cavargas10 commited on
Commit
d4ca03c
·
verified ·
1 Parent(s): ec252b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -106
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
  import spaces
3
  import os
@@ -17,6 +19,10 @@ import torchvision.transforms.functional as TF
17
  from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
18
  from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
19
  from pathlib import Path
 
 
 
 
20
  style_list = [
21
  {
22
  "name": "(No style)",
@@ -41,29 +47,42 @@ style_list = [
41
  ]
42
  styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
43
  STYLE_NAMES = list(styles.keys())
44
- DEFAULT_STYLE_NAME = "(No style)"
45
  MAX_SEED = np.iinfo(np.int32).max
46
  TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
47
  os.makedirs(TMP_DIR, exist_ok=True)
48
 
49
- def reset_canvas():
50
- return gr.update(value={"background":Image.new("RGB", (512, 512), (255, 255, 255)), "layers":[Image.new("RGB", (512, 512), (255, 255, 255))], "composite":Image.new("RGB", (512, 512), (255, 255, 255))})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
53
  p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
54
  return p.replace("{prompt}", positive), n + negative
55
 
56
- def start_session(req: gr.Request):
57
- user_dir = os.path.join(TMP_DIR, str(req.session_hash))
58
- os.makedirs(user_dir, exist_ok=True)
59
-
60
- def end_session(req: gr.Request):
61
- user_dir = os.path.join(TMP_DIR, str(req.session_hash))
62
- shutil.rmtree(user_dir)
63
 
64
  @spaces.GPU
65
  def preprocess_image(
66
- image: Image.Image,
67
  prompt: str = "",
68
  negative_prompt: str = "",
69
  style_name: str = "",
@@ -71,24 +90,28 @@ def preprocess_image(
71
  guidance_scale: float = 5,
72
  controlnet_conditioning_scale: float = 1.0,
73
  req: gr.Request = None,
74
- ) -> Image.Image:
75
- user_dir = os.path.join(TMP_DIR, str(req.session_hash))
76
- os.makedirs(user_dir, exist_ok=True)
77
-
78
- width, height = image['composite'].size
 
 
 
 
 
 
79
  ratio = np.sqrt(1024.0 * 1024.0 / (width * height))
80
  new_width, new_height = int(width * ratio), int(height * ratio)
81
- image = image['composite'].resize((new_width, new_height))
82
- image = ImageOps.invert(image)
83
- print("image:", type(image))
84
 
85
  prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
86
- print("params:", prompt, negative_prompt, style_name, num_steps, guidance_scale, controlnet_conditioning_scale)
87
 
88
- output = pipe_control(
89
  prompt=prompt,
90
  negative_prompt=negative_prompt,
91
- image=image,
92
  num_inference_steps=num_steps,
93
  controlnet_conditioning_scale=controlnet_conditioning_scale,
94
  guidance_scale=guidance_scale,
@@ -97,54 +120,14 @@ def preprocess_image(
97
  ).images[0]
98
 
99
  processed_image_path = os.path.join(user_dir, 'processed_image.png')
100
- output.save(processed_image_path)
101
-
102
- processed_image = pipeline.preprocess_image(output)
103
 
104
- return processed_image
105
-
106
- def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
107
- return {
108
- 'gaussian': {
109
- **gs.init_params,
110
- '_xyz': gs._xyz.cpu().numpy(),
111
- '_features_dc': gs._features_dc.cpu().numpy(),
112
- '_scaling': gs._scaling.cpu().numpy(),
113
- '_rotation': gs._rotation.cpu().numpy(),
114
- '_opacity': gs._opacity.cpu().numpy(),
115
- },
116
- 'mesh': {
117
- 'vertices': mesh.vertices.cpu().numpy(),
118
- 'faces': mesh.faces.cpu().numpy(),
119
- },
120
- }
121
-
122
- def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
123
- gs = Gaussian(
124
- aabb=state['gaussian']['aabb'],
125
- sh_degree=state['gaussian']['sh_degree'],
126
- mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
127
- scaling_bias=state['gaussian']['scaling_bias'],
128
- opacity_bias=state['gaussian']['opacity_bias'],
129
- scaling_activation=state['gaussian']['scaling_activation'],
130
- )
131
- gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
132
- gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
133
- gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
134
- gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
135
- gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
136
- mesh = edict(
137
- vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
138
- faces=torch.tensor(state['mesh']['faces'], device='cuda'),
139
- )
140
- return gs, mesh
141
-
142
- def get_seed(randomize_seed: bool, seed: int) -> int:
143
- return np.random.randint(0, MAX_SEED) if randomize_seed else seed
144
 
145
  @spaces.GPU
146
  def image_to_3d(
147
- image: Image.Image,
148
  seed: int,
149
  ss_guidance_strength: float,
150
  ss_sampling_steps: int,
@@ -152,9 +135,14 @@ def image_to_3d(
152
  slat_sampling_steps: int,
153
  req: gr.Request,
154
  ) -> Tuple[dict, str]:
155
- user_dir = os.path.join(TMP_DIR, str(req.session_hash))
 
 
 
 
 
156
  outputs = pipeline.run(
157
- image,
158
  seed=seed,
159
  formats=["gaussian", "mesh"],
160
  preprocess_image=False,
@@ -167,13 +155,17 @@ def image_to_3d(
167
  "cfg_strength": slat_guidance_strength,
168
  },
169
  )
 
 
170
  video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
171
  video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
172
  video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
173
  video_path = os.path.join(user_dir, 'sample.mp4')
174
  imageio.mimsave(video_path, video, fps=15)
 
175
  state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
176
  torch.cuda.empty_cache()
 
177
  return state, video_path
178
 
179
  @spaces.GPU(duration=90)
@@ -183,17 +175,56 @@ def extract_glb(
183
  texture_size: int,
184
  req: gr.Request,
185
  ) -> Tuple[str, str]:
186
- user_dir = os.path.join(TMP_DIR, str(req.session_hash))
 
 
 
187
  gs, mesh = unpack_state(state)
188
  glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
189
  glb_path = os.path.join(user_dir, 'sample.glb')
190
  glb.export(glb_path)
 
191
  torch.cuda.empty_cache()
 
192
  return glb_path, glb_path
193
 
194
- def reset_do_preprocess():
195
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
198
  user_dir = os.path.join(TMP_DIR, str(req.session_hash))
199
  gs, _ = unpack_state(state)
@@ -201,7 +232,7 @@ def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
201
  gs.save_ply(gaussian_path)
202
  torch.cuda.empty_cache()
203
  return gaussian_path, gaussian_path
204
-
205
  with gr.Blocks(delete_cache=(600, 600)) as demo:
206
  gr.Markdown("""
207
  # UTPL - Conversión de Boceto a objetos 3D usando IA
@@ -213,20 +244,20 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
213
  with gr.Row():
214
  with gr.Column():
215
  with gr.Column():
216
- image_prompt = gr.ImageEditor(label="Input sketch", type="pil", image_mode="RGB", height=512, value={"background":Image.new("RGB", (512, 512), (255, 255, 255)), "layers":[Image.new("RGB", (512, 512), (255, 255, 255))], "composite":Image.new("RGB", (512, 512), (255, 255, 255))})
217
  with gr.Row():
218
- sketch_btn = gr.Button("process sketch")
219
  generate_btn = gr.Button("Generate 3D")
220
  with gr.Row():
221
  prompt = gr.Textbox(label="Prompt")
222
  style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
223
  with gr.Accordion(label="Generation Settings", open=False):
224
- with gr.Tab(label="sketch-to-image generation"):
225
  negative_prompt = gr.Textbox(label="Negative prompt")
226
  num_steps = gr.Slider(1, 20, label="Number of steps", value=8, step=1)
227
  guidance_scale = gr.Slider(0.1, 10.0, label="Guidance scale", value=5, step=0.1)
228
- controlnet_conditioning_scale = gr.Slider(0.5, 5.0, label="controlnet conditioning scale", value=0.85, step=0.01)
229
- with gr.Tab(label="3D generation"):
230
  seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
231
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
232
  gr.Markdown("Stage 1: Sparse Structure Generation")
@@ -245,31 +276,22 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
245
  extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
246
  with gr.Column():
247
  video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
248
- image_prompt_processed = gr.Image(label="processed sketch", interactive=False, type="pil", height=512)
249
  model_output = gr.Model3D(label="Extracted GLB/Gaussian", height=300)
250
  with gr.Row():
251
  download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
252
  download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
253
 
254
- do_preprocess = gr.State(True)
255
  output_buf = gr.State()
256
 
257
  demo.load(start_session)
258
  demo.unload(end_session)
259
 
260
- image_prompt.clear(
261
- fn=reset_canvas,
262
- outputs = [image_prompt]
263
- )
264
-
265
  sketch_btn.click(
266
- get_seed,
267
- inputs=[randomize_seed, seed],
268
- outputs=[seed],
269
- ).then(
270
  preprocess_image,
271
  inputs=[image_prompt, prompt, negative_prompt, style, num_steps, guidance_scale, controlnet_conditioning_scale],
272
  outputs=[image_prompt_processed],
 
273
  )
274
 
275
  generate_btn.click(
@@ -280,7 +302,10 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
280
  image_to_3d,
281
  inputs=[image_prompt_processed, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
282
  outputs=[output_buf, video_output],
283
- ).then(
 
 
 
284
  lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
285
  outputs=[extract_glb_btn, extract_gs_btn],
286
  )
@@ -294,18 +319,24 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
294
  extract_glb,
295
  inputs=[output_buf, mesh_simplify, texture_size],
296
  outputs=[model_output, download_glb],
297
- ).then(
 
 
 
298
  lambda: gr.Button(interactive=True),
299
- outputs=[download_glb],
300
  )
301
-
302
  extract_gs_btn.click(
303
  extract_gaussian,
304
  inputs=[output_buf],
305
  outputs=[model_output, download_gs],
306
- ).then(
 
 
 
307
  lambda: gr.Button(interactive=True),
308
- outputs=[download_gs],
309
  )
310
 
311
  model_output.clear(
@@ -317,22 +348,13 @@ if __name__ == "__main__":
317
  pipeline = TrellisImageTo3DPipeline.from_pretrained("cavargas10/TRELLIS")
318
  pipeline.cuda()
319
  device = "cuda" if torch.cuda.is_available() else "cpu"
320
- #scribble controlnet
321
- controlnet = ControlNetModel.from_pretrained(
322
- "xinsir/controlnet-scribble-sdxl-1.0",
323
- torch_dtype=torch.float16
324
- )
325
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
326
- pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained(
327
- "sd-community/sdxl-flash",
328
- controlnet=controlnet,
329
- vae=vae,
330
- torch_dtype=torch.float16,
331
- )
332
  pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
333
  pipe_control.to(device)
334
  try:
335
- pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8))) # Preload rembg
336
  except:
337
  pass
338
  demo.launch()
 
1
+ # app.py (en el Space de Hugging Face para Boceto a 3D)
2
+
3
  import gradio as gr
4
  import spaces
5
  import os
 
19
  from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
20
  from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
21
  from pathlib import Path
22
+ import logging
23
+
24
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - HF_SPACE_BOCETO - %(levelname)s - %(message)s')
25
+
26
  style_list = [
27
  {
28
  "name": "(No style)",
 
47
  ]
48
  styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
49
  STYLE_NAMES = list(styles.keys())
50
+ DEFAULT_STYLE_NAME = "3D Model"
51
  MAX_SEED = np.iinfo(np.int32).max
52
  TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
53
  os.makedirs(TMP_DIR, exist_ok=True)
54
 
55
+ def start_session(req: gr.Request):
56
+ session_hash = str(req.session_hash)
57
+ user_dir = os.path.join(TMP_DIR, session_hash)
58
+ logging.info(f"START SESSION: Creando directorio para la sesión {session_hash} en {user_dir}")
59
+ os.makedirs(user_dir, exist_ok=True)
60
+
61
+ def end_session(req: gr.Request):
62
+ session_hash = str(req.session_hash)
63
+ user_dir = os.path.join(TMP_DIR, session_hash)
64
+ logging.info(f"END SESSION: Intentando eliminar el directorio de la sesión {session_hash} en {user_dir}")
65
+ if os.path.exists(user_dir):
66
+ try:
67
+ shutil.rmtree(user_dir)
68
+ logging.info(f"Directorio de la sesión {session_hash} eliminado correctamente.")
69
+ except Exception as e:
70
+ logging.error(f"Error al eliminar el directorio de la sesión {session_hash}: {e}")
71
+ else:
72
+ logging.warning(f"El directorio de la sesión {session_hash} no fue encontrado al intentar eliminarlo. Es posible que ya haya sido limpiado.")
73
 
74
  def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
75
  p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
76
  return p.replace("{prompt}", positive), n + negative
77
 
78
+ def get_seed(randomize_seed: bool, seed: int) -> int:
79
+ new_seed = np.random.randint(0, MAX_SEED) if randomize_seed else seed
80
+ logging.info(f"Usando seed: {new_seed}")
81
+ return new_seed
 
 
 
82
 
83
  @spaces.GPU
84
  def preprocess_image(
85
+ image: dict,
86
  prompt: str = "",
87
  negative_prompt: str = "",
88
  style_name: str = "",
 
90
  guidance_scale: float = 5,
91
  controlnet_conditioning_scale: float = 1.0,
92
  req: gr.Request = None,
93
+ ) -> str:
94
+ session_hash = str(req.session_hash)
95
+ user_dir = os.path.join(TMP_DIR, session_hash)
96
+ logging.info(f"[{session_hash}] Iniciando preprocess_image con prompt: '{prompt[:50]}...'")
97
+
98
+ if not image or 'composite' not in image or not isinstance(image['composite'], Image.Image):
99
+ logging.error(f"[{session_hash}] La entrada de imagen no es válida o está vacía.")
100
+ raise ValueError("Entrada de boceto no válida. Por favor, dibuja algo.")
101
+
102
+ input_image = image['composite']
103
+ width, height = input_image.size
104
  ratio = np.sqrt(1024.0 * 1024.0 / (width * height))
105
  new_width, new_height = int(width * ratio), int(height * ratio)
106
+ input_image = input_image.resize((new_width, new_height))
107
+ input_image = ImageOps.invert(input_image)
 
108
 
109
  prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
 
110
 
111
+ output_image = pipe_control(
112
  prompt=prompt,
113
  negative_prompt=negative_prompt,
114
+ image=input_image,
115
  num_inference_steps=num_steps,
116
  controlnet_conditioning_scale=controlnet_conditioning_scale,
117
  guidance_scale=guidance_scale,
 
120
  ).images[0]
121
 
122
  processed_image_path = os.path.join(user_dir, 'processed_image.png')
123
+ output_image.save(processed_image_path)
124
+ logging.info(f"[{session_hash}] Imagen preprocesada y guardada en: {processed_image_path}")
 
125
 
126
+ return processed_image_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  @spaces.GPU
129
  def image_to_3d(
130
+ image_path: str,
131
  seed: int,
132
  ss_guidance_strength: float,
133
  ss_sampling_steps: int,
 
135
  slat_sampling_steps: int,
136
  req: gr.Request,
137
  ) -> Tuple[dict, str]:
138
+ session_hash = str(req.session_hash)
139
+ user_dir = os.path.join(TMP_DIR, session_hash)
140
+ logging.info(f"[{session_hash}] Iniciando image_to_3d desde la imagen: {image_path}")
141
+
142
+ processed_image = pipeline.preprocess_image(Image.open(image_path))
143
+
144
  outputs = pipeline.run(
145
+ processed_image,
146
  seed=seed,
147
  formats=["gaussian", "mesh"],
148
  preprocess_image=False,
 
155
  "cfg_strength": slat_guidance_strength,
156
  },
157
  )
158
+
159
+ logging.info(f"[{session_hash}] Generación del modelo completada. Renderizando video...")
160
  video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
161
  video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
162
  video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
163
  video_path = os.path.join(user_dir, 'sample.mp4')
164
  imageio.mimsave(video_path, video, fps=15)
165
+
166
  state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
167
  torch.cuda.empty_cache()
168
+ logging.info(f"[{session_hash}] Video renderizado y estado empaquetado. Devolviendo: {video_path}")
169
  return state, video_path
170
 
171
  @spaces.GPU(duration=90)
 
175
  texture_size: int,
176
  req: gr.Request,
177
  ) -> Tuple[str, str]:
178
+ session_hash = str(req.session_hash)
179
+ user_dir = os.path.join(TMP_DIR, session_hash)
180
+ logging.info(f"[{session_hash}] Iniciando extract_glb...")
181
+
182
  gs, mesh = unpack_state(state)
183
  glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
184
  glb_path = os.path.join(user_dir, 'sample.glb')
185
  glb.export(glb_path)
186
+
187
  torch.cuda.empty_cache()
188
+ logging.info(f"[{session_hash}] GLB extraído. Devolviendo: {glb_path}")
189
  return glb_path, glb_path
190
 
191
+ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
192
+ return {
193
+ 'gaussian': {
194
+ **gs.init_params,
195
+ '_xyz': gs._xyz.cpu().numpy(),
196
+ '_features_dc': gs._features_dc.cpu().numpy(),
197
+ '_scaling': gs._scaling.cpu().numpy(),
198
+ '_rotation': gs._rotation.cpu().numpy(),
199
+ '_opacity': gs._opacity.cpu().numpy(),
200
+ },
201
+ 'mesh': {
202
+ 'vertices': mesh.vertices.cpu().numpy(),
203
+ 'faces': mesh.faces.cpu().numpy(),
204
+ },
205
+ }
206
 
207
+ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
208
+ gs = Gaussian(
209
+ aabb=state['gaussian']['aabb'],
210
+ sh_degree=state['gaussian']['sh_degree'],
211
+ mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
212
+ scaling_bias=state['gaussian']['scaling_bias'],
213
+ opacity_bias=state['gaussian']['opacity_bias'],
214
+ scaling_activation=state['gaussian']['scaling_activation'],
215
+ )
216
+ gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
217
+ gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
218
+ gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
219
+ gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
220
+ gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
221
+ mesh = edict(
222
+ vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
223
+ faces=torch.tensor(state['mesh']['faces'], device='cuda'),
224
+ )
225
+ return gs, mesh
226
+
227
+ @spaces.GPU
228
  def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
229
  user_dir = os.path.join(TMP_DIR, str(req.session_hash))
230
  gs, _ = unpack_state(state)
 
232
  gs.save_ply(gaussian_path)
233
  torch.cuda.empty_cache()
234
  return gaussian_path, gaussian_path
235
+
236
  with gr.Blocks(delete_cache=(600, 600)) as demo:
237
  gr.Markdown("""
238
  # UTPL - Conversión de Boceto a objetos 3D usando IA
 
244
  with gr.Row():
245
  with gr.Column():
246
  with gr.Column():
247
+ image_prompt = gr.ImageEditor(label="Input sketch", type="pil", image_mode="RGB", height=512)
248
  with gr.Row():
249
+ sketch_btn = gr.Button("Process Sketch")
250
  generate_btn = gr.Button("Generate 3D")
251
  with gr.Row():
252
  prompt = gr.Textbox(label="Prompt")
253
  style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
254
  with gr.Accordion(label="Generation Settings", open=False):
255
+ with gr.Tab(label="Sketch-to-Image Generation"):
256
  negative_prompt = gr.Textbox(label="Negative prompt")
257
  num_steps = gr.Slider(1, 20, label="Number of steps", value=8, step=1)
258
  guidance_scale = gr.Slider(0.1, 10.0, label="Guidance scale", value=5, step=0.1)
259
+ controlnet_conditioning_scale = gr.Slider(0.5, 5.0, label="ControlNet Conditioning Scale", value=0.85, step=0.01)
260
+ with gr.Tab(label="3D Generation"):
261
  seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
262
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
263
  gr.Markdown("Stage 1: Sparse Structure Generation")
 
276
  extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
277
  with gr.Column():
278
  video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
279
+ image_prompt_processed = gr.Image(label="Processed Sketch", interactive=False, type="filepath", height=512)
280
  model_output = gr.Model3D(label="Extracted GLB/Gaussian", height=300)
281
  with gr.Row():
282
  download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
283
  download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
284
 
 
285
  output_buf = gr.State()
286
 
287
  demo.load(start_session)
288
  demo.unload(end_session)
289
 
 
 
 
 
 
290
  sketch_btn.click(
 
 
 
 
291
  preprocess_image,
292
  inputs=[image_prompt, prompt, negative_prompt, style, num_steps, guidance_scale, controlnet_conditioning_scale],
293
  outputs=[image_prompt_processed],
294
+ api_name="preprocess_image"
295
  )
296
 
297
  generate_btn.click(
 
302
  image_to_3d,
303
  inputs=[image_prompt_processed, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
304
  outputs=[output_buf, video_output],
305
+ api_name="image_to_3d"
306
+ )
307
+
308
+ generate_btn.click(
309
  lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
310
  outputs=[extract_glb_btn, extract_gs_btn],
311
  )
 
319
  extract_glb,
320
  inputs=[output_buf, mesh_simplify, texture_size],
321
  outputs=[model_output, download_glb],
322
+ api_name="extract_glb"
323
+ )
324
+
325
+ extract_glb_btn.click(
326
  lambda: gr.Button(interactive=True),
327
+ outputs=[download_glb]
328
  )
329
+
330
  extract_gs_btn.click(
331
  extract_gaussian,
332
  inputs=[output_buf],
333
  outputs=[model_output, download_gs],
334
+ api_name="extract_gaussian"
335
+ )
336
+
337
+ extract_gs_btn.click(
338
  lambda: gr.Button(interactive=True),
339
+ outputs=[download_gs]
340
  )
341
 
342
  model_output.clear(
 
348
  pipeline = TrellisImageTo3DPipeline.from_pretrained("cavargas10/TRELLIS")
349
  pipeline.cuda()
350
  device = "cuda" if torch.cuda.is_available() else "cpu"
351
+ controlnet = ControlNetModel.from_pretrained("xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch.float16)
 
 
 
 
352
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
353
+ pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained("sd-community/sdxl-flash", controlnet=controlnet, vae=vae, torch_dtype=torch.float16)
 
 
 
 
 
354
  pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
355
  pipe_control.to(device)
356
  try:
357
+ pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
358
  except:
359
  pass
360
  demo.launch()