DeepBeepMeep commited on
Commit
8de1a69
·
1 Parent(s): 710c845

Added Vace Sliding Window

Browse files
README.md CHANGED
@@ -14,10 +14,14 @@
14
 
15
 
16
  ## 🔥 Latest News!!
17
- * April 9 2025: 👋 Wan 2.1GP v4.0: lots of goodies for you !
18
  - A new queuing system that lets you stack in a queue as many text2video and imag2video tasks as you want. Each task can rely on complete different generation parameters (different number of frames, steps, loras, ...).
19
  - Temporal upsampling (Rife) and spatial upsampling (Lanczos) for a smoother video (32 fps or 64 fps) and to enlarge you video by x2 or x4. Check these new advanced options.
20
  - Wan Vace Control Net support : with Vace you can inject in the scene people or objects, animate a person, perform inpainting or outpainting, continue a video, ... I have provided an introduction guide below.
 
 
 
 
21
  * Mar 27 2025: 👋 Added support for the new Wan Fun InP models (image2video). The 14B Fun InP has probably better end image support but unfortunately existing loras do not work so well with it. The great novelty is the Fun InP image2 1.3B model : Image 2 Video is now accessible to even lower hardware configuration. It is not as good as the 14B models but very impressive for its size. You can choose any of those models in the Configuration tab. Many thanks to the VideoX-Fun team (https://github.com/aigc-apps/VideoX-Fun)
22
  * Mar 26 2025: 👋 Good news ! Official support for RTX 50xx please check the installation instructions below.
23
  * Mar 24 2025: 👋 Wan2.1GP v3.2:
 
14
 
15
 
16
  ## 🔥 Latest News!!
17
+ * April 13 2025: 👋 Wan 2.1GP v4.0: lots of goodies for you !
18
  - A new queuing system that lets you stack in a queue as many text2video and imag2video tasks as you want. Each task can rely on complete different generation parameters (different number of frames, steps, loras, ...).
19
  - Temporal upsampling (Rife) and spatial upsampling (Lanczos) for a smoother video (32 fps or 64 fps) and to enlarge you video by x2 or x4. Check these new advanced options.
20
  - Wan Vace Control Net support : with Vace you can inject in the scene people or objects, animate a person, perform inpainting or outpainting, continue a video, ... I have provided an introduction guide below.
21
+ - Integrated *Matanyone* tool directly inside WanGP so that you can create easily inpainting masks
22
+ - Sliding Window generation for Vace, create windows that can last dozen of seconds
23
+ - A new UI, tabs were replaced by a Dropdown box to easily switch models
24
+
25
  * Mar 27 2025: 👋 Added support for the new Wan Fun InP models (image2video). The 14B Fun InP has probably better end image support but unfortunately existing loras do not work so well with it. The great novelty is the Fun InP image2 1.3B model : Image 2 Video is now accessible to even lower hardware configuration. It is not as good as the 14B models but very impressive for its size. You can choose any of those models in the Configuration tab. Many thanks to the VideoX-Fun team (https://github.com/aigc-apps/VideoX-Fun)
26
  * Mar 26 2025: 👋 Good news ! Official support for RTX 50xx please check the installation instructions below.
27
  * Mar 24 2025: 👋 Wan2.1GP v3.2:
preprocessing/matanyone/app.py CHANGED
@@ -163,10 +163,10 @@ def get_frames_from_video(video_input, video_state):
163
  model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
164
  return video_state, video_info, video_state["origin_images"][0], \
165
  gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=True, maximum=len(frames), value=len(frames)), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
166
- gr.update(visible=True), gr.update(visible=True), \
167
  gr.update(visible=True), gr.update(visible=True),\
168
- gr.update(visible=True), gr.update(visible=True), \
169
  gr.update(visible=True), gr.update(visible=False), \
 
170
  gr.update(visible=False), gr.update(visible=True), \
171
  gr.update(visible=True)
172
 
@@ -273,7 +273,7 @@ def save_video(frames, output_path, fps):
273
  return output_path
274
 
275
  # video matting
276
- def video_matting(video_state, end_slider, interactive_state, mask_dropdown, erode_kernel_size, dilate_kernel_size):
277
  matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
278
  # if interactive_state["track_end_number"]:
279
  # following_frames = video_state["origin_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]]
@@ -301,9 +301,16 @@ def video_matting(video_state, end_slider, interactive_state, mask_dropdown, ero
301
  template_mask[0][0]=1
302
  foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size)
303
  output_frames = []
 
304
  for frame_origin, frame_alpha in zip(following_frames, alpha):
305
- frame_alpha[frame_alpha > 127] = 255
306
- frame_alpha[frame_alpha <= 127] = 0
 
 
 
 
 
 
307
  output_frame = np.bitwise_and(frame_origin, 255-frame_alpha)
308
  frame_grey = frame_alpha.copy()
309
  frame_grey[frame_alpha == 255] = 127
@@ -314,14 +321,18 @@ def video_matting(video_state, end_slider, interactive_state, mask_dropdown, ero
314
  if not os.path.exists("mask_outputs"):
315
  os.makedirs("mask_outputs")
316
 
317
-
318
- foreground_output = save_video(foreground, output_path="./mask_outputs/{}_fg.mp4".format(video_state["video_name"]), fps=fps)
 
319
  # foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
320
- alpha_output = save_video(alpha, output_path="./mask_outputs/{}_alpha.mp4".format(video_state["video_name"]), fps=fps)
321
  # alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
322
 
323
- return foreground_output, alpha_output
 
324
 
 
 
325
 
326
  def add_audio_to_video(video_path, audio_path, output_path):
327
  try:
@@ -392,8 +403,8 @@ def restart():
392
  },
393
  "track_end_number": None,
394
  }, [[],[]], None, None, \
395
- gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),\
396
- gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), \
397
  gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), \
398
  gr.update(visible=False), gr.update(visible=False, choices=[], value=[]), "", gr.update(visible=False)
399
 
@@ -529,7 +540,16 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
529
  visible=False,
530
  min_width=100,
531
  scale=1)
532
- mask_dropdown = gr.Dropdown(multiselect=True, value=[], label="Mask Selection", info="Choose 1~all mask(s) added in Step 2", visible=False)
 
 
 
 
 
 
 
 
 
533
 
534
  gr.Markdown("---")
535
 
@@ -549,9 +569,9 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
549
  template_frame = gr.Image(label="Start Frame", type="pil",interactive=True, elem_id="template_frame", visible=False, elem_classes="image")
550
  with gr.Row():
551
  clear_button_click = gr.Button(value="Clear Clicks", interactive=True, visible=False, min_width=100)
552
- add_mask_button = gr.Button(value="Add Mask", interactive=True, visible=False, min_width=100)
553
  remove_mask_button = gr.Button(value="Remove Mask", interactive=True, visible=False, min_width=100) # no use
554
- matting_button = gr.Button(value="Video Matting", interactive=True, visible=False, min_width=100)
555
  with gr.Row():
556
  gr.Markdown("")
557
 
@@ -560,11 +580,11 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
560
  with gr.Column(scale=2):
561
  foreground_video_output = gr.Video(label="Masked Video Output", visible=False, elem_classes="video")
562
  foreground_output_button = gr.Button(value="Black & White Video Output", visible=False, elem_classes="new_button")
563
- export_to_vace_video_input_btn = gr.Button("Export to Vace Video Input Video For Inpainting")
564
  with gr.Column(scale=2):
565
  alpha_video_output = gr.Video(label="B & W Mask Video Output", visible=False, elem_classes="video")
566
  alpha_output_button = gr.Button(value="Alpha Mask Output", visible=False, elem_classes="new_button")
567
- export_to_vace_video_mask_btn = gr.Button("Export to Vace Video Input and Video Mask for stronger Inpainting")
568
 
569
  export_to_vace_video_input_btn.click(fn=export_to_vace_video_input, inputs= [foreground_video_output], outputs= [video_prompt_video_guide_trigger, vace_video_input])
570
  export_to_vace_video_mask_btn.click(fn=export_to_vace_video_mask, inputs= [foreground_video_output, alpha_video_output], outputs= [video_prompt_video_guide_trigger, vace_video_input, vace_video_mask])
@@ -575,7 +595,7 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
575
  video_input, video_state
576
  ],
577
  outputs=[video_state, video_info, template_frame,
578
- image_selection_slider, end_selection_slider, track_pause_number_slider, point_prompt, clear_button_click, add_mask_button, matting_button, template_frame,
579
  foreground_video_output, alpha_video_output, foreground_output_button, alpha_output_button, mask_dropdown, step2_title]
580
  )
581
 
@@ -609,9 +629,12 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
609
 
610
  # video matting
611
  matting_button.click(
 
 
 
612
  fn=video_matting,
613
- inputs=[video_state, end_selection_slider, interactive_state, mask_dropdown, erode_kernel_size, dilate_kernel_size],
614
- outputs=[foreground_video_output, alpha_video_output]
615
  )
616
 
617
  # click to get mask
@@ -631,7 +654,7 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
631
  click_state,
632
  foreground_video_output, alpha_video_output,
633
  template_frame,
634
- image_selection_slider , track_pause_number_slider,point_prompt, clear_button_click,
635
  add_mask_button, matting_button, template_frame, foreground_video_output, alpha_video_output, remove_mask_button, foreground_output_button, alpha_output_button, mask_dropdown, video_info, step2_title
636
  ],
637
  queue=False,
@@ -646,7 +669,7 @@ def display(vace_video_input, vace_video_mask, video_prompt_video_guide_trigger)
646
  click_state,
647
  foreground_video_output, alpha_video_output,
648
  template_frame,
649
- image_selection_slider , track_pause_number_slider,point_prompt, clear_button_click,
650
  add_mask_button, matting_button, template_frame, foreground_video_output, alpha_video_output, remove_mask_button, foreground_output_button, alpha_output_button, mask_dropdown, video_info, step2_title
651
  ],
652
  queue=False,
 
163
  model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
164
  return video_state, video_info, video_state["origin_images"][0], \
165
  gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=True, maximum=len(frames), value=len(frames)), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
166
+ gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), \
167
  gr.update(visible=True), gr.update(visible=True),\
 
168
  gr.update(visible=True), gr.update(visible=False), \
169
+ gr.update(visible=False), gr.update(visible=False), \
170
  gr.update(visible=False), gr.update(visible=True), \
171
  gr.update(visible=True)
172
 
 
273
  return output_path
274
 
275
  # video matting
276
+ def video_matting(video_state, end_slider, matting_type, interactive_state, mask_dropdown, erode_kernel_size, dilate_kernel_size):
277
  matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
278
  # if interactive_state["track_end_number"]:
279
  # following_frames = video_state["origin_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]]
 
301
  template_mask[0][0]=1
302
  foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size)
303
  output_frames = []
304
+ foreground_mat = matting_type == "Foreground"
305
  for frame_origin, frame_alpha in zip(following_frames, alpha):
306
+ if foreground_mat:
307
+ frame_alpha[frame_alpha > 127] = 255
308
+ frame_alpha[frame_alpha <= 127] = 0
309
+ else:
310
+ frame_temp = frame_alpha.copy()
311
+ frame_alpha[frame_temp > 127] = 0
312
+ frame_alpha[frame_temp <= 127] = 255
313
+
314
  output_frame = np.bitwise_and(frame_origin, 255-frame_alpha)
315
  frame_grey = frame_alpha.copy()
316
  frame_grey[frame_alpha == 255] = 127
 
321
  if not os.path.exists("mask_outputs"):
322
  os.makedirs("mask_outputs")
323
 
324
+ file_name= video_state["video_name"]
325
+ file_name = ".".join(file_name.split(".")[:-1])
326
+ foreground_output = save_video(foreground, output_path="./mask_outputs/{}_fg.mp4".format(file_name), fps=fps)
327
  # foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
328
+ alpha_output = save_video(alpha, output_path="./mask_outputs/{}_alpha.mp4".format(file_name), fps=fps)
329
  # alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
330
 
331
+ return foreground_output, alpha_output, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
332
+
333
 
334
+ def show_outputs():
335
+ return gr.update(visible=True), gr.update(visible=True)
336
 
337
  def add_audio_to_video(video_path, audio_path, output_path):
338
  try:
 
403
  },
404
  "track_end_number": None,
405
  }, [[],[]], None, None, \
406
+ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),\
407
+ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), \
408
  gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), \
409
  gr.update(visible=False), gr.update(visible=False, choices=[], value=[]), "", gr.update(visible=False)
410
 
 
540
  visible=False,
541
  min_width=100,
542
  scale=1)
543
+ matting_type = gr.Radio(
544
+ choices=["Foreground", "Background"],
545
+ value="Foreground",
546
+ label="Matting Type",
547
+ info="Type of Video Matting to Generate",
548
+ interactive=True,
549
+ visible=False,
550
+ min_width=100,
551
+ scale=1)
552
+ mask_dropdown = gr.Dropdown(multiselect=True, value=[], label="Mask Selection", info="Choose 1~all mask(s) added in Step 2", visible=False, scale=2)
553
 
554
  gr.Markdown("---")
555
 
 
569
  template_frame = gr.Image(label="Start Frame", type="pil",interactive=True, elem_id="template_frame", visible=False, elem_classes="image")
570
  with gr.Row():
571
  clear_button_click = gr.Button(value="Clear Clicks", interactive=True, visible=False, min_width=100)
572
+ add_mask_button = gr.Button(value="Set Mask", interactive=True, visible=False, min_width=100)
573
  remove_mask_button = gr.Button(value="Remove Mask", interactive=True, visible=False, min_width=100) # no use
574
+ matting_button = gr.Button(value="Generate Video Matting", interactive=True, visible=False, min_width=100)
575
  with gr.Row():
576
  gr.Markdown("")
577
 
 
580
  with gr.Column(scale=2):
581
  foreground_video_output = gr.Video(label="Masked Video Output", visible=False, elem_classes="video")
582
  foreground_output_button = gr.Button(value="Black & White Video Output", visible=False, elem_classes="new_button")
583
+ export_to_vace_video_input_btn = gr.Button("Export to Vace Video Input Video For Inpainting", visible= False)
584
  with gr.Column(scale=2):
585
  alpha_video_output = gr.Video(label="B & W Mask Video Output", visible=False, elem_classes="video")
586
  alpha_output_button = gr.Button(value="Alpha Mask Output", visible=False, elem_classes="new_button")
587
+ export_to_vace_video_mask_btn = gr.Button("Export to Vace Video Input and Video Mask for stronger Inpainting", visible= False)
588
 
589
  export_to_vace_video_input_btn.click(fn=export_to_vace_video_input, inputs= [foreground_video_output], outputs= [video_prompt_video_guide_trigger, vace_video_input])
590
  export_to_vace_video_mask_btn.click(fn=export_to_vace_video_mask, inputs= [foreground_video_output, alpha_video_output], outputs= [video_prompt_video_guide_trigger, vace_video_input, vace_video_mask])
 
595
  video_input, video_state
596
  ],
597
  outputs=[video_state, video_info, template_frame,
598
+ image_selection_slider, end_selection_slider, track_pause_number_slider, point_prompt, matting_type, clear_button_click, add_mask_button, matting_button, template_frame,
599
  foreground_video_output, alpha_video_output, foreground_output_button, alpha_output_button, mask_dropdown, step2_title]
600
  )
601
 
 
629
 
630
  # video matting
631
  matting_button.click(
632
+ fn=show_outputs,
633
+ inputs=[],
634
+ outputs=[foreground_video_output, alpha_video_output]).then(
635
  fn=video_matting,
636
+ inputs=[video_state, end_selection_slider, matting_type, interactive_state, mask_dropdown, erode_kernel_size, dilate_kernel_size],
637
+ outputs=[foreground_video_output, alpha_video_output,foreground_video_output, alpha_video_output, export_to_vace_video_input_btn, export_to_vace_video_mask_btn]
638
  )
639
 
640
  # click to get mask
 
654
  click_state,
655
  foreground_video_output, alpha_video_output,
656
  template_frame,
657
+ image_selection_slider, end_selection_slider, track_pause_number_slider,point_prompt, export_to_vace_video_input_btn, export_to_vace_video_mask_btn, matting_type, clear_button_click,
658
  add_mask_button, matting_button, template_frame, foreground_video_output, alpha_video_output, remove_mask_button, foreground_output_button, alpha_output_button, mask_dropdown, video_info, step2_title
659
  ],
660
  queue=False,
 
669
  click_state,
670
  foreground_video_output, alpha_video_output,
671
  template_frame,
672
+ image_selection_slider , end_selection_slider, track_pause_number_slider,point_prompt, export_to_vace_video_input_btn, export_to_vace_video_mask_btn, matting_type, clear_button_click,
673
  add_mask_button, matting_button, template_frame, foreground_video_output, alpha_video_output, remove_mask_button, foreground_output_button, alpha_output_button, mask_dropdown, video_info, step2_title
674
  ],
675
  queue=False,
wan/text2video.py CHANGED
@@ -209,34 +209,47 @@ class WanT2V:
209
  def vace_latent(self, z, m):
210
  return [torch.cat([zz, mm], dim=0) for zz, mm in zip(z, m)]
211
 
212
- def prepare_source(self, src_video, src_mask, src_ref_images, num_frames, image_size, device, original_video = False, keep_frames= []):
213
  image_sizes = []
214
  trim_video = len(keep_frames)
215
- for i, (sub_src_video, sub_src_mask) in enumerate(zip(src_video, src_mask)):
 
 
 
216
  if sub_src_mask is not None and sub_src_video is not None:
217
- src_video[i], src_mask[i], _, _, _ = self.vid_proc.load_video_pair(sub_src_video, sub_src_mask, max_frames= num_frames, trim_video = trim_video)
218
  # src_video is [-1, 1], 0 = inpainting area (in fact 127 in [0, 255])
219
  # src_mask is [-1, 1], 0 = preserve original video (in fact 127 in [0, 255]) and 1 = Inpainting (in fact 255 in [0, 255])
220
  src_video[i] = src_video[i].to(device)
221
  src_mask[i] = src_mask[i].to(device)
 
 
 
222
  src_video_shape = src_video[i].shape
223
- if src_video_shape[1] != num_frames:
224
- src_video[i] = torch.cat( [src_video[i], src_video[i].new_zeros(src_video_shape[0], num_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
225
- src_mask[i] = torch.cat( [src_mask[i], src_mask[i].new_ones(src_video_shape[0], num_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
226
  src_mask[i] = torch.clamp((src_mask[i][:1, :, :, :] + 1) / 2, min=0, max=1)
227
  image_sizes.append(src_video[i].shape[2:])
228
  elif sub_src_video is None:
229
- src_video[i] = torch.zeros((3, num_frames, image_size[0], image_size[1]), device=device)
230
- src_mask[i] = torch.ones_like(src_video[i], device=device)
 
 
 
 
231
  image_sizes.append(image_size)
232
  else:
233
- src_video[i], _, _, _ = self.vid_proc.load_video(sub_src_video, max_frames= num_frames, trim_video = trim_video)
234
  src_video[i] = src_video[i].to(device)
235
  src_mask[i] = torch.zeros_like(src_video[i], device=device) if original_video else torch.ones_like(src_video[i], device=device)
 
 
 
236
  src_video_shape = src_video[i].shape
237
- if src_video_shape[1] != num_frames:
238
- src_video[i] = torch.cat( [src_video[i], src_video[i].new_zeros(src_video_shape[0], num_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
239
- src_mask[i] = torch.cat( [src_mask[i], src_mask[i].new_ones(src_video_shape[0], num_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
240
  image_sizes.append(src_video[i].shape[2:])
241
  for k, keep in enumerate(keep_frames):
242
  if not keep:
 
209
  def vace_latent(self, z, m):
210
  return [torch.cat([zz, mm], dim=0) for zz, mm in zip(z, m)]
211
 
212
+ def prepare_source(self, src_video, src_mask, src_ref_images, total_frames, image_size, device, original_video = False, keep_frames= [], start_frame = 0, pre_src_video = None):
213
  image_sizes = []
214
  trim_video = len(keep_frames)
215
+
216
+ for i, (sub_src_video, sub_src_mask, sub_pre_src_video) in enumerate(zip(src_video, src_mask,pre_src_video)):
217
+ prepend_count = 0 if sub_pre_src_video == None else sub_pre_src_video.shape[1]
218
+ num_frames = total_frames - prepend_count
219
  if sub_src_mask is not None and sub_src_video is not None:
220
+ src_video[i], src_mask[i], _, _, _ = self.vid_proc.load_video_pair(sub_src_video, sub_src_mask, max_frames= num_frames, trim_video = trim_video - prepend_count, start_frame = start_frame)
221
  # src_video is [-1, 1], 0 = inpainting area (in fact 127 in [0, 255])
222
  # src_mask is [-1, 1], 0 = preserve original video (in fact 127 in [0, 255]) and 1 = Inpainting (in fact 255 in [0, 255])
223
  src_video[i] = src_video[i].to(device)
224
  src_mask[i] = src_mask[i].to(device)
225
+ if prepend_count > 0:
226
+ src_video[i] = torch.cat( [sub_pre_src_video, src_video[i]], dim=1)
227
+ src_mask[i] = torch.cat( [torch.zeros_like(sub_pre_src_video), src_mask[i]] ,1)
228
  src_video_shape = src_video[i].shape
229
+ if src_video_shape[1] != total_frames:
230
+ src_video[i] = torch.cat( [src_video[i], src_video[i].new_zeros(src_video_shape[0], total_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
231
+ src_mask[i] = torch.cat( [src_mask[i], src_mask[i].new_ones(src_video_shape[0], total_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
232
  src_mask[i] = torch.clamp((src_mask[i][:1, :, :, :] + 1) / 2, min=0, max=1)
233
  image_sizes.append(src_video[i].shape[2:])
234
  elif sub_src_video is None:
235
+ if prepend_count > 0:
236
+ src_video[i] = torch.cat( [sub_pre_src_video, torch.zeros((3, num_frames, image_size[0], image_size[1]), device=device)], dim=1)
237
+ src_mask[i] = torch.cat( [torch.zeros_like(sub_pre_src_video), torch.ones((3, num_frames, image_size[0], image_size[1]), device=device)] ,1)
238
+ else:
239
+ src_video[i] = torch.zeros((3, num_frames, image_size[0], image_size[1]), device=device)
240
+ src_mask[i] = torch.ones_like(src_video[i], device=device)
241
  image_sizes.append(image_size)
242
  else:
243
+ src_video[i], _, _, _ = self.vid_proc.load_video(sub_src_video, max_frames= num_frames, trim_video = trim_video - prepend_count, start_frame = start_frame)
244
  src_video[i] = src_video[i].to(device)
245
  src_mask[i] = torch.zeros_like(src_video[i], device=device) if original_video else torch.ones_like(src_video[i], device=device)
246
+ if prepend_count > 0:
247
+ src_video[i] = torch.cat( [sub_pre_src_video, src_video[i]], dim=1)
248
+ src_mask[i] = torch.cat( [torch.zeros_like(sub_pre_src_video), src_mask[i]] ,1)
249
  src_video_shape = src_video[i].shape
250
+ if src_video_shape[1] != total_frames:
251
+ src_video[i] = torch.cat( [src_video[i], src_video[i].new_zeros(src_video_shape[0], total_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
252
+ src_mask[i] = torch.cat( [src_mask[i], src_mask[i].new_ones(src_video_shape[0], total_frames -src_video_shape[1], *src_video_shape[-2:])], dim=1)
253
  image_sizes.append(src_video[i].shape[2:])
254
  for k, keep in enumerate(keep_frames):
255
  if not keep:
wan/utils/utils.py CHANGED
@@ -22,18 +22,18 @@ __all__ = ['cache_video', 'cache_image', 'str2bool']
22
  from PIL import Image
23
 
24
 
25
- def resample(video_fps, video_frames_count, max_frames, target_fps):
26
  import math
27
 
28
  video_frame_duration = 1 /video_fps
29
  target_frame_duration = 1 / target_fps
30
 
31
- cur_time = 0
32
- target_time = 0
33
- frame_no = 0
34
  frame_ids =[]
35
  while True:
36
- if max_frames != 0 and len(frame_ids) >= max_frames:
37
  break
38
  add_frames_count = math.ceil( (target_time -cur_time) / video_frame_duration )
39
  frame_no += add_frames_count
@@ -42,6 +42,7 @@ def resample(video_fps, video_frames_count, max_frames, target_fps):
42
  frame_ids.append(frame_no)
43
  cur_time += add_frames_count * video_frame_duration
44
  target_time += target_frame_duration
 
45
  return frame_ids
46
 
47
  def get_video_frame(file_name, frame_no):
 
22
  from PIL import Image
23
 
24
 
25
+ def resample(video_fps, video_frames_count, max_target_frames_count, target_fps, start_target_frame ):
26
  import math
27
 
28
  video_frame_duration = 1 /video_fps
29
  target_frame_duration = 1 / target_fps
30
 
31
+ target_time = start_target_frame * target_frame_duration
32
+ frame_no = math.ceil(target_time / video_frame_duration)
33
+ cur_time = frame_no * video_frame_duration
34
  frame_ids =[]
35
  while True:
36
+ if max_target_frames_count != 0 and len(frame_ids) >= max_target_frames_count :
37
  break
38
  add_frames_count = math.ceil( (target_time -cur_time) / video_frame_duration )
39
  frame_no += add_frames_count
 
42
  frame_ids.append(frame_no)
43
  cur_time += add_frames_count * video_frame_duration
44
  target_time += target_frame_duration
45
+ frame_ids = frame_ids[:max_target_frames_count]
46
  return frame_ids
47
 
48
  def get_video_frame(file_name, frame_no):
wan/utils/vace_preprocessor.py CHANGED
@@ -182,14 +182,14 @@ class VaceVideoProcessor(object):
182
 
183
 
184
 
185
- def _get_frameid_bbox_adjust_last(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0):
186
  from wan.utils.utils import resample
187
 
188
  target_fps = self.max_fps
189
 
190
  # video_frames_count = len(frame_timestamps)
191
 
192
- frame_ids= resample(fps, video_frames_count, max_frames, target_fps)
193
 
194
  x1, x2, y1, y2 = [0, w, 0, h] if crop_box is None else crop_box
195
  h, w = y2 - y1, x2 - x1
@@ -206,7 +206,7 @@ class VaceVideoProcessor(object):
206
  np.log2(np.sqrt(max_area_z))
207
  )))
208
 
209
- seq_len = max_area_z * ((max_frames- 1) // df +1)
210
 
211
  # of = min(
212
  # (len(frame_ids) - 1) // df + 1,
@@ -226,9 +226,9 @@ class VaceVideoProcessor(object):
226
 
227
  return frame_ids, (x1, x2, y1, y2), (oh, ow), target_fps
228
 
229
- def _get_frameid_bbox(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0):
230
  if self.keep_last:
231
- return self._get_frameid_bbox_adjust_last(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames)
232
  else:
233
  return self._get_frameid_bbox_default(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames)
234
 
@@ -238,7 +238,7 @@ class VaceVideoProcessor(object):
238
  def load_video_pair(self, data_key, data_key2, crop_box=None, seed=2024, **kwargs):
239
  return self.load_video_batch(data_key, data_key2, crop_box=crop_box, seed=seed, **kwargs)
240
 
241
- def load_video_batch(self, *data_key_batch, crop_box=None, seed=2024, max_frames= 0, trim_video =0, **kwargs):
242
  rng = np.random.default_rng(seed + hash(data_key_batch[0]) % 10000)
243
  # read video
244
  import decord
@@ -254,7 +254,7 @@ class VaceVideoProcessor(object):
254
 
255
  if src_video != None:
256
  fps = 16
257
- length = src_video.shape[0]
258
  if len(readers) > 0:
259
  min_readers = min([len(r) for r in readers])
260
  length = min(length, min_readers )
@@ -269,7 +269,7 @@ class VaceVideoProcessor(object):
269
  h, w = src_video.shape[1:3]
270
  else:
271
  h, w = readers[0].next().shape[:2]
272
- frame_ids, (x1, x2, y1, y2), (oh, ow), fps = self._get_frameid_bbox(fps, length, h, w, crop_box, rng, max_frames=max_frames)
273
 
274
  # preprocess video
275
  videos = [reader.get_batch(frame_ids)[:, y1:y2, x1:x2, :] for reader in readers]
 
182
 
183
 
184
 
185
+ def _get_frameid_bbox_adjust_last(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0, start_frame =0):
186
  from wan.utils.utils import resample
187
 
188
  target_fps = self.max_fps
189
 
190
  # video_frames_count = len(frame_timestamps)
191
 
192
+ frame_ids= resample(fps, video_frames_count, max_frames, target_fps, start_frame )
193
 
194
  x1, x2, y1, y2 = [0, w, 0, h] if crop_box is None else crop_box
195
  h, w = y2 - y1, x2 - x1
 
206
  np.log2(np.sqrt(max_area_z))
207
  )))
208
 
209
+ seq_len = max_area_z * ((max_frames- start_frame - 1) // df +1)
210
 
211
  # of = min(
212
  # (len(frame_ids) - 1) // df + 1,
 
226
 
227
  return frame_ids, (x1, x2, y1, y2), (oh, ow), target_fps
228
 
229
+ def _get_frameid_bbox(self, fps, video_frames_count, h, w, crop_box, rng, max_frames= 0, start_frame= 0):
230
  if self.keep_last:
231
+ return self._get_frameid_bbox_adjust_last(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames, start_frame= start_frame)
232
  else:
233
  return self._get_frameid_bbox_default(fps, video_frames_count, h, w, crop_box, rng, max_frames= max_frames)
234
 
 
238
  def load_video_pair(self, data_key, data_key2, crop_box=None, seed=2024, **kwargs):
239
  return self.load_video_batch(data_key, data_key2, crop_box=crop_box, seed=seed, **kwargs)
240
 
241
+ def load_video_batch(self, *data_key_batch, crop_box=None, seed=2024, max_frames= 0, trim_video =0, start_frame = 0, **kwargs):
242
  rng = np.random.default_rng(seed + hash(data_key_batch[0]) % 10000)
243
  # read video
244
  import decord
 
254
 
255
  if src_video != None:
256
  fps = 16
257
+ length = src_video.shape[0] + start_frame
258
  if len(readers) > 0:
259
  min_readers = min([len(r) for r in readers])
260
  length = min(length, min_readers )
 
269
  h, w = src_video.shape[1:3]
270
  else:
271
  h, w = readers[0].next().shape[:2]
272
+ frame_ids, (x1, x2, y1, y2), (oh, ow), fps = self._get_frameid_bbox(fps, length, h, w, crop_box, rng, max_frames=max_frames, start_frame = start_frame )
273
 
274
  # preprocess video
275
  videos = [reader.get_batch(frame_ids)[:, y1:y2, x1:x2, :] for reader in readers]
wgp.py CHANGED
@@ -144,13 +144,23 @@ def process_prompt_and_add_tasks(state, model_choice):
144
  gr.Info("You must use the 14B model to generate videos with a resolution equivalent to 720P")
145
  return
146
 
147
-
 
148
  if "Vace" in model_filename:
149
  video_prompt_type = inputs["video_prompt_type"]
150
  image_refs = inputs["image_refs"]
151
  video_guide = inputs["video_guide"]
152
  video_mask = inputs["video_mask"]
153
- if "Vace" in model_filename and "1.3B" in model_filename :
 
 
 
 
 
 
 
 
 
154
  resolution_reformated = str(height) + "*" + str(width)
155
  if not resolution_reformated in VACE_SIZE_CONFIGS:
156
  res = (" and ").join(VACE_SIZE_CONFIGS.keys())
@@ -197,6 +207,9 @@ def process_prompt_and_add_tasks(state, model_choice):
197
  image_refs = resize_and_remove_background(image_refs, width, height, inputs["remove_background_image_ref"] ==1)
198
 
199
 
 
 
 
200
  for single_prompt in prompts:
201
  extra_inputs = {
202
  "prompt" : single_prompt,
@@ -2053,7 +2066,7 @@ def convert_image(image):
2053
  return cast(Image, ImageOps.exif_transpose(image))
2054
 
2055
 
2056
- def preprocess_video(process_type, height, width, video_in, max_frames):
2057
 
2058
  from wan.utils.utils import resample
2059
 
@@ -2063,8 +2076,10 @@ def preprocess_video(process_type, height, width, video_in, max_frames):
2063
 
2064
  fps = reader.get_avg_fps()
2065
 
2066
- frame_nos = resample(fps, len(reader), max_frames= max_frames, target_fps=16)
2067
  frames_list = reader.get_batch(frame_nos)
 
 
2068
  frame_height, frame_width, _ = frames_list[0].shape
2069
 
2070
  scale = ((height * width ) / (frame_height * frame_width))**(1/2)
@@ -2187,6 +2202,9 @@ def generate_video(
2187
  video_guide,
2188
  video_mask,
2189
  keep_frames,
 
 
 
2190
  remove_background_image_ref,
2191
  temporal_upsampling,
2192
  spatial_upsampling,
@@ -2342,41 +2360,6 @@ def generate_video(
2342
  else:
2343
  raise gr.Error("Teacache not supported for this model")
2344
 
2345
- if "Vace" in model_filename:
2346
- # video_prompt_type = video_prompt_type +"G"
2347
- if any(process in video_prompt_type for process in ("P", "D", "G")) :
2348
- prompts_max = gen["prompts_max"]
2349
-
2350
- status = get_generation_status(prompt_no, prompts_max, 1, 1)
2351
- preprocess_type = None
2352
- if "P" in video_prompt_type :
2353
- progress_args = [0, status + " - Extracting Open Pose Information"]
2354
- preprocess_type = "pose"
2355
- elif "D" in video_prompt_type :
2356
- progress_args = [0, status + " - Extracting Depth Information"]
2357
- preprocess_type = "depth"
2358
- elif "G" in video_prompt_type :
2359
- progress_args = [0, status + " - Extracting Gray Level Information"]
2360
- preprocess_type = "gray"
2361
-
2362
- if preprocess_type != None :
2363
- progress(*progress_args )
2364
- gen["progress_args"] = progress_args
2365
- video_guide = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= video_length)
2366
- image_refs = image_refs.copy() if image_refs != None else None # required since prepare_source do inplace modifications
2367
- keep_frames_parsed, error = parse_keep_frames(keep_frames, video_length)
2368
- if len(error) > 0:
2369
- raise gr.Error(f"invalid keep frames {keep_frames}")
2370
-
2371
- src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide],
2372
- [video_mask],
2373
- [image_refs],
2374
- video_length, VACE_SIZE_CONFIGS[resolution_reformated], "cpu",
2375
- original_video= "O" in video_prompt_type,
2376
- keep_frames=keep_frames_parsed)
2377
- else:
2378
- src_video, src_mask, src_ref_images = None, None, None
2379
-
2380
 
2381
  import random
2382
  if seed == None or seed <0:
@@ -2393,6 +2376,21 @@ def generate_video(
2393
  gen["prompt"] = prompt
2394
  repeat_no = 0
2395
  extra_generation = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2396
  while True:
2397
  extra_generation += gen.get("extra_orders",0)
2398
  gen["extra_orders"] = 0
@@ -2400,10 +2398,59 @@ def generate_video(
2400
  gen["total_generation"] = total_generation
2401
  if abort or repeat_no >= total_generation:
2402
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2403
  repeat_no +=1
2404
  gen["repeat_no"] = repeat_no
2405
  prompts_max = gen["prompts_max"]
2406
- status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation)
2407
 
2408
  yield status
2409
 
@@ -2539,6 +2586,15 @@ def generate_video(
2539
  # yield f"Video generation was aborted. Total Generation Time: {end_time-start_time:.1f}s"
2540
  else:
2541
  sample = samples.cpu()
 
 
 
 
 
 
 
 
 
2542
 
2543
  time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%Hh%Mm%Ss")
2544
  if os.name == 'nt':
@@ -2565,7 +2621,13 @@ def generate_video(
2565
 
2566
  if exp > 0:
2567
  from rife.inference import temporal_interpolation
2568
- sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
 
 
 
 
 
 
2569
  fps = fps * 2**exp
2570
 
2571
  if len(spatial_upsampling) > 0:
@@ -2590,6 +2652,12 @@ def generate_video(
2590
  new_frames = None
2591
  sample = sample * 2 - 1
2592
 
 
 
 
 
 
 
2593
 
2594
  cache_video(
2595
  tensor=sample[None],
@@ -2616,7 +2684,8 @@ def generate_video(
2616
  print(f"New video saved to Path: "+video_path)
2617
  file_list.append(video_path)
2618
  state['update_gallery'] = True
2619
- seed += 1
 
2620
 
2621
  if temp_filename!= None and os.path.isfile(temp_filename):
2622
  os.remove(temp_filename)
@@ -2694,17 +2763,19 @@ def process_tasks(state, progress=gr.Progress()):
2694
  yield f"Total Generation Time: {end_time-start_time:.1f}s"
2695
 
2696
 
2697
- def get_generation_status(prompt_no, prompts_max, repeat_no, repeat_max):
2698
- if prompts_max == 1:
 
 
2699
  if repeat_max == 1:
2700
  return "Video"
2701
  else:
2702
- return f"Sample {repeat_no}/{repeat_max}"
2703
  else:
2704
  if repeat_max == 1:
2705
  return f"Prompt {prompt_no}/{prompts_max}"
2706
  else:
2707
- return f"Prompt {prompt_no}/{prompts_max}, Sample {repeat_no}/{repeat_max}"
2708
 
2709
 
2710
  refresh_id = 0
@@ -2720,7 +2791,8 @@ def update_status(state):
2720
  prompts_max = gen.get("prompts_max",0)
2721
  total_generation = gen["total_generation"]
2722
  repeat_no = gen["repeat_no"]
2723
- status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation)
 
2724
  gen["progress_status"] = status
2725
  gen["refresh"] = get_new_refresh_id()
2726
 
@@ -2737,7 +2809,7 @@ def one_more_sample(state):
2737
  prompts_max = gen.get("prompts_max",0)
2738
  total_generation = gen["total_generation"] + extra_orders
2739
  repeat_no = gen["repeat_no"]
2740
- status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation)
2741
 
2742
 
2743
  gen["progress_status"] = status
@@ -3059,7 +3131,7 @@ def prepare_inputs_dict(target, inputs ):
3059
 
3060
 
3061
  if not "Vace" in model_filename:
3062
- unsaved_params = ["video_prompt_type", "keep_frames", "remove_background_image_ref"]
3063
  for k in unsaved_params:
3064
  inputs.pop(k)
3065
 
@@ -3102,6 +3174,9 @@ def save_inputs(
3102
  video_guide,
3103
  video_mask,
3104
  keep_frames,
 
 
 
3105
  remove_background_image_ref,
3106
  temporal_upsampling,
3107
  spatial_upsampling,
@@ -3437,7 +3512,6 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3437
  # video_prompt_type_image_refs = gr.Checkbox(value="I" in video_prompt_type_value , label= "Use References Images (Faces, Objects) to customize New Video", scale =1 )
3438
 
3439
  video_guide = gr.Video(label= "Control Video", visible= "V" in video_prompt_type_value, value= ui_defaults.get("video_guide", None),)
3440
- # keep_frames = gr.Slider(0, 100, value=ui_defaults.get("keep_frames",0), step=1, label="Nb of frames in Control Video to use (0 = max)", visible= "V" in video_prompt_type_value, scale = 2 )
3441
  keep_frames = gr.Text(value=ui_defaults.get("keep_frames","") , visible= "V" in video_prompt_type_value, scale = 2, label= "Frames to keep in Control Video (empty=All, 1=first, a:b for a range, space to separate values)" ) #, -1=last
3442
  image_refs = gr.Gallery( label ="Reference Images",
3443
  type ="pil", show_label= True,
@@ -3513,28 +3587,32 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3513
  label="Resolution"
3514
  )
3515
  with gr.Row():
3516
- with gr.Column():
3517
- video_length = gr.Slider(5, 193, value=ui_defaults.get("video_length", 81), step=4, label="Number of frames (16 = 1s)")
3518
- with gr.Column():
3519
- num_inference_steps = gr.Slider(1, 100, value=ui_defaults.get("num_inference_steps",30), step=1, label="Number of Inference Steps")
 
3520
  show_advanced = gr.Checkbox(label="Advanced Mode", value=advanced_ui)
3521
- with gr.Row(visible=advanced_ui) as advanced_row:
3522
- with gr.Column():
3523
- seed = gr.Slider(-1, 999999999, value=ui_defaults["seed"], step=1, label="Seed (-1 for random)")
3524
- with gr.Row():
3525
- repeat_generation = gr.Slider(1, 25.0, value=ui_defaults.get("repeat_generation",1), step=1, label="Default Number of Generated Videos per Prompt")
3526
- multi_images_gen_type = gr.Dropdown( value=ui_defaults.get("multi_images_gen_type",0),
3527
- choices=[
3528
- ("Generate every combination of images and texts", 0),
3529
- ("Match images and text prompts", 1),
3530
- ], visible= args.multiple_images, label= "Multiple Images as Texts Prompts"
3531
- )
3532
- with gr.Row():
3533
- guidance_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance_scale",5), step=0.5, label="Guidance Scale", visible=True)
3534
- embedded_guidance_scale = gr.Slider(1.0, 20.0, value=6.0, step=0.5, label="Embedded Guidance Scale", visible=False)
3535
- flow_shift = gr.Slider(0.0, 25.0, value=ui_defaults.get("flow_shift",3), step=0.1, label="Shift Scale")
3536
- with gr.Row():
3537
- negative_prompt = gr.Textbox(label="Negative Prompt", value=ui_defaults.get("negative_prompt", "") )
 
 
 
3538
  with gr.Column(visible = True): #as loras_column:
3539
  gr.Markdown("<B>Loras can be used to create special effects on the video by mentioning a trigger word in the Prompt. You can save Loras combinations in presets.</B>")
3540
  loras_choices = gr.Dropdown(
@@ -3548,7 +3626,10 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3548
  loras_multipliers = gr.Textbox(label="Loras Multipliers (1.0 by default) separated by space characters or carriage returns, line that starts with # are ignored", value=launch_multis_str)
3549
  with gr.Row():
3550
  gr.Markdown("<B>Tea Cache accelerates by skipping intelligently some steps, the more steps are skipped the lower the quality of the video (Tea Cache consumes also VRAM)</B>")
3551
- with gr.Row():
 
 
 
3552
  tea_cache_setting = gr.Dropdown(
3553
  choices=[
3554
  ("Tea Cache Disabled", 0),
@@ -3564,9 +3645,10 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3564
  )
3565
  tea_cache_start_step_perc = gr.Slider(0, 100, value=ui_defaults.get("tea_cache_start_step_perc",0), step=1, label="Tea Cache starting moment in % of generation")
3566
 
3567
- with gr.Row():
 
 
3568
  gr.Markdown("<B>Upsampling - postprocessing that may improve fluidity and the size of the video</B>")
3569
- with gr.Row():
3570
  temporal_upsampling = gr.Dropdown(
3571
  choices=[
3572
  ("Disabled", ""),
@@ -3590,6 +3672,59 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3590
  label="Spatial Upsampling"
3591
  )
3592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3593
  gr.Markdown("<B>With Riflex you can generate videos longer than 5s which is the default duration of videos used to train the model</B>")
3594
  RIFLEx_setting = gr.Dropdown(
3595
  choices=[
@@ -3600,50 +3735,9 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3600
  value=ui_defaults.get("RIFLEx_setting",0),
3601
  label="RIFLEx positional embedding to generate long video"
3602
  )
3603
- with gr.Row():
3604
- gr.Markdown("<B>Experimental: Skip Layer Guidance, should improve video quality</B>")
3605
- with gr.Row():
3606
- slg_switch = gr.Dropdown(
3607
- choices=[
3608
- ("OFF", 0),
3609
- ("ON", 1),
3610
- ],
3611
- value=ui_defaults.get("slg_switch",0),
3612
- visible=True,
3613
- scale = 1,
3614
- label="Skip Layer guidance"
3615
- )
3616
- slg_layers = gr.Dropdown(
3617
- choices=[
3618
- (str(i), i ) for i in range(40)
3619
- ],
3620
- value=ui_defaults.get("slg_layers", ["9"]),
3621
- multiselect= True,
3622
- label="Skip Layers",
3623
- scale= 3
3624
- )
3625
- with gr.Row():
3626
- slg_start_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_start_perc",10), step=1, label="Denoising Steps % start")
3627
- slg_end_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_end_perc",90), step=1, label="Denoising Steps % end")
3628
-
3629
- with gr.Row():
3630
- gr.Markdown("<B>Experimental: Classifier-Free Guidance Zero Star, better adherence to Text Prompt")
3631
- with gr.Row():
3632
- cfg_star_switch = gr.Dropdown(
3633
- choices=[
3634
- ("OFF", 0),
3635
- ("ON", 1),
3636
- ],
3637
- value=ui_defaults.get("cfg_star_switch",0),
3638
- visible=True,
3639
- scale = 1,
3640
- label="CFG Star"
3641
- )
3642
- with gr.Row():
3643
- cfg_zero_step = gr.Slider(-1, 39, value=ui_defaults.get("cfg_zero_step",-1), step=1, label="CFG Zero below this Layer (Extra Process)")
3644
 
3645
- with gr.Row():
3646
- save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
3647
 
3648
  if not update_form:
3649
  with gr.Column():
@@ -3697,11 +3791,11 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3697
  let countdown = 5;
3698
  const label = document.getElementById('quit_timer_label');
3699
  if (label) {
3700
- label.innerText = `Quitting in ${countdown}...`;
3701
  window.quitCountdownInterval = setInterval(() => {
3702
  countdown--;
3703
  if (countdown > 0) {
3704
- label.innerText = `Quitting in ${countdown}...`;
3705
  } else {
3706
  clearInterval(window.quitCountdownInterval);
3707
  findAndClickGradioButton('comfirm_quit_btn_hidden');
@@ -3841,7 +3935,7 @@ def generate_video_tab(update_form = False, state_dict = None, ui_defaults = Non
3841
  )
3842
 
3843
  extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column,
3844
- prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, advanced_row] # show_advanced presets_column,
3845
  if update_form:
3846
  locals_dict = locals()
3847
  gen_inputs = [state_dict if k=="state" else locals_dict[k] for k in inputs_names] + [state_dict] + extra_inputs
@@ -4141,13 +4235,14 @@ def generate_about_tab():
4141
  gr.Markdown("- <B>Alibaba Wan team for the best open source video generator")
4142
  gr.Markdown("- <B>Alibaba Vace and Fun Teams for their incredible control net models")
4143
  gr.Markdown("- <B>Cocktail Peanuts</B> : QA and simple installation via Pinokio.computer")
4144
- gr.Markdown("- <B>Tophness</B> : created multi tabs and queuing frameworks")
4145
  gr.Markdown("- <B>AmericanPresidentJimmyCarter</B> : added original support for Skip Layer Guidance")
4146
  gr.Markdown("- <B>Remade_AI</B> : for their awesome Loras collection")
4147
  gr.Markdown("<BR>Huge acknowlegments to these great open source projects used in WanGP:")
4148
  gr.Markdown("- <B>Rife</B>: temporal upsampler (https://github.com/hzwer/ECCV2022-RIFE)")
4149
  gr.Markdown("- <B>DwPose</B>: Open Pose extractor (https://github.com/IDEA-Research/DWPose)")
4150
  gr.Markdown("- <B>Midas</B>: Depth extractor (https://github.com/isl-org/MiDaS")
 
4151
 
4152
 
4153
  def generate_info_tab():
 
144
  gr.Info("You must use the 14B model to generate videos with a resolution equivalent to 720P")
145
  return
146
 
147
+ sliding_window_repeat = inputs["sliding_window_repeat"]
148
+ sliding_window = sliding_window_repeat > 0
149
  if "Vace" in model_filename:
150
  video_prompt_type = inputs["video_prompt_type"]
151
  image_refs = inputs["image_refs"]
152
  video_guide = inputs["video_guide"]
153
  video_mask = inputs["video_mask"]
154
+
155
+ if sliding_window:
156
+ if inputs["repeat_generation"]!=1:
157
+ gr.Info("Only one Video generated per Prompt is supported when Sliding windows is used")
158
+ return
159
+ if inputs["sliding_window_overlap"]>=inputs["video_length"] :
160
+ gr.Info("The number of frames of the Sliding Window Overlap must be less than the Number of Frames to Generate")
161
+ return
162
+
163
+ if "1.3B" in model_filename :
164
  resolution_reformated = str(height) + "*" + str(width)
165
  if not resolution_reformated in VACE_SIZE_CONFIGS:
166
  res = (" and ").join(VACE_SIZE_CONFIGS.keys())
 
207
  image_refs = resize_and_remove_background(image_refs, width, height, inputs["remove_background_image_ref"] ==1)
208
 
209
 
210
+ if sliding_window and len(prompts) > 0:
211
+ prompts = ["\n".join(prompts)]
212
+
213
  for single_prompt in prompts:
214
  extra_inputs = {
215
  "prompt" : single_prompt,
 
2066
  return cast(Image, ImageOps.exif_transpose(image))
2067
 
2068
 
2069
+ def preprocess_video(process_type, height, width, video_in, max_frames, start_frame=0):
2070
 
2071
  from wan.utils.utils import resample
2072
 
 
2076
 
2077
  fps = reader.get_avg_fps()
2078
 
2079
+ frame_nos = resample(fps, len(reader), max_target_frames_count= max_frames, target_fps=16, start_target_frame= start_frame)
2080
  frames_list = reader.get_batch(frame_nos)
2081
+ if len(frames_list) == 0:
2082
+ return None
2083
  frame_height, frame_width, _ = frames_list[0].shape
2084
 
2085
  scale = ((height * width ) / (frame_height * frame_width))**(1/2)
 
2202
  video_guide,
2203
  video_mask,
2204
  keep_frames,
2205
+ sliding_window_repeat,
2206
+ sliding_window_overlap,
2207
+ sliding_window_discard_last_frames,
2208
  remove_background_image_ref,
2209
  temporal_upsampling,
2210
  spatial_upsampling,
 
2360
  else:
2361
  raise gr.Error("Teacache not supported for this model")
2362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2363
 
2364
  import random
2365
  if seed == None or seed <0:
 
2376
  gen["prompt"] = prompt
2377
  repeat_no = 0
2378
  extra_generation = 0
2379
+ sliding_window = sliding_window_repeat > 0
2380
+ if sliding_window:
2381
+ start_frame = 0
2382
+ reuse_frames = sliding_window_overlap
2383
+ discard_last_frames = sliding_window_discard_last_frames #4
2384
+ repeat_generation = sliding_window_repeat
2385
+ prompts = prompt.split("\n")
2386
+ prompts = [part for part in prompts if len(prompt)>0]
2387
+
2388
+
2389
+ gen["sliding_window"] = sliding_window
2390
+
2391
+ frames_already_processed = None
2392
+ pre_video_guide = None
2393
+
2394
  while True:
2395
  extra_generation += gen.get("extra_orders",0)
2396
  gen["extra_orders"] = 0
 
2398
  gen["total_generation"] = total_generation
2399
  if abort or repeat_no >= total_generation:
2400
  break
2401
+
2402
+ if "Vace" in model_filename and (repeat_no == 0 or sliding_window):
2403
+ if sliding_window:
2404
+ prompt = prompts[repeat_no] if repeat_no < len(prompts) else prompts[-1]
2405
+
2406
+ # video_prompt_type = video_prompt_type +"G"
2407
+ image_refs_copy = image_refs.copy() if image_refs != None else None # required since prepare_source do inplace modifications
2408
+ video_guide_copy = video_guide
2409
+ video_mask_copy = video_mask
2410
+ if any(process in video_prompt_type for process in ("P", "D", "G")) :
2411
+ prompts_max = gen["prompts_max"]
2412
+
2413
+ status = get_generation_status(prompt_no, prompts_max, 1, 1, sliding_window)
2414
+ preprocess_type = None
2415
+ if "P" in video_prompt_type :
2416
+ progress_args = [0, status + " - Extracting Open Pose Information"]
2417
+ preprocess_type = "pose"
2418
+ elif "D" in video_prompt_type :
2419
+ progress_args = [0, status + " - Extracting Depth Information"]
2420
+ preprocess_type = "depth"
2421
+ elif "G" in video_prompt_type :
2422
+ progress_args = [0, status + " - Extracting Gray Level Information"]
2423
+ preprocess_type = "gray"
2424
+
2425
+ if preprocess_type != None :
2426
+ progress(*progress_args )
2427
+ gen["progress_args"] = progress_args
2428
+ video_guide_copy = preprocess_video(preprocess_type, width=width, height=height,video_in=video_guide, max_frames= video_length if repeat_no ==0 else video_length - reuse_frames, start_frame = start_frame)
2429
+ keep_frames_parsed, error = parse_keep_frames(keep_frames, video_length)
2430
+ if len(error) > 0:
2431
+ raise gr.Error(f"invalid keep frames {keep_frames}")
2432
+ if repeat_no == 0:
2433
+ image_size = VACE_SIZE_CONFIGS[resolution_reformated] # default frame dimensions until it is set by video_src (if there is any)
2434
+ src_video, src_mask, src_ref_images = wan_model.prepare_source([video_guide_copy],
2435
+ [video_mask_copy ],
2436
+ [image_refs_copy],
2437
+ video_length, image_size = image_size, device ="cpu",
2438
+ original_video= "O" in video_prompt_type,
2439
+ keep_frames=keep_frames_parsed,
2440
+ start_frame = start_frame,
2441
+ pre_src_video = [pre_video_guide]
2442
+ )
2443
+ if repeat_no == 0 and src_video != None and len(src_video) > 0:
2444
+ image_size = src_video[0].shape[-2:]
2445
+
2446
+ else:
2447
+ src_video, src_mask, src_ref_images = None, None, None
2448
+
2449
+
2450
  repeat_no +=1
2451
  gen["repeat_no"] = repeat_no
2452
  prompts_max = gen["prompts_max"]
2453
+ status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation, sliding_window)
2454
 
2455
  yield status
2456
 
 
2586
  # yield f"Video generation was aborted. Total Generation Time: {end_time-start_time:.1f}s"
2587
  else:
2588
  sample = samples.cpu()
2589
+ if sliding_window :
2590
+ start_frame += video_length
2591
+ if discard_last_frames > 0:
2592
+ sample = sample[: , :-discard_last_frames]
2593
+ start_frame -= discard_last_frames
2594
+ pre_video_guide = sample[:, -reuse_frames:]
2595
+ if repeat_no > 1:
2596
+ sample = sample[: , reuse_frames:]
2597
+ start_frame -= reuse_frames
2598
 
2599
  time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%Hh%Mm%Ss")
2600
  if os.name == 'nt':
 
2621
 
2622
  if exp > 0:
2623
  from rife.inference import temporal_interpolation
2624
+ if sliding_window and repeat_no > 1:
2625
+ sample = torch.cat([frames_already_processed[:, -2:-1], sample], dim=1)
2626
+ sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
2627
+ sample = sample[:, 1:]
2628
+ else:
2629
+ sample = temporal_interpolation( os.path.join("ckpts", "flownet.pkl"), sample, exp, device=processing_device)
2630
+
2631
  fps = fps * 2**exp
2632
 
2633
  if len(spatial_upsampling) > 0:
 
2652
  new_frames = None
2653
  sample = sample * 2 - 1
2654
 
2655
+ if sliding_window :
2656
+ if repeat_no == 1:
2657
+ frames_already_processed = sample
2658
+ else:
2659
+ sample = torch.cat([frames_already_processed, sample], dim=1)
2660
+ frames_already_processed = sample
2661
 
2662
  cache_video(
2663
  tensor=sample[None],
 
2684
  print(f"New video saved to Path: "+video_path)
2685
  file_list.append(video_path)
2686
  state['update_gallery'] = True
2687
+ if not sliding_window:
2688
+ seed += 1
2689
 
2690
  if temp_filename!= None and os.path.isfile(temp_filename):
2691
  os.remove(temp_filename)
 
2763
  yield f"Total Generation Time: {end_time-start_time:.1f}s"
2764
 
2765
 
2766
+ def get_generation_status(prompt_no, prompts_max, repeat_no, repeat_max, sliding_window):
2767
+
2768
+ item = "Sliding Window" if sliding_window else "Sample"
2769
+ if prompts_max == 1:
2770
  if repeat_max == 1:
2771
  return "Video"
2772
  else:
2773
+ return f"{item} {repeat_no}/{repeat_max}"
2774
  else:
2775
  if repeat_max == 1:
2776
  return f"Prompt {prompt_no}/{prompts_max}"
2777
  else:
2778
+ return f"Prompt {prompt_no}/{prompts_max}, {item} {repeat_no}/{repeat_max}"
2779
 
2780
 
2781
  refresh_id = 0
 
2791
  prompts_max = gen.get("prompts_max",0)
2792
  total_generation = gen["total_generation"]
2793
  repeat_no = gen["repeat_no"]
2794
+ sliding_window = gen["sliding_window"]
2795
+ status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation, sliding_window)
2796
  gen["progress_status"] = status
2797
  gen["refresh"] = get_new_refresh_id()
2798
 
 
2809
  prompts_max = gen.get("prompts_max",0)
2810
  total_generation = gen["total_generation"] + extra_orders
2811
  repeat_no = gen["repeat_no"]
2812
+ status = get_generation_status(prompt_no, prompts_max, repeat_no, total_generation, gen.get("sliding_window",False))
2813
 
2814
 
2815
  gen["progress_status"] = status
 
3131
 
3132
 
3133
  if not "Vace" in model_filename:
3134
+ unsaved_params = ["video_prompt_type", "keep_frames", "remove_background_image_ref", "sliding_window_repeat", "sliding_window_overlap", "sliding_window_discard_last_frames"]
3135
  for k in unsaved_params:
3136
  inputs.pop(k)
3137
 
 
3174
  video_guide,
3175
  video_mask,
3176
  keep_frames,
3177
+ sliding_window_repeat,
3178
+ sliding_window_overlap,
3179
+ sliding_window_discard_last_frames,
3180
  remove_background_image_ref,
3181
  temporal_upsampling,
3182
  spatial_upsampling,
 
3512
  # video_prompt_type_image_refs = gr.Checkbox(value="I" in video_prompt_type_value , label= "Use References Images (Faces, Objects) to customize New Video", scale =1 )
3513
 
3514
  video_guide = gr.Video(label= "Control Video", visible= "V" in video_prompt_type_value, value= ui_defaults.get("video_guide", None),)
 
3515
  keep_frames = gr.Text(value=ui_defaults.get("keep_frames","") , visible= "V" in video_prompt_type_value, scale = 2, label= "Frames to keep in Control Video (empty=All, 1=first, a:b for a range, space to separate values)" ) #, -1=last
3516
  image_refs = gr.Gallery( label ="Reference Images",
3517
  type ="pil", show_label= True,
 
3587
  label="Resolution"
3588
  )
3589
  with gr.Row():
3590
+ video_length = gr.Slider(5, 193, value=ui_defaults.get("video_length", 81), step=4, label="Number of frames (16 = 1s)")
3591
+ num_inference_steps = gr.Slider(1, 100, value=ui_defaults.get("num_inference_steps",30), step=1, label="Number of Inference Steps")
3592
+
3593
+
3594
+
3595
  show_advanced = gr.Checkbox(label="Advanced Mode", value=advanced_ui)
3596
+ with gr.Tabs(visible=advanced_ui) as advanced_row:
3597
+ # with gr.Row(visible=advanced_ui) as advanced_row:
3598
+ with gr.Tab("Generation"):
3599
+ with gr.Column():
3600
+ seed = gr.Slider(-1, 999999999, value=ui_defaults["seed"], step=1, label="Seed (-1 for random)")
3601
+ with gr.Row():
3602
+ repeat_generation = gr.Slider(1, 25.0, value=ui_defaults.get("repeat_generation",1), step=1, label="Default Number of Generated Videos per Prompt")
3603
+ multi_images_gen_type = gr.Dropdown( value=ui_defaults.get("multi_images_gen_type",0),
3604
+ choices=[
3605
+ ("Generate every combination of images and texts", 0),
3606
+ ("Match images and text prompts", 1),
3607
+ ], visible= args.multiple_images, label= "Multiple Images as Texts Prompts"
3608
+ )
3609
+ with gr.Row():
3610
+ guidance_scale = gr.Slider(1.0, 20.0, value=ui_defaults.get("guidance_scale",5), step=0.5, label="Guidance Scale", visible=True)
3611
+ embedded_guidance_scale = gr.Slider(1.0, 20.0, value=6.0, step=0.5, label="Embedded Guidance Scale", visible=False)
3612
+ flow_shift = gr.Slider(0.0, 25.0, value=ui_defaults.get("flow_shift",3), step=0.1, label="Shift Scale")
3613
+ with gr.Row():
3614
+ negative_prompt = gr.Textbox(label="Negative Prompt", value=ui_defaults.get("negative_prompt", "") )
3615
+ with gr.Tab("Loras"):
3616
  with gr.Column(visible = True): #as loras_column:
3617
  gr.Markdown("<B>Loras can be used to create special effects on the video by mentioning a trigger word in the Prompt. You can save Loras combinations in presets.</B>")
3618
  loras_choices = gr.Dropdown(
 
3626
  loras_multipliers = gr.Textbox(label="Loras Multipliers (1.0 by default) separated by space characters or carriage returns, line that starts with # are ignored", value=launch_multis_str)
3627
  with gr.Row():
3628
  gr.Markdown("<B>Tea Cache accelerates by skipping intelligently some steps, the more steps are skipped the lower the quality of the video (Tea Cache consumes also VRAM)</B>")
3629
+ with gr.Tab("Speed"):
3630
+ with gr.Column():
3631
+ gr.Markdown("<B>Tea Cache accelerates the Video generation by skipping denoising steps. This may impact the quality</B>")
3632
+
3633
  tea_cache_setting = gr.Dropdown(
3634
  choices=[
3635
  ("Tea Cache Disabled", 0),
 
3645
  )
3646
  tea_cache_start_step_perc = gr.Slider(0, 100, value=ui_defaults.get("tea_cache_start_step_perc",0), step=1, label="Tea Cache starting moment in % of generation")
3647
 
3648
+ with gr.Tab("Upsampling"):
3649
+
3650
+ with gr.Column():
3651
  gr.Markdown("<B>Upsampling - postprocessing that may improve fluidity and the size of the video</B>")
 
3652
  temporal_upsampling = gr.Dropdown(
3653
  choices=[
3654
  ("Disabled", ""),
 
3672
  label="Spatial Upsampling"
3673
  )
3674
 
3675
+ with gr.Tab("Quality"):
3676
+ with gr.Row():
3677
+ gr.Markdown("<B>Experimental: Skip Layer Guidance, should improve video quality</B>")
3678
+ with gr.Row():
3679
+ slg_switch = gr.Dropdown(
3680
+ choices=[
3681
+ ("OFF", 0),
3682
+ ("ON", 1),
3683
+ ],
3684
+ value=ui_defaults.get("slg_switch",0),
3685
+ visible=True,
3686
+ scale = 1,
3687
+ label="Skip Layer guidance"
3688
+ )
3689
+ slg_layers = gr.Dropdown(
3690
+ choices=[
3691
+ (str(i), i ) for i in range(40)
3692
+ ],
3693
+ value=ui_defaults.get("slg_layers", ["9"]),
3694
+ multiselect= True,
3695
+ label="Skip Layers",
3696
+ scale= 3
3697
+ )
3698
+ with gr.Row():
3699
+ slg_start_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_start_perc",10), step=1, label="Denoising Steps % start")
3700
+ slg_end_perc = gr.Slider(0, 100, value=ui_defaults.get("slg_end_perc",90), step=1, label="Denoising Steps % end")
3701
+
3702
+ with gr.Row():
3703
+ gr.Markdown("<B>Experimental: Classifier-Free Guidance Zero Star, better adherence to Text Prompt")
3704
+ with gr.Row():
3705
+ cfg_star_switch = gr.Dropdown(
3706
+ choices=[
3707
+ ("OFF", 0),
3708
+ ("ON", 1),
3709
+ ],
3710
+ value=ui_defaults.get("cfg_star_switch",0),
3711
+ visible=True,
3712
+ scale = 1,
3713
+ label="CFG Star"
3714
+ )
3715
+ with gr.Row():
3716
+ cfg_zero_step = gr.Slider(-1, 39, value=ui_defaults.get("cfg_zero_step",-1), step=1, label="CFG Zero below this Layer (Extra Process)")
3717
+
3718
+ with gr.Tab("Sliding Window", visible= "Vace" in model_filename ) as sliding_window_tab:
3719
+
3720
+ with gr.Column(visible= "Vace" in model_filename ) as sliding_window_row:
3721
+ gr.Markdown("<B>A Sliding Window allows you to generate video longer than those of the model limits</B>")
3722
+
3723
+ sliding_window_repeat = gr.Slider(0, 20, value=ui_defaults.get("sliding_window_repeat", 0), step=1, label="Sliding Window Iterations (O=Disabled)")
3724
+ sliding_window_overlap = gr.Slider(1, 32, value=ui_defaults.get("sliding_window_overlap",16), step=1, label="Windows Frames Overlap (needed to maintain continuity between windows, a higher value will require more windows)")
3725
+ sliding_window_discard_last_frames = gr.Slider(1, 10, value=ui_defaults.get("sliding_window_discard_last_frames", 4), step=1, label="Discard Last Frames of a Window (that may have bad quality)")
3726
+
3727
+ with gr.Tab("Miscellaneous"):
3728
  gr.Markdown("<B>With Riflex you can generate videos longer than 5s which is the default duration of videos used to train the model</B>")
3729
  RIFLEx_setting = gr.Dropdown(
3730
  choices=[
 
3735
  value=ui_defaults.get("RIFLEx_setting",0),
3736
  label="RIFLEx positional embedding to generate long video"
3737
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3738
 
3739
+ with gr.Row():
3740
+ save_settings_btn = gr.Button("Set Settings as Default", visible = not args.lock_config)
3741
 
3742
  if not update_form:
3743
  with gr.Column():
 
3791
  let countdown = 5;
3792
  const label = document.getElementById('quit_timer_label');
3793
  if (label) {
3794
+ label.innerText = `${countdown}...`;
3795
  window.quitCountdownInterval = setInterval(() => {
3796
  countdown--;
3797
  if (countdown > 0) {
3798
+ label.innerText = `${countdown}`;
3799
  } else {
3800
  clearInterval(window.quitCountdownInterval);
3801
  findAndClickGradioButton('comfirm_quit_btn_hidden');
 
3935
  )
3936
 
3937
  extra_inputs = prompt_vars + [wizard_prompt, wizard_variables_var, wizard_prompt_activated_var, video_prompt_column, image_prompt_column,
3938
+ prompt_column_advanced, prompt_column_wizard_vars, prompt_column_wizard, lset_name, advanced_row, sliding_window_tab] # show_advanced presets_column,
3939
  if update_form:
3940
  locals_dict = locals()
3941
  gen_inputs = [state_dict if k=="state" else locals_dict[k] for k in inputs_names] + [state_dict] + extra_inputs
 
4235
  gr.Markdown("- <B>Alibaba Wan team for the best open source video generator")
4236
  gr.Markdown("- <B>Alibaba Vace and Fun Teams for their incredible control net models")
4237
  gr.Markdown("- <B>Cocktail Peanuts</B> : QA and simple installation via Pinokio.computer")
4238
+ gr.Markdown("- <B>Tophness</B> : created (former) multi tabs and queuing frameworks")
4239
  gr.Markdown("- <B>AmericanPresidentJimmyCarter</B> : added original support for Skip Layer Guidance")
4240
  gr.Markdown("- <B>Remade_AI</B> : for their awesome Loras collection")
4241
  gr.Markdown("<BR>Huge acknowlegments to these great open source projects used in WanGP:")
4242
  gr.Markdown("- <B>Rife</B>: temporal upsampler (https://github.com/hzwer/ECCV2022-RIFE)")
4243
  gr.Markdown("- <B>DwPose</B>: Open Pose extractor (https://github.com/IDEA-Research/DWPose)")
4244
  gr.Markdown("- <B>Midas</B>: Depth extractor (https://github.com/isl-org/MiDaS")
4245
+ gr.Markdown("- <B>Matanyone</B> and <B>SAM2</B>: Mask Generation (https://github.com/pq-yang/MatAnyone) and (https://github.com/facebookresearch/sam2)")
4246
 
4247
 
4248
  def generate_info_tab():