primerz commited on
Commit
e9201b0
·
verified ·
1 Parent(s): d31bf62

Upload 2 files

Browse files
Files changed (2) hide show
  1. generator.py +163 -102
  2. models.py +50 -32
generator.py CHANGED
@@ -20,7 +20,7 @@ from models import (
20
  load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
21
  load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
22
  setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
23
- load_openpose_detector
24
  )
25
 
26
 
@@ -34,17 +34,24 @@ class RetroArtConverter:
34
  'custom_checkpoint': False,
35
  'lora': False,
36
  'instantid': False,
37
- 'zoe_depth': False,
 
38
  'ip_adapter': False,
39
- 'openpose': False
 
40
  }
41
 
42
- # Initialize face analysis
43
  self.face_app, self.face_detection_enabled = load_face_analysis()
44
 
45
- # Load Zoe Depth detector
46
- self.zoe_depth, zoe_success = load_depth_detector()
47
- self.models_loaded['zoe_depth'] = zoe_success
 
 
 
 
 
48
 
49
  # --- NEW: Load OpenPose detector ---
50
  self.openpose_detector, openpose_success = load_openpose_detector()
@@ -182,8 +189,11 @@ class RetroArtConverter:
182
  print("============================\n")
183
 
184
  def get_depth_map(self, image):
185
- """Generate depth map using Zoe Depth"""
186
- if self.zoe_depth is not None:
 
 
 
187
  try:
188
  if image.mode != 'RGB':
189
  image = image.convert('RGB')
@@ -203,40 +213,38 @@ class RetroArtConverter:
203
  image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
204
 
205
  if target_width != orig_width or target_height != orig_height:
206
- print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
207
 
208
  # FIXED: Add torch.no_grad() wrapper
209
  with torch.no_grad():
210
- depth_image = self.zoe_depth(image_for_depth)
211
 
212
  depth_width, depth_height = depth_image.size
213
  if depth_width != orig_width or depth_height != orig_height:
214
  depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
215
 
216
- print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
217
  return depth_image
218
 
219
  except Exception as e:
220
- print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
221
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
222
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
223
  return Image.fromarray(depth_colored)
224
  else:
 
 
225
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
226
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
227
  return Image.fromarray(depth_colored)
228
 
229
 
230
- def add_trigger_word(self, prompt, lora_name="retroart"):
231
  """Add trigger word to prompt if not present"""
232
- from config import LORA_CONFIGS
233
-
234
- trigger_word = LORA_CONFIGS.get(lora_name, {}).get("trigger", TRIGGER_WORD)
235
-
236
- if trigger_word.lower() not in prompt.lower():
237
  if not prompt or not prompt.strip():
238
- return trigger_word
239
- return f"{trigger_word}, {prompt}"
240
  return prompt
241
 
242
  def extract_multi_scale_face(self, face_crop, face):
@@ -487,7 +495,6 @@ class RetroArtConverter:
487
  identity_control_scale=0.85,
488
  expression_control_scale=0.6,
489
  lora_scale=1.0,
490
- lora_name="retroart",
491
  identity_preservation=0.8,
492
  strength=0.75,
493
  enable_color_matching=False,
@@ -514,7 +521,7 @@ class RetroArtConverter:
514
  )
515
 
516
  # Add trigger word
517
- prompt = self.add_trigger_word(prompt, lora_name)
518
 
519
  # Calculate optimal size with flexible aspect ratio support
520
  original_width, original_height = input_image.size
@@ -532,7 +539,7 @@ class RetroArtConverter:
532
  # Generate depth map
533
  depth_image = None
534
  if self.depth_active:
535
- print("Generating Zoe depth map...")
536
  depth_image = self.get_depth_map(resized_image)
537
  if depth_image.size != (target_width, target_height):
538
  depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
@@ -557,88 +564,142 @@ class RetroArtConverter:
557
  has_detected_faces = False
558
  face_bbox_original = None
559
 
560
- if self.instantid_active and self.face_app is not None: # <-- Check instantid_active
561
- print("Detecting faces and extracting keypoints...")
562
- img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
563
- faces = self.face_app.get(img_array)
564
 
565
- if len(faces) > 0:
566
- has_detected_faces = True
567
- print(f"Detected {len(faces)} face(s)")
568
-
569
- # Get largest face
570
- face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
571
-
572
- # ADAPTIVE PARAMETERS
573
- adaptive_params = self.detect_face_quality(face)
574
- if adaptive_params is not None:
575
- print(f"[ADAPTIVE] {adaptive_params['reason']}")
576
- identity_preservation = adaptive_params['identity_preservation']
577
- identity_control_scale = adaptive_params['identity_control_scale']
578
- guidance_scale = adaptive_params['guidance_scale']
579
- lora_scale = adaptive_params['lora_scale']
580
-
581
- # Extract face embeddings
582
- face_embeddings_base = face.normed_embedding
583
-
584
- # Extract face crop
585
- bbox = face.bbox.astype(int)
586
- x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
587
- face_bbox_original = [x1, y1, x2, y2]
588
-
589
- # Add padding
590
- face_width = x2 - x1
591
- face_height = y2 - y1
592
- padding_x = int(face_width * 0.3)
593
- padding_y = int(face_height * 0.3)
594
- x1 = max(0, x1 - padding_x)
595
- y1 = max(0, y1 - padding_y)
596
- x2 = min(resized_image.width, x2 + padding_x)
597
- y2 = min(resized_image.height, y2 + padding_y)
598
-
599
- # Crop face region
600
- face_crop = resized_image.crop((x1, y1, x2, y2))
601
-
602
- # MULTI-SCALE PROCESSING
603
- face_embeddings = self.extract_multi_scale_face(face_crop, face)
604
-
605
- # Enhance face crop
606
- face_crop_enhanced = enhance_face_crop(face_crop)
607
 
608
- # Draw keypoints
609
- face_kps = face.kps
610
- face_kps_image = draw_kps(resized_image, face_kps)
611
-
612
- # ENHANCED: Extract comprehensive facial attributes
613
- from utils import get_facial_attributes, build_enhanced_prompt
614
- facial_attrs = get_facial_attributes(face)
615
-
616
- # Update prompt with detected attributes
617
- prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD)
618
-
619
- # Legacy output for compatibility
620
- age = facial_attrs['age']
621
- gender_code = facial_attrs['gender']
622
- det_score = facial_attrs['quality']
623
-
624
- gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
625
- print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
626
- print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
 
628
  # Set LORA scale
629
  if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
630
  try:
631
- self.pipe.set_adapters([lora_name], adapter_weights=[lora_scale])
632
- print(f"LoRA ({lora_name}) scale: {lora_scale}")
633
  except Exception as e:
634
- print(f"Could not set LoRA scale: {e}")
635
- # Try fallback to retroart if selected LoRA fails
636
- if lora_name != "retroart":
637
- try:
638
- self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
639
- print(f"Fallback to RetroArt LoRA, scale: {lora_scale}")
640
- except:
641
- pass
642
 
643
  # Prepare generation kwargs
644
  pipe_kwargs = {
@@ -727,11 +788,11 @@ class RetroArtConverter:
727
  print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
728
 
729
  else:
730
- # No face, must add a blank image to keep list order
731
- print("Using blank map for InstantID (no face/disabled)")
732
  control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
733
  conditioning_scales.append(0.0) # Set scale to 0
734
- scale_debug_str.append("Identity: 0.00")
735
 
736
  # 2. Depth
737
  if self.depth_active:
 
20
  load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
21
  load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
22
  setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
23
+ load_openpose_detector, load_mediapipe_face_detector
24
  )
25
 
26
 
 
34
  'custom_checkpoint': False,
35
  'lora': False,
36
  'instantid': False,
37
+ 'depth_detector': False,
38
+ 'depth_type': None,
39
  'ip_adapter': False,
40
+ 'openpose': False,
41
+ 'mediapipe_face': False
42
  }
43
 
44
+ # Initialize face analysis (InsightFace)
45
  self.face_app, self.face_detection_enabled = load_face_analysis()
46
 
47
+ # Load MediapipeFaceDetector (alternative face detection)
48
+ self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
49
+ self.models_loaded['mediapipe_face'] = mediapipe_success
50
+
51
+ # Load Depth detector with fallback hierarchy (Leres -> Midas)
52
+ self.depth_detector, self.depth_type, depth_success = load_depth_detector()
53
+ self.models_loaded['depth_detector'] = depth_success
54
+ self.models_loaded['depth_type'] = self.depth_type
55
 
56
  # --- NEW: Load OpenPose detector ---
57
  self.openpose_detector, openpose_success = load_openpose_detector()
 
189
  print("============================\n")
190
 
191
  def get_depth_map(self, image):
192
+ """
193
+ Generate depth map using available depth detector.
194
+ Supports: LeresDetector or MidasDetector.
195
+ """
196
+ if self.depth_detector is not None:
197
  try:
198
  if image.mode != 'RGB':
199
  image = image.convert('RGB')
 
213
  image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
214
 
215
  if target_width != orig_width or target_height != orig_height:
216
+ print(f"[DEPTH] Resized for {self.depth_type.upper()}Detector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
217
 
218
  # FIXED: Add torch.no_grad() wrapper
219
  with torch.no_grad():
220
+ depth_image = self.depth_detector(image_for_depth)
221
 
222
  depth_width, depth_height = depth_image.size
223
  if depth_width != orig_width or depth_height != orig_height:
224
  depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
225
 
226
+ print(f"[DEPTH] {self.depth_type.upper()} depth map generated: {orig_width}x{orig_height}")
227
  return depth_image
228
 
229
  except Exception as e:
230
+ print(f"[DEPTH] {self.depth_type.upper()}Detector failed ({e}), falling back to grayscale depth")
231
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
232
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
233
  return Image.fromarray(depth_colored)
234
  else:
235
+ # No depth detector available, use grayscale fallback
236
+ print("[DEPTH] No depth detector available, using grayscale fallback")
237
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
238
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
239
  return Image.fromarray(depth_colored)
240
 
241
 
242
+ def add_trigger_word(self, prompt):
243
  """Add trigger word to prompt if not present"""
244
+ if TRIGGER_WORD.lower() not in prompt.lower():
 
 
 
 
245
  if not prompt or not prompt.strip():
246
+ return TRIGGER_WORD
247
+ return f"{TRIGGER_WORD}, {prompt}"
248
  return prompt
249
 
250
  def extract_multi_scale_face(self, face_crop, face):
 
495
  identity_control_scale=0.85,
496
  expression_control_scale=0.6,
497
  lora_scale=1.0,
 
498
  identity_preservation=0.8,
499
  strength=0.75,
500
  enable_color_matching=False,
 
521
  )
522
 
523
  # Add trigger word
524
+ prompt = self.add_trigger_word(prompt)
525
 
526
  # Calculate optimal size with flexible aspect ratio support
527
  original_width, original_height = input_image.size
 
539
  # Generate depth map
540
  depth_image = None
541
  if self.depth_active:
542
+ print("Generating depth map...")
543
  depth_image = self.get_depth_map(resized_image)
544
  if depth_image.size != (target_width, target_height):
545
  depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
 
564
  has_detected_faces = False
565
  face_bbox_original = None
566
 
567
+ if self.instantid_active:
568
+ # Try InsightFace first (if available)
569
+ insightface_tried = False
570
+ insightface_success = False
571
 
572
+ if self.face_app is not None:
573
+ print("Detecting faces with InsightFace...")
574
+ insightface_tried = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
+ try:
577
+ img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
578
+ faces = self.face_app.get(img_array)
579
+
580
+ if len(faces) > 0:
581
+ insightface_success = True
582
+ has_detected_faces = True
583
+ print(f"✓ InsightFace detected {len(faces)} face(s)")
584
+
585
+ # Get largest face
586
+ face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
587
+
588
+ # ADAPTIVE PARAMETERS
589
+ adaptive_params = self.detect_face_quality(face)
590
+ if adaptive_params is not None:
591
+ print(f"[ADAPTIVE] {adaptive_params['reason']}")
592
+ identity_preservation = adaptive_params['identity_preservation']
593
+ identity_control_scale = adaptive_params['identity_control_scale']
594
+ guidance_scale = adaptive_params['guidance_scale']
595
+ lora_scale = adaptive_params['lora_scale']
596
+
597
+ # Extract face embeddings
598
+ face_embeddings_base = face.normed_embedding
599
+
600
+ # Extract face crop
601
+ bbox = face.bbox.astype(int)
602
+ x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
603
+ face_bbox_original = [x1, y1, x2, y2]
604
+
605
+ # Add padding
606
+ face_width = x2 - x1
607
+ face_height = y2 - y1
608
+ padding_x = int(face_width * 0.3)
609
+ padding_y = int(face_height * 0.3)
610
+ x1 = max(0, x1 - padding_x)
611
+ y1 = max(0, y1 - padding_y)
612
+ x2 = min(resized_image.width, x2 + padding_x)
613
+ y2 = min(resized_image.height, y2 + padding_y)
614
+
615
+ # Crop face region
616
+ face_crop = resized_image.crop((x1, y1, x2, y2))
617
+
618
+ # MULTI-SCALE PROCESSING
619
+ face_embeddings = self.extract_multi_scale_face(face_crop, face)
620
+
621
+ # Enhance face crop
622
+ face_crop_enhanced = enhance_face_crop(face_crop)
623
+
624
+ # Draw keypoints
625
+ face_kps = face.kps
626
+ face_kps_image = draw_kps(resized_image, face_kps)
627
+
628
+ # ENHANCED: Extract comprehensive facial attributes
629
+ from utils import get_facial_attributes, build_enhanced_prompt
630
+ facial_attrs = get_facial_attributes(face)
631
+
632
+ # Update prompt with detected attributes
633
+ prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD)
634
+
635
+ # Legacy output for compatibility
636
+ age = facial_attrs['age']
637
+ gender_code = facial_attrs['gender']
638
+ det_score = facial_attrs['quality']
639
+
640
+ gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
641
+ print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
642
+ print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
643
+ else:
644
+ print("✗ InsightFace found no faces")
645
+
646
+ except Exception as e:
647
+ print(f"[ERROR] InsightFace detection failed: {e}")
648
+ import traceback
649
+ traceback.print_exc()
650
+ else:
651
+ print("[INFO] InsightFace not available (face_app is None)")
652
+
653
+ # If InsightFace didn't succeed, try MediapipeFace
654
+ if not insightface_success:
655
+ if self.mediapipe_face is not None:
656
+ print("Trying MediapipeFaceDetector as fallback...")
657
+
658
+ try:
659
+ # MediapipeFace returns an annotated image with keypoints
660
+ mediapipe_result = self.mediapipe_face(resized_image)
661
+
662
+ # Check if face was detected (result is not blank/black)
663
+ mediapipe_array = np.array(mediapipe_result)
664
+ if mediapipe_array.sum() > 1000: # If image has significant content
665
+ has_detected_faces = True
666
+ face_kps_image = mediapipe_result
667
+ print(f"✓ MediapipeFace detected face(s)")
668
+ print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
669
+
670
+ # Note: MediapipeFace doesn't provide embeddings or detailed info
671
+ # So face_embeddings, face_crop_enhanced remain None
672
+ # InstantID will work with keypoints only (reduced quality)
673
+ else:
674
+ print("✗ MediapipeFace found no faces")
675
+ except Exception as e:
676
+ print(f"[ERROR] MediapipeFace detection failed: {e}")
677
+ import traceback
678
+ traceback.print_exc()
679
+ else:
680
+ print("[INFO] MediapipeFaceDetector not available")
681
+
682
+ # Final summary
683
+ if not has_detected_faces:
684
+ print("\n[SUMMARY] No faces detected by any detector")
685
+ if insightface_tried:
686
+ print(" - InsightFace: tried, found nothing")
687
+ else:
688
+ print(" - InsightFace: not available")
689
+
690
+ if self.mediapipe_face is not None:
691
+ print(" - MediapipeFace: tried, found nothing")
692
+ else:
693
+ print(" - MediapipeFace: not available")
694
+ print()
695
 
696
  # Set LORA scale
697
  if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
698
  try:
699
+ self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
700
+ print(f"LORA scale: {lora_scale}")
701
  except Exception as e:
702
+ print(f"Could not set LORA scale: {e}")
 
 
 
 
 
 
 
703
 
704
  # Prepare generation kwargs
705
  pipe_kwargs = {
 
788
  print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
789
 
790
  else:
791
+ # No face detected - blank map needed to maintain ControlNet list order
792
+ print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
793
  control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
794
  conditioning_scales.append(0.0) # Set scale to 0
795
+ scale_debug_str.append("Identity: 0.00 (no face)")
796
 
797
  # 2. Depth
798
  if self.depth_active:
models.py CHANGED
@@ -13,7 +13,7 @@ from diffusers import (
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
- from controlnet_aux import ZoeDetector, OpenposeDetector # <-- NEW
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
@@ -82,16 +82,34 @@ def load_face_analysis():
82
 
83
 
84
  def load_depth_detector():
85
- """Load Zoe Depth detector."""
86
- print("Loading Zoe Depth detector...")
 
 
 
 
 
87
  try:
88
- zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
89
- zoe_depth.to(device)
90
- print(" [OK] Zoe Depth loaded successfully")
91
- return zoe_depth, True
 
92
  except Exception as e:
93
- print(f" [WARNING] Zoe Depth not available: {e}")
94
- return None, False
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # --- NEW FUNCTION ---
97
  def load_openpose_detector():
@@ -107,11 +125,24 @@ def load_openpose_detector():
107
  return None, False
108
  # --- END NEW FUNCTION ---
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def load_controlnets():
111
  """Load ControlNet models."""
112
- print("Loading ControlNet Zoe Depth model...")
113
  controlnet_depth = ControlNetModel.from_pretrained(
114
- "diffusers/controlnet-zoe-depth-sdxl-1.0",
115
  torch_dtype=dtype
116
  ).to(device)
117
  print(" [OK] ControlNet Depth loaded")
@@ -188,29 +219,16 @@ def load_sdxl_pipeline(controlnets):
188
 
189
 
190
  def load_lora(pipe):
191
- """Load both LORA models from HuggingFace Hub."""
192
- print("Loading LoRAs from HuggingFace Hub...")
193
- success_count = 0
194
-
195
- # Load RetroArt LoRA
196
  try:
197
- lora_path_retroart = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora_retroart'])
198
- pipe.load_lora_weights(lora_path_retroart, adapter_name="retroart")
199
- print(f" [OK] RetroArt LoRA loaded successfully")
200
- success_count += 1
201
  except Exception as e:
202
- print(f" [WARNING] Could not load RetroArt LoRA: {e}")
203
-
204
- # Load VGA LoRA
205
- try:
206
- lora_path_vga = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora_vga'])
207
- pipe.load_lora_weights(lora_path_vga, adapter_name="vga")
208
- print(f" [OK] VGA LoRA loaded successfully")
209
- success_count += 1
210
- except Exception as e:
211
- print(f" [WARNING] Could not load VGA LoRA: {e}")
212
-
213
- return success_count > 0
214
 
215
 
216
  def setup_ip_adapter(pipe, image_encoder):
 
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
+ from controlnet_aux import OpenposeDetector, LeresDetector, MidasDetector, MediapipeFaceDetector
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
 
82
 
83
 
84
  def load_depth_detector():
85
+ """
86
+ Load depth detector with fallback hierarchy: Leres -> Midas.
87
+ Returns (detector, detector_type, success).
88
+ """
89
+ print("Loading depth detector with fallback hierarchy...")
90
+
91
+ # Try LeresDetector first (best quality)
92
  try:
93
+ print(" Attempting LeresDetector (highest quality)...")
94
+ leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
95
+ leres_depth.to(device)
96
+ print(" [OK] LeresDetector loaded successfully")
97
+ return leres_depth, 'leres', True
98
  except Exception as e:
99
+ print(f" [INFO] LeresDetector not available: {e}")
100
+
101
+ # Fallback to MidasDetector
102
+ try:
103
+ print(" Attempting MidasDetector (fallback)...")
104
+ midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
105
+ midas_depth.to(device)
106
+ print(" [OK] MidasDetector loaded successfully")
107
+ return midas_depth, 'midas', True
108
+ except Exception as e:
109
+ print(f" [WARNING] MidasDetector not available: {e}")
110
+
111
+ print(" [ERROR] No depth detector available")
112
+ return None, None, False
113
 
114
  # --- NEW FUNCTION ---
115
  def load_openpose_detector():
 
125
  return None, False
126
  # --- END NEW FUNCTION ---
127
 
128
+ # --- NEW FUNCTION ---
129
+ def load_mediapipe_face_detector():
130
+ """Load MediapipeFaceDetector for advanced face detection."""
131
+ print("Loading MediapipeFaceDetector...")
132
+ try:
133
+ face_detector = MediapipeFaceDetector()
134
+ print(" [OK] MediapipeFaceDetector loaded successfully")
135
+ return face_detector, True
136
+ except Exception as e:
137
+ print(f" [WARNING] MediapipeFaceDetector not available: {e}")
138
+ return None, False
139
+ # --- END NEW FUNCTION ---
140
+
141
  def load_controlnets():
142
  """Load ControlNet models."""
143
+ print("Loading ControlNet Depth model...")
144
  controlnet_depth = ControlNetModel.from_pretrained(
145
+ "diffusers/controlnet-zoe-depth-sdxl-1.0", # Model repo name (not tied to detector)
146
  torch_dtype=dtype
147
  ).to(device)
148
  print(" [OK] ControlNet Depth loaded")
 
219
 
220
 
221
  def load_lora(pipe):
222
+ """Load LORA from HuggingFace Hub."""
223
+ print("Loading LORA (retroart) from HuggingFace Hub...")
 
 
 
224
  try:
225
+ lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
226
+ pipe.load_lora_weights(lora_path, adapter_name="retroart")
227
+ print(f" [OK] LORA loaded successfully")
228
+ return True
229
  except Exception as e:
230
+ print(f" [WARNING] Could not load LORA: {e}")
231
+ return False
 
 
 
 
 
 
 
 
 
 
232
 
233
 
234
  def setup_ip_adapter(pipe, image_encoder):