Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- generator.py +163 -102
- models.py +50 -32
generator.py
CHANGED
|
@@ -20,7 +20,7 @@ from models import (
|
|
| 20 |
load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
|
| 21 |
load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
|
| 22 |
setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
|
| 23 |
-
load_openpose_detector
|
| 24 |
)
|
| 25 |
|
| 26 |
|
|
@@ -34,17 +34,24 @@ class RetroArtConverter:
|
|
| 34 |
'custom_checkpoint': False,
|
| 35 |
'lora': False,
|
| 36 |
'instantid': False,
|
| 37 |
-
'
|
|
|
|
| 38 |
'ip_adapter': False,
|
| 39 |
-
'openpose': False
|
|
|
|
| 40 |
}
|
| 41 |
|
| 42 |
-
# Initialize face analysis
|
| 43 |
self.face_app, self.face_detection_enabled = load_face_analysis()
|
| 44 |
|
| 45 |
-
# Load
|
| 46 |
-
self.
|
| 47 |
-
self.models_loaded['
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# --- NEW: Load OpenPose detector ---
|
| 50 |
self.openpose_detector, openpose_success = load_openpose_detector()
|
|
@@ -182,8 +189,11 @@ class RetroArtConverter:
|
|
| 182 |
print("============================\n")
|
| 183 |
|
| 184 |
def get_depth_map(self, image):
|
| 185 |
-
"""
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
| 187 |
try:
|
| 188 |
if image.mode != 'RGB':
|
| 189 |
image = image.convert('RGB')
|
|
@@ -203,40 +213,38 @@ class RetroArtConverter:
|
|
| 203 |
image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
|
| 204 |
|
| 205 |
if target_width != orig_width or target_height != orig_height:
|
| 206 |
-
print(f"[DEPTH] Resized for
|
| 207 |
|
| 208 |
# FIXED: Add torch.no_grad() wrapper
|
| 209 |
with torch.no_grad():
|
| 210 |
-
depth_image = self.
|
| 211 |
|
| 212 |
depth_width, depth_height = depth_image.size
|
| 213 |
if depth_width != orig_width or depth_height != orig_height:
|
| 214 |
depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
|
| 215 |
|
| 216 |
-
print(f"[DEPTH]
|
| 217 |
return depth_image
|
| 218 |
|
| 219 |
except Exception as e:
|
| 220 |
-
print(f"[DEPTH]
|
| 221 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 222 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 223 |
return Image.fromarray(depth_colored)
|
| 224 |
else:
|
|
|
|
|
|
|
| 225 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 226 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 227 |
return Image.fromarray(depth_colored)
|
| 228 |
|
| 229 |
|
| 230 |
-
def add_trigger_word(self, prompt
|
| 231 |
"""Add trigger word to prompt if not present"""
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
trigger_word = LORA_CONFIGS.get(lora_name, {}).get("trigger", TRIGGER_WORD)
|
| 235 |
-
|
| 236 |
-
if trigger_word.lower() not in prompt.lower():
|
| 237 |
if not prompt or not prompt.strip():
|
| 238 |
-
return
|
| 239 |
-
return f"{
|
| 240 |
return prompt
|
| 241 |
|
| 242 |
def extract_multi_scale_face(self, face_crop, face):
|
|
@@ -487,7 +495,6 @@ class RetroArtConverter:
|
|
| 487 |
identity_control_scale=0.85,
|
| 488 |
expression_control_scale=0.6,
|
| 489 |
lora_scale=1.0,
|
| 490 |
-
lora_name="retroart",
|
| 491 |
identity_preservation=0.8,
|
| 492 |
strength=0.75,
|
| 493 |
enable_color_matching=False,
|
|
@@ -514,7 +521,7 @@ class RetroArtConverter:
|
|
| 514 |
)
|
| 515 |
|
| 516 |
# Add trigger word
|
| 517 |
-
prompt = self.add_trigger_word(prompt
|
| 518 |
|
| 519 |
# Calculate optimal size with flexible aspect ratio support
|
| 520 |
original_width, original_height = input_image.size
|
|
@@ -532,7 +539,7 @@ class RetroArtConverter:
|
|
| 532 |
# Generate depth map
|
| 533 |
depth_image = None
|
| 534 |
if self.depth_active:
|
| 535 |
-
print("Generating
|
| 536 |
depth_image = self.get_depth_map(resized_image)
|
| 537 |
if depth_image.size != (target_width, target_height):
|
| 538 |
depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
|
@@ -557,88 +564,142 @@ class RetroArtConverter:
|
|
| 557 |
has_detected_faces = False
|
| 558 |
face_bbox_original = None
|
| 559 |
|
| 560 |
-
if self.instantid_active
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
|
| 565 |
-
if
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
# Get largest face
|
| 570 |
-
face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
|
| 571 |
-
|
| 572 |
-
# ADAPTIVE PARAMETERS
|
| 573 |
-
adaptive_params = self.detect_face_quality(face)
|
| 574 |
-
if adaptive_params is not None:
|
| 575 |
-
print(f"[ADAPTIVE] {adaptive_params['reason']}")
|
| 576 |
-
identity_preservation = adaptive_params['identity_preservation']
|
| 577 |
-
identity_control_scale = adaptive_params['identity_control_scale']
|
| 578 |
-
guidance_scale = adaptive_params['guidance_scale']
|
| 579 |
-
lora_scale = adaptive_params['lora_scale']
|
| 580 |
-
|
| 581 |
-
# Extract face embeddings
|
| 582 |
-
face_embeddings_base = face.normed_embedding
|
| 583 |
-
|
| 584 |
-
# Extract face crop
|
| 585 |
-
bbox = face.bbox.astype(int)
|
| 586 |
-
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
|
| 587 |
-
face_bbox_original = [x1, y1, x2, y2]
|
| 588 |
-
|
| 589 |
-
# Add padding
|
| 590 |
-
face_width = x2 - x1
|
| 591 |
-
face_height = y2 - y1
|
| 592 |
-
padding_x = int(face_width * 0.3)
|
| 593 |
-
padding_y = int(face_height * 0.3)
|
| 594 |
-
x1 = max(0, x1 - padding_x)
|
| 595 |
-
y1 = max(0, y1 - padding_y)
|
| 596 |
-
x2 = min(resized_image.width, x2 + padding_x)
|
| 597 |
-
y2 = min(resized_image.height, y2 + padding_y)
|
| 598 |
-
|
| 599 |
-
# Crop face region
|
| 600 |
-
face_crop = resized_image.crop((x1, y1, x2, y2))
|
| 601 |
-
|
| 602 |
-
# MULTI-SCALE PROCESSING
|
| 603 |
-
face_embeddings = self.extract_multi_scale_face(face_crop, face)
|
| 604 |
-
|
| 605 |
-
# Enhance face crop
|
| 606 |
-
face_crop_enhanced = enhance_face_crop(face_crop)
|
| 607 |
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
|
| 628 |
# Set LORA scale
|
| 629 |
if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
|
| 630 |
try:
|
| 631 |
-
self.pipe.set_adapters([
|
| 632 |
-
print(f"
|
| 633 |
except Exception as e:
|
| 634 |
-
print(f"Could not set
|
| 635 |
-
# Try fallback to retroart if selected LoRA fails
|
| 636 |
-
if lora_name != "retroart":
|
| 637 |
-
try:
|
| 638 |
-
self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
|
| 639 |
-
print(f"Fallback to RetroArt LoRA, scale: {lora_scale}")
|
| 640 |
-
except:
|
| 641 |
-
pass
|
| 642 |
|
| 643 |
# Prepare generation kwargs
|
| 644 |
pipe_kwargs = {
|
|
@@ -727,11 +788,11 @@ class RetroArtConverter:
|
|
| 727 |
print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
|
| 728 |
|
| 729 |
else:
|
| 730 |
-
# No face
|
| 731 |
-
print("Using blank map
|
| 732 |
control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
|
| 733 |
conditioning_scales.append(0.0) # Set scale to 0
|
| 734 |
-
scale_debug_str.append("Identity: 0.00")
|
| 735 |
|
| 736 |
# 2. Depth
|
| 737 |
if self.depth_active:
|
|
|
|
| 20 |
load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
|
| 21 |
load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
|
| 22 |
setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
|
| 23 |
+
load_openpose_detector, load_mediapipe_face_detector
|
| 24 |
)
|
| 25 |
|
| 26 |
|
|
|
|
| 34 |
'custom_checkpoint': False,
|
| 35 |
'lora': False,
|
| 36 |
'instantid': False,
|
| 37 |
+
'depth_detector': False,
|
| 38 |
+
'depth_type': None,
|
| 39 |
'ip_adapter': False,
|
| 40 |
+
'openpose': False,
|
| 41 |
+
'mediapipe_face': False
|
| 42 |
}
|
| 43 |
|
| 44 |
+
# Initialize face analysis (InsightFace)
|
| 45 |
self.face_app, self.face_detection_enabled = load_face_analysis()
|
| 46 |
|
| 47 |
+
# Load MediapipeFaceDetector (alternative face detection)
|
| 48 |
+
self.mediapipe_face, mediapipe_success = load_mediapipe_face_detector()
|
| 49 |
+
self.models_loaded['mediapipe_face'] = mediapipe_success
|
| 50 |
+
|
| 51 |
+
# Load Depth detector with fallback hierarchy (Leres -> Midas)
|
| 52 |
+
self.depth_detector, self.depth_type, depth_success = load_depth_detector()
|
| 53 |
+
self.models_loaded['depth_detector'] = depth_success
|
| 54 |
+
self.models_loaded['depth_type'] = self.depth_type
|
| 55 |
|
| 56 |
# --- NEW: Load OpenPose detector ---
|
| 57 |
self.openpose_detector, openpose_success = load_openpose_detector()
|
|
|
|
| 189 |
print("============================\n")
|
| 190 |
|
| 191 |
def get_depth_map(self, image):
|
| 192 |
+
"""
|
| 193 |
+
Generate depth map using available depth detector.
|
| 194 |
+
Supports: LeresDetector or MidasDetector.
|
| 195 |
+
"""
|
| 196 |
+
if self.depth_detector is not None:
|
| 197 |
try:
|
| 198 |
if image.mode != 'RGB':
|
| 199 |
image = image.convert('RGB')
|
|
|
|
| 213 |
image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
|
| 214 |
|
| 215 |
if target_width != orig_width or target_height != orig_height:
|
| 216 |
+
print(f"[DEPTH] Resized for {self.depth_type.upper()}Detector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
|
| 217 |
|
| 218 |
# FIXED: Add torch.no_grad() wrapper
|
| 219 |
with torch.no_grad():
|
| 220 |
+
depth_image = self.depth_detector(image_for_depth)
|
| 221 |
|
| 222 |
depth_width, depth_height = depth_image.size
|
| 223 |
if depth_width != orig_width or depth_height != orig_height:
|
| 224 |
depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
|
| 225 |
|
| 226 |
+
print(f"[DEPTH] {self.depth_type.upper()} depth map generated: {orig_width}x{orig_height}")
|
| 227 |
return depth_image
|
| 228 |
|
| 229 |
except Exception as e:
|
| 230 |
+
print(f"[DEPTH] {self.depth_type.upper()}Detector failed ({e}), falling back to grayscale depth")
|
| 231 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 232 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 233 |
return Image.fromarray(depth_colored)
|
| 234 |
else:
|
| 235 |
+
# No depth detector available, use grayscale fallback
|
| 236 |
+
print("[DEPTH] No depth detector available, using grayscale fallback")
|
| 237 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 238 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 239 |
return Image.fromarray(depth_colored)
|
| 240 |
|
| 241 |
|
| 242 |
+
def add_trigger_word(self, prompt):
|
| 243 |
"""Add trigger word to prompt if not present"""
|
| 244 |
+
if TRIGGER_WORD.lower() not in prompt.lower():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
if not prompt or not prompt.strip():
|
| 246 |
+
return TRIGGER_WORD
|
| 247 |
+
return f"{TRIGGER_WORD}, {prompt}"
|
| 248 |
return prompt
|
| 249 |
|
| 250 |
def extract_multi_scale_face(self, face_crop, face):
|
|
|
|
| 495 |
identity_control_scale=0.85,
|
| 496 |
expression_control_scale=0.6,
|
| 497 |
lora_scale=1.0,
|
|
|
|
| 498 |
identity_preservation=0.8,
|
| 499 |
strength=0.75,
|
| 500 |
enable_color_matching=False,
|
|
|
|
| 521 |
)
|
| 522 |
|
| 523 |
# Add trigger word
|
| 524 |
+
prompt = self.add_trigger_word(prompt)
|
| 525 |
|
| 526 |
# Calculate optimal size with flexible aspect ratio support
|
| 527 |
original_width, original_height = input_image.size
|
|
|
|
| 539 |
# Generate depth map
|
| 540 |
depth_image = None
|
| 541 |
if self.depth_active:
|
| 542 |
+
print("Generating depth map...")
|
| 543 |
depth_image = self.get_depth_map(resized_image)
|
| 544 |
if depth_image.size != (target_width, target_height):
|
| 545 |
depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
|
|
|
| 564 |
has_detected_faces = False
|
| 565 |
face_bbox_original = None
|
| 566 |
|
| 567 |
+
if self.instantid_active:
|
| 568 |
+
# Try InsightFace first (if available)
|
| 569 |
+
insightface_tried = False
|
| 570 |
+
insightface_success = False
|
| 571 |
|
| 572 |
+
if self.face_app is not None:
|
| 573 |
+
print("Detecting faces with InsightFace...")
|
| 574 |
+
insightface_tried = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
|
| 576 |
+
try:
|
| 577 |
+
img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
|
| 578 |
+
faces = self.face_app.get(img_array)
|
| 579 |
+
|
| 580 |
+
if len(faces) > 0:
|
| 581 |
+
insightface_success = True
|
| 582 |
+
has_detected_faces = True
|
| 583 |
+
print(f"✓ InsightFace detected {len(faces)} face(s)")
|
| 584 |
+
|
| 585 |
+
# Get largest face
|
| 586 |
+
face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
|
| 587 |
+
|
| 588 |
+
# ADAPTIVE PARAMETERS
|
| 589 |
+
adaptive_params = self.detect_face_quality(face)
|
| 590 |
+
if adaptive_params is not None:
|
| 591 |
+
print(f"[ADAPTIVE] {adaptive_params['reason']}")
|
| 592 |
+
identity_preservation = adaptive_params['identity_preservation']
|
| 593 |
+
identity_control_scale = adaptive_params['identity_control_scale']
|
| 594 |
+
guidance_scale = adaptive_params['guidance_scale']
|
| 595 |
+
lora_scale = adaptive_params['lora_scale']
|
| 596 |
+
|
| 597 |
+
# Extract face embeddings
|
| 598 |
+
face_embeddings_base = face.normed_embedding
|
| 599 |
+
|
| 600 |
+
# Extract face crop
|
| 601 |
+
bbox = face.bbox.astype(int)
|
| 602 |
+
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
|
| 603 |
+
face_bbox_original = [x1, y1, x2, y2]
|
| 604 |
+
|
| 605 |
+
# Add padding
|
| 606 |
+
face_width = x2 - x1
|
| 607 |
+
face_height = y2 - y1
|
| 608 |
+
padding_x = int(face_width * 0.3)
|
| 609 |
+
padding_y = int(face_height * 0.3)
|
| 610 |
+
x1 = max(0, x1 - padding_x)
|
| 611 |
+
y1 = max(0, y1 - padding_y)
|
| 612 |
+
x2 = min(resized_image.width, x2 + padding_x)
|
| 613 |
+
y2 = min(resized_image.height, y2 + padding_y)
|
| 614 |
+
|
| 615 |
+
# Crop face region
|
| 616 |
+
face_crop = resized_image.crop((x1, y1, x2, y2))
|
| 617 |
+
|
| 618 |
+
# MULTI-SCALE PROCESSING
|
| 619 |
+
face_embeddings = self.extract_multi_scale_face(face_crop, face)
|
| 620 |
+
|
| 621 |
+
# Enhance face crop
|
| 622 |
+
face_crop_enhanced = enhance_face_crop(face_crop)
|
| 623 |
+
|
| 624 |
+
# Draw keypoints
|
| 625 |
+
face_kps = face.kps
|
| 626 |
+
face_kps_image = draw_kps(resized_image, face_kps)
|
| 627 |
+
|
| 628 |
+
# ENHANCED: Extract comprehensive facial attributes
|
| 629 |
+
from utils import get_facial_attributes, build_enhanced_prompt
|
| 630 |
+
facial_attrs = get_facial_attributes(face)
|
| 631 |
+
|
| 632 |
+
# Update prompt with detected attributes
|
| 633 |
+
prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD)
|
| 634 |
+
|
| 635 |
+
# Legacy output for compatibility
|
| 636 |
+
age = facial_attrs['age']
|
| 637 |
+
gender_code = facial_attrs['gender']
|
| 638 |
+
det_score = facial_attrs['quality']
|
| 639 |
+
|
| 640 |
+
gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
|
| 641 |
+
print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
|
| 642 |
+
print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
|
| 643 |
+
else:
|
| 644 |
+
print("✗ InsightFace found no faces")
|
| 645 |
+
|
| 646 |
+
except Exception as e:
|
| 647 |
+
print(f"[ERROR] InsightFace detection failed: {e}")
|
| 648 |
+
import traceback
|
| 649 |
+
traceback.print_exc()
|
| 650 |
+
else:
|
| 651 |
+
print("[INFO] InsightFace not available (face_app is None)")
|
| 652 |
+
|
| 653 |
+
# If InsightFace didn't succeed, try MediapipeFace
|
| 654 |
+
if not insightface_success:
|
| 655 |
+
if self.mediapipe_face is not None:
|
| 656 |
+
print("Trying MediapipeFaceDetector as fallback...")
|
| 657 |
+
|
| 658 |
+
try:
|
| 659 |
+
# MediapipeFace returns an annotated image with keypoints
|
| 660 |
+
mediapipe_result = self.mediapipe_face(resized_image)
|
| 661 |
+
|
| 662 |
+
# Check if face was detected (result is not blank/black)
|
| 663 |
+
mediapipe_array = np.array(mediapipe_result)
|
| 664 |
+
if mediapipe_array.sum() > 1000: # If image has significant content
|
| 665 |
+
has_detected_faces = True
|
| 666 |
+
face_kps_image = mediapipe_result
|
| 667 |
+
print(f"✓ MediapipeFace detected face(s)")
|
| 668 |
+
print(f"[INFO] Using MediapipeFace keypoints (no embeddings available)")
|
| 669 |
+
|
| 670 |
+
# Note: MediapipeFace doesn't provide embeddings or detailed info
|
| 671 |
+
# So face_embeddings, face_crop_enhanced remain None
|
| 672 |
+
# InstantID will work with keypoints only (reduced quality)
|
| 673 |
+
else:
|
| 674 |
+
print("✗ MediapipeFace found no faces")
|
| 675 |
+
except Exception as e:
|
| 676 |
+
print(f"[ERROR] MediapipeFace detection failed: {e}")
|
| 677 |
+
import traceback
|
| 678 |
+
traceback.print_exc()
|
| 679 |
+
else:
|
| 680 |
+
print("[INFO] MediapipeFaceDetector not available")
|
| 681 |
+
|
| 682 |
+
# Final summary
|
| 683 |
+
if not has_detected_faces:
|
| 684 |
+
print("\n[SUMMARY] No faces detected by any detector")
|
| 685 |
+
if insightface_tried:
|
| 686 |
+
print(" - InsightFace: tried, found nothing")
|
| 687 |
+
else:
|
| 688 |
+
print(" - InsightFace: not available")
|
| 689 |
+
|
| 690 |
+
if self.mediapipe_face is not None:
|
| 691 |
+
print(" - MediapipeFace: tried, found nothing")
|
| 692 |
+
else:
|
| 693 |
+
print(" - MediapipeFace: not available")
|
| 694 |
+
print()
|
| 695 |
|
| 696 |
# Set LORA scale
|
| 697 |
if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
|
| 698 |
try:
|
| 699 |
+
self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
|
| 700 |
+
print(f"LORA scale: {lora_scale}")
|
| 701 |
except Exception as e:
|
| 702 |
+
print(f"Could not set LORA scale: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
|
| 704 |
# Prepare generation kwargs
|
| 705 |
pipe_kwargs = {
|
|
|
|
| 788 |
print(" Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
|
| 789 |
|
| 790 |
else:
|
| 791 |
+
# No face detected - blank map needed to maintain ControlNet list order
|
| 792 |
+
print("[INSTANTID] Using blank map (scale=0, no effect on generation)")
|
| 793 |
control_images.append(Image.new("RGB", (target_width, target_height), (0,0,0)))
|
| 794 |
conditioning_scales.append(0.0) # Set scale to 0
|
| 795 |
+
scale_debug_str.append("Identity: 0.00 (no face)")
|
| 796 |
|
| 797 |
# 2. Depth
|
| 798 |
if self.depth_active:
|
models.py
CHANGED
|
@@ -13,7 +13,7 @@ from diffusers import (
|
|
| 13 |
from diffusers.models.attention_processor import AttnProcessor2_0
|
| 14 |
from transformers import CLIPVisionModelWithProjection
|
| 15 |
from insightface.app import FaceAnalysis
|
| 16 |
-
from controlnet_aux import
|
| 17 |
from huggingface_hub import hf_hub_download
|
| 18 |
from compel import Compel, ReturnedEmbeddingsType
|
| 19 |
|
|
@@ -82,16 +82,34 @@ def load_face_analysis():
|
|
| 82 |
|
| 83 |
|
| 84 |
def load_depth_detector():
|
| 85 |
-
"""
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
try:
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
| 92 |
except Exception as e:
|
| 93 |
-
print(f" [
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
# --- NEW FUNCTION ---
|
| 97 |
def load_openpose_detector():
|
|
@@ -107,11 +125,24 @@ def load_openpose_detector():
|
|
| 107 |
return None, False
|
| 108 |
# --- END NEW FUNCTION ---
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def load_controlnets():
|
| 111 |
"""Load ControlNet models."""
|
| 112 |
-
print("Loading ControlNet
|
| 113 |
controlnet_depth = ControlNetModel.from_pretrained(
|
| 114 |
-
"diffusers/controlnet-zoe-depth-sdxl-1.0",
|
| 115 |
torch_dtype=dtype
|
| 116 |
).to(device)
|
| 117 |
print(" [OK] ControlNet Depth loaded")
|
|
@@ -188,29 +219,16 @@ def load_sdxl_pipeline(controlnets):
|
|
| 188 |
|
| 189 |
|
| 190 |
def load_lora(pipe):
|
| 191 |
-
"""Load
|
| 192 |
-
print("Loading
|
| 193 |
-
success_count = 0
|
| 194 |
-
|
| 195 |
-
# Load RetroArt LoRA
|
| 196 |
try:
|
| 197 |
-
|
| 198 |
-
pipe.load_lora_weights(
|
| 199 |
-
print(f" [OK]
|
| 200 |
-
|
| 201 |
except Exception as e:
|
| 202 |
-
print(f" [WARNING] Could not load
|
| 203 |
-
|
| 204 |
-
# Load VGA LoRA
|
| 205 |
-
try:
|
| 206 |
-
lora_path_vga = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora_vga'])
|
| 207 |
-
pipe.load_lora_weights(lora_path_vga, adapter_name="vga")
|
| 208 |
-
print(f" [OK] VGA LoRA loaded successfully")
|
| 209 |
-
success_count += 1
|
| 210 |
-
except Exception as e:
|
| 211 |
-
print(f" [WARNING] Could not load VGA LoRA: {e}")
|
| 212 |
-
|
| 213 |
-
return success_count > 0
|
| 214 |
|
| 215 |
|
| 216 |
def setup_ip_adapter(pipe, image_encoder):
|
|
|
|
| 13 |
from diffusers.models.attention_processor import AttnProcessor2_0
|
| 14 |
from transformers import CLIPVisionModelWithProjection
|
| 15 |
from insightface.app import FaceAnalysis
|
| 16 |
+
from controlnet_aux import OpenposeDetector, LeresDetector, MidasDetector, MediapipeFaceDetector
|
| 17 |
from huggingface_hub import hf_hub_download
|
| 18 |
from compel import Compel, ReturnedEmbeddingsType
|
| 19 |
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def load_depth_detector():
|
| 85 |
+
"""
|
| 86 |
+
Load depth detector with fallback hierarchy: Leres -> Midas.
|
| 87 |
+
Returns (detector, detector_type, success).
|
| 88 |
+
"""
|
| 89 |
+
print("Loading depth detector with fallback hierarchy...")
|
| 90 |
+
|
| 91 |
+
# Try LeresDetector first (best quality)
|
| 92 |
try:
|
| 93 |
+
print(" Attempting LeresDetector (highest quality)...")
|
| 94 |
+
leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
|
| 95 |
+
leres_depth.to(device)
|
| 96 |
+
print(" [OK] LeresDetector loaded successfully")
|
| 97 |
+
return leres_depth, 'leres', True
|
| 98 |
except Exception as e:
|
| 99 |
+
print(f" [INFO] LeresDetector not available: {e}")
|
| 100 |
+
|
| 101 |
+
# Fallback to MidasDetector
|
| 102 |
+
try:
|
| 103 |
+
print(" Attempting MidasDetector (fallback)...")
|
| 104 |
+
midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
| 105 |
+
midas_depth.to(device)
|
| 106 |
+
print(" [OK] MidasDetector loaded successfully")
|
| 107 |
+
return midas_depth, 'midas', True
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f" [WARNING] MidasDetector not available: {e}")
|
| 110 |
+
|
| 111 |
+
print(" [ERROR] No depth detector available")
|
| 112 |
+
return None, None, False
|
| 113 |
|
| 114 |
# --- NEW FUNCTION ---
|
| 115 |
def load_openpose_detector():
|
|
|
|
| 125 |
return None, False
|
| 126 |
# --- END NEW FUNCTION ---
|
| 127 |
|
| 128 |
+
# --- NEW FUNCTION ---
|
| 129 |
+
def load_mediapipe_face_detector():
|
| 130 |
+
"""Load MediapipeFaceDetector for advanced face detection."""
|
| 131 |
+
print("Loading MediapipeFaceDetector...")
|
| 132 |
+
try:
|
| 133 |
+
face_detector = MediapipeFaceDetector()
|
| 134 |
+
print(" [OK] MediapipeFaceDetector loaded successfully")
|
| 135 |
+
return face_detector, True
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f" [WARNING] MediapipeFaceDetector not available: {e}")
|
| 138 |
+
return None, False
|
| 139 |
+
# --- END NEW FUNCTION ---
|
| 140 |
+
|
| 141 |
def load_controlnets():
|
| 142 |
"""Load ControlNet models."""
|
| 143 |
+
print("Loading ControlNet Depth model...")
|
| 144 |
controlnet_depth = ControlNetModel.from_pretrained(
|
| 145 |
+
"diffusers/controlnet-zoe-depth-sdxl-1.0", # Model repo name (not tied to detector)
|
| 146 |
torch_dtype=dtype
|
| 147 |
).to(device)
|
| 148 |
print(" [OK] ControlNet Depth loaded")
|
|
|
|
| 219 |
|
| 220 |
|
| 221 |
def load_lora(pipe):
|
| 222 |
+
"""Load LORA from HuggingFace Hub."""
|
| 223 |
+
print("Loading LORA (retroart) from HuggingFace Hub...")
|
|
|
|
|
|
|
|
|
|
| 224 |
try:
|
| 225 |
+
lora_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['lora'])
|
| 226 |
+
pipe.load_lora_weights(lora_path, adapter_name="retroart")
|
| 227 |
+
print(f" [OK] LORA loaded successfully")
|
| 228 |
+
return True
|
| 229 |
except Exception as e:
|
| 230 |
+
print(f" [WARNING] Could not load LORA: {e}")
|
| 231 |
+
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
def setup_ip_adapter(pipe, image_encoder):
|