PranavCR01 commited on
Commit
233452b
·
1 Parent(s): f5c077e

feat: swap backbone from CLIP to SigLIP 2 (google/siglip2-base-patch16-224)

Browse files
Files changed (4) hide show
  1. app.py +1 -1
  2. clip_head.py +6 -6
  3. gradcam.py +5 -5
  4. model_loader.py +4 -4
app.py CHANGED
@@ -125,7 +125,7 @@ async def score(
125
  pixel_values = inputs["pixel_values"] # (1, 3, 224, 224), CPU
126
 
127
  with torch.no_grad():
128
- clip_out = model.clip(pixel_values=pixel_values)
129
  embedding = clip_out.pooler_output # (1, 768)
130
  outputs = model(embedding=embedding)
131
 
 
125
  pixel_values = inputs["pixel_values"] # (1, 3, 224, 224), CPU
126
 
127
  with torch.no_grad():
128
+ clip_out = model.backbone(pixel_values=pixel_values)
129
  embedding = clip_out.pooler_output # (1, 768)
130
  outputs = model(embedding=embedding)
131
 
clip_head.py CHANGED
@@ -1,21 +1,21 @@
1
  import torch
2
  import torch.nn as nn
3
- from transformers import CLIPVisionModel
4
 
5
 
6
  class CreativeScorer(nn.Module):
7
  def __init__(self):
8
  super().__init__()
9
- # Frozen CLIP backbone — NEVER set requires_grad=True on these params
10
- self.clip = CLIPVisionModel.from_pretrained(
11
- "openai/clip-vit-base-patch32",
12
  use_safetensors=True,
13
  )
14
- for param in self.clip.parameters():
15
  param.requires_grad = False
16
 
17
  # Fail fast if backbone accidentally gets unfrozen anywhere downstream
18
- assert not any(p.requires_grad for p in self.clip.parameters())
19
 
20
  # Trainable head only
21
  self.projection = nn.Sequential(
 
1
  import torch
2
  import torch.nn as nn
3
+ from transformers import SiglipVisionModel
4
 
5
 
6
  class CreativeScorer(nn.Module):
7
  def __init__(self):
8
  super().__init__()
9
+ # Frozen SigLIP 2 backbone — NEVER set requires_grad=True on these params
10
+ self.backbone = SiglipVisionModel.from_pretrained(
11
+ "google/siglip2-base-patch16-224",
12
  use_safetensors=True,
13
  )
14
+ for param in self.backbone.parameters():
15
  param.requires_grad = False
16
 
17
  # Fail fast if backbone accidentally gets unfrozen anywhere downstream
18
+ assert not any(p.requires_grad for p in self.backbone.parameters())
19
 
20
  # Trainable head only
21
  self.projection = nn.Sequential(
gradcam.py CHANGED
@@ -5,14 +5,14 @@ import cv2
5
  import numpy as np
6
  import torch
7
  from PIL import Image
8
- from transformers import CLIPProcessor
9
 
10
  from clip_head import CreativeScorer
11
 
12
 
13
  def _compute_cam(
14
  model: CreativeScorer,
15
- processor: CLIPProcessor,
16
  image: Image.Image,
17
  device: str,
18
  ) -> tuple[np.ndarray, np.ndarray]:
@@ -72,7 +72,7 @@ def _compute_cam(
72
 
73
  def generate_heatmap(
74
  model: CreativeScorer,
75
- processor: CLIPProcessor,
76
  image: Image.Image,
77
  device: str = "cpu",
78
  ) -> np.ndarray:
@@ -83,7 +83,7 @@ def generate_heatmap(
83
 
84
  def generate_heatmap_with_cam(
85
  model: CreativeScorer,
86
- processor: CLIPProcessor,
87
  image: Image.Image,
88
  device: str = "cpu",
89
  ) -> tuple[np.ndarray, np.ndarray]:
@@ -93,7 +93,7 @@ def generate_heatmap_with_cam(
93
 
94
  def save_heatmaps(
95
  model: CreativeScorer,
96
- processor: CLIPProcessor,
97
  image_paths: List[str],
98
  output_dir: str,
99
  device: str = "cpu",
 
5
  import numpy as np
6
  import torch
7
  from PIL import Image
8
+ from transformers import AutoProcessor
9
 
10
  from clip_head import CreativeScorer
11
 
12
 
13
  def _compute_cam(
14
  model: CreativeScorer,
15
+ processor: AutoProcessor,
16
  image: Image.Image,
17
  device: str,
18
  ) -> tuple[np.ndarray, np.ndarray]:
 
72
 
73
  def generate_heatmap(
74
  model: CreativeScorer,
75
+ processor: AutoProcessor,
76
  image: Image.Image,
77
  device: str = "cpu",
78
  ) -> np.ndarray:
 
83
 
84
  def generate_heatmap_with_cam(
85
  model: CreativeScorer,
86
+ processor: AutoProcessor,
87
  image: Image.Image,
88
  device: str = "cpu",
89
  ) -> tuple[np.ndarray, np.ndarray]:
 
93
 
94
  def save_heatmaps(
95
  model: CreativeScorer,
96
+ processor: AutoProcessor,
97
  image_paths: List[str],
98
  output_dir: str,
99
  device: str = "cpu",
model_loader.py CHANGED
@@ -2,15 +2,15 @@ import os
2
 
3
  import torch
4
  from huggingface_hub import hf_hub_download
5
- from transformers import CLIPProcessor
6
 
7
  from clip_head import CreativeScorer
8
 
9
  _model: CreativeScorer | None = None
10
- _processor: CLIPProcessor | None = None
11
 
12
 
13
- def get_model() -> tuple[CreativeScorer, CLIPProcessor]:
14
  global _model, _processor
15
  if _model is None:
16
  try:
@@ -19,7 +19,7 @@ def get_model() -> tuple[CreativeScorer, CLIPProcessor]:
19
 
20
  print(f"[model_loader] Loading from repo: {hf_repo}", flush=True)
21
 
22
- _processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
23
  print("[model_loader] Processor loaded", flush=True)
24
 
25
  _model = CreativeScorer()
 
2
 
3
  import torch
4
  from huggingface_hub import hf_hub_download
5
+ from transformers import AutoProcessor
6
 
7
  from clip_head import CreativeScorer
8
 
9
  _model: CreativeScorer | None = None
10
+ _processor: AutoProcessor | None = None
11
 
12
 
13
+ def get_model() -> tuple[CreativeScorer, AutoProcessor]:
14
  global _model, _processor
15
  if _model is None:
16
  try:
 
19
 
20
  print(f"[model_loader] Loading from repo: {hf_repo}", flush=True)
21
 
22
+ _processor = AutoProcessor.from_pretrained("google/siglip2-base-patch16-224")
23
  print("[model_loader] Processor loaded", flush=True)
24
 
25
  _model = CreativeScorer()