FoodDesert commited on
Commit
c4b9ff7
·
verified ·
1 Parent(s): 14ff5a0

Keep classifier fp16 on CPU by default

Browse files
Files changed (1) hide show
  1. app.py +23 -14
app.py CHANGED
@@ -806,15 +806,19 @@ def _load_tag_classifier_bundle() -> Optional[Dict[str, Any]]:
806
  labels_raw = json.loads(labels_path.read_text(encoding="utf-8"))
807
  labels = [_norm_tag_for_lookup(str(x)) for x in labels_raw]
808
  tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True)
809
- model = AutoModelForSequenceClassification.from_pretrained(model_dir, local_files_only=True)
810
  device_raw = (os.environ.get("PSQ_TAG_CLASSIFIER_DEVICE", "auto") or "auto").strip().lower()
811
  if device_raw == "auto":
812
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
813
  else:
814
  device = torch.device(device_raw)
815
- if device.type == "cpu":
816
- # The deployed classifier may be stored as fp16 to fit Space storage.
817
- # Run CPU inference in fp32 for broader operator support.
 
 
 
 
 
818
  model.float()
819
  model.to(device)
820
  model.eval()
@@ -886,16 +890,21 @@ def _run_tag_classifier(
886
  if not text:
887
  return empty
888
 
889
- with torch.no_grad():
890
- enc = tokenizer(
891
- [text],
892
- padding=True,
893
- truncation=True,
894
- max_length=max_len,
895
- return_tensors="pt",
896
- )
897
- enc = {k: v.to(device) for k, v in enc.items()}
898
- probs = torch.sigmoid(model(**enc).logits)[0].detach().cpu().tolist()
 
 
 
 
 
899
 
900
  ranked = sorted(
901
  ((labels[i], float(score)) for i, score in enumerate(probs) if i < len(labels)),
 
806
  labels_raw = json.loads(labels_path.read_text(encoding="utf-8"))
807
  labels = [_norm_tag_for_lookup(str(x)) for x in labels_raw]
808
  tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True)
 
809
  device_raw = (os.environ.get("PSQ_TAG_CLASSIFIER_DEVICE", "auto") or "auto").strip().lower()
810
  if device_raw == "auto":
811
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
812
  else:
813
  device = torch.device(device_raw)
814
+ load_kwargs: Dict[str, Any] = {"local_files_only": True}
815
+ cpu_dtype = (os.environ.get("PSQ_TAG_CLASSIFIER_CPU_DTYPE", "float16") or "").strip().lower()
816
+ if device.type == "cpu" and cpu_dtype in {"float16", "fp16", "half"}:
817
+ # Keep the deployed classifier resident in fp16 on CPU by default.
818
+ # This reduces RAM pressure on the Space; set CPU_DTYPE=float32 if needed.
819
+ load_kwargs["torch_dtype"] = torch.float16
820
+ model = AutoModelForSequenceClassification.from_pretrained(model_dir, **load_kwargs)
821
+ if device.type == "cpu" and cpu_dtype in {"float32", "fp32", "full"}:
822
  model.float()
823
  model.to(device)
824
  model.eval()
 
890
  if not text:
891
  return empty
892
 
893
+ try:
894
+ with torch.no_grad():
895
+ enc = tokenizer(
896
+ [text],
897
+ padding=True,
898
+ truncation=True,
899
+ max_length=max_len,
900
+ return_tensors="pt",
901
+ )
902
+ enc = {k: v.to(device) for k, v in enc.items()}
903
+ probs = torch.sigmoid(model(**enc).logits)[0].detach().cpu().tolist()
904
+ except Exception as e:
905
+ if log:
906
+ log(f"Classifier: failed during inference; skipping ({type(e).__name__}: {_redact_console_error_text(e)})")
907
+ return empty
908
 
909
  ranked = sorted(
910
  ((labels[i], float(score)) for i, score in enumerate(probs) if i < len(labels)),