Spaces:
Running
Running
Keep classifier fp16 on CPU by default
Browse files
app.py
CHANGED
|
@@ -806,15 +806,19 @@ def _load_tag_classifier_bundle() -> Optional[Dict[str, Any]]:
|
|
| 806 |
labels_raw = json.loads(labels_path.read_text(encoding="utf-8"))
|
| 807 |
labels = [_norm_tag_for_lookup(str(x)) for x in labels_raw]
|
| 808 |
tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True)
|
| 809 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_dir, local_files_only=True)
|
| 810 |
device_raw = (os.environ.get("PSQ_TAG_CLASSIFIER_DEVICE", "auto") or "auto").strip().lower()
|
| 811 |
if device_raw == "auto":
|
| 812 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 813 |
else:
|
| 814 |
device = torch.device(device_raw)
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
model.float()
|
| 819 |
model.to(device)
|
| 820 |
model.eval()
|
|
@@ -886,16 +890,21 @@ def _run_tag_classifier(
|
|
| 886 |
if not text:
|
| 887 |
return empty
|
| 888 |
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
|
| 900 |
ranked = sorted(
|
| 901 |
((labels[i], float(score)) for i, score in enumerate(probs) if i < len(labels)),
|
|
|
|
| 806 |
labels_raw = json.loads(labels_path.read_text(encoding="utf-8"))
|
| 807 |
labels = [_norm_tag_for_lookup(str(x)) for x in labels_raw]
|
| 808 |
tokenizer = AutoTokenizer.from_pretrained(model_dir, local_files_only=True)
|
|
|
|
| 809 |
device_raw = (os.environ.get("PSQ_TAG_CLASSIFIER_DEVICE", "auto") or "auto").strip().lower()
|
| 810 |
if device_raw == "auto":
|
| 811 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 812 |
else:
|
| 813 |
device = torch.device(device_raw)
|
| 814 |
+
load_kwargs: Dict[str, Any] = {"local_files_only": True}
|
| 815 |
+
cpu_dtype = (os.environ.get("PSQ_TAG_CLASSIFIER_CPU_DTYPE", "float16") or "").strip().lower()
|
| 816 |
+
if device.type == "cpu" and cpu_dtype in {"float16", "fp16", "half"}:
|
| 817 |
+
# Keep the deployed classifier resident in fp16 on CPU by default.
|
| 818 |
+
# This reduces RAM pressure on the Space; set CPU_DTYPE=float32 if needed.
|
| 819 |
+
load_kwargs["torch_dtype"] = torch.float16
|
| 820 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_dir, **load_kwargs)
|
| 821 |
+
if device.type == "cpu" and cpu_dtype in {"float32", "fp32", "full"}:
|
| 822 |
model.float()
|
| 823 |
model.to(device)
|
| 824 |
model.eval()
|
|
|
|
| 890 |
if not text:
|
| 891 |
return empty
|
| 892 |
|
| 893 |
+
try:
|
| 894 |
+
with torch.no_grad():
|
| 895 |
+
enc = tokenizer(
|
| 896 |
+
[text],
|
| 897 |
+
padding=True,
|
| 898 |
+
truncation=True,
|
| 899 |
+
max_length=max_len,
|
| 900 |
+
return_tensors="pt",
|
| 901 |
+
)
|
| 902 |
+
enc = {k: v.to(device) for k, v in enc.items()}
|
| 903 |
+
probs = torch.sigmoid(model(**enc).logits)[0].detach().cpu().tolist()
|
| 904 |
+
except Exception as e:
|
| 905 |
+
if log:
|
| 906 |
+
log(f"Classifier: failed during inference; skipping ({type(e).__name__}: {_redact_console_error_text(e)})")
|
| 907 |
+
return empty
|
| 908 |
|
| 909 |
ranked = sorted(
|
| 910 |
((labels[i], float(score)) for i, score in enumerate(probs) if i < len(labels)),
|