bbkdevops's picture
download
raw
6.11 kB
from __future__ import annotations
import argparse
import json
import logging
import re
import sys
import time
import warnings
from pathlib import Path
logging.getLogger("torch.utils.flop_counter").setLevel(logging.ERROR)
warnings.filterwarnings(
"ignore",
message=r"_check_is_size will be removed in a future PyTorch release.*",
category=FutureWarning,
module=r"bitsandbytes\.backends\.cuda\.ops",
)
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
PROJECT_ROOT = Path(r"D:\ad\tinymind")
MODEL_ROOT = PROJECT_ROOT / "model" / "tinymind-12b"
sys.path.insert(0, str(MODEL_ROOT))
from tinymind_text_sanitize import sanitize_generated_text
PROBES = [
{
"id": "language_semantics",
"prompt": "อธิบายความต่างระหว่าง ambiguity, vagueness และ uncertainty เป็นภาษาไทย พร้อมตัวอย่างสั้น ๆ",
"must": ["ambiguity", "vagueness", "uncertainty"],
},
{
"id": "raw_code_bits",
"prompt": "Explain how to sign-extend a packed signed 6-bit integer and name two boundary values.",
"must": ["mask", "sign", "-32", "31"],
},
{
"id": "systems_abi",
"prompt": "Explain ABI compatibility for a Rust/C FFI boundary in one concise paragraph.",
"must": ["calling", "layout", "symbol"],
},
{
"id": "pure_math_bound",
"prompt": "พิสูจน์สั้น ๆ ว่า m_t = c m_{t-1} + x_t มีขอบเขตเมื่อ 0<c<1 และ |x_t|<=B",
"must": ["B", "1-c", "ขอบเขต"],
},
{
"id": "entropy_relation",
"prompt": "Explain the relation H(P,Q)=H(P)+KL(P||Q) and why it matters for eval loss.",
"must": ["cross", "KL", "entropy"],
},
]
def load_model(model_id: str, adapter: str):
tokenizer = AutoTokenizer.from_pretrained(adapter if Path(adapter).exists() else model_id, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
bnb = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
bnb_4bit_use_double_quant=True,
)
base = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb,
device_map="auto",
trust_remote_code=True,
dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)
model = PeftModel.from_pretrained(base, adapter)
model.eval()
return tokenizer, model
def repeated_ngrams(text: str) -> bool:
words = re.findall(r"\w+", text.lower(), flags=re.UNICODE)
grams = [" ".join(words[i : i + 5]) for i in range(max(0, len(words) - 4))]
return len(grams) != len(set(grams))
def generate(tokenizer, model, prompt: str) -> str:
messages = [
{"role": "system", "content": "Answer precisely. Use constraints from the user. Avoid repetition."},
{"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=220,
min_new_tokens=24,
do_sample=False,
repetition_penalty=1.16,
no_repeat_ngram_size=5,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
return sanitize_generated_text(tokenizer.decode(out[0][inputs["input_ids"].shape[1] :], skip_special_tokens=True).strip())
def score(sample: dict, response: str) -> tuple[int, list[str]]:
flags: list[str] = []
lower = response.lower()
missing = [term for term in sample["must"] if term.lower() not in lower]
if missing:
flags.append("missing:" + ",".join(missing))
if repeated_ngrams(response):
flags.append("repetition")
if len(response) < 80:
flags.append("too_short")
if "```" in response and response.count("```") % 2 != 0:
flags.append("broken_code_fence")
points = 4 - len(flags)
if any(flag.startswith("missing:") for flag in flags):
points -= 1
return max(points, 0), flags
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--model-id", default="mistralai/Mistral-Nemo-Instruct-2407")
parser.add_argument("--adapter", action="append", required=True, help="name=path")
parser.add_argument("--out-dir", default=str(PROJECT_ROOT / "reports" / "deep_core_probe_manual"))
args = parser.parse_args()
report = {"created_at": time.time(), "base_model": args.model_id, "probes": PROBES, "adapters": {}}
for item in args.adapter:
name, adapter = item.split("=", 1)
tokenizer, model = load_model(args.model_id, adapter)
samples = []
total = 0
for probe in PROBES:
response = generate(tokenizer, model, probe["prompt"])
points, flags = score(probe, response)
total += points
samples.append({"id": probe["id"], "response": response, "score": points, "flags": flags})
print(f"{name} {probe['id']} score={points} flags={flags}")
report["adapters"][name] = {
"adapter": adapter,
"total_score": total,
"max_score": len(PROBES) * 4,
"samples": samples,
}
del model
torch.cuda.empty_cache()
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / "deep_core_probe_report.json"
out_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
print(json.dumps({"report": str(out_path), "adapters": report["adapters"]}, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())

Xet Storage Details

Size:
6.11 kB
·
Xet hash:
67082277eb83925f6b5a677f538b51bc9def47d3777e66e7ff58aef7fb0ffe8e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.