taf-agent / test_gguf.mjs
karlexmarin's picture
v0.9.3: GGUF repo autocomplete auto-lists quant files on select
22784b8
import { chromium } from "playwright";
const BASE = "http://127.0.0.1:8000/index.html";
const b = await chromium.launch({ headless: true });
const p = await (await b.newContext()).newPage();
const errors = [];
const benign = s => /Failed to load resource.*40\d|status of 40\d/.test(s);
p.on("console", m => { if (m.type()==="error" && !benign(m.text())) errors.push(`[err] ${m.text()}`); });
p.on("pageerror", e => errors.push(`[pageerror] ${e.message}`));
const log = s => process.stdout.write(s+"\n");
let pass=0, fail=0;
const check=(n,c,x="")=>{ log(`${c?" OK ":" FAIL"} ${n} ${x}`); c?pass++:fail++; };
await p.goto(BASE,{waitUntil:"domcontentloaded",timeout:90000});
await p.waitForTimeout(2500);
await p.click(`.lang-btn[data-lang="en"]`); await p.waitForTimeout(200);
check("module loads, 0 errors", errors.length===0, `(errors=${errors.length})`);
await p.click('[data-mode-link="gguf"]',{timeout:5000}); await p.waitForTimeout(500);
const secVis = await p.evaluate(()=>{const s=document.querySelector("#gguf-section");return s&&getComputedStyle(s).display!=="none";});
check("gguf-section visible after tile click", secVis);
log("\n── List quant files (real repo) ──");
await p.fill("#gguf-repo","Qwen/Qwen2.5-0.5B-Instruct-GGUF");
await p.click("#gguf-list-btn");
await p.waitForTimeout(4000);
const listed = await p.evaluate(()=>{
const sel=document.querySelector("#gguf-file");
return { count:sel.options.length, selected:sel.value, disabled:sel.disabled,
analyzeEnabled:!document.querySelector("#gguf-analyze-btn").disabled,
status:document.querySelector("#gguf-status").innerText.slice(0,60) };
});
check("files listed in dropdown", listed.count>0, `(${listed.count} files)`);
check("Q4_K_M auto-selected", /q4_k_m/i.test(listed.selected), listed.selected);
check("analyze button enabled", listed.analyzeEnabled);
log("\n── Autocomplete pick → auto-lists files (the 'no sale nada' fix) ──");
await p.fill("#gguf-repo","");
// clear current file list first to prove auto-list repopulates
await p.evaluate(()=>{const s=document.querySelector("#gguf-file");s.innerHTML="";s.disabled=true;});
await p.click("#gguf-repo");
await p.fill("#gguf-repo","Qwen2.5-0.5B-Instruct-GGUF");
await p.waitForTimeout(1300);
const picked = await p.evaluate(()=>{
const dd=[...document.querySelectorAll(".hf-autocomplete-dropdown")].find(d=>getComputedStyle(d).display!=="none");
const items=dd?[...dd.querySelectorAll(".hf-result")]:[];
const m=items.find(i=>i.dataset.id==="Qwen/Qwen2.5-0.5B-Instruct-GGUF")||items[0];
if(m){m.dispatchEvent(new MouseEvent("mousedown",{bubbles:true}));return m.dataset.id;}
return null;
});
check("autocomplete shows GGUF repos", !!picked, picked||"none");
await p.waitForTimeout(4500); // auto-list fires onSelect
const auto = await p.evaluate(()=>({count:document.querySelector("#gguf-file").options.length, sel:document.querySelector("#gguf-file").value}));
check("picking repo AUTO-lists quant files (no extra click)", auto.count>0, `(${auto.count} files, sel=${auto.sel})`);
log("\n── Multiple repo naming conventions ──");
async function listRepo(repo){
await p.fill("#gguf-repo",repo);
await p.keyboard.press("Escape");
await p.click("#gguf-list-btn");
await p.waitForTimeout(3500);
return p.evaluate(()=>({count:document.querySelector("#gguf-file").options.length, sel:document.querySelector("#gguf-file").value, status:document.querySelector("#gguf-status").innerText.slice(0,40)}));
}
let m1=await listRepo("bartowski/Qwen2.5-7B-Instruct-GGUF");
check("bartowski/* lists files", m1.count>0, `(${m1.count})`);
let m2=await listRepo("TheBloke/Llama-2-7B-Chat-GGUF");
check("TheBloke/* lists files", m2.count>0, `(${m2.count})`);
log("\n── Error clears stale dropdown ──");
let m3=await listRepo("this/not-real-xyz999");
check("bad repo clears file dropdown (no stale)", m3.count===0, `(count=${m3.count})`);
log("\n── reset to known-good for downstream tests ──");
await listRepo("Qwen/Qwen2.5-0.5B-Instruct-GGUF");
log("\n── Analyze GGUF (parse header + verdict) ──");
await p.click("#gguf-analyze-btn");
await p.waitForTimeout(8000); // range fetch + parse
const r = await p.evaluate(()=>{
const o=document.querySelector("#gguf-output");
return { vis:getComputedStyle(o).display!=="none",
verdict:o.querySelector(".verdict-badge")?.innerText?.trim()||"",
text:o.innerText,
status:document.querySelector("#gguf-status").innerText };
});
check("output rendered", r.vis && r.text.length>50);
check("verdict present", r.verdict.length>3, r.verdict);
check("shows architecture qwen2", /qwen2/.test(r.text));
check("shows trained context 32K", /32K|32768/.test(r.text), (r.text.match(/Trained context[^\n]*\n?\s*[\w.]+/)||[""])[0].slice(0,40));
check("shows quant Q4_K_M", /Q4_K_M/i.test(r.text));
check("shows γ-shift from quant", /γ-shift|shift/i.test(r.text));
check("shows ΔPPL", /ΔPPL|PPL/.test(r.text));
check("header parsed status (MB)", /MB header|parsed|analizada|analysé|已解析/i.test(r.status), r.status.slice(0,50));
log("\n── Target L override ──");
await p.fill("#gguf-target","131072");
await p.click("#gguf-analyze-btn");
await p.waitForTimeout(7000);
const r2 = await p.evaluate(()=>document.querySelector("#gguf-output .verdict-badge")?.innerText?.trim());
check("re-analyze with L=131072", r2.length>3, r2);
log("\n── Compare all quants (one header parse → full table) ──");
await p.click("#gguf-all-btn");
await p.waitForTimeout(7000);
const cmp = await p.evaluate(()=>{
const o=document.querySelector("#gguf-output");
const rows=[...o.querySelectorAll("table tr")];
const dataRows=rows.slice(1); // minus header
return { title:o.querySelector("h3")?.innerText,
rowCount:dataRows.length,
quants:dataRows.map(r=>r.querySelector("code")?.innerText).filter(Boolean),
hasShift:/−0\.|—/.test(o.innerText),
hasVerdictCol:rows[0]?.innerText?.includes("Verdict") };
});
check("comparison table rendered", cmp.rowCount>=3, `(${cmp.rowCount} rows)`);
check("lists multiple quant labels", cmp.quants.length>=3, cmp.quants.join(", "));
check("has verdict column", cmp.hasVerdictCol, cmp.title);
check("rows sorted best→worst (Q8 before Q2)", (()=>{
const i8=cmp.quants.findIndex(q=>/Q8/.test(q)), i2=cmp.quants.findIndex(q=>/Q2/.test(q));
return i8<0||i2<0||i8<i2;})(), cmp.quants.join(" > "));
// Verdicts must vary across quants (regression guard: a hard d_horizon gate
// once forced every row to DEGRADES even when γ@L was healthy).
const verdicts = await p.evaluate(()=>[...document.querySelectorAll("#gguf-output table tr")].slice(1).map(r=>r.lastElementChild?.innerText?.trim()));
check("verdicts vary across quants (not all identical)", new Set(verdicts).size>=2, verdicts.join(" | "));
// γ@L must DECREASE for worse quants (Q8 γ@L > Q2 γ@L).
const gammas = await p.evaluate(()=>[...document.querySelectorAll("#gguf-output table tr")].slice(1).map(r=>parseFloat(r.children[2]?.innerText)));
check("γ@L decreases for worse quant", gammas[0] > gammas[gammas.length-1], `${gammas[0]}${gammas[gammas.length-1]}`);
log("\n── 4-language verdict ──");
for (const lang of ["es","fr","zh","en"]) {
await p.click(`.lang-btn[data-lang="${lang}"]`); await p.waitForTimeout(300);
const label = await p.evaluate(()=>document.querySelector('.mode-btn[data-mode="gguf"]')?.textContent?.trim());
check(`${lang}: tab label localized`, label && label.length>3, label);
}
log("\n── Error path: bad repo ──");
await p.click(`.lang-btn[data-lang="en"]`); await p.waitForTimeout(200);
await p.fill("#gguf-repo","this/definitely-not-a-real-repo-xyz123");
await p.click("#gguf-list-btn");
await p.waitForTimeout(3000);
const errStatus = await p.evaluate(()=>document.querySelector("#gguf-status").innerText);
check("bad repo → error message", /❌|not found|HTTP/i.test(errStatus), errStatus.slice(0,50));
log(`\n=== ${pass} passed, ${fail} failed · JS errors: ${errors.length} ===`);
errors.slice(0,10).forEach(e=>log(e));
await b.close();
process.exit(fail>0?1:0);