Spaces:
Running
Running
File size: 8,164 Bytes
2eb69cb 22784b8 2eb69cb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | import { chromium } from "playwright";
const BASE = "http://127.0.0.1:8000/index.html";
const b = await chromium.launch({ headless: true });
const p = await (await b.newContext()).newPage();
const errors = [];
const benign = s => /Failed to load resource.*40\d|status of 40\d/.test(s);
p.on("console", m => { if (m.type()==="error" && !benign(m.text())) errors.push(`[err] ${m.text()}`); });
p.on("pageerror", e => errors.push(`[pageerror] ${e.message}`));
const log = s => process.stdout.write(s+"\n");
let pass=0, fail=0;
const check=(n,c,x="")=>{ log(`${c?" OK ":" FAIL"} ${n} ${x}`); c?pass++:fail++; };
await p.goto(BASE,{waitUntil:"domcontentloaded",timeout:90000});
await p.waitForTimeout(2500);
await p.click(`.lang-btn[data-lang="en"]`); await p.waitForTimeout(200);
check("module loads, 0 errors", errors.length===0, `(errors=${errors.length})`);
await p.click('[data-mode-link="gguf"]',{timeout:5000}); await p.waitForTimeout(500);
const secVis = await p.evaluate(()=>{const s=document.querySelector("#gguf-section");return s&&getComputedStyle(s).display!=="none";});
check("gguf-section visible after tile click", secVis);
log("\n── List quant files (real repo) ──");
await p.fill("#gguf-repo","Qwen/Qwen2.5-0.5B-Instruct-GGUF");
await p.click("#gguf-list-btn");
await p.waitForTimeout(4000);
const listed = await p.evaluate(()=>{
const sel=document.querySelector("#gguf-file");
return { count:sel.options.length, selected:sel.value, disabled:sel.disabled,
analyzeEnabled:!document.querySelector("#gguf-analyze-btn").disabled,
status:document.querySelector("#gguf-status").innerText.slice(0,60) };
});
check("files listed in dropdown", listed.count>0, `(${listed.count} files)`);
check("Q4_K_M auto-selected", /q4_k_m/i.test(listed.selected), listed.selected);
check("analyze button enabled", listed.analyzeEnabled);
log("\n── Autocomplete pick → auto-lists files (the 'no sale nada' fix) ──");
await p.fill("#gguf-repo","");
// clear current file list first to prove auto-list repopulates
await p.evaluate(()=>{const s=document.querySelector("#gguf-file");s.innerHTML="";s.disabled=true;});
await p.click("#gguf-repo");
await p.fill("#gguf-repo","Qwen2.5-0.5B-Instruct-GGUF");
await p.waitForTimeout(1300);
const picked = await p.evaluate(()=>{
const dd=[...document.querySelectorAll(".hf-autocomplete-dropdown")].find(d=>getComputedStyle(d).display!=="none");
const items=dd?[...dd.querySelectorAll(".hf-result")]:[];
const m=items.find(i=>i.dataset.id==="Qwen/Qwen2.5-0.5B-Instruct-GGUF")||items[0];
if(m){m.dispatchEvent(new MouseEvent("mousedown",{bubbles:true}));return m.dataset.id;}
return null;
});
check("autocomplete shows GGUF repos", !!picked, picked||"none");
await p.waitForTimeout(4500); // auto-list fires onSelect
const auto = await p.evaluate(()=>({count:document.querySelector("#gguf-file").options.length, sel:document.querySelector("#gguf-file").value}));
check("picking repo AUTO-lists quant files (no extra click)", auto.count>0, `(${auto.count} files, sel=${auto.sel})`);
log("\n── Multiple repo naming conventions ──");
async function listRepo(repo){
await p.fill("#gguf-repo",repo);
await p.keyboard.press("Escape");
await p.click("#gguf-list-btn");
await p.waitForTimeout(3500);
return p.evaluate(()=>({count:document.querySelector("#gguf-file").options.length, sel:document.querySelector("#gguf-file").value, status:document.querySelector("#gguf-status").innerText.slice(0,40)}));
}
let m1=await listRepo("bartowski/Qwen2.5-7B-Instruct-GGUF");
check("bartowski/* lists files", m1.count>0, `(${m1.count})`);
let m2=await listRepo("TheBloke/Llama-2-7B-Chat-GGUF");
check("TheBloke/* lists files", m2.count>0, `(${m2.count})`);
log("\n── Error clears stale dropdown ──");
let m3=await listRepo("this/not-real-xyz999");
check("bad repo clears file dropdown (no stale)", m3.count===0, `(count=${m3.count})`);
log("\n── reset to known-good for downstream tests ──");
await listRepo("Qwen/Qwen2.5-0.5B-Instruct-GGUF");
log("\n── Analyze GGUF (parse header + verdict) ──");
await p.click("#gguf-analyze-btn");
await p.waitForTimeout(8000); // range fetch + parse
const r = await p.evaluate(()=>{
const o=document.querySelector("#gguf-output");
return { vis:getComputedStyle(o).display!=="none",
verdict:o.querySelector(".verdict-badge")?.innerText?.trim()||"",
text:o.innerText,
status:document.querySelector("#gguf-status").innerText };
});
check("output rendered", r.vis && r.text.length>50);
check("verdict present", r.verdict.length>3, r.verdict);
check("shows architecture qwen2", /qwen2/.test(r.text));
check("shows trained context 32K", /32K|32768/.test(r.text), (r.text.match(/Trained context[^\n]*\n?\s*[\w.]+/)||[""])[0].slice(0,40));
check("shows quant Q4_K_M", /Q4_K_M/i.test(r.text));
check("shows γ-shift from quant", /γ-shift|shift/i.test(r.text));
check("shows ΔPPL", /ΔPPL|PPL/.test(r.text));
check("header parsed status (MB)", /MB header|parsed|analizada|analysé|已解析/i.test(r.status), r.status.slice(0,50));
log("\n── Target L override ──");
await p.fill("#gguf-target","131072");
await p.click("#gguf-analyze-btn");
await p.waitForTimeout(7000);
const r2 = await p.evaluate(()=>document.querySelector("#gguf-output .verdict-badge")?.innerText?.trim());
check("re-analyze with L=131072", r2.length>3, r2);
log("\n── Compare all quants (one header parse → full table) ──");
await p.click("#gguf-all-btn");
await p.waitForTimeout(7000);
const cmp = await p.evaluate(()=>{
const o=document.querySelector("#gguf-output");
const rows=[...o.querySelectorAll("table tr")];
const dataRows=rows.slice(1); // minus header
return { title:o.querySelector("h3")?.innerText,
rowCount:dataRows.length,
quants:dataRows.map(r=>r.querySelector("code")?.innerText).filter(Boolean),
hasShift:/−0\.|—/.test(o.innerText),
hasVerdictCol:rows[0]?.innerText?.includes("Verdict") };
});
check("comparison table rendered", cmp.rowCount>=3, `(${cmp.rowCount} rows)`);
check("lists multiple quant labels", cmp.quants.length>=3, cmp.quants.join(", "));
check("has verdict column", cmp.hasVerdictCol, cmp.title);
check("rows sorted best→worst (Q8 before Q2)", (()=>{
const i8=cmp.quants.findIndex(q=>/Q8/.test(q)), i2=cmp.quants.findIndex(q=>/Q2/.test(q));
return i8<0||i2<0||i8<i2;})(), cmp.quants.join(" > "));
// Verdicts must vary across quants (regression guard: a hard d_horizon gate
// once forced every row to DEGRADES even when γ@L was healthy).
const verdicts = await p.evaluate(()=>[...document.querySelectorAll("#gguf-output table tr")].slice(1).map(r=>r.lastElementChild?.innerText?.trim()));
check("verdicts vary across quants (not all identical)", new Set(verdicts).size>=2, verdicts.join(" | "));
// γ@L must DECREASE for worse quants (Q8 γ@L > Q2 γ@L).
const gammas = await p.evaluate(()=>[...document.querySelectorAll("#gguf-output table tr")].slice(1).map(r=>parseFloat(r.children[2]?.innerText)));
check("γ@L decreases for worse quant", gammas[0] > gammas[gammas.length-1], `${gammas[0]} → ${gammas[gammas.length-1]}`);
log("\n── 4-language verdict ──");
for (const lang of ["es","fr","zh","en"]) {
await p.click(`.lang-btn[data-lang="${lang}"]`); await p.waitForTimeout(300);
const label = await p.evaluate(()=>document.querySelector('.mode-btn[data-mode="gguf"]')?.textContent?.trim());
check(`${lang}: tab label localized`, label && label.length>3, label);
}
log("\n── Error path: bad repo ──");
await p.click(`.lang-btn[data-lang="en"]`); await p.waitForTimeout(200);
await p.fill("#gguf-repo","this/definitely-not-a-real-repo-xyz123");
await p.click("#gguf-list-btn");
await p.waitForTimeout(3000);
const errStatus = await p.evaluate(()=>document.querySelector("#gguf-status").innerText);
check("bad repo → error message", /❌|not found|HTTP/i.test(errStatus), errStatus.slice(0,50));
log(`\n=== ${pass} passed, ${fail} failed · JS errors: ${errors.length} ===`);
errors.slice(0,10).forEach(e=>log(e));
await b.close();
process.exit(fail>0?1:0);
|