HearthNet-Nemotron / browsermesh.html
Chris4K's picture
POC
5dc06ed
Raw
History Blame
30.7 kB
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>BrowserMesh β€” distributed LLM inference over WebRTC</title>
<style>
:root {
--bg: #fafaf7; --fg: #1a1a18; --muted: #6b6b65; --line: #d8d6cd;
--card: #ffffff; --accent: #534ab7; --accent-soft: #eeedfe;
--ok: #0f6e56; --ok-soft: #e1f5ee;
--warn: #854f0b; --warn-soft: #faeeda;
--err: #791f1f; --err-soft: #fcebeb;
--code: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, monospace;
--sans: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
}
@media (prefers-color-scheme: dark) {
:root {
--bg: #1a1a18; --fg: #f0eee6; --muted: #a8a69d; --line: #3a3a36;
--card: #242421; --accent: #afa9ec; --accent-soft: #2a2655;
--ok: #5dcaa5; --ok-soft: #0a3a2e;
--warn: #ef9f27; --warn-soft: #3a2a0a;
--err: #f09595; --err-soft: #3a1010;
}
}
* { box-sizing: border-box; }
html, body { margin: 0; padding: 0; background: var(--bg); color: var(--fg); font-family: var(--sans); font-size: 15px; line-height: 1.55; }
.wrap { max-width: 920px; margin: 0 auto; padding: 32px 24px 64px; }
header { margin-bottom: 32px; }
h1 { font-size: 24px; font-weight: 500; margin: 0 0 4px; letter-spacing: -0.01em; }
h2 { font-size: 13px; font-weight: 500; text-transform: uppercase; letter-spacing: 0.08em; color: var(--muted); margin: 0 0 12px; }
.sub { color: var(--muted); margin: 0 0 20px; font-size: 14px; }
section { margin-bottom: 28px; }
.card { background: var(--card); border: 0.5px solid var(--line); border-radius: 10px; padding: 16px 18px; }
.row { display: flex; gap: 8px; align-items: center; flex-wrap: wrap; }
.row > input[type=text], .row > select, .row > textarea { flex: 1; min-width: 200px; }
input, select, textarea, button { font-family: inherit; font-size: 14px; }
input[type=text], textarea, select {
background: var(--bg); color: var(--fg); border: 0.5px solid var(--line);
border-radius: 6px; padding: 8px 10px;
}
input[type=text]:focus, textarea:focus, select:focus { outline: none; border-color: var(--accent); }
textarea { font-family: inherit; resize: vertical; min-height: 60px; width: 100%; }
button {
background: var(--card); color: var(--fg); border: 0.5px solid var(--line);
border-radius: 6px; padding: 8px 14px; cursor: pointer; transition: background .12s;
}
button:hover:not(:disabled) { background: var(--accent-soft); border-color: var(--accent); }
button:disabled { opacity: 0.5; cursor: not-allowed; }
button.primary { background: var(--accent); color: white; border-color: var(--accent); }
button.primary:hover:not(:disabled) { background: #3c3489; }
code, .mono { font-family: var(--code); font-size: 13px; }
.pill {
display: inline-block; padding: 2px 8px; border-radius: 10px; font-size: 12px;
background: var(--accent-soft); color: var(--accent); font-family: var(--code);
}
.pill.ok { background: var(--ok-soft); color: var(--ok); }
.pill.warn { background: var(--warn-soft); color: var(--warn); }
.pill.err { background: var(--err-soft); color: var(--err); }
.my-id { display: flex; align-items: center; gap: 10px; padding: 12px 16px; background: var(--accent-soft); border-radius: 8px; }
.my-id code { font-size: 14px; color: var(--accent); font-weight: 500; }
.peers-grid { display: grid; grid-template-columns: 1fr; gap: 8px; }
.peer-card { padding: 10px 14px; border: 0.5px solid var(--line); border-radius: 8px; background: var(--card); }
.peer-card .head { display: flex; justify-content: space-between; align-items: center; }
.peer-card .caps { font-size: 12px; color: var(--muted); margin-top: 4px; }
.progress { height: 4px; background: var(--line); border-radius: 2px; overflow: hidden; margin-top: 8px; }
.progress > div { height: 100%; background: var(--accent); transition: width .15s; width: 0%; }
.modes { display: flex; gap: 6px; margin-bottom: 12px; flex-wrap: wrap; }
.modes label {
padding: 6px 12px; border: 0.5px solid var(--line); border-radius: 16px; cursor: pointer; font-size: 13px;
background: var(--card);
}
.modes label.on { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); }
.modes input { display: none; }
.results { display: grid; gap: 10px; margin-top: 14px; }
.result { padding: 12px 14px; border-radius: 8px; border: 0.5px solid var(--line); background: var(--card); }
.result .head { display: flex; justify-content: space-between; align-items: center; font-size: 12px; color: var(--muted); margin-bottom: 6px; }
.result .text { white-space: pre-wrap; word-wrap: break-word; font-size: 14px; }
.result .text:empty::before { content: '…'; color: var(--muted); }
.log { background: var(--bg); border: 0.5px solid var(--line); border-radius: 6px; padding: 10px 12px; max-height: 200px; overflow-y: auto; font-family: var(--code); font-size: 12px; line-height: 1.5; }
.log .l { color: var(--muted); }
.log .l b { color: var(--fg); font-weight: 500; }
.footnote { font-size: 12px; color: var(--muted); margin-top: 6px; }
.info { padding: 10px 14px; border-radius: 6px; background: var(--warn-soft); color: var(--warn); font-size: 13px; border-left: 3px solid var(--warn); }
.danger { padding: 10px 14px; border-radius: 6px; background: var(--err-soft); color: var(--err); font-size: 13px; border-left: 3px solid var(--err); }
</style>
</head>
<body>
<div class="wrap">
<header>
<h1>BrowserMesh</h1>
<p class="sub">Distributed LLM inference across browser peers, via WebRTC. Open this URL in two tabs (or send to a friend), connect, load the model in each, and run inference together.</p>
<div class="my-id">
<span style="font-size: 13px; color: var(--muted);">Your peer ID:</span>
<code id="my-peer-id">connecting to broker…</code>
<button id="copy-link" style="margin-left:auto" disabled>Copy invite link</button>
</div>
</header>
<section>
<h2>Mesh</h2>
<div class="card">
<div class="row" style="margin-bottom: 12px;">
<input type="text" id="connect-input" placeholder="Paste a peer ID to connect">
<button id="connect-btn" disabled>Connect</button>
</div>
<div class="peers-grid" id="peers-list">
<div class="footnote">No peers connected. Share your invite link to add some.</div>
</div>
</div>
</section>
<section>
<h2>This browser</h2>
<div class="card">
<div id="caps-display" class="footnote">Detecting capabilities…</div>
<div class="row" style="margin-top: 14px;">
<select id="model-select">
<option value="onnx-community/Qwen2.5-0.5B-Instruct">Qwen2.5-0.5B-Instruct Β· q4 Β· ~400MB</option>
<option value="HuggingFaceTB/SmolLM2-360M-Instruct">SmolLM2-360M-Instruct Β· q4 Β· ~250MB</option>
<option value="onnx-community/Llama-3.2-1B-Instruct">Llama-3.2-1B-Instruct Β· q4 Β· ~700MB</option>
</select>
<button id="load-model-btn" class="primary">Load model</button>
</div>
<div class="progress" style="margin-top: 10px;"><div id="model-progress-bar"></div></div>
<div class="footnote" id="model-status">Click to download and load. Cached after first use.</div>
</div>
</section>
<section>
<h2>Inference</h2>
<div class="card">
<div class="modes">
<label class="on"><input type="radio" name="mode" value="ensemble" checked>Ensemble <span class="pill ok">working</span></label>
<label><input type="radio" name="mode" value="pipeline">Pipeline parallel <span class="pill warn">protocol only</span></label>
<label><input type="radio" name="mode" value="moe">MoE expert routing <span class="pill warn">protocol only</span></label>
</div>
<textarea id="prompt-input" rows="3" placeholder="Enter a prompt. Try: &quot;Explain WebRTC ICE traversal in one paragraph.&quot;"></textarea>
<div class="row" style="margin-top: 10px;">
<button id="run-btn" class="primary" disabled>Run on mesh</button>
<span class="footnote" id="run-hint">Load the model first.</span>
</div>
<div class="results" id="results"></div>
</div>
</section>
<section>
<h2>Mesh log</h2>
<div class="log" id="log"></div>
</section>
<section>
<h2>Deployment notes</h2>
<div class="card" style="font-size: 13px; line-height: 1.7; color: var(--muted);">
<p style="margin: 0 0 8px;"><b style="color: var(--fg)">Signaling.</b> This demo uses the public PeerJS broker at <code>0.peerjs.com</code> β€” rate-limited and shared. For production, run your own PeerServer (<code>npm install peer</code> or the Docker image) and pass <code>{host, port, path, key}</code> to <code>new Peer()</code>. Or write a PHP signaling server β€” it's just WebSocket + relay-room-by-id.</p>
<p style="margin: 0 0 8px;"><b style="color: var(--fg)">NAT traversal.</b> Default ICE uses Google's public STUN. If two peers can't connect (corporate NAT, symmetric NAT) you need a TURN server. Coturn on a small VPS handles thousands of users.</p>
<p style="margin: 0 0 8px;"><b style="color: var(--fg)">CORS / COEP.</b> transformers.js downloads from <code>huggingface.co</code> CDN (CORS-enabled). No special headers required unless you later add SharedArrayBuffer/threaded WASM, then add <code>Cross-Origin-Opener-Policy: same-origin</code> and <code>Cross-Origin-Embedder-Policy: require-corp</code>.</p>
<p style="margin: 0;"><b style="color: var(--fg)">Pipeline / MoE.</b> The protocol layer (capability ad, request routing, error handling, circuit breaker) is fully wired. The inference handlers return <code>{ unimplemented: true }</code> β€” implementing them requires a custom transformers.js fork that exposes intermediate hidden states between transformer blocks. See the <code>handlePipelineStage</code> and <code>handleMoeExpert</code> stubs at the bottom of the script for the TODO contract.</p>
</div>
</section>
</div>
<script src="https://unpkg.com/peerjs@1.5.4/dist/peerjs.min.js"></script>
<script type="module">
import { pipeline, env, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2';
env.allowLocalModels = false;
env.useBrowserCache = true;
// ─── state ────────────────────────────────────────────────────────────────
const PROTO_VERSION = 1;
const state = {
peer: null,
myId: null,
peers: {}, // peerId -> { conn, caps, lastSeen }
myCaps: null,
generator: null,
modelId: null,
inflight: {}, // requestId -> { results, mode, prompt, startedAt }
};
const $ = id => document.getElementById(id);
const log = (msg, peerId) => {
const el = $('log');
const time = new Date().toLocaleTimeString();
const who = peerId ? `<b>${peerId.slice(0,8)}</b> ` : '';
el.innerHTML += `<div class="l">[${time}] ${who}${msg}</div>`;
el.scrollTop = el.scrollHeight;
};
// ─── capability detection ─────────────────────────────────────────────────
async function detectCapabilities() {
const caps = {
peerId: state.myId,
ts: Date.now(),
device: {
webgpu: !!navigator.gpu,
cores: navigator.hardwareConcurrency || null,
memoryGB: navigator.deviceMemory || null,
ua: (navigator.userAgent.match(/(Chrome|Firefox|Safari|Edge)\/[\d.]+/) || ['unknown'])[0],
},
gpu: null,
network: { bandwidthKbps: null, rttMs: null },
models: [],
capabilities: ['inference.ensemble'],
};
if (navigator.gpu) {
try {
const adapter = await navigator.gpu.requestAdapter();
if (adapter) {
const info = adapter.info || {};
caps.gpu = {
vendor: info.vendor || 'unknown',
architecture: info.architecture || 'unknown',
device: info.device || '',
};
}
} catch (e) { /* ignore */ }
}
// crude bandwidth probe: fetch a known small asset, measure
try {
const url = 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2/package.json';
const t0 = performance.now();
const r = await fetch(url, { cache: 'no-store' });
const text = await r.text();
const dt = performance.now() - t0;
caps.network.bandwidthKbps = Math.round((text.length * 8) / (dt / 1000) / 1024);
caps.network.rttMs = Math.round(dt);
} catch (e) { /* ignore */ }
return caps;
}
function renderCaps() {
const c = state.myCaps;
if (!c) return;
const bits = [];
bits.push(c.device.webgpu ? '<span class="pill ok">WebGPU</span>' : '<span class="pill warn">WASM only</span>');
if (c.gpu?.vendor) bits.push(`<span class="pill">${c.gpu.vendor}${c.gpu.architecture ? ' Β· ' + c.gpu.architecture : ''}</span>`);
if (c.device.cores) bits.push(`<span class="pill">${c.device.cores} cores</span>`);
if (c.device.memoryGB) bits.push(`<span class="pill">${c.device.memoryGB} GB RAM</span>`);
if (c.network.bandwidthKbps) bits.push(`<span class="pill">${(c.network.bandwidthKbps/1024).toFixed(1)} Mbps to CDN</span>`);
bits.push(`<span class="pill">${c.device.ua}</span>`);
$('caps-display').innerHTML = bits.join(' ');
}
// ─── peer mesh setup (PeerJS / WebRTC) ────────────────────────────────────
function setupPeer() {
// public PeerJS broker. for prod: pass {host, port, path, key} for your own server.
state.peer = new Peer({ debug: 1 });
state.peer.on('open', async id => {
state.myId = id;
$('my-peer-id').textContent = id;
$('connect-btn').disabled = false;
$('copy-link').disabled = false;
log(`broker connected, peer id ${id}`);
state.myCaps = await detectCapabilities();
renderCaps();
});
state.peer.on('connection', conn => {
log(`incoming connection from ${conn.peer.slice(0,8)}`);
wireConnection(conn);
});
state.peer.on('error', err => {
log(`peer error: ${err.type} β€” ${err.message}`);
if (err.type === 'peer-unavailable') {
log('peer not online or wrong ID');
}
});
state.peer.on('disconnected', () => {
log('disconnected from broker, attempting reconnect…');
state.peer.reconnect();
});
}
function wireConnection(conn) {
conn.on('open', () => {
state.peers[conn.peer] = { conn, caps: null, lastSeen: Date.now() };
renderPeers();
// exchange capabilities
send(conn, { type: 'capability_advertise', payload: state.myCaps });
log(`connection open to ${conn.peer.slice(0,8)}, advertising capabilities`);
});
conn.on('data', data => {
if (typeof data !== 'object' || data.v !== PROTO_VERSION) {
log(`dropping malformed message from ${conn.peer.slice(0,8)}`);
return;
}
handleMessage(conn.peer, data);
});
conn.on('close', () => {
delete state.peers[conn.peer];
log(`disconnected from ${conn.peer.slice(0,8)}`);
renderPeers();
});
conn.on('error', err => {
log(`connection error with ${conn.peer.slice(0,8)}: ${err.message}`);
});
}
function send(conn, msg) {
conn.send({ v: PROTO_VERSION, ts: Date.now(), ...msg });
}
function broadcast(msg) {
for (const p of Object.values(state.peers)) {
if (p.conn.open) send(p.conn, msg);
}
}
function connectToPeer() {
const id = $('connect-input').value.trim();
if (!id || id === state.myId || state.peers[id]) return;
log(`connecting to ${id.slice(0,8)}…`);
const conn = state.peer.connect(id, { reliable: true });
wireConnection(conn);
$('connect-input').value = '';
}
function renderPeers() {
const list = $('peers-list');
const peers = Object.entries(state.peers);
if (peers.length === 0) {
list.innerHTML = '<div class="footnote">No peers connected. Share your invite link to add some.</div>';
return;
}
list.innerHTML = peers.map(([id, p]) => {
const c = p.caps;
const cap = c ? [
c.device.webgpu ? 'WebGPU' : 'WASM',
c.gpu?.vendor || '',
c.device.cores ? c.device.cores + 'c' : '',
c.models.length ? `models: ${c.models.join(', ')}` : 'no model loaded',
].filter(Boolean).join(' Β· ') : 'capabilities pending…';
return `<div class="peer-card">
<div class="head"><code>${id.slice(0,12)}…</code><span class="pill ok">connected</span></div>
<div class="caps">${cap}</div>
</div>`;
}).join('');
}
// ─── protocol message dispatch ────────────────────────────────────────────
function handleMessage(fromPeer, msg) {
state.peers[fromPeer].lastSeen = Date.now();
switch (msg.type) {
case 'capability_advertise':
state.peers[fromPeer].caps = msg.payload;
log(`capabilities received`, fromPeer);
renderPeers();
// reply with our own if we haven't yet
break;
case 'inference_request':
handleInferenceRequest(fromPeer, msg.payload, msg.id);
break;
case 'inference_chunk':
handleInferenceChunk(fromPeer, msg.payload, msg.id);
break;
case 'inference_done':
handleInferenceDone(fromPeer, msg.payload, msg.id);
break;
case 'inference_error':
handleInferenceError(fromPeer, msg.payload, msg.id);
break;
case 'ping':
send(state.peers[fromPeer].conn, { type: 'pong', id: msg.id });
break;
case 'pong':
// could compute RTT here
break;
default:
log(`unknown message type: ${msg.type}`, fromPeer);
}
}
// ─── model loading ────────────────────────────────────────────────────────
async function loadModel() {
const modelId = $('model-select').value;
$('load-model-btn').disabled = true;
$('model-select').disabled = true;
$('model-status').textContent = 'Downloading…';
const device = state.myCaps.device.webgpu ? 'webgpu' : 'wasm';
log(`loading ${modelId} on ${device}`);
try {
state.generator = await pipeline('text-generation', modelId, {
dtype: 'q4',
device,
progress_callback: p => {
if (p.status === 'progress' && p.total) {
const pct = Math.round((p.loaded / p.total) * 100);
$('model-progress-bar').style.width = pct + '%';
$('model-status').textContent = `${p.file} β€” ${pct}%`;
} else if (p.status === 'done') {
$('model-status').textContent = `${p.file} ready`;
}
},
});
state.modelId = modelId;
state.myCaps.models = [modelId];
$('model-progress-bar').style.width = '100%';
$('model-status').innerHTML = `<span class="pill ok">loaded</span> ${modelId} on ${device}`;
$('run-btn').disabled = false;
$('run-hint').textContent = 'Ready. Enter a prompt and run.';
log(`model ready: ${modelId}`);
// re-advertise with model list
broadcast({ type: 'capability_advertise', payload: state.myCaps });
renderCaps();
} catch (err) {
$('model-status').innerHTML = `<span class="pill err">error</span> ${err.message}`;
log(`model load failed: ${err.message}`);
$('load-model-btn').disabled = false;
$('model-select').disabled = false;
}
}
// ─── local inference (the part that actually runs the model) ──────────────
async function runLocalInference(prompt, onChunk) {
if (!state.generator) throw new Error('model not loaded');
const messages = [
{ role: 'system', content: 'You are a helpful assistant. Be concise.' },
{ role: 'user', content: prompt },
];
let fullText = '';
const streamer = new TextStreamer(state.generator.tokenizer, {
skip_prompt: true,
skip_special_tokens: true,
callback_function: text => {
fullText += text;
if (onChunk) onChunk(text, fullText);
},
});
const t0 = performance.now();
await state.generator(messages, {
max_new_tokens: 200,
temperature: 0.7,
do_sample: true,
streamer,
});
const elapsed = performance.now() - t0;
return { text: fullText, elapsedMs: Math.round(elapsed) };
}
// ─── coordinator: run inference across the mesh ──────────────────────────
async function runInference() {
const prompt = $('prompt-input').value.trim();
if (!prompt) return;
const mode = document.querySelector('input[name=mode]:checked').value;
const reqId = crypto.randomUUID();
$('run-btn').disabled = true;
$('run-hint').textContent = `Running ${mode}…`;
$('results').innerHTML = '';
const peersWithModel = Object.entries(state.peers).filter(([id, p]) => p.caps?.models?.length > 0);
log(`request ${reqId.slice(0,8)} Β· mode=${mode} Β· ${peersWithModel.length + 1} workers (self + peers)`);
state.inflight[reqId] = { results: {}, mode, prompt, startedAt: Date.now() };
if (mode === 'ensemble') {
// every worker runs the full model on the same prompt; coordinator gathers all completions
// worker = self + every peer that has a model loaded
// create result slots
addResult(reqId, state.myId, 'this browser', 'running');
for (const [pid, p] of peersWithModel) {
addResult(reqId, pid, p.caps.models[0] + ' on ' + (p.caps.gpu?.vendor || 'cpu'), 'running');
}
// dispatch to peers
for (const [pid, p] of peersWithModel) {
send(p.conn, {
type: 'inference_request',
id: reqId,
payload: { mode: 'ensemble', prompt, params: { max_new_tokens: 200, temperature: 0.7 } },
});
}
// run locally with streaming
try {
const out = await runLocalInference(prompt, (chunk, full) => {
updateResult(reqId, state.myId, full, 'running');
});
updateResult(reqId, state.myId, out.text, 'done', `${out.elapsedMs}ms Β· local`);
} catch (err) {
updateResult(reqId, state.myId, `error: ${err.message}`, 'error');
}
// peer results arrive async via handleInferenceDone
checkComplete(reqId);
} else if (mode === 'pipeline' || mode === 'moe') {
// protocol-only modes: dispatch real request, expect peers to return unimplemented
// this exercises the wiring; replace handler stubs to ship the real thing
addResult(reqId, state.myId, `${mode} coordinator`, 'running');
if (peersWithModel.length === 0) {
updateResult(reqId, state.myId, `${mode} mode requires at least one peer with a loaded model.\n\nFor a real implementation, see the handle${mode === 'pipeline' ? 'PipelineStage' : 'MoeExpert'} stub at the bottom of the script. The protocol is wired β€” only the inference handler is TODO.`, 'done');
$('run-btn').disabled = false;
$('run-hint').textContent = 'Ready.';
return;
}
for (const [pid, p] of peersWithModel) {
addResult(reqId, pid, `${mode} stage on ${p.caps.gpu?.vendor || 'cpu'}`, 'running');
send(p.conn, {
type: 'inference_request',
id: reqId,
payload: mode === 'pipeline'
? { mode: 'pipeline_stage', prompt, layers: [0, 7], input_hidden: null }
: { mode: 'moe_expert', prompt, expert_id: 'general', input_hidden: null },
});
}
updateResult(reqId, state.myId, `Dispatched to ${peersWithModel.length} peer(s). Awaiting responses…`, 'done');
setTimeout(() => checkComplete(reqId), 5000);
}
}
function addResult(reqId, peerId, label, status) {
const el = document.createElement('div');
el.className = 'result';
el.id = `r-${reqId}-${peerId}`;
el.innerHTML = `
<div class="head">
<span>${label} <code>${peerId.slice(0,8)}</code></span>
<span class="pill ${status === 'running' ? '' : status === 'done' ? 'ok' : 'err'}" data-status>${status}</span>
</div>
<div class="text" data-text></div>`;
$('results').appendChild(el);
}
function updateResult(reqId, peerId, text, status, meta) {
const el = $(`r-${reqId}-${peerId}`);
if (!el) return;
el.querySelector('[data-text]').textContent = text;
const s = el.querySelector('[data-status]');
s.textContent = meta || status;
s.className = `pill ${status === 'running' ? '' : status === 'done' ? 'ok' : 'err'}`;
}
function checkComplete(reqId) {
const inflight = state.inflight[reqId];
if (!inflight) return;
const elapsed = Date.now() - inflight.startedAt;
if (elapsed > 60000) {
delete state.inflight[reqId];
}
$('run-btn').disabled = false;
$('run-hint').textContent = 'Ready.';
}
// ─── inference request handler (we are a worker for someone else) ─────────
async function handleInferenceRequest(fromPeer, payload, reqId) {
log(`inference request: mode=${payload.mode}`, fromPeer);
const conn = state.peers[fromPeer].conn;
if (!state.generator) {
send(conn, { type: 'inference_error', id: reqId, payload: { reason: 'no model loaded' } });
return;
}
try {
if (payload.mode === 'ensemble') {
const out = await runLocalInference(payload.prompt, (chunk, full) => {
send(conn, { type: 'inference_chunk', id: reqId, payload: { text: full } });
});
send(conn, { type: 'inference_done', id: reqId, payload: { text: out.text, elapsedMs: out.elapsedMs, model: state.modelId } });
} else if (payload.mode === 'pipeline_stage') {
const result = await handlePipelineStage(payload);
send(conn, { type: 'inference_done', id: reqId, payload: result });
} else if (payload.mode === 'moe_expert') {
const result = await handleMoeExpert(payload);
send(conn, { type: 'inference_done', id: reqId, payload: result });
} else {
send(conn, { type: 'inference_error', id: reqId, payload: { reason: `unknown mode: ${payload.mode}` } });
}
} catch (err) {
send(conn, { type: 'inference_error', id: reqId, payload: { reason: err.message } });
}
}
function handleInferenceChunk(fromPeer, payload, reqId) {
updateResult(reqId, fromPeer, payload.text, 'running');
}
function handleInferenceDone(fromPeer, payload, reqId) {
log(`inference done in ${payload.elapsedMs || '?'}ms`, fromPeer);
if (payload.unimplemented) {
updateResult(reqId, fromPeer, `[${payload.mode}] TODO β€” handler is a stub.\nReturned: ${JSON.stringify(payload, null, 2)}`, 'done', 'unimplemented');
} else {
updateResult(reqId, fromPeer, payload.text, 'done', `${payload.elapsedMs}ms Β· ${payload.model || ''}`);
}
checkComplete(reqId);
}
function handleInferenceError(fromPeer, payload, reqId) {
log(`inference error: ${payload.reason}`, fromPeer);
updateResult(reqId, fromPeer, `error: ${payload.reason}`, 'error');
checkComplete(reqId);
}
// ─── pipeline / MoE handlers (TODO: real implementation needs modeling fork) ─
// Contract: caller sends { layers: [start, end], input_hidden: Float32Array or null for first stage, prompt: string (first stage only) }.
// Implementer should:
// 1. Load the model weights for just the assigned layer range (custom transformers.js fork; ONNX export per stage)
// 2. If input_hidden is null, run embedding + the assigned layers, output hidden state
// 3. Else, accept input_hidden, run the assigned layers, output hidden state
// 4. If layers includes lm_head, run final norm + lm_head + sampling, output token + new prompt for next iter
// The coordinator chains stages by forwarding output_hidden to the next peer in the pipeline.
async function handlePipelineStage(payload) {
return {
unimplemented: true,
mode: 'pipeline_stage',
layers_requested: payload.layers,
note: 'Splitting transformers.js inference by layer requires either (a) ONNX exports per stage, or (b) a forked modeling file that exposes hidden states between blocks. Neither is built-in. See https://github.com/huggingface/transformers.js for the modeling layer.',
};
}
// Contract: caller sends { expert_id: string, input_hidden: Float32Array, prompt: string (first call) }.
// Implementer should:
// 1. Load only the expert weights for expert_id (a LoRA adapter, a sub-FFN, or a small specialist model)
// 2. Run the expert on the input hidden state
// 3. Return output_hidden plus a confidence/gating score
// The coordinator (router) is responsible for picking top-k experts per token based on gating logits.
async function handleMoeExpert(payload) {
return {
unimplemented: true,
mode: 'moe_expert',
expert_id: payload.expert_id,
note: 'MoE expert hosting works best by treating each peer as a LoRA-adapted specialist of a small base model. Each peer keeps the base in memory (cheap) and swaps adapters per request. Router picks top-k experts using a gating model. Real Mixtral-style sharding needs per-expert ONNX exports.',
};
}
// ─── UI wiring ────────────────────────────────────────────────────────────
$('connect-btn').addEventListener('click', connectToPeer);
$('connect-input').addEventListener('keydown', e => { if (e.key === 'Enter') connectToPeer(); });
$('copy-link').addEventListener('click', () => {
const url = `${location.origin}${location.pathname}?peer=${state.myId}`;
navigator.clipboard.writeText(url).then(() => {
$('copy-link').textContent = 'Copied!';
setTimeout(() => { $('copy-link').textContent = 'Copy invite link'; }, 1500);
});
});
$('load-model-btn').addEventListener('click', loadModel);
$('run-btn').addEventListener('click', runInference);
document.querySelectorAll('.modes label').forEach(l => {
l.addEventListener('click', () => {
document.querySelectorAll('.modes label').forEach(x => x.classList.remove('on'));
l.classList.add('on');
});
});
// auto-connect if ?peer= in URL
const urlParams = new URLSearchParams(location.search);
const incomingPeer = urlParams.get('peer');
if (incomingPeer) {
log(`auto-connect target from URL: ${incomingPeer.slice(0,8)}`);
$('connect-input').value = incomingPeer;
// wait for our own broker connection before connecting
const tryConnect = () => {
if (state.myId) connectToPeer();
else setTimeout(tryConnect, 200);
};
setTimeout(tryConnect, 500);
}
// ─── go ───────────────────────────────────────────────────────────────────
setupPeer();
log('BrowserMesh booted. waiting for peer ID from broker…');
</script>
</body>
</html>