| import os |
| import time |
| import random |
| import queue |
| from datetime import datetime, timezone |
| import html |
| from threading import Thread |
| from typing import Any |
|
|
| import gradio as gr |
| import torch |
| import torch.nn.functional as F |
| try: |
| from huggingface_hub import login |
| except ImportError: |
| login = None |
| from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList |
| try: |
| from transformers import TextIteratorStreamer |
| except ImportError: |
| from transformers.generation.streamers import TextIteratorStreamer |
|
|
| |
| if not torch.cuda.is_available(): |
| |
| |
| torch.set_num_threads(2) |
| |
| try: |
| torch.set_flush_denormal(True) |
| except Exception: |
| pass |
|
|
| |
| try: |
| import tokenmonster |
| except ImportError: |
| tokenmonster = None |
|
|
| class InterruptCallback(StoppingCriteria): |
| def __init__(self): |
| self.stop_signal = False |
| def __call__(self, input_ids, scores, **kwargs): |
| return self.stop_signal |
|
|
| interrupt_callback = InterruptCallback() |
|
|
| |
| APP_TITLE = "Stentor Labs" |
| MODEL_OWNER = os.getenv("HF_MODEL_OWNER", "StentorLabs") |
| FLAGSHIP_MODELS = ["Portimbria-150M", "Stentor3-50M"] |
|
|
| STENTOR_MODELS = { |
| "Portimbria-150M": f"{MODEL_OWNER}/Portimbria-150M", |
| "Stentor3-50M": f"{MODEL_OWNER}/Stentor3-50M", |
| "Stentor3-20M": f"{MODEL_OWNER}/Stentor3-20M", |
| "Stentor2-30M": f"{MODEL_OWNER}/Stentor2-30M", |
| "Stentor2-12M": f"{MODEL_OWNER}/Stentor2-12M", |
| "Stentor-30M": f"{MODEL_OWNER}/Stentor-30M", |
| "Stentor-12M": f"{MODEL_OWNER}/Stentor-12M", |
| "Stentor-30M-Instruct": f"{MODEL_OWNER}/Stentor-30M-Instruct", |
| "Stentor-12M-Instruct": f"{MODEL_OWNER}/Stentor-12M-Instruct", |
| } |
|
|
| ARENA_MODELS = { |
| "DistilGPT2": "distilbert/distilgpt2", |
| "Pythia-14M": "EleutherAI/pythia-14m", |
| "Pythia-31M": "EleutherAI/pythia-31m", |
| "Pythia-70M": "EleutherAI/pythia-70m", |
| "gpt2 small": "openai-community/gpt2", |
| "SmolLM2-135M": "HuggingFaceTB/SmolLM2-135M", |
| "NanoWhale-100M-Base": "HuggingFaceTB/nanowhale-100m-base", |
| "Pythia-160M": "EleutherAI/pythia-160m", |
| "OPT-125M": "facebook/opt-125m", |
| "GPT-Neo 125M": "EleutherAI/gpt-neo-125M", |
| } |
|
|
| ALL_MODELS = {**STENTOR_MODELS, **ARENA_MODELS} |
| DEFAULT_MODEL = "Portimbria-150M" |
|
|
| def _max_tokens_cap(model_key: str) -> int: |
| if model_key.lower().startswith("portimbria") or model_key.lower().startswith("stentor3-"): |
| return 4096 |
| if model_key.lower().startswith("stentor2-"): |
| return 1024 |
| return 512 |
|
|
| |
| DEFAULT_TEMP = 0.8 |
| DEFAULT_REP_PENALTY = 1.35 |
| DEFAULT_TOP_P = 0.9 |
| DEFAULT_MAX_TOKENS = 100 |
|
|
| INITIAL_MAX_TOKENS = _max_tokens_cap(DEFAULT_MODEL) |
|
|
| PRESETS = { |
| "🎨 Creative": { |
| "temperature": 1.1, |
| "top_p": 0.95, |
| "max_tokens": DEFAULT_MAX_TOKENS, |
| "repetition_penalty": DEFAULT_REP_PENALTY, |
| }, |
| "⚖️ Balanced": { |
| "temperature": 0.8, |
| "top_p": 0.9, |
| "max_tokens": DEFAULT_MAX_TOKENS, |
| "repetition_penalty": DEFAULT_REP_PENALTY, |
| }, |
| "🎯 Focused": { |
| "temperature": 0.6, |
| "top_p": 0.9, |
| "max_tokens": DEFAULT_MAX_TOKENS, |
| "repetition_penalty": DEFAULT_REP_PENALTY, |
| }, |
| } |
|
|
| MODE_RECOMMENDATION_HTML = ( |
| '<p class="mode-caption">' |
| '<strong>Balanced</strong> or <strong>Focused</strong> modes are recommended; ' |
| 'Creative mode can get chaotic.' |
| '</p>' |
| ) |
|
|
| EXAMPLE_PROMPTS = [ |
| ("📖", "Once upon a time in a world where"), |
| ("📖", "The last explorer on Earth discovered"), |
| ("🔬", "The theory of relativity states that"), |
| ("🔬", "Scientists recently discovered that"), |
| ("💻", "def quicksort(arr):\n "), |
| ("💻", "class NeuralNetwork:\n def __init__"), |
| ("🧠", "The most important thing about AI is"), |
| ("🧠", "The philosophy of consciousness suggests"), |
| ] |
|
|
| _model_cache: dict[str, tuple[Any, Any]] = {} |
|
|
| def _hf_auth_token(): |
| return os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") |
|
|
| |
| _token = _hf_auth_token() |
| if _token: |
| print(f"[Stentor] Authentication token found (ending in ...{_token[-4:]}).") |
| if login: |
| try: |
| login(token=_token) |
| except Exception as e: |
| print(f"[Stentor] Global login failed: {e}") |
| else: |
| print("[Stentor] WARNING: No HF_TOKEN found in secrets. Private/Gated models will fail to load.") |
|
|
|
|
| def _hf_repo_kwargs() -> dict[str, Any]: |
| token = _hf_auth_token() |
| return {"token": token} if token else {} |
|
|
| |
| def _load_tokenizer(model_id: str): |
| |
| is_stentor = any(s.lower() in model_id.lower() for s in ["stentor", "portimbria"]) |
| |
| kwargs = {"trust_remote_code": is_stentor, "use_fast": not is_stentor} |
| try: |
| return AutoTokenizer.from_pretrained(model_id, **_hf_repo_kwargs(), **kwargs) |
| except Exception as first_err: |
| |
| try: |
| return AutoTokenizer.from_pretrained(model_id, **_hf_repo_kwargs(), trust_remote_code=True) |
| except Exception as second_err: |
| raise RuntimeError(f"Tokenizer fail: {first_err} -> {second_err}") |
|
|
|
|
| def _load_model(model_id: str): |
| is_stentor = any(s.lower() in model_id.lower() for s in ["stentor", "portimbria"]) |
| |
| |
| if torch.cuda.is_available(): |
| if torch.cuda.is_bf16_supported(): |
| dtype = torch.bfloat16 |
| print(f"[Stentor] Using bfloat16 for {model_id} on CUDA.") |
| else: |
| dtype = torch.float16 |
| print(f"[Stentor] Using float16 for {model_id} on CUDA.") |
| else: |
| |
| |
| |
| dtype = torch.float32 |
| print(f"[Stentor] Using float32 for {model_id} on CPU for maximum speed.") |
|
|
| try: |
| return AutoModelForCausalLM.from_pretrained( |
| model_id, |
| dtype=dtype, |
| low_cpu_mem_usage=True, |
| trust_remote_code=is_stentor, |
| **_hf_repo_kwargs(), |
| ) |
| except Exception as e: |
| print(f"[Stentor] Failed to load {model_id} with {dtype}. Retrying with float32 and trust_remote_code=True. Error: {e}") |
| |
| return AutoModelForCausalLM.from_pretrained( |
| model_id, |
| dtype=torch.float32, |
| low_cpu_mem_usage=True, |
| trust_remote_code=True, |
| **_hf_repo_kwargs(), |
| ) |
|
|
|
|
| def _get_model(model_id: str): |
| if model_id in _model_cache: |
| return _model_cache[model_id] |
| print(f"[Stentor] Loading {model_id}...") |
| tok = _load_tokenizer(model_id) |
| mdl = _load_model(model_id) |
| mdl.eval() |
| |
| |
| |
| print(f"[Stentor] Warming up {model_id}...") |
| try: |
| dummy = tok("Hello", return_tensors="pt") |
| with torch.no_grad(): |
| mdl.generate(**dummy, max_new_tokens=1, pad_token_id=tok.eos_token_id) |
| print(f"[Stentor] {model_id} warmup complete") |
| except Exception as e: |
| print(f"[Stentor] {model_id} warmup skipped ({e})") |
| |
| _model_cache[model_id] = (tok, mdl) |
| print(f"[Stentor] {model_id} ready.") |
| return tok, mdl |
|
|
|
|
| def _prep_inputs(tokenizer, prompt): |
| inputs = tokenizer(prompt, return_tensors="pt") |
| inputs.pop("token_type_ids", None) |
| return inputs |
|
|
|
|
| def _to_model_device(inputs: dict[str, Any], model): |
| device = next(model.parameters()).device |
| for k, v in inputs.items(): |
| if hasattr(v, "to"): |
| inputs[k] = v.to(device) |
| if "attention_mask" not in inputs and "input_ids" in inputs: |
| inputs["attention_mask"] = torch.ones_like(inputs["input_ids"]) |
| return inputs |
|
|
|
|
| def _decode_response_only(tokenizer, full_ids, input_ids): |
| prompt_len = input_ids.shape[1] |
| new_ids = full_ids[0][prompt_len:] |
| return tokenizer.decode(new_ids, skip_special_tokens=True) |
|
|
|
|
| def _generate_and_stream(repo_id, prompt, max_tokens, temperature, top_p, repetition_penalty): |
| """Generate text from a model, yielding tokens as they come.""" |
| tokenizer, model = _get_model(repo_id) |
| interrupt_callback.stop_signal = False |
| inputs = _to_model_device(_prep_inputs(tokenizer, prompt), model) |
| |
| |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True) |
| error_container = [None] |
|
|
| def worker_fn(): |
| try: |
| with torch.inference_mode(): |
| gen_kwargs = dict( |
| **inputs, |
| max_new_tokens=int(max_tokens), |
| temperature=float(temperature), |
| top_p=float(top_p), |
| repetition_penalty=float(repetition_penalty), |
| do_sample=float(temperature) >= 0.05, |
| pad_token_id=tokenizer.eos_token_id, |
| stopping_criteria=StoppingCriteriaList([interrupt_callback]), |
| streamer=streamer, |
| ) |
| model.generate(**gen_kwargs) |
| except Exception as e: |
| error_container[0] = str(e) |
|
|
| t = Thread(target=worker_fn, daemon=True) |
| t.start() |
| full_text = "" |
| |
| |
| special_tokens_to_remove = [tokenizer.eos_token, tokenizer.pad_token, tokenizer.bos_token, tokenizer.unk_token] |
| |
| special_tokens_to_remove = [t for t in special_tokens_to_remove if t is not None] |
|
|
| try: |
| for chunk in streamer: |
| if interrupt_callback.stop_signal: break |
| for st in special_tokens_to_remove: |
| chunk = chunk.replace(st, "") |
| full_text += chunk |
| yield full_text |
| finally: |
| interrupt_callback.stop_signal = True |
| t.join(timeout=1.0) |
|
|
| if error_container[0]: |
| yield f"❌ {error_container[0]}" |
|
|
|
|
| def parallel_config_generate(prompt, configs): |
| """Runs multiple generations sequentially and yields results list.""" |
| text = (prompt or "").strip() |
| if not text: return |
| |
| num = len(configs) |
| results = [""] * num |
| t0 = time.perf_counter() |
| |
| interrupt_callback.stop_signal = False |
| for i, cfg in enumerate(configs): |
| if interrupt_callback.stop_signal: break |
| m_key = cfg["model_key"] |
| error_container = [None] |
| try: |
| interrupt_callback.stop_signal = False |
| tokenizer, model = _get_model(ALL_MODELS[m_key]) |
| m_cap = _max_tokens_cap(m_key) |
| actual_max = min(int(cfg["max_tokens"]), m_cap) |
| inputs = _to_model_device(_prep_inputs(tokenizer, text), model) |
| |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True) |
|
|
| def worker_fn(): |
| try: |
| with torch.inference_mode(): |
| |
| torch.manual_seed(random.randint(0, 2**31 - 1)) |
| gen_kwargs = dict( |
| **inputs, |
| max_new_tokens=actual_max, |
| temperature=float(cfg["temperature"]), |
| top_p=float(cfg["top_p"]), |
| repetition_penalty=float(cfg["repetition_penalty"]), |
| do_sample=float(cfg["temperature"]) >= 0.05, |
| pad_token_id=tokenizer.eos_token_id, |
| stopping_criteria=StoppingCriteriaList([interrupt_callback]), |
| streamer=streamer, |
| ) |
| model.generate(**gen_kwargs) |
| except Exception as e: |
| error_container[0] = str(e) |
|
|
| t = Thread(target=worker_fn, daemon=True) |
| t.start() |
|
|
| |
| special_tokens_to_remove = [tokenizer.eos_token, tokenizer.pad_token, tokenizer.bos_token, tokenizer.unk_token] |
| special_tokens_to_remove = [t for t in special_tokens_to_remove if t is not None] |
|
|
| try: |
| for chunk in streamer: |
| if interrupt_callback.stop_signal: break |
| for st in special_tokens_to_remove: |
| chunk = chunk.replace(st, "") |
| results[i] += chunk |
| yield results, time.perf_counter() - t0 |
| finally: |
| t.join(timeout=1.0) |
|
|
| if error_container[0]: |
| results[i] = f"❌ {error_container[0]}" |
| yield results, time.perf_counter() - t0 |
|
|
| if interrupt_callback.stop_signal: |
| break |
|
|
| except Exception as e: |
| results[i] = f"❌ {str(e)}" |
| yield results, time.perf_counter() - t0 |
|
|
|
|
| |
| def chat_generate(messages_state, user_message, model_key, max_tok, temp, tp, rep_penalty): |
| if messages_state is None: |
| messages_state = [] |
| if not user_message: |
| yield "", messages_state |
| return |
|
|
| messages_state.append({"role": "user", "content": user_message}) |
| |
| conversation = "" |
| for msg in messages_state: |
| role = msg.get("role", "user") |
| content = msg.get("content", "") |
| conversation += f"<{role}>{content}</{role}>\n" |
| conversation += f"<assistant>" |
|
|
| config = [{"model_key": model_key, "max_tokens": max_tok, "temperature": temp, "top_p": tp, "repetition_penalty": rep_penalty}] |
| |
| assistant_response = "" |
| for results, _ in parallel_config_generate(conversation, config): |
| assistant_response = results[0] |
| temp_messages = messages_state + [{"role": "assistant", "content": assistant_response}] |
| yield _render_chat_html(temp_messages, model_key), messages_state |
|
|
| messages_state.append({"role": "assistant", "content": assistant_response.strip()}) |
| yield _render_chat_html(messages_state, model_key), messages_state |
|
|
|
|
| def _render_chat_html(messages, model_key): |
| html_parts = [] |
| for msg in messages: |
| role = msg.get("role", "user") |
| content = msg.get("content", "") |
| if role == "user": |
| html_parts.append( |
| f'<div style="margin-bottom:8px;padding:8px 12px;background:#0d1829;border:1px solid #1a2744;' |
| f'border-radius:8px;"><span style="font-weight:700;color:#38bdf8;font-size:11px;' |
| f'text-transform:uppercase;">You</span><p style="margin:4px 0 0;color:#ffffff;font-size:13px;">' |
| f'{content}</p></div>' |
| ) |
| else: |
| html_parts.append( |
| f'<div style="margin-bottom:8px;padding:8px 12px;background:#080d1a;border:1px solid rgba(245,158,11,0.25);' |
| f'border-radius:8px;"><span style="font-weight:700;color:#f59e0b;font-size:11px;' |
| f'text-transform:uppercase;">{model_key}</span><p style="margin:4px 0 0;color:#ffffff;font-size:13px;">' |
| f'{content}</p></div>' |
| ) |
| return "".join(html_parts) |
| |
|
|
| def chat_clear(): |
| """Clear chat messages.""" |
| placeholder = '<div style="padding:20px;text-align:center;background:#060a14;border:1px solid #1e2d45;border-radius:10px;min-height:200px;display:flex;flex-direction:column;align-items:center;justify-content:center;"><p style="font-family:Sora,sans-serif;font-size:13px;color:#374151;margin:0;">Start a conversation by typing a message below.</p></div>' |
| return [], placeholder, "" |
|
|
|
|
| def apply_standard_preset(name: str): |
| p = PRESETS[name] |
| return p["max_tokens"], p["temperature"], p["top_p"], p["repetition_penalty"] |
|
|
|
|
| def apply_sweep_preset(name: str): |
| p = PRESETS[name] |
| return p["max_tokens"], p["top_p"], p["repetition_penalty"] |
|
|
|
|
| |
| def run_token_explorer(prompt, model_key, max_tokens, temperature, top_p, repetition_penalty): |
| text = (prompt or "").strip() |
| if not text: |
| return _explorer_placeholder(), "Enter a prompt." |
|
|
| interrupt_callback.stop_signal = False |
| try: |
| tokenizer, model = _get_model(ALL_MODELS.get(model_key, ALL_MODELS[DEFAULT_MODEL])) |
| except Exception as e: |
| return f'<p style="color:#f87171;padding:20px;">Error loading model: {e}</p>', "Error" |
|
|
| max_tokens = min(int(max_tokens), _max_tokens_cap(model_key)) |
| inputs = _to_model_device(_prep_inputs(tokenizer, text), model) |
| try: |
| with torch.inference_mode(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=int(max_tokens), |
| do_sample=True, |
| temperature=float(temperature), |
| top_p=float(top_p), |
| repetition_penalty=float(repetition_penalty), |
| output_scores=True, |
| return_dict_in_generate=True, |
| pad_token_id=tokenizer.eos_token_id, |
| stopping_criteria=StoppingCriteriaList([interrupt_callback]), |
| ) |
| except Exception as e: |
| return f'<p style="color:#f87171;padding:20px;">Generation error: {e}</p>', "Error" |
|
|
| input_len = inputs["input_ids"].shape[1] |
| generated_ids = outputs.sequences[0][input_len:] |
|
|
| token_data = [] |
| for score_t, token_id in zip(outputs.scores, generated_ids): |
| probs = F.softmax(score_t[0], dim=-1) |
| top_k = torch.topk(probs, 8) |
| token_data.append({ |
| "token": tokenizer.decode([token_id.item()]), |
| "prob": probs[token_id].item(), |
| "alternatives": [ |
| {"token": tokenizer.decode([idx.item()]), "prob": p.item()} |
| for idx, p in zip(top_k.indices, top_k.values) |
| ], |
| }) |
|
|
| html = _build_token_html(text, token_data) |
| return html, f"✓ {len(token_data)} tokens · sampled · {model_key}" |
|
|
|
|
| def _tok_style(p): |
| if p >= 0.80: return "#10b981", "rgba(16,185,129,0.18)", "rgba(16,185,129,0.40)" |
| if p >= 0.50: return "#eab308", "rgba(234,179,8,0.18)", "rgba(234,179,8,0.40)" |
| if p >= 0.35: return "#f97316", "rgba(249,115,22,0.14)", "rgba(249,115,22,0.40)" |
| return "#b91c1c", "rgba(185,28,28,0.12)", "rgba(185,28,28,0.40)" |
|
|
|
|
| def _build_token_html(prompt_text, token_data): |
| if not token_data: |
| return '<p style="color:#64748b;padding:20px;">No tokens generated.</p>' |
|
|
| avg_p = sum(td["prob"] for td in token_data) / len(token_data) |
| high = sum(1 for td in token_data if td["prob"] >= 0.80) |
| med = sum(1 for td in token_data if 0.50 <= td["prob"] < 0.80) |
| unsure = sum(1 for td in token_data if 0.35 <= td["prob"] < 0.50) |
| low = sum(1 for td in token_data if td["prob"] < 0.35) |
|
|
| spans = [] |
| for td in token_data: |
| raw = td["token"] |
| p = td["prob"] |
| pct = int(p * 100) |
| col, bg, brd = _tok_style(p) |
| |
| disp = html.escape(raw).replace("\n", "↵") |
| if not disp.strip(): |
| disp = "·" |
|
|
| alts = " | ".join( |
| f'{html.escape(a["token"].strip() or "·")} {a["prob"]*100:.0f}%' |
| for a in td["alternatives"][:6] |
| ) |
| tip = html.escape(f"Token: {raw.strip() or repr(raw)} ({pct}%)\nAlternatives: {alts}").replace("\n", " ") |
|
|
| spans.append( |
| f'<span title="{tip}" ' |
| f'style="background:{bg};color:{col};border:1px solid {brd};' |
| f'padding:3px 7px;border-radius:5px;margin:2px 1px;display:inline-block;' |
| f'font-family:Space Mono,monospace;font-size:13px;line-height:2.2;' |
| f'cursor:help;transition:transform 0.1s,box-shadow 0.1s;" ' |
| f'onmouseover="this.style.transform=\'scale(1.1)\';this.style.boxShadow=\'0 0 12px {brd}\'" ' |
| f'onmouseout="this.style.transform=\'\';this.style.boxShadow=\'\'">' |
| f'{disp}<sup style="font-size:8px;opacity:0.6;margin-left:2px;">{pct}%</sup>' |
| f'</span>' |
| ) |
|
|
| prompt_span = ( |
| f'<span style="color:#374151;font-family:Space Mono,monospace;font-size:13px;">' |
| f'{html.escape(prompt_text)}' |
| f'</span>' |
| ) |
|
|
| token_block = ( |
| f'<div style="padding:20px;background:#060a14;border:1px solid #1e2d45;border-radius:10px;' |
| f'line-height:2.4;word-wrap:break-word;min-height:80px;">' |
| + prompt_span + "".join(spans) |
| + "</div>" |
| ) |
|
|
| legend = ( |
| f'<div style="margin-bottom:14px;">' |
| f'<div style="display:flex;gap:10px;margin-bottom:10px;flex-wrap:wrap;align-items:center;">' |
| f'<span style="font-family:Sora,sans-serif;font-size:13px;color:#64748b;">Avg confidence: ' |
| f'<strong style="color:#ffffff;">{int(avg_p*100)}%</strong></span>' |
| f'<span style="font-family:Sora,sans-serif;font-size:11px;color:#4a5568;margin-left:4px;">· Hover any token to see top alternatives</span>' |
| f'</div>' |
| f'<div style="display:flex;gap:8px;flex-wrap:wrap;">' |
| f'<div style="display:flex;align-items:center;gap:6px;">' |
| f'<div style="width:12px;height:12px;background:#10b981;border-radius:3px;"></div>' |
| f'<span style="font-family:Space Mono,monospace;font-size:11px;color:#94a3b8;">≥80% confident · {high} tokens</span></div>' |
| f'<div style="display:flex;align-items:center;gap:6px;">' |
| f'<div style="width:12px;height:12px;background:#eab308;border-radius:3px;"></div>' |
| f'<span style="font-family:Space Mono,monospace;font-size:11px;color:#94a3b8;">50–79% moderate · {med} tokens</span></div>' |
| f'<div style="display:flex;align-items:center;gap:6px;">' |
| f'<div style="width:12px;height:12px;background:#f97316;border-radius:3px;"></div>' |
| f'<span style="font-family:Space Mono,monospace;font-size:11px;color:#94a3b8;">35–49% uncertain · {unsure} tokens</span></div>' |
| f'<div style="display:flex;align-items:center;gap:6px;">' |
| f'<div style="width:12px;height:12px;background:#b91c1c;border-radius:3px;"></div>' |
| f'<span style="font-family:Space Mono,monospace;font-size:11px;color:#94a3b8;"><35% low · {low} tokens</span></div>' |
| f'</div></div>' |
| ) |
|
|
| return legend + token_block |
|
|
|
|
| def _explorer_placeholder(): |
| return ( |
| '<div style="padding:32px;text-align:center;background:#060a14;border:1px solid #1e2d45;' |
| 'border-radius:10px;min-height:120px;display:flex;flex-direction:column;align-items:center;justify-content:center;">' |
| '<div style="font-family:Bebas Neue,sans-serif;font-size:28px;letter-spacing:0.08em;color:#1e2d45;margin-bottom:8px;">TOKEN EXPLORER</div>' |
| '<p style="font-family:Sora,sans-serif;font-size:13px;color:#374151;margin:0;">' |
| 'Enter a prompt and click Explore to see per-token confidence heatmap</p>' |
| '</div>' |
| ) |
|
|
|
|
| |
| def run_temp_sweep_streamed(prompt, model_key, max_tok, tp, rep_penalty, count, *temps): |
| count_int = int(count) |
| text = (prompt or "").strip() |
| if not text: |
| yield tuple(["Enter a prompt."] * 5) |
| return |
|
|
| active_temps = [float(t) for t in temps[:count_int]] |
| |
| |
| rounded = [round(t, 2) for t in active_temps] |
| if len(set(rounded)) != len(rounded): |
| yield tuple(["⚠️ Duplicate temperatures detected. Please make each temperature unique."] * 5) |
| return |
|
|
| configs = [] |
| for t in active_temps: |
| configs.append({ |
| "model_key": model_key, "max_tokens": max_tok, |
| "temperature": t, "top_p": tp, "repetition_penalty": rep_penalty |
| }) |
| |
| for results, _ in parallel_config_generate(prompt, configs): |
| outputs = [] |
| for i in range(5): |
| outputs.append(results[i] if i < count_int else "") |
| yield tuple(outputs) |
|
|
|
|
| |
| def add_to_history(history, prompt, output, model_key, temperature): |
| if not (output and prompt): |
| return history or [] |
| entry = { |
| "prompt": prompt[:55] + ("…" if len(prompt) > 55 else ""), |
| "output": output.strip()[:100], |
| "model": model_key, |
| "temp": round(float(temperature), 1), |
| "time": datetime.now().strftime("%H:%M"), |
| } |
| return ([entry] + (history or []))[:10] |
|
|
|
|
| def build_history_html(history): |
| if not history: |
| return ( |
| '<div style="padding:24px 16px;text-align:center;">' |
| '<div style="font-family:Sora,sans-serif;font-size:12px;color:#374151;">No generations yet</div>' |
| '</div>' |
| ) |
| rows = [] |
| for i, e in enumerate(history): |
| fade = max(0.35, 1.0 - i * 0.07) |
| rows.append( |
| f'<div style="padding:10px 14px;border-bottom:1px solid #0d1829;opacity:{fade:.2f};">' |
| f'<div style="font-family:Space Mono,monospace;font-size:10px;color:#f59e0b;margin-bottom:3px;letter-spacing:0.04em;">' |
| f'{e["time"]} · {e["model"]} · t={e["temp"]}</div>' |
| f'<div style="font-family:Sora,sans-serif;font-size:12px;color:#cbd5e1;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;margin-bottom:2px;">' |
| f'<strong style="color:#ffffff;">{e["prompt"]}</strong></div>' |
| f'<div style="font-family:Sora,sans-serif;font-size:11px;color:#4a5568;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;">' |
| f'{e["output"] or "…"}</div>' |
| f'</div>' |
| ) |
| return ( |
| '<div style="background:#0a0f1e;border:1px solid #1e2d45;border-radius:10px;overflow:hidden;">' |
| '<div style="padding:10px 14px;border-bottom:1px solid #1e2d45;background:#060a14;display:flex;align-items:center;gap:8px;">' |
| '<span style="font-family:Bebas Neue,sans-serif;font-size:16px;letter-spacing:0.1em;color:#ffffff;">HISTORY</span>' |
| f'<span style="font-family:Space Mono,monospace;font-size:10px;color:#374151;">{len(history)} runs</span>' |
| '</div>' |
| + "".join(rows) |
| + '</div>' |
| ) |
|
|
|
|
| |
| def build_stats_html(tokens, elapsed, tps): |
| def pill(val, lbl): |
| return ( |
| f'<div style="background:#0d1829;border:1px solid #1e2d45;border-radius:8px;padding:10px;text-align:center;">' |
| f'<span style="font-family:Space Mono,monospace;font-size:15px;font-weight:700;color:#f59e0b;display:block;line-height:1;margin-bottom:3px;">{val}</span>' |
| f'<span style="font-family:Sora,sans-serif;font-size:10px;font-weight:600;letter-spacing:0.12em;text-transform:uppercase;color:#374151;">{lbl}</span>' |
| f'</div>' |
| ) |
| return ( |
| f'<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:8px;margin-top:8px;">' |
| + pill(tokens, "Tokens") + pill(elapsed, "Time") + pill(tps, "Speed") |
| + '</div>' |
| ) |
|
|
|
|
| def _on_generate_model_change(model_key: str, current_max_tokens: float): |
| cap = _max_tokens_cap(model_key) |
| value = int(min(max(DEFAULT_MAX_TOKENS, current_max_tokens), cap)) |
| return MODEL_CARDS.get(model_key, ""), gr.update(maximum=cap, value=value) |
|
|
|
|
| def _on_model_cap_change(model_key: str, current_max_tokens: float, min_value: int = 5): |
| cap = _max_tokens_cap(model_key) |
| value = int(min(max(min_value, current_max_tokens), cap)) |
| return gr.update(maximum=cap, value=value) |
|
|
|
|
| |
| def arena_generate(prompt, mode, model1_key, model2_key, max_tok, temp, tp, rep_penalty, fair_match, internal_pair): |
| """Run two models side by side and stream the results.""" |
| text = (prompt or "").strip() |
| show_vote = (mode != "👀 Show") |
| if not text: |
| yield gr.update(value="", label="Left Model Output"), gr.update(value="", label="Right Model Output"), "Enter a prompt.", "Enter a prompt.", gr.update(visible=show_vote), gr.update(visible=True), None, "" |
| return |
|
|
| interrupt_callback.stop_signal = False |
| m1 = model1_key |
| m2 = model2_key |
|
|
| if mode == "🎲 Random": |
| m1 = random.choice(list(STENTOR_MODELS.keys())) |
| |
| potential_m2 = list(ARENA_MODELS.keys()) |
| if internal_pair: |
| potential_m2 += [k for k in STENTOR_MODELS if k != m1] |
|
|
| if fair_match: |
| |
| tiers = { |
| "12m": [k for k in STENTOR_MODELS if "12M" in k], |
| "20m": [k for k in STENTOR_MODELS if "20M" in k], |
| "30m": [k for k in STENTOR_MODELS if "30M" in k], |
| "50m": [k for k in STENTOR_MODELS if "50M" in k], |
| "150m": ["Portimbria-150M"] |
| } |
| fair_external_allowlist = { |
| "12m": ["Pythia-14M"], |
| "20m": ["Pythia-14M", "Pythia-31M"], |
| "30m": ["Pythia-31M"], |
| "50m": ["Pythia-31M", "Pythia-70M", "NanoWhale-100M-Base"], |
| "150m": ["gpt2 small", "SmolLM2-135M", "NanoWhale-100M-Base", "Pythia-160M", "OPT-125M", "GPT-Neo 125M"], |
| } |
|
|
| allowed_m2 = [] |
| if m1 in tiers["12m"]: |
| allowed_m2 = list(fair_external_allowlist["12m"]) |
| if internal_pair: allowed_m2 += [k for k in tiers["12m"] if k != m1] |
| elif m1 in tiers["20m"]: |
| allowed_m2 = list(fair_external_allowlist["20m"]) |
| if internal_pair: allowed_m2 += [k for k in tiers["20m"] if k != m1] |
| elif m1 in tiers["30m"]: |
| allowed_m2 = list(fair_external_allowlist["30m"]) |
| if internal_pair: allowed_m2 += [k for k in tiers["30m"] if k != m1] |
| elif m1 in tiers["50m"]: |
| allowed_m2 = list(fair_external_allowlist["50m"]) |
| if internal_pair: allowed_m2 += [k for k in tiers["50m"] if k != m1] |
| elif m1 in tiers["150m"]: |
| allowed_m2 = list(fair_external_allowlist["150m"]) |
| if internal_pair: allowed_m2 = [k for k in allowed_m2 if k != m1] |
| else: |
| |
| allowed_m2 = [k for k in potential_m2 if k != m1] |
| |
| if not allowed_m2: |
| m2 = random.choice(potential_m2) |
| else: |
| m2 = random.choice(allowed_m2) |
| else: |
| m2 = random.choice(potential_m2) |
| |
| elif mode == "🙈 Blind": |
| |
| if m1 in ARENA_MODELS and m2 in ARENA_MODELS: |
| m1 = random.choice(list(STENTOR_MODELS.keys())) |
| |
| |
| is_swapped = False |
| if mode != "👀 Show" and random.random() > 0.5: |
| m1, m2 = m2, m1 |
| is_swapped = True |
|
|
| display_name1 = m1 if mode == "👀 Show" else "Model A" |
| display_name2 = m2 if mode == "👀 Show" else "Model B" |
|
|
| label1 = f"{display_name1} Output" |
| label2 = f"{display_name2} Output" |
|
|
| |
| yield gr.update(value="", label=label1), gr.update(value="", label=label2), f"⚡ {display_name1} is thinking…", f"Waiting for {display_name2}…", gr.update(visible=show_vote), gr.update(visible=False), (m1, m2), "" |
|
|
| |
| output1 = "" |
| for partial in _generate_and_stream(ALL_MODELS[m1], text, max_tok, temp, tp, rep_penalty): |
| if interrupt_callback.stop_signal: break |
| output1 = partial |
| yield gr.update(value=output1, label=label1), gr.update(value="", label=label2), f"⚡ {display_name1} is thinking…", f"Waiting for {display_name2}…", gr.update(visible=show_vote), gr.update(visible=False), (m1, m2), "" |
|
|
| |
| output2 = "" |
| start2 = time.time() |
| for partial in _generate_and_stream(ALL_MODELS[m2], text, max_tok, temp, tp, rep_penalty): |
| if interrupt_callback.stop_signal: break |
| output2 = partial |
| elapsed = time.time() - start2 |
| yield gr.update(value=output1, label=label1), gr.update(value=output2, label=label2), f"✓ {display_name1} Finished", f"⚡ {display_name2} is thinking… ({elapsed:.1f}s)", gr.update(visible=show_vote), gr.update(visible=False), (m1, m2), "" |
|
|
| yield gr.update(value=output1, label=label1), gr.update(value=output2, label=label2), f"✓ {display_name1} Finished", f"✓ {display_name2} Finished", gr.update(visible=show_vote), gr.update(visible=(mode == "👀 Show")), (m1, m2), "" |
|
|
|
|
| def arena_setup(mode): |
| show_vote = (mode != "👀 Show") |
| return gr.update(value="", label="Left Model Output"), gr.update(value="", label="Right Model Output"), "Waiting...", "Waiting...", gr.update(visible=show_vote), gr.update(visible=False), None, "" |
|
|
|
|
| def arena_vote(vote_type, identities): |
| if not identities: |
| return "Please run a battle first.", gr.update(visible=False) |
| |
| m1, m2 = identities |
| result_text = f"### Decision Recorded! \n\n**Winner:** {vote_type}\n\n" |
| result_text += f"**Left was:** `{m1}`\n**Right was:** `{m2}`" |
| |
| return result_text, gr.update(visible=False) |
|
|
|
|
| |
| CSS = """ |
| @import url('https://fonts.googleapis.com/css2?family=Bebas+Neue&family=Space+Mono:wght@400;700&family=Sora:wght@300;400;500;600;700&display=swap'); |
| |
| .gradio-container, .gradio-container * { box-sizing: border-box !important; } |
| |
| .gradio-container { |
| background: #04060e !important; |
| max-width: 1280px !important; |
| margin: 0 auto !important; |
| padding: 0 !important; |
| font-family: 'Sora', sans-serif !important; |
| color: #ffffff !important; |
| } |
| |
| body, |
| .gradio-container > div, |
| .gradio-container .contain, |
| .gradio-container .wrap, |
| .gradio-container section, |
| .gradio-container .tabs, |
| .gradio-container .tabitem, |
| .gradio-container > div > div, |
| .gradio-container .block { |
| background: #04060e !important; |
| border-color: #1a2744 !important; |
| } |
| |
| .gradio-container .block { |
| box-shadow: none !important; |
| border-radius: 0 !important; |
| padding: 0 !important; |
| border: none !important; |
| } |
| |
| footer { display: none !important; } |
| |
| .gradio-container p, |
| .gradio-container span, |
| .gradio-container div, |
| .gradio-container li, |
| .gradio-container td, |
| .gradio-container th { |
| color: #ffffff !important; |
| font-family: 'Sora', sans-serif !important; |
| } |
| |
| .gradio-container label, |
| .gradio-container label span, |
| .gradio-container .label-wrap span { |
| font-family: 'Sora', sans-serif !important; |
| font-size: 11px !important; |
| font-weight: 600 !important; |
| letter-spacing: 0.1em !important; |
| text-transform: uppercase !important; |
| color: #374151 !important; |
| } |
| |
| .gradio-container textarea, |
| .gradio-container input[type="text"], |
| .gradio-container input[type="number"] { |
| font-family: 'Sora', sans-serif !important; |
| background: #0d1829 !important; |
| border: 1px solid #1a2744 !important; |
| color: #ffffff !important; |
| border-radius: 8px !important; |
| font-size: 14px !important; |
| } |
| |
| .gradio-container textarea:focus, |
| .gradio-container input:focus { |
| border-color: #78490a !important; |
| box-shadow: 0 0 0 3px rgba(245,158,11,0.07) !important; |
| outline: none !important; |
| } |
| |
| #prompt-box textarea { |
| font-size: 15px !important; |
| line-height: 1.75 !important; |
| min-height: 120px !important; |
| } |
| |
| #output-box textarea { |
| font-family: 'Space Mono', monospace !important; |
| font-size: 13px !important; |
| line-height: 1.85 !important; |
| color: #ffffff !important; |
| background: #060a14 !important; |
| border-color: #1a2744 !important; |
| } |
| |
| /* Stop fading during generation updates */ |
| .gradio-container textarea { transition: none !important; opacity: 1 !important; } |
| |
| .status-bar textarea { |
| font-family: 'Space Mono', monospace !important; |
| font-size: 12px !important; |
| color: #374151 !important; |
| background: #0a0f1e !important; |
| border-color: #1a2744 !important; |
| padding: 6px 10px !important; |
| } |
| |
| .gradio-container input[type="range"] { accent-color: #f59e0b !important; } |
| .gradio-container input[type="number"] { |
| background: #0d1829 !important; |
| color: #e2e8f0 !important; |
| border: 1px solid #1a2744 !important; |
| font-family: 'Space Mono', monospace !important; |
| font-size: 13px !important; |
| width: 64px !important; |
| } |
| |
| .gradio-container [role="tablist"] { |
| background: #04060e !important; |
| border-bottom: 1px solid #1a2744 !important; |
| padding: 0 36px !important; |
| gap: 0 !important; |
| } |
| |
| .gradio-container [role="tab"] { |
| font-family: 'Sora', sans-serif !important; |
| font-size: 13px !important; |
| font-weight: 500 !important; |
| color: #374151 !important; |
| background: transparent !important; |
| border: none !important; |
| border-bottom: 2px solid transparent !important; |
| border-radius: 0 !important; |
| padding: 14px 20px !important; |
| letter-spacing: 0.03em !important; |
| transition: color 0.15s !important; |
| } |
| |
| .gradio-container [role="tab"]:hover { color: #94a3b8 !important; background: transparent !important; } |
| |
| .gradio-container [role="tab"][aria-selected="true"], |
| .gradio-container [role="tab"].selected { |
| color: #f59e0b !important; |
| border-bottom: 2px solid #f59e0b !important; |
| background: transparent !important; |
| } |
| |
| .gradio-container [role="tabpanel"], |
| .gradio-container .tabitem { |
| background: #04060e !important; |
| padding: 28px 36px !important; |
| border: none !important; |
| } |
| |
| .gradio-container fieldset { |
| background: transparent !important; |
| border: none !important; |
| padding: 0 !important; |
| gap: 6px !important; |
| } |
| |
| .gradio-container fieldset label { |
| background: #0d1829 !important; |
| border: 1px solid #1a2744 !important; |
| border-radius: 8px !important; |
| padding: 8px 14px !important; |
| cursor: pointer !important; |
| color: #64748b !important; |
| font-size: 13px !important; |
| font-weight: 500 !important; |
| text-transform: none !important; |
| letter-spacing: 0 !important; |
| transition: all 0.15s !important; |
| } |
| |
| .gradio-container fieldset label:has(input:checked) { |
| background: rgba(245,158,11,0.1) !important; |
| border-color: #f59e0b !important; |
| color: #f59e0b !important; |
| } |
| |
| .gradio-container button { |
| font-family: 'Sora', sans-serif !important; |
| cursor: pointer !important; |
| transition: all 0.18s !important; |
| border-radius: 8px !important; |
| } |
| |
| .gradio-container button.primary, |
| .gradio-container button[variant="primary"] { |
| background: #f59e0b !important; |
| color: #07090f !important; |
| border: none !important; |
| font-size: 13px !important; |
| font-weight: 700 !important; |
| letter-spacing: 0.07em !important; |
| text-transform: uppercase !important; |
| padding: 11px 22px !important; |
| position: relative !important; |
| overflow: hidden !important; |
| } |
| |
| .gradio-container button.primary::after { |
| content: '' !important; |
| position: absolute !important; |
| inset: 0 !important; |
| background: linear-gradient(120deg, transparent 30%, rgba(255,255,255,0.15) 50%, transparent 70%) !important; |
| transform: translateX(-100%) !important; |
| transition: transform 0.4s !important; |
| } |
| |
| .gradio-container button.primary:hover::after { transform: translateX(100%) !important; } |
| .gradio-container button.primary:hover { |
| background: #fbbf24 !important; |
| box-shadow: 0 0 28px rgba(245,158,11,0.4) !important; |
| transform: translateY(-1px) !important; |
| } |
| |
| .gradio-container button.secondary, |
| .gradio-container button[variant="secondary"] { |
| background: #0d1829 !important; |
| color: #64748b !important; |
| border: 1px solid #1a2744 !important; |
| font-size: 13px !important; |
| font-weight: 500 !important; |
| padding: 10px 18px !important; |
| } |
| |
| .gradio-container button.secondary:hover { |
| background: #111d30 !important; |
| color: #e2e8f0 !important; |
| border-color: #2a3f60 !important; |
| } |
| |
| .prompt-chip { |
| background: transparent !important; |
| border: 1px solid #1a2744 !important; |
| color: #374151 !important; |
| font-size: 11px !important; |
| font-weight: 400 !important; |
| padding: 5px 11px !important; |
| border-radius: 16px !important; |
| white-space: nowrap !important; |
| overflow: hidden !important; |
| text-overflow: ellipsis !important; |
| max-width: 200px !important; |
| text-transform: none !important; |
| letter-spacing: 0 !important; |
| } |
| |
| .prompt-chip:hover { |
| border-color: #0e4a6a !important; |
| color: #38bdf8 !important; |
| background: rgba(56,189,248,0.05) !important; |
| } |
| |
| .preset-chip { |
| background: #0d1829 !important; |
| border: 1px solid #1a2744 !important; |
| color: #64748b !important; |
| font-size: 12px !important; |
| font-weight: 600 !important; |
| padding: 6px 14px !important; |
| border-radius: 20px !important; |
| text-transform: none !important; |
| letter-spacing: 0 !important; |
| } |
| |
| .preset-chip:hover { |
| border-color: #78490a !important; |
| color: #f59e0b !important; |
| background: rgba(245,158,11,0.07) !important; |
| } |
| |
| .mode-caption { |
| margin: 10px 0 0 0; |
| font-family: 'Sora', sans-serif; |
| font-size: 12px; |
| line-height: 1.5; |
| color: #94a3b8; |
| } |
| |
| .mode-caption strong { |
| color: #f59e0b; |
| } |
| |
| @keyframes shimmer { 0%, 100% { opacity: 1; } 50% { opacity: 0.6; } } |
| @keyframes pulse-border { 0%,100%{border-color:#1a2744} 50%{border-color:#2a3f60} } |
| |
| .stentor-header { |
| position: relative; |
| padding: 52px 40px 44px; |
| overflow: hidden; |
| border-bottom: 1px solid #1a2744; |
| background: #04060e; |
| } |
| |
| .stentor-header::before { |
| content: ''; |
| position: absolute; |
| inset: 0; |
| background: |
| radial-gradient(ellipse 70% 55% at 50% -10%, rgba(245,158,11,0.06) 0%, transparent 65%), |
| repeating-linear-gradient(90deg, transparent, transparent 79px, rgba(26,39,68,0.2) 80px), |
| repeating-linear-gradient(0deg, transparent, transparent 79px, rgba(26,39,68,0.2) 80px); |
| pointer-events: none; |
| } |
| |
| .stentor-header::after { |
| content: ''; |
| position: absolute; |
| top: 0; left: 0; right: 0; height: 2px; |
| background: linear-gradient(90deg, transparent 0%, #f59e0b 50%, transparent 100%); |
| animation: shimmer 5s ease-in-out infinite; |
| } |
| |
| .header-inner { |
| position: relative; z-index: 1; |
| display: flex; align-items: flex-end; |
| justify-content: space-between; gap: 20px; flex-wrap: wrap; |
| } |
| |
| .stentor-header h1, |
| .stentor-header h1 * { |
| color: #ffffff !important; |
| -webkit-text-fill-color: #ffffff !important; |
| } |
| |
| .wordmark-eyebrow { |
| font-family: 'Sora', sans-serif; |
| font-size: 11px; font-weight: 600; |
| letter-spacing: 0.3em; text-transform: uppercase; |
| color: #f59e0b; display: block; margin-bottom: 6px; |
| } |
| |
| .wordmark-title { |
| font-family: 'Bebas Neue', sans-serif !important; |
| font-size: clamp(56px, 9vw, 96px) !important; |
| line-height: 0.88 !important; |
| color: #ffffff !important; |
| -webkit-text-fill-color: #ffffff !important; |
| margin: 0 !important; display: block !important; |
| letter-spacing: 0.02em !important; |
| } |
| |
| .wordmark-sub { |
| font-family: 'Space Mono', monospace; |
| font-size: 11px; color: #374151; |
| margin-top: 10px; display: block; letter-spacing: 0.04em; |
| } |
| |
| .header-badges { display: flex; flex-direction: column; align-items: flex-end; gap: 8px; } |
| .badge-row { display: flex; gap: 6px; flex-wrap: wrap; justify-content: flex-end; } |
| .badge { font-family: 'Space Mono', monospace; font-size: 10px; padding: 4px 10px; border-radius: 4px; font-weight: 700; display: inline-block; } |
| .badge-gold { background: rgba(245,158,11,0.12); color: #f59e0b; border: 1px solid rgba(245,158,11,0.3); } |
| .badge-ice { background: rgba(56,189,248,0.08); color: #38bdf8; border: 1px solid rgba(56,189,248,0.25); } |
| .badge-green { background: rgba(16,185,129,0.08); color: #10b981; border: 1px solid rgba(16,185,129,0.25); } |
| |
| .section-title { |
| font-family: 'Bebas Neue', sans-serif !important; |
| font-size: 24px !important; letter-spacing: 0.09em !important; |
| color: #ffffff !important; margin: 0 0 16px 0 !important; |
| padding-bottom: 10px !important; border-bottom: 1px solid #1a2744 !important; |
| line-height: 1 !important; display: block; |
| } |
| |
| .model-card { background: #080d1a; border: 1px solid #1a2744; border-radius: 10px; padding: 14px; margin-bottom: 12px; } |
| .model-card-title { font-family: 'Bebas Neue', sans-serif; font-size: 20px; letter-spacing: 0.06em; color: #ffffff; margin: 0 0 10px 0; line-height: 1; } |
| .model-attr { display: flex; justify-content: space-between; align-items: center; padding: 4px 0; border-bottom: 1px solid #0d1829; font-size: 12px; } |
| .model-attr:last-child { border-bottom: none; } |
| .attr-key { font-family: 'Sora', sans-serif; color: #374151; font-weight: 500; } |
| .attr-val { font-family: 'Space Mono', monospace; color: #38bdf8; font-size: 11px; } |
| |
| .explorer-info { |
| background: rgba(245,158,11,0.05); |
| border: 1px solid rgba(245,158,11,0.2); |
| border-radius: 8px; |
| padding: 12px 16px; |
| margin-bottom: 16px; |
| font-family: 'Sora', sans-serif; |
| font-size: 12px; |
| color: #94a3b8; |
| line-height: 1.6; |
| } |
| |
| .about-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-top: 20px; } |
| .about-block { background: #080d1a; border: 1px solid #1a2744; border-radius: 10px; padding: 18px; } |
| .about-block h3 { font-family: 'Bebas Neue', sans-serif !important; font-size: 17px !important; letter-spacing: 0.07em !important; color: #f59e0b !important; margin: 0 0 12px 0 !important; } |
| .about-block p { font-size: 13px; color: #64748b; line-height: 1.7; margin: 0; } |
| .about-block li { font-size: 13px; color: #64748b; line-height: 1.85; } |
| .about-block a { color: #38bdf8 !important; text-decoration: none !important; } |
| .about-block a:hover { text-decoration: underline !important; } |
| |
| .arch-table { width: 100%; border-collapse: collapse; font-family: 'Space Mono', monospace; font-size: 12px; } |
| .arch-table td { padding: 5px 6px; border-bottom: 1px solid #1a2744; } |
| .arch-table td:first-child { color: #374151; font-size: 11px; } |
| .arch-table td:last-child { color: #38bdf8; text-align: right; } |
| |
| ::-webkit-scrollbar { width: 5px; height: 5px; } |
| ::-webkit-scrollbar-track { background: #080d1a; } |
| ::-webkit-scrollbar-thumb { background: #1a2744; border-radius: 3px; } |
| ::-webkit-scrollbar-thumb:hover { background: #2a3f60; } |
| |
| @media (max-width: 800px) { |
| .stentor-header { padding: 28px 16px 24px; } |
| .gradio-container [role="tabpanel"] { padding: 16px !important; } |
| .about-grid { grid-template-columns: 1fr; } |
| .header-badges { display: none; } |
| } |
| """ |
|
|
| |
| HEADER_HTML = """ |
| <div class="stentor-header"> |
| <div class="header-inner"> |
| <div> |
| <span class="wordmark-eyebrow">Model Showcase</span> |
| <h1 class="wordmark-title">STENTOR<br>LABS</h1> |
| <span class="wordmark-sub">// compact llama models · cpu-native · open research</span> |
| </div> |
| <div class="header-badges"> |
| <div class="badge-row"> |
| <span class="badge badge-gold">Apache 2.0</span> |
| <span class="badge badge-ice">Llama Arch</span> |
| <span class="badge badge-green">CPU Native</span> |
| </div> |
| <div class="badge-row"> |
| <span class="badge badge-gold">Stentor2-30M + Stentor2-12M</span> |
| <span class="badge badge-ice">1024 ctx</span> |
| <span class="badge badge-green">Edge Ready</span> |
| </div> |
| <div class="badge-row"> |
| <span class="badge badge-gold">Stentor3-50M + Stentor3-20M</span> |
| <span class="badge badge-ice">4096 ctx</span> |
| <span class="badge badge-green">Next Gen</span> |
| </div> |
| <div class="badge-row"> |
| <span class="badge badge-gold">Portimbria-150M</span> |
| <span class="badge badge-ice">4096 ctx</span> |
| </div> |
| <div class="badge-row"> |
| <span class="badge badge-gold">Flagship: Portimbria-150M</span> |
| </div> |
| <div class="badge-row"> |
| <span class="badge badge-gold">FineWeb-Edu</span> |
| <span class="badge badge-ice">Cosmopedia v2</span> |
| </div> |
| </div> |
| </div> |
| </div> |
| """ |
|
|
| MODEL_CARDS = { |
| "Portimbria-150M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">PORTIMBRIA-150M</p>' |
| '<div class="model-attr"><span class="attr-key">Parameters</span><span class="attr-val">151M</span></div>' |
| '<div class="model-attr"><span class="attr-key">Family</span><span class="attr-val">Portimbria</span></div>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Base model</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">4096 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Architecture</span><span class="attr-val">Llama with GQA</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/Portimbria-150M" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '</div>' |
| ), |
| "Stentor3-50M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR3-50M</p>' |
| '<div class="model-attr"><span class="attr-key">Parameters</span><span class="attr-val">50M</span></div>' |
| '<div class="model-attr"><span class="attr-key">Family</span><span class="attr-val">Stentor3</span></div>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Base model</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">4096 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/stentor3-50m" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '</div>' |
| ), |
| "Stentor3-20M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR3-20M</p>' |
| '<div class="model-attr"><span class="attr-key">Parameters</span><span class="attr-val">20M</span></div>' |
| '<div class="model-attr"><span class="attr-key">Family</span><span class="attr-val">Stentor3</span></div>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Base model</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">4096 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/stentor3-20m" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '</div>' |
| ), |
| "Stentor2-30M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR2-30M</p>' |
| '<div class="model-attr"><span class="attr-key">Family</span><span class="attr-val">Stentor2</span></div>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Base model</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">1024 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/stentor2-30m" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '</div>' |
| ), |
| "Stentor2-12M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR2-12M</p>' |
| '<div class="model-attr"><span class="attr-key">Family</span><span class="attr-val">Stentor2</span></div>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Base model</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">1024 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/stentor2-12m" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '</div>' |
| ), |
| "Stentor-30M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR-30M</p>' |
| '<div class="model-attr"><span class="attr-key">Parameters</span><span class="attr-val">30,419,712</span></div>' |
| '<div class="model-attr"><span class="attr-key">Architecture</span><span class="attr-val">LlamaForCausalLM</span></div>' |
| '<div class="model-attr"><span class="attr-key">Layers</span><span class="attr-val">21</span></div>' |
| '<div class="model-attr"><span class="attr-key">Hidden Size</span><span class="attr-val">256</span></div>' |
| '<div class="model-attr"><span class="attr-key">Attn Heads</span><span class="attr-val">4</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">512 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Val Loss / PPL</span><span class="attr-val">3.4971 / 33.02</span></div>' |
| '<div class="model-attr"><span class="attr-key">Trained On</span><span class="attr-val">600M tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Hardware</span><span class="attr-val">1× Tesla T4 · 7.88h</span></div>' |
| '<div class="model-attr"><span class="attr-key">GGUF</span><span class="attr-val"><a href="https://huggingface.co/mradermacher/Stentor-30M-GGUF" target="_blank" style="color:#38bdf8">mradermacher ↗</a></span></div>' |
| '</div>' |
| ), |
| "Stentor-12M": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR-12M</p>' |
| '<div class="model-attr"><span class="attr-key">Parameters</span><span class="attr-val">12,047,040</span></div>' |
| '<div class="model-attr"><span class="attr-key">Architecture</span><span class="attr-val">LlamaForCausalLM</span></div>' |
| '<div class="model-attr"><span class="attr-key">Layers</span><span class="attr-val">9</span></div>' |
| '<div class="model-attr"><span class="attr-key">Hidden Size</span><span class="attr-val">192</span></div>' |
| '<div class="model-attr"><span class="attr-key">Attn Heads</span><span class="attr-val">3</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">512 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Val Loss / PPL</span><span class="attr-val">4.4887 / 89.01</span></div>' |
| '<div class="model-attr"><span class="attr-key">Trained On</span><span class="attr-val">200M tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Hardware</span><span class="attr-val">2× Tesla T4 · 1.3h</span></div>' |
| '</div>' |
| ), |
| "Stentor-30M-Instruct": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR-30M-INSTRUCT</p>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Instruction-tuned</span></div>' |
| '<div class="model-attr"><span class="attr-key">Base Family</span><span class="attr-val">Stentor-30M</span></div>' |
| '<div class="model-attr"><span class="attr-key">Architecture</span><span class="attr-val">LlamaForCausalLM</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">512 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/Stentor-30M-Instruct" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '<div class="model-attr"><span class="attr-key">Status</span><span class="attr-val">Featured in this Space</span></div>' |
| '</div>' |
| ), |
| "Stentor-12M-Instruct": ( |
| '<div class="model-card">' |
| '<p class="model-card-title">STENTOR-12M-INSTRUCT</p>' |
| '<div class="model-attr"><span class="attr-key">Variant</span><span class="attr-val">Instruction-tuned</span></div>' |
| '<div class="model-attr"><span class="attr-key">Base Family</span><span class="attr-val">Stentor-12M</span></div>' |
| '<div class="model-attr"><span class="attr-key">Architecture</span><span class="attr-val">LlamaForCausalLM</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context</span><span class="attr-val">512 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/Stentor-12M-Instruct" target="_blank" style="color:#38bdf8">Open ↗</a></span></div>' |
| '<div class="model-attr"><span class="attr-key">Status</span><span class="attr-val">Featured in this Space</span></div>' |
| '</div>' |
| ), |
| } |
|
|
| FLAGSHIP_HTML = ( |
| '<div class="model-card" style="border-color:rgba(245,158,11,0.35);background:rgba(245,158,11,0.06);">' |
| '<p class="model-card-title" style="color:#f59e0b;">FLAGSHIP MODEL</p>' |
| '<div class="model-attr"><span class="attr-key">Primary</span><span class="attr-val">Portimbria-150M</span></div>' |
| '<div class="model-attr"><span class="attr-key">Context Length</span><span class="attr-val">4,096 tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Training Data</span><span class="attr-val">6B tokens</span></div>' |
| '<div class="model-attr"><span class="attr-key">Model Card</span><span class="attr-val"><a href="https://huggingface.co/StentorLabs/Portimbria-150M" target="_blank" style="color:#38bdf8">Portimbria-150M ↗</a></span></div>' |
| '</div>' |
| ) |
|
|
| ABOUT_HTML = """ |
| <div> |
| <div style="margin-bottom:24px;padding:20px;background:#080d1a;border:1px solid #1a2744;border-radius:10px;"> |
| <div style="font-family:Bebas Neue,sans-serif;font-size:22px;letter-spacing:0.08em;color:#f59e0b;margin-bottom:8px;">STENTORLABS PLAYGROUND</div> |
| <p style="font-family:Sora,sans-serif;font-size:14px;color:#ffffff;line-height:1.85;margin:0;max-width:850px;"> |
| Welcome to the official StentorLabs sandbox. This Hugging Face Space is a <strong style="color:#f59e0b;">free, comprehensive testing environment</strong> |
| designed to give anyone—from researchers to hobbyists—full access to our family of compact Llama models. |
| Unlike traditional demos, this Space provides deep diagnostic tools to help you understand how Small Language Models (SLMs) |
| actually process information, manage confidence, and respond to parameter shifts. |
| </p> |
| </div> |
| |
| <div style="margin-bottom:24px;padding:20px;background:#080d1a;border:1px solid #1a2744;border-radius:10px;"> |
| <div style="font-family:Bebas Neue,sans-serif;font-size:18px;letter-spacing:0.08em;color:#f59e0b;margin-bottom:16px;">ARCHITECTURE DIAGRAM · PORTIMBRIA-150M</div> |
| <div style="display:flex;flex-direction:column;align-items:center;gap:0;font-family:Space Mono,monospace;font-size:11px;"> |
| <div style="background:#0d1829;border:1px solid #1a2744;border-radius:6px;padding:8px 28px;color:#94a3b8;text-align:center;">INPUT TOKENS</div> |
| <div style="color:#374151;padding:4px 0;font-size:16px;">↓</div> |
| <div style="background:rgba(245,158,11,0.1);border:1px solid rgba(245,158,11,0.3);border-radius:6px;padding:8px 20px;color:#f59e0b;text-align:center;">EMBEDDING LAYER<br><span style="font-size:10px;color:#78490a;">32,768 vocab × 768 hidden</span></div> |
| <div style="color:#374151;padding:4px 0;font-size:16px;">↓</div> |
| <div style="border:1px solid #1a2744;border-radius:8px;padding:12px 20px;background:#060a14;width:100%;max-width:420px;"> |
| <div style="font-family:Bebas Neue,sans-serif;font-size:14px;letter-spacing:0.08em;color:#94a3b8;text-align:center;margin-bottom:8px;">× 20 TRANSFORMER BLOCKS</div> |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:6px;"> |
| <div style="background:rgba(56,189,248,0.08);border:1px solid rgba(56,189,248,0.2);border-radius:5px;padding:6px 10px;text-align:center;color:#38bdf8;font-size:10px;">GQA ATTENTION<br><span style="color:#0e4a6a;">6 heads · 2 KV · RoPE θ=50000</span></div> |
| <div style="background:rgba(167,139,250,0.08);border:1px solid rgba(167,139,250,0.2);border-radius:5px;padding:6px 10px;text-align:center;color:#a78bfa;font-size:10px;">FEED-FORWARD<br><span style="color:#4c1d95;">768→2048→768 SiLU</span></div> |
| </div> |
| <div style="margin-top:6px;text-align:center;font-size:10px;color:#1a2744;">RMSNorm + Residual connections</div> |
| </div> |
| <div style="color:#374151;padding:4px 0;font-size:16px;">↓</div> |
| <div style="background:rgba(16,185,129,0.08);border:1px solid rgba(16,185,129,0.25);border-radius:6px;padding:8px 20px;color:#10b981;text-align:center;">OUTPUT LOGITS<br><span style="font-size:10px;color:#064e3b;">32,768 vocab (tied weights)</span></div> |
| </div> |
| </div> |
| |
| <div class="about-grid"> |
| <div class="about-block"> |
| <h3>⚡ Mode: Generate</h3> |
| <p>The standard interface for text completion. Test how models handle creative writing, code drafting, or factual continuation.</p> |
| <ul style="margin-top:8px;"> |
| <li><strong>Presets:</strong> Instantly switch between Creative (high temp), Balanced, and Focused (low temp) logic.</li> |
| <li><strong>Multi-Response:</strong> Generate up to 5 variations of the same prompt sequentially to test output variance.</li> |
| </ul> |
| </div> |
| <div class="about-block"> |
| <h3>🔬 Mode: Token Explorer</h3> |
| <p>Peek "under the hood" of the model's decision-making process. This mode visualizes internal confidence levels.</p> |
| <ul style="margin-top:8px;"> |
| <li><strong>Confidence Heatmap:</strong> See which tokens the model was certain about vs. which were random guesses.</li> |
| <li><strong>Alternatives:</strong> Hover over any generated token to see the <em>top 8 alternatives</em> the model was considering at that exact moment.</li> |
| </ul> |
| </div> |
| <div class="about-block"> |
| <h3>🌡 Mode: Temp Sweep</h3> |
| <p>A visual study in creativity. Run the exact same prompt across 2–5 different temperature settings simultaneously.</p> |
| <ul style="margin-top:8px;"> |
| <li><strong>Visual Divergence:</strong> Observe how low temperatures stay rigid and repetitive while high temperatures become increasingly chaotic.</li> |
| <li><strong>Dynamic Slots:</strong> Use the Number of Boxes slider to reveal up to five outputs.</li> |
| </ul> |
| </div> |
| <div class="about-block"> |
| <h3>💬 Mode: Chat</h3> |
| <p>Interactive testing for all model variants, including both <strong>Base</strong> and <strong>Instruct</strong> versions. Test how models handle multi-turn dialogue and maintain context.</p> |
| <ul style="margin-top:8px;"> |
| <li><strong>Memory:</strong> Test how the 512–4096 token context handles conversation history.</li> |
| <li><strong>Safety:</strong> Observe how small models handle refusals and helpfulness constraints.</li> |
| </ul> |
| </div> |
| <div class="about-block"> |
| <h3>🛠 Parameter Guide</h3> |
| <ul style="margin-top:0;"> |
| <li><strong>Temperature:</strong> Controls "creativity." 0.1 is nearly deterministic; 1.5+ is experimental/chaotic.</li> |
| <li><strong>Top P:</strong> Nucleus sampling. Limits the model to the most likely group of tokens whose cumulative probability is P. Helps prevent gibberish.</li> |
| <li><strong>Repetition Penalty:</strong> Penalizes tokens that have already appeared. Essential for preventing loops in very small models.</li> |
| <li><strong>Max Tokens:</strong> Each model has a physical cap (e.g., 4,096 for Portimbria). Setting this too low will cut off thoughts mid-sentence.</li> |
| </ul> |
| </div> |
| <div class="about-block"> |
| <h3>Links & Resources</h3> |
| <ul> |
| <li><a href="https://huggingface.co/StentorLabs/Portimbria-150M" target="_blank">Portimbria-150M Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs/Stentor-30M" target="_blank">Stentor-30M Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs/Stentor-30M-Instruct" target="_blank">Stentor-30M-Instruct Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs/Stentor-12M" target="_blank">Stentor-12M Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs/Stentor-12M-Instruct" target="_blank">Stentor-12M-Instruct Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs/stentor2-30m" target="_blank">Stentor2-30M Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs/stentor2-12m" target="_blank">Stentor2-12M Model Card ↗</a></li> |
| <li><a href="https://huggingface.co/mradermacher/Stentor-30M-GGUF" target="_blank">GGUF Quantizations (mradermacher) ↗</a></li> |
| <li><a href="https://huggingface.co/StentorLabs" target="_blank">StentorLabs on Hugging Face ↗</a></li> |
| </ul> |
| <p style="margin-top:14px;font-size:12px;color:#1a2744;border-top:1px solid #0d1829;padding-top:12px;"> |
| ⚠ Includes both base and instruct variants · Always set max_new_tokens · Apache 2.0 · Built by Kai Izumoto |
| </p> |
| </div> |
| </div> |
| </div> |
| """ |
|
|
| |
| print(f"[Stentor] Preloading default model ({DEFAULT_MODEL}) at startup...") |
| try: |
| _get_model(ALL_MODELS[DEFAULT_MODEL]) |
| print("[Stentor] Default model loaded and warmed up.") |
| except Exception as e: |
| print(f"[Stentor] Could not preload default model: {e}") |
|
|
| print("[Stentor] Preloading arena models at startup...") |
| for arena_name, arena_repo in ARENA_MODELS.items(): |
| try: |
| _get_model(arena_repo) |
| print(f"[Stentor] Arena model loaded: {arena_name}") |
| except Exception as e: |
| print(f"[Stentor] Could not preload arena model {arena_name}: {e}") |
|
|
| |
| with gr.Blocks(title=APP_TITLE) as demo: |
| gr.HTML(HEADER_HTML) |
| history_state = gr.State([]) |
|
|
| with gr.Tabs(): |
|
|
| |
| with gr.TabItem(" ▶ Generate "): |
| with gr.Row(): |
|
|
| with gr.Column(scale=1, min_width=240): |
| gr.HTML('<span class="section-title">MODEL</span>') |
| model_sel = gr.Radio( |
| choices=list(STENTOR_MODELS.keys()), |
| value=DEFAULT_MODEL, |
| label="", |
| interactive=True, |
| ) |
| gr.HTML(FLAGSHIP_HTML) |
| model_card_html = gr.HTML(MODEL_CARDS[DEFAULT_MODEL]) |
| gr.HTML('<span class="section-title" style="margin-top:20px;">PARAMETERS</span>') |
| with gr.Row(): |
| btn_creative = gr.Button("🎨 Creative", size="sm", elem_classes=["preset-chip"]) |
| btn_balanced = gr.Button("⚖️ Balanced", size="sm", elem_classes=["preset-chip"]) |
| btn_focused = gr.Button("🎯 Focused", size="sm", elem_classes=["preset-chip"]) |
| gr.HTML(MODE_RECOMMENDATION_HTML) |
| max_tokens = gr.Slider(10, INITIAL_MAX_TOKENS, value=DEFAULT_MAX_TOKENS, step=10, label="Max New Tokens") |
| temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMP, step=0.05, label="Temperature") |
| top_p = gr.Slider(0.05, 1.0, value=DEFAULT_TOP_P, step=0.05, label="Top P") |
| repetition_penalty = gr.Slider(0.8, 2.0, value=DEFAULT_REP_PENALTY, step=0.05, label="Repetition Penalty") |
| num_responses = gr.Slider(1, 5, value=1, step=1, label="Number of Responses") |
|
|
| with gr.Column(scale=3): |
| gr.HTML('<span class="section-title">GENERATE</span>') |
| prompt_box = gr.Textbox( |
| label="Prompt", |
| placeholder="Start writing or pick an example below…", |
| lines=4, |
| elem_id="prompt-box", |
| ) |
| example_btns = [] |
| with gr.Column(): |
| with gr.Row(): |
| for emoji, p in EXAMPLE_PROMPTS[:4]: |
| short = p[:26] + ("…" if len(p) > 26 else "") |
| b = gr.Button(f"{emoji} {short}", size="sm", elem_classes=["prompt-chip"]) |
| example_btns.append((b, p)) |
| with gr.Row(): |
| for emoji, p in EXAMPLE_PROMPTS[4:]: |
| short = p[:26] + ("…" if len(p) > 26 else "") |
| b = gr.Button(f"{emoji} {short}", size="sm", elem_classes=["prompt-chip"]) |
| example_btns.append((b, p)) |
|
|
| with gr.Row(): |
| gen_btn = gr.Button("▶ Generate", variant="primary", scale=3) |
| stop_btn = gr.Button("⏹ Stop", variant="secondary", scale=1) |
|
|
| output_box = gr.Textbox( |
| label="Output", |
| lines=12, |
| interactive=False, |
| elem_id="output-box", |
| ) |
| stats_html = gr.HTML(build_stats_html("—", "—", "—")) |
| status_box = gr.Textbox( |
| value="Ready.", label="", |
| interactive=False, elem_classes=["status-bar"], |
| ) |
|
|
| with gr.Column(scale=1, min_width=220): |
| gr.HTML('<span class="section-title">HISTORY</span>') |
| history_html = gr.HTML(build_history_html([])) |
|
|
| |
| with gr.TabItem(" 🔬 Token Explorer "): |
| with gr.Column(): |
| gr.HTML('<span class="section-title">TOKEN PROBABILITY EXPLORER</span>') |
| gr.HTML( |
| '<div class="explorer-info">' |
| '🔬 <strong style="color:#f59e0b;">How it works:</strong> Samples tokens while capturing the full probability distribution over the vocabulary at each step. ' |
| 'Tokens are color-coded by confidence: ' |
| '<span style="color:#10b981;">●</span> green = confident (≥80%), ' |
| '<span style="color:#eab308;">●</span> yellow = moderate (50–79%), ' |
| '<span style="color:#f97316;">●</span> orange = uncertain (35–49%), ' |
| '<span style="color:#b91c1c;">●</span> dark red = low (<35%). ' |
| 'Hover any token to see the top alternatives the model considered.' |
| '</div>' |
| ) |
| with gr.Row(): |
| with gr.Column(scale=3): |
| exp_prompt = gr.Textbox( |
| label="Prompt", |
| placeholder="Enter a prompt to visualize token-by-token confidence…", |
| lines=3, |
| elem_id="prompt-box", |
| ) |
| with gr.Column(scale=1): |
| exp_model = gr.Radio(choices=list(STENTOR_MODELS.keys()), value=DEFAULT_MODEL, label="Model") |
| with gr.Row(): |
| exp_creative = gr.Button("🎨 Creative", size="sm", elem_classes=["preset-chip"]) |
| exp_balanced = gr.Button("⚖️ Balanced", size="sm", elem_classes=["preset-chip"]) |
| exp_focused = gr.Button("🎯 Focused", size="sm", elem_classes=["preset-chip"]) |
| gr.HTML(MODE_RECOMMENDATION_HTML) |
| exp_tokens = gr.Slider(5, INITIAL_MAX_TOKENS, value=DEFAULT_MAX_TOKENS, step=10, label="Max Tokens") |
| exp_temp = gr.Slider(0.1, 2.0, value=DEFAULT_TEMP, step=0.05, label="Temperature") |
| exp_top_p = gr.Slider(0.05, 1.0, value=DEFAULT_TOP_P, step=0.05, label="Top P") |
| exp_rep_pen = gr.Slider(0.8, 2.0, value=DEFAULT_REP_PENALTY, step=0.05, label="Repetition Penalty") |
| exp_btn = gr.Button("🔬 Explore", variant="primary", scale=3) |
| exp_stop_btn = gr.Button("⏹ Stop", variant="secondary", scale=1) |
|
|
| exp_output = gr.HTML(_explorer_placeholder()) |
| exp_status = gr.Textbox(value="", label="", interactive=False, elem_classes=["status-bar"]) |
|
|
| |
| with gr.TabItem(" 🌡 Temp Sweep "): |
| with gr.Column(): |
| gr.HTML('<span class="section-title">TEMPERATURE SWEEP</span>') |
| gr.HTML( |
| '<div class="explorer-info">' |
| '🌡 <strong style="color:#f59e0b;">What this shows:</strong> The same prompt run at multiple different temperatures simultaneously. ' |
| 'Low temperature = conservative/repetitive. High temperature = creative/chaotic. ' |
| 'Choose between 2–5 temperature boxes below. <strong>No duplicate temperatures allowed.</strong>' |
| '</div>' |
| ) |
| sweep_state = gr.State([0.5, 1.0, 1.5, 2.0]) |
| with gr.Row(): |
| with gr.Column(scale=3): |
| sweep_prompt = gr.Textbox( |
| label="Prompt", |
| placeholder="Enter a prompt to run across all temperatures…", |
| lines=3, |
| elem_id="prompt-box", |
| ) |
| with gr.Column(scale=1): |
| sweep_model = gr.Radio(choices=list(STENTOR_MODELS.keys()), value=DEFAULT_MODEL, label="Model") |
| with gr.Row(): |
| sweep_creative = gr.Button("🎨 Creative", size="sm", elem_classes=["preset-chip"]) |
| sweep_balanced = gr.Button("⚖️ Balanced", size="sm", elem_classes=["preset-chip"]) |
| sweep_focused = gr.Button("🎯 Focused", size="sm", elem_classes=["preset-chip"]) |
| gr.HTML(MODE_RECOMMENDATION_HTML) |
| sweep_tokens = gr.Slider(10, INITIAL_MAX_TOKENS, value=DEFAULT_MAX_TOKENS, step=10, label="Max Tokens") |
| sweep_top_p = gr.Slider(0.05, 1.0, value=DEFAULT_TOP_P, step=0.05, label="Top P") |
| sweep_rep_pen = gr.Slider(0.8, 2.0, value=DEFAULT_REP_PENALTY, step=0.05, label="Repetition Penalty") |
| sweep_count = gr.Slider(2, 5, value=2, step=1, label="Number of Boxes") |
| sweep_btn = gr.Button("🌡 Run Sweep", variant="primary", scale=3) |
| sweep_stop_btn = gr.Button("⏹ Stop", variant="secondary", scale=1) |
|
|
| with gr.Row(): |
| sweep_temp_inputs = [] |
| sweep_temp_labels = ["1st Temp", "2nd Temp", "3rd Temp", "4th Temp", "5th Temp"] |
| default_temps_for_index = [0.5, 1.0, 1.5, 2.0, 2.5] |
| sweep_columns = [] |
| sweep_outputs_for_fn = [] |
| for i in range(5): |
| with gr.Column(visible=(i < 2)) as col: |
| color_map = ["#38bdf8", "#10b981", "#f59e0b", "#f97316", "#f87171"] |
| gr.HTML( |
| f'<div style="text-align:center;padding:8px 0 4px;">' |
| f'<span style="font-family:Bebas Neue,sans-serif;font-size:20px;' |
| f'letter-spacing:0.06em;color:{color_map[i]};">BOX {i+1}</span>' |
| f'</div>' |
| ) |
| inp = gr.Number(value=default_temps_for_index[i], label=sweep_temp_labels[i], minimum=0.1, maximum=2.5, step=0.05) |
| sweep_temp_inputs.append(inp) |
| out = gr.Textbox(label="", lines=8, interactive=False, elem_id="output-box") |
| sweep_outputs_for_fn.append(out) |
| sweep_columns.append(col) |
|
|
| with gr.Row(): |
| sweep_add_btn = gr.Button("+ Add Box", variant="secondary", scale=1) |
| sweep_rm_btn = gr.Button("− Remove Box", variant="secondary", scale=1) |
|
|
| def update_sweep_visibility(count, *temps): |
| count_int = int(count) |
| provided_temps = list(temps[:5]) |
| valid_temps = [t for t in provided_temps[:count_int] if t is not None] |
| if len(valid_temps) != len(set(round(float(t), 2) for t in valid_temps)): |
| new_temps = [round(0.5 + i * (1.5 / max(count_int - 1, 1)), 2) for i in range(count_int)] |
| col_updates = [gr.update(visible=(i < count_int)) for i in range(5)] |
| temp_updates = [] |
| for i in range(5): |
| if i < count_int: |
| temp_updates.append(gr.update(visible=True, value=new_temps[i])) |
| else: |
| temp_updates.append(gr.update(visible=False)) |
| return col_updates + temp_updates |
| col_updates = [gr.update(visible=(i < count_int)) for i in range(5)] |
| temp_updates = [gr.update()] * 5 |
| return col_updates + temp_updates |
|
|
| def add_sweep_box(count): |
| count_int = 2 if count is None else int(count) |
| return gr.update(value=min(count_int + 1, 5)) |
|
|
| def remove_sweep_box(count): |
| count_int = 2 if count is None else int(count) |
| return gr.update(value=max(count_int - 1, 2)) |
|
|
| sweep_outs = sweep_outputs_for_fn |
|
|
| sweep_count.change( |
| fn=update_sweep_visibility, |
| inputs=[sweep_count] + sweep_temp_inputs, |
| outputs=sweep_columns + sweep_temp_inputs |
| ) |
|
|
| sweep_add_btn.click( |
| fn=add_sweep_box, |
| inputs=[sweep_count], |
| outputs=[sweep_count], |
| ).then( |
| fn=update_sweep_visibility, |
| inputs=[sweep_count] + sweep_temp_inputs, |
| outputs=sweep_columns + sweep_temp_inputs, |
| ) |
| sweep_rm_btn.click( |
| fn=remove_sweep_box, |
| inputs=[sweep_count], |
| outputs=[sweep_count], |
| ).then( |
| fn=update_sweep_visibility, |
| inputs=[sweep_count] + sweep_temp_inputs, |
| outputs=sweep_columns + sweep_temp_inputs, |
| ) |
|
|
| |
| with gr.TabItem(" 🏟 Arena "): |
| with gr.Column(): |
| gr.HTML('<span class="section-title">MODEL ARENA</span>') |
| gr.HTML( |
| '<div class="explorer-info">' |
| '🏟 <strong style="color:#f59e0b;">Model Arena:</strong> Benchmark performance via blind or open testing.<br>' |
| '• <strong>Show Mode:</strong> Pick models and see their names while generating.<br>' |
| '• <strong>Blind Mode:</strong> Pick models but their identities are hidden until you vote.<br>' |
| '• <strong>Random Mode:</strong> Let the arena pick a random Stentor vs a Baseline model.' |
| '</div>' |
| ) |
| arena_identities = gr.State(None) |
| with gr.Row(): |
| with gr.Column(scale=3): |
| arena_prompt = gr.Textbox( |
| label="Shared Prompt", |
| placeholder="Enter a prompt to run through both models…", |
| lines=3, |
| elem_id="prompt-box", |
| ) |
| with gr.Column(scale=2): |
| with gr.Row(): |
| arena_creative = gr.Button("🎨 Creative", size="sm", elem_classes=["preset-chip"]) |
| arena_balanced = gr.Button("⚖️ Balanced", size="sm", elem_classes=["preset-chip"]) |
| arena_focused = gr.Button("🎯 Focused", size="sm", elem_classes=["preset-chip"]) |
| gr.HTML(MODE_RECOMMENDATION_HTML) |
| with gr.Row(): |
| arena_mode = gr.Dropdown( |
| choices=["👀 Show", "🙈 Blind", "🎲 Random"], |
| value="👀 Show", |
| label="Arena Mode" |
| ) |
| with gr.Column(visible=False) as arena_random_options: |
| arena_fair_match = gr.Checkbox(value=True, label="Fair Matchmaking", info="Pairs models with similar parameter counts for a balanced fight.") |
| gr.HTML('<p style="font-family:Sora,sans-serif;font-size:10px;color:#374151;margin-bottom:8px;text-transform:uppercase;font-weight:600;letter-spacing:0.05em;">Pairing Logic</p>') |
| arena_internal_pair = gr.Checkbox(value=True, label="Internal Pairings", info="Allows Stentor models to face other Stentor models.") |
|
|
| arena_max = gr.Slider(10, 1024, value=DEFAULT_MAX_TOKENS, step=10, label="Max Tokens") |
| with gr.Row(): |
| arena_temp = gr.Slider(0.1, 2.0, value=DEFAULT_TEMP, step=0.05, label="Temperature") |
| arena_top_p = gr.Slider(0.05, 1.0, value=DEFAULT_TOP_P, step=0.05, label="Top P") |
| with gr.Row(): |
| arena_rep_pen = gr.Slider(0.8, 2.0, value=DEFAULT_REP_PENALTY, step=0.05, label="Repetition Penalty") |
|
|
| with gr.Row(): |
| arena_btn = gr.Button("🏟 Battle", variant="primary", scale=2) |
| arena_stop_btn = gr.Button("⏹ Stop", variant="secondary", scale=1) |
|
|
| with gr.Row(elem_id="arena-selectors") as arena_selector_row: |
| with gr.Column(): |
| arena_model1 = gr.Dropdown( |
| choices=list(STENTOR_MODELS.keys()), |
| value=DEFAULT_MODEL, |
| label="Stentor Model", |
| interactive=True, |
| ) |
| with gr.Column(): |
| arena_model2 = gr.Dropdown( |
| choices=list(ARENA_MODELS.keys()), |
| value=list(ARENA_MODELS.keys())[0], |
| label="External Model", |
| interactive=True, |
| ) |
|
|
| def update_arena_ui_visibility(mode): |
| return gr.update(visible=(mode == "🎲 Random")), gr.update(visible=(mode != "🎲 Random")) |
|
|
| arena_mode.change(fn=update_arena_ui_visibility, inputs=[arena_mode], outputs=[arena_random_options, arena_selector_row]) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| arena_output1 = gr.Textbox(label="Left Model Output", lines=12, interactive=False, elem_id="output-box") |
| arena_status1 = gr.Textbox(value="Ready.", label="", interactive=False, elem_classes=["status-bar"]) |
| with gr.Column(): |
| arena_output2 = gr.Textbox(label="Right Model Output", lines=12, interactive=False, elem_id="output-box") |
| arena_status2 = gr.Textbox(value="Ready.", label="", interactive=False, elem_classes=["status-bar"]) |
|
|
| with gr.Column(visible=False) as vote_col: |
| gr.HTML('<div style="text-align:center;margin-top:20px;"><span class="section-title">VOTE FOR THE BEST RESPONSE</span></div>') |
| with gr.Row(): |
| left_win = gr.Button("👈 Left is Better", variant="secondary") |
| right_win = gr.Button("Right is Better 👉", variant="secondary") |
| tie_win = gr.Button("🤝 It's a Tie", variant="secondary") |
| both_bad = gr.Button("👎 Both are Bad", variant="secondary") |
| |
| arena_results = gr.Markdown("") |
|
|
| |
| arena_event = arena_btn.click( |
| fn=arena_setup, |
| inputs=[arena_mode], |
| outputs=[arena_output1, arena_output2, arena_status1, arena_status2, vote_col, arena_selector_row, arena_identities, arena_results], |
| ).then( |
| fn=arena_generate, |
| inputs=[arena_prompt, arena_mode, arena_model1, arena_model2, arena_max, arena_temp, arena_top_p, arena_rep_pen, arena_fair_match, arena_internal_pair], |
| outputs=[arena_output1, arena_output2, arena_status1, arena_status2, vote_col, arena_selector_row, arena_identities, arena_results], |
| ) |
|
|
| vote_inputs = [arena_identities] |
| left_win.click(fn=lambda ids: arena_vote("Left Model", ids), inputs=vote_inputs, outputs=[arena_results, vote_col]) |
| right_win.click(fn=lambda ids: arena_vote("Right Model", ids), inputs=vote_inputs, outputs=[arena_results, vote_col]) |
| tie_win.click(fn=lambda ids: arena_vote("Tie", ids), inputs=vote_inputs, outputs=[arena_results, vote_col]) |
| both_bad.click(fn=lambda ids: arena_vote("Both Bad", ids), inputs=vote_inputs, outputs=[arena_results, vote_col]) |
|
|
| arena_stop_btn.click( |
| fn=lambda: ("⏹ Stopped.", "⏹ Stopped.", "", "", gr.update(visible=False)), |
| outputs=[arena_output1, arena_output2, arena_status1, arena_status2, vote_col], |
| cancels=[arena_event], |
| ) |
|
|
| |
| with gr.TabItem(" 💬 Chat "): |
| with gr.Row(): |
| with gr.Column(scale=1, min_width=240): |
| gr.HTML('<span class="section-title">CHAT SETTINGS</span>') |
| with gr.Row(): |
| chat_creative = gr.Button("🎨 Creative", size="sm", elem_classes=["preset-chip"]) |
| chat_balanced = gr.Button("⚖️ Balanced", size="sm", elem_classes=["preset-chip"]) |
| chat_focused = gr.Button("🎯 Focused", size="sm", elem_classes=["preset-chip"]) |
| gr.HTML(MODE_RECOMMENDATION_HTML) |
| chat_model = gr.Radio( |
| choices=list(STENTOR_MODELS.keys()), |
| value=DEFAULT_MODEL, |
| label="Model", |
| interactive=True, |
| ) |
| chat_max_tokens = gr.Slider(10, INITIAL_MAX_TOKENS, value=DEFAULT_MAX_TOKENS, step=10, label="Max New Tokens") |
| chat_temperature = gr.Slider(0.1, 2.0, value=DEFAULT_TEMP, step=0.05, label="Temperature") |
| chat_top_p = gr.Slider(0.05, 1.0, value=DEFAULT_TOP_P, step=0.05, label="Top P") |
| chat_rep_penalty = gr.Slider(0.8, 2.0, value=DEFAULT_REP_PENALTY, step=0.05, label="Repetition Penalty") |
| with gr.Row(): |
| chat_stop_btn = gr.Button("⏹ Stop", variant="secondary") |
| chat_reset_btn = gr.Button("↺ Reset Chat", variant="secondary") |
|
|
| with gr.Column(scale=3): |
| gr.HTML('<span class="section-title">CONVERSATION</span>') |
| chat_messages = gr.State([]) |
| chat_display = gr.HTML( |
| '<div style="padding:20px;text-align:center;background:#060a14;border:1px solid #1e2d45;' |
| 'border-radius:10px;min-height:200px;display:flex;flex-direction:column;align-items:center;' |
| 'justify-content:center;">' |
| '<p style="font-family:Sora,sans-serif;font-size:13px;color:#374151;margin:0;">' |
| 'Start a conversation by typing a message below.</p></div>' |
| ) |
| chat_input = gr.Textbox( |
| label="Your Message", |
| placeholder="Type your message here and press Send…", |
| lines=3, |
| elem_id="prompt-box", |
| ) |
| chat_send_btn = gr.Button("▶ Send", variant="primary") |
|
|
| chat_model.change( |
| fn=lambda m, cur: gr.update(maximum=_max_tokens_cap(m), value=min(cur, _max_tokens_cap(m))), |
| inputs=[chat_model, chat_max_tokens], |
| outputs=[chat_max_tokens], |
| ) |
|
|
| |
| with gr.TabItem(" ℹ About "): |
| with gr.Column(): |
| gr.HTML('<span class="section-title">THE STENTOR SERIES</span>') |
| gr.HTML(ABOUT_HTML) |
|
|
| |
|
|
| model_sel.change( |
| fn=lambda m, cur: (MODEL_CARDS.get(m, ""), gr.update(maximum=_max_tokens_cap(m), value=int(min(max(DEFAULT_MAX_TOKENS, cur), _max_tokens_cap(m))))), |
| inputs=[model_sel, max_tokens], |
| outputs=[model_card_html, max_tokens], |
| ) |
|
|
| exp_model.change( |
| fn=lambda m, cur: gr.update(maximum=_max_tokens_cap(m), value=int(min(max(5, cur), _max_tokens_cap(m)))), |
| inputs=[exp_model, exp_tokens], |
| outputs=[exp_tokens], |
| ) |
| sweep_model.change( |
| fn=lambda m, cur: gr.update(maximum=_max_tokens_cap(m), value=int(min(max(10, cur), _max_tokens_cap(m)))), |
| inputs=[sweep_model, sweep_tokens], |
| outputs=[sweep_tokens], |
| ) |
|
|
| btn_creative.click(fn=lambda: apply_standard_preset("🎨 Creative"), outputs=[max_tokens, temperature, top_p, repetition_penalty]) |
| btn_balanced.click(fn=lambda: apply_standard_preset("⚖️ Balanced"), outputs=[max_tokens, temperature, top_p, repetition_penalty]) |
| btn_focused .click(fn=lambda: apply_standard_preset("🎯 Focused"), outputs=[max_tokens, temperature, top_p, repetition_penalty]) |
|
|
| exp_creative.click(fn=lambda: apply_standard_preset("🎨 Creative"), outputs=[exp_tokens, exp_temp, exp_top_p, exp_rep_pen]) |
| exp_balanced.click(fn=lambda: apply_standard_preset("⚖️ Balanced"), outputs=[exp_tokens, exp_temp, exp_top_p, exp_rep_pen]) |
| exp_focused .click(fn=lambda: apply_standard_preset("🎯 Focused"), outputs=[exp_tokens, exp_temp, exp_top_p, exp_rep_pen]) |
|
|
| sweep_creative.click(fn=lambda: apply_sweep_preset("🎨 Creative"), outputs=[sweep_tokens, sweep_top_p, sweep_rep_pen]) |
| sweep_balanced.click(fn=lambda: apply_sweep_preset("⚖️ Balanced"), outputs=[sweep_tokens, sweep_top_p, sweep_rep_pen]) |
| sweep_focused .click(fn=lambda: apply_sweep_preset("🎯 Focused"), outputs=[sweep_tokens, sweep_top_p, sweep_rep_pen]) |
|
|
| arena_creative.click(fn=lambda: apply_standard_preset("🎨 Creative"), outputs=[arena_max, arena_temp, arena_top_p, arena_rep_pen]) |
| arena_balanced.click(fn=lambda: apply_standard_preset("⚖️ Balanced"), outputs=[arena_max, arena_temp, arena_top_p, arena_rep_pen]) |
| arena_focused .click(fn=lambda: apply_standard_preset("🎯 Focused"), outputs=[arena_max, arena_temp, arena_top_p, arena_rep_pen]) |
|
|
| chat_creative.click(fn=lambda: apply_standard_preset("🎨 Creative"), outputs=[chat_max_tokens, chat_temperature, chat_top_p, chat_rep_penalty]) |
| chat_balanced.click(fn=lambda: apply_standard_preset("⚖️ Balanced"), outputs=[chat_max_tokens, chat_temperature, chat_top_p, chat_rep_penalty]) |
| chat_focused .click(fn=lambda: apply_standard_preset("🎯 Focused"), outputs=[chat_max_tokens, chat_temperature, chat_top_p, chat_rep_penalty]) |
|
|
| for btn, full_prompt in example_btns: |
| btn.click(fn=lambda t=full_prompt: t, outputs=[prompt_box]) |
|
|
| def handle_stop_gen(): |
| interrupt_callback.stop_signal = True |
| return "⏹ Stopped.", build_stats_html("—", "—", "—") |
|
|
| def run_generate_multi(prompt, model_key, max_tok, temp, tp, rep_penalty, num_resp, history): |
| text = (prompt or "").strip() |
| if not text: |
| yield "Enter a prompt.", "Enter a prompt.", build_stats_html("—", "—", "—"), history, build_history_html(history) |
| return |
|
|
| num = int(num_resp) if num_resp is not None else 1 |
| tokenizer, _ = _get_model(STENTOR_MODELS[model_key]) |
| configs = [{ |
| "model_key": model_key, "max_tokens": max_tok, "temperature": temp, |
| "top_p": tp, "repetition_penalty": rep_penalty |
| } for _ in range(num)] |
| |
| final_results = [""] * num |
| final_elapsed = 0.0 |
| |
| for results, elapsed in parallel_config_generate(prompt, configs): |
| final_results, final_elapsed = results, elapsed |
| display = "" |
| if num > 1: |
| for idx, r in enumerate(results): |
| display += f"─── Response {idx+1} ───\n{r}\n\n" |
| else: |
| display = results[0] |
| |
| total_tokens = sum(len(tokenizer.encode(r)) for r in results) |
| tps = total_tokens / elapsed if elapsed > 0 else 0 |
| yield display, "⚡ Generating…", build_stats_html(str(total_tokens), f"{elapsed:.1f}s", f"{tps:.1f} t/s"), history, build_history_html(history) |
|
|
| new_history = add_to_history(history, prompt, final_results[0], model_key, temp) |
| total_tokens = sum(len(tokenizer.encode(r)) for r in final_results) |
| tps = total_tokens / final_elapsed if final_elapsed > 0 else 0 |
| yield display, f"✓ Done · {model_key}", build_stats_html(str(total_tokens), f"{final_elapsed:.2f}s", f"{tps:.1f} t/s"), new_history, build_history_html(new_history) |
|
|
| gen_event = gen_btn.click( |
| fn=run_generate_multi, |
| inputs=[prompt_box, model_sel, max_tokens, temperature, top_p, repetition_penalty, num_responses, history_state], |
| outputs=[output_box, status_box, stats_html, history_state, history_html], |
| ) |
|
|
| stop_btn.click( |
| fn=handle_stop_gen, |
| outputs=[status_box, stats_html], |
| cancels=[gen_event] |
| ) |
|
|
| def handle_stop_exp(): |
| interrupt_callback.stop_signal = True |
| return "⏹ Stopped.", _explorer_placeholder() |
|
|
| exp_event = exp_btn.click( |
| fn=run_token_explorer, |
| inputs=[exp_prompt, exp_model, exp_tokens, exp_temp, exp_top_p, exp_rep_pen], |
| outputs=[exp_output, exp_status], |
| ) |
| exp_stop_btn.click( |
| fn=handle_stop_exp, |
| outputs=[exp_status, exp_output], |
| cancels=[exp_event] |
| ) |
|
|
| sweep_event = sweep_btn.click( |
| fn=run_temp_sweep_streamed, |
| inputs=[sweep_prompt, sweep_model, sweep_tokens, sweep_top_p, sweep_rep_pen, sweep_count] + sweep_temp_inputs, |
| outputs=sweep_outs, |
| ) |
| def handle_stop_sweep(): |
| interrupt_callback.stop_signal = True |
| return [gr.update(value="⏹ Stopped.")] * len(sweep_outputs_for_fn) |
| sweep_stop_btn.click( |
| fn=handle_stop_sweep, |
| outputs=sweep_outputs_for_fn, |
| cancels=[sweep_event] |
| ) |
|
|
| chat_event = chat_send_btn.click( |
| fn=chat_generate, |
| inputs=[chat_messages, chat_input, chat_model, chat_max_tokens, chat_temperature, chat_top_p, chat_rep_penalty], |
| outputs=[chat_display, chat_messages], |
| ).then( |
| fn=lambda: "", outputs=[chat_input], |
| ) |
|
|
| chat_input_event = chat_input.submit( |
| fn=chat_generate, |
| inputs=[chat_messages, chat_input, chat_model, chat_max_tokens, chat_temperature, chat_top_p, chat_rep_penalty], |
| outputs=[chat_display, chat_messages], |
| ).then( |
| fn=lambda: "", outputs=[chat_input], |
| ) |
|
|
| def handle_stop_chat(): |
| interrupt_callback.stop_signal = True |
|
|
| chat_stop_btn.click( |
| fn=handle_stop_chat, |
| cancels=[chat_event, chat_input_event] |
| ) |
|
|
| chat_reset_btn.click( |
| fn=chat_clear, |
| outputs=[chat_messages, chat_display, chat_input], |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(theme=gr.themes.Base(), css=CSS, ssr_mode=False) |
|
|