import gradio as gr from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="WithinUsAI/Gemma4-Overlooked.Thinker.Uncensored-E2B.gguf", filename="*Q4_K_M*", n_ctx=8192, n_threads=4, verbose=False, ) def chat(message, history): prompt = "" for user_msg, assistant_msg in history: prompt += f"user\n{user_msg}\n" prompt += f"model\n{assistant_msg}\n" prompt += f"user\n{message}\nmodel\n" output = llm( prompt, max_tokens=1024, stop=["", ""], echo=False, ) return output["choices"][0]["text"].strip() # Wrap the ChatInterface in gr.Blocks to safely apply the theme with gr.Blocks(theme=gr.themes.Default(primary_hue="red")) as demo: gr.ChatInterface( fn=chat, title="🔥 Gemma4 Overlooked Thinker — Uncensored", description="5B uncensored reasoning model by **WithIn Us AI**. Abliterated with norm-preserving biprojection. 0.4% refusal rate.", examples=[ "Explain quantum entanglement simply", "Write a Python web scraper", "What are the ethics of AI censorship?", "Debug this code: for i in range(10) print(i)", ], ) demo.launch()