import gradio as gr from llama_cpp import Llama # --- Load model from Hugging Face Hub (Q8 GGUF) --- llm = Llama.from_pretrained( repo_id="owlninjam/nytheria-3b", filename="gguf_models/nytheria-3b-q8_0.gguf", n_ctx=2048, n_threads=8 # CPU threads; free tier usually gives 2-4 cores ) # --- Chat function --- def chat(message, history): if history is None: history = [] response = llm(message, max_tokens=512, stop=[""]) output = response["choices"][0]["text"].strip() history.append(("๐ง You: " + message, "๐ค Nytheria: " + output)) return history, "" # --- Build Sexy Chat UI --- with gr.Blocks(css=""" #chatbot {height: 600px; overflow: auto;} .message.user {background: #4f46e5; color: white; border-radius: 20px; padding: 10px 15px;} .message.bot {background: #f3f4f6; color: black; border-radius: 20px; padding: 10px 15px;} .wrap {display: flex; gap: 10px; margin-bottom: 10px;} .avatar {font-size: 20px;} """) as demo: gr.Markdown("
Sexy UI powered by llama.cpp GGUF
") chatbot = gr.Chatbot(elem_id="chatbot", bubble_full_width=False, label="Chat with Nytheria") msg = gr.Textbox(placeholder="Type your message here...", show_label=False) clear = gr.Button("๐งน Clear Chat") def respond(message, chat_history): chat_history, new_msg = chat(message, chat_history) return chat_history, new_msg msg.submit(respond, [msg, chatbot], [chatbot, msg]) clear.click(lambda: [], None, chatbot, queue=False) demo.launch()