import os import requests from llama_cpp import Llama import streamlit as st MODEL_URL = "https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF/resolve/main/BioMistral-7B.Q4_K_M.gguf" MODEL_PATH = "BioMistral-7B.Q4_K_M.gguf" # Streamlit app title st.set_page_config(page_title="Medical Chatbot") st.title("🩺 Medical Chatbot using BioMistral-7B") # Download model manually if not present def download_model(): with st.spinner("🔄 Downloading model... please wait (few minutes)..."): response = requests.get(MODEL_URL, stream=True) if response.status_code == 200: with open(MODEL_PATH, "wb") as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) else: st.error(f"Failed to download model. Status: {response.status_code}") st.stop() # Check and download if not os.path.exists(MODEL_PATH): download_model() # Verify model exists if not os.path.exists(MODEL_PATH): st.error("❌ Model file missing after download. Exiting.") st.stop() # Load model with st.spinner("⚙️ Loading BioMistral model..."): llm = Llama( model_path=MODEL_PATH, n_ctx=4096, n_threads=8, n_gpu_layers=35 # Use 0 for CPU-only ) # UI query = st.text_input("💬 Enter your medical question:") if query: with st.spinner("🧠 Thinking..."): response = llm(query, max_tokens=512, stop=[""]) st.markdown("**Answer:**") st.write(response["choices"][0]["text"].strip())