import os
import requests
from llama_cpp import Llama
import streamlit as st

MODEL_URL = "https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF/resolve/main/BioMistral-7B.Q4_K_M.gguf"
MODEL_PATH = "BioMistral-7B.Q4_K_M.gguf"

# Streamlit app title
st.set_page_config(page_title="Medical Chatbot")
st.title("🩺 Medical Chatbot using BioMistral-7B")

# Download model manually if not present
def download_model():
    with st.spinner("🔄 Downloading model... please wait (few minutes)..."):
        response = requests.get(MODEL_URL, stream=True)
        if response.status_code == 200:
            with open(MODEL_PATH, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
        else:
            st.error(f"Failed to download model. Status: {response.status_code}")
            st.stop()

# Check and download
if not os.path.exists(MODEL_PATH):
    download_model()

# Verify model exists
if not os.path.exists(MODEL_PATH):
    st.error("❌ Model file missing after download. Exiting.")
    st.stop()

# Load model
with st.spinner("⚙️ Loading BioMistral model..."):
    llm = Llama(
        model_path=MODEL_PATH,
        n_ctx=4096,
        n_threads=8,
        n_gpu_layers=35  # Use 0 for CPU-only
    )

# UI
query = st.text_input("💬 Enter your medical question:")
if query:
    with st.spinner("🧠 Thinking..."):
        response = llm(query, max_tokens=512, stop=["</s>"])
        st.markdown("**Answer:**")
        st.write(response["choices"][0]["text"].strip())