import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# model_id = "heyIamUmair/llama3-3b-merged-legal"
# model_id = "heyIamUmair/llama3-legal-lora-4epoch"
model_id = "unsloth/Llama-3.2-1B-Instruct"

from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct",  device_map="auto", torch_dtype="auto")
model = PeftModel.from_pretrained(base_model, "heyIamUmair/llama3-legal-lora-4epoch")

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     device_map="auto",
#     torch_dtype="auto"
# )

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def chat(message, history):  # ✅ Fix: Add history
    output = pipe(message, max_new_tokens=200, do_sample=True, temperature=0.7)
    return output[0]["generated_text"]

gr.ChatInterface(fn=chat, title="🧑‍⚖️ Pakistan Law Chatbot (LLama 3.2)").launch()