import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # model_id = "heyIamUmair/llama3-3b-merged-legal" # model_id = "heyIamUmair/llama3-legal-lora-4epoch" model_id = "unsloth/Llama-3.2-1B-Instruct" from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct", device_map="auto", torch_dtype="auto") model = PeftModel.from_pretrained(base_model, "heyIamUmair/llama3-legal-lora-4epoch") tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) # model = AutoModelForCausalLM.from_pretrained( # model_id, # device_map="auto", # torch_dtype="auto" # ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def chat(message, history): # ✅ Fix: Add history output = pipe(message, max_new_tokens=200, do_sample=True, temperature=0.7) return output[0]["generated_text"] gr.ChatInterface(fn=chat, title="🧑‍⚖️ Pakistan Law Chatbot (LLama 3.2)").launch()