from llama_cpp import Llama import json import re class HealthFunctionLM: def __init__( self, repo_id="ramgovindv/health_function_call_llama3.2_3b_gguf", filename="Llama-3.2-3B-Instruct.Q4_K_M.gguf", n_ctx=2048, n_threads=4 ): self.llm = Llama.from_pretrained( repo_id=repo_id, filename=filename, n_ctx=n_ctx, n_threads=n_threads, chat_format=None, verbose=False ) def _build_prompt(self, query): return f""" You are an API generator. Return JSON: {{ "name": "function_name", "parameters": {{}} }} User query: {query} JSON: """ def _generate(self, prompt): response = self.llm.create_chat_completion( messages=[{"role": "user", "content": prompt}], temperature=0.1 ) return response["choices"][0]["message"]["content"] def _parse(self, text): try: return json.loads(text) except: return {"raw_output": text} def query(self, user_query): prompt = self._build_prompt(user_query) output = self._generate(prompt) parsed = self._parse(output) return { "query": user_query, "result": parsed } # simple entry point def load_model(): return HealthFunctionLM()