# file: model_loader.py from langchain_community.chat_models import ChatLlamaCpp from huggingface_hub import hf_hub_download from langchain_core.callbacks import StreamingStdOutCallbackHandler import logging import os logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) _llm_instance = None def get_model(): try: global _llm_instance if _llm_instance is None: model_path = hf_hub_download( repo_id="junaid17/qwen2.5-finance-assistant-gguf", filename="qwen2.5-finance-assistant-q4_k_m.gguf", ) logger.info(f"Loading model from: {model_path}") _llm_instance = ChatLlamaCpp( model_path=model_path, temperature=0.5, max_tokens=2048, n_ctx=4096, n_batch=512, n_threads=max(4, os.cpu_count() // 2), n_gpu_layers=0, verbose=False, streaming=True, callbacks=[StreamingStdOutCallbackHandler()] ) logger.info("Model loaded successfully!") except Exception as e: logger.exception(f"Error while loading the model, {str(e)}") return _llm_instance