diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..c5c261271e3bb7e8668c7425a586b5938cc46dd6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +sample_docs/Amar_Agnihotri_Resume.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/Dockerfile b/Dockerfile index da5043e3e463622be0c0b69085e1e091939a2b11..880cf769df2c365c40262fe4a9e6b47ff91d001f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ ENV APP_ENV=production \ CELERY_CONCURRENCY=1 \ HF_INFERENCE_PROVIDER=auto \ OPENAI_BASE_URL=https://router.huggingface.co/v1 \ - OPENAI_MODEL=deepseek-ai/DeepSeek-R1 \ + OPENAI_MODEL=openai/gpt-oss-20b \ HF_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B \ EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M \ EMBEDDING_DEVICE=cpu \ diff --git a/README.md b/README.md index 662cafbaaa0152420167bc2bba8d1b9a6deaa727..dc2200595c92f178c91aac57ee3675249961b877 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ Required Space secrets: Recommended Space variables: - `OPENAI_BASE_URL=https://router.huggingface.co/v1` -- `OPENAI_MODEL=deepseek-ai/DeepSeek-R1` +- `OPENAI_MODEL=openai/gpt-oss-20b` - `HF_INFERENCE_PROVIDER=auto` - `HF_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B` - `EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M` diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index deb0aae25b40b3a2439a6226c18f79265cab2847..0000000000000000000000000000000000000000 Binary files a/app/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc deleted file mode 100644 index daaa24b168a053ece6035efe6177695864747573..0000000000000000000000000000000000000000 Binary files a/app/__pycache__/main.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/__init__.cpython-312.pyc b/app/api/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 468b8b1f60e26bd10f7f2f99620091ba379f01e4..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/conversations.cpython-312.pyc b/app/api/__pycache__/conversations.cpython-312.pyc deleted file mode 100644 index 3f7dd597b9a4892d96122234a24ba1a71fde65a7..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/conversations.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/deps.cpython-312.pyc b/app/api/__pycache__/deps.cpython-312.pyc deleted file mode 100644 index 3c4c9bb2ce2b1718d662d3a5da688889d0f29412..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/deps.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/documents.cpython-312.pyc b/app/api/__pycache__/documents.cpython-312.pyc deleted file mode 100644 index 5443b860cfc5a983cfbca33736a6c97884f041ed..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/documents.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/health.cpython-312.pyc b/app/api/__pycache__/health.cpython-312.pyc deleted file mode 100644 index 696366ca6b479f97c4d1ac68bfcfcc7707bda9ed..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/health.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/questions.cpython-312.pyc b/app/api/__pycache__/questions.cpython-312.pyc deleted file mode 100644 index b972293e74edc82076a55d72314de5dab9e39259..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/questions.cpython-312.pyc and /dev/null differ diff --git a/app/api/__pycache__/schemas.cpython-312.pyc b/app/api/__pycache__/schemas.cpython-312.pyc deleted file mode 100644 index d0592ad2fab4f8417981f3a20d9c54adcbd6cefd..0000000000000000000000000000000000000000 Binary files a/app/api/__pycache__/schemas.cpython-312.pyc and /dev/null differ diff --git a/app/core/__pycache__/__init__.cpython-312.pyc b/app/core/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index dedd644c1e7ac1ae5df22c5a9c67bbfdbcfeb12c..0000000000000000000000000000000000000000 Binary files a/app/core/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/core/__pycache__/config.cpython-312.pyc b/app/core/__pycache__/config.cpython-312.pyc deleted file mode 100644 index f5b62f3290c3ba7e443b7bb7b1de3b36b6f03c7a..0000000000000000000000000000000000000000 Binary files a/app/core/__pycache__/config.cpython-312.pyc and /dev/null differ diff --git a/app/core/config.py b/app/core/config.py index fce5176cbd507a79e3df422de7526d9d102e4420..426945c9187dc7a0eb7da2f3b79d4a3d917d87a4 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -27,9 +27,9 @@ class Settings(BaseSettings): openai_api_key: str | None = None openai_model: str | None = None openai_timeout_seconds: int = 60 - local_llm_base_url: str = "http://host.docker.internal:11434/v1" + local_llm_base_url: str = "http://host.docker.internal:8000/v1" local_llm_api_key: str = "local-dev" - local_llm_model: str = "qwen3:0.6b" + local_llm_model: str = "Qwen/Qwen3-0.6B" local_llm_model_placeholder: str = "local-model" default_openai_model: str = "gpt-4.1-mini" @@ -121,6 +121,21 @@ class Settings(BaseSettings): ) return "OpenAI unavailable. Check OPENAI_API_KEY, OPENAI_BASE_URL, and network access." + @property + def local_llm_unavailable_message(self) -> str: + return ( + "Local LLM unavailable. " + f"Check {self.local_llm_base_url} and model {self.local_llm_model}." + ) + + @property + def hosted_then_local_llm_unavailable_message(self) -> str: + return ( + "Hosted LLM failed and local fallback is unavailable. " + f"Check OPENAI_BASE_URL/OPENAI_API_KEY plus local fallback {self.local_llm_base_url} " + f"with model {self.local_llm_model}." + ) + @property def use_hf_inference_embeddings(self) -> bool: return self._clean_optional(self.hf_token) is not None diff --git a/app/db/__pycache__/__init__.cpython-312.pyc b/app/db/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 2f00704270fe0804d6c316a991cda167a392e020..0000000000000000000000000000000000000000 Binary files a/app/db/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/db/__pycache__/base.cpython-312.pyc b/app/db/__pycache__/base.cpython-312.pyc deleted file mode 100644 index a6a9ea4f17b74cc11d27020456759c902bccbf82..0000000000000000000000000000000000000000 Binary files a/app/db/__pycache__/base.cpython-312.pyc and /dev/null differ diff --git a/app/db/__pycache__/session.cpython-312.pyc b/app/db/__pycache__/session.cpython-312.pyc deleted file mode 100644 index 9f3f871f16204a0880bbe307c989885c68150d16..0000000000000000000000000000000000000000 Binary files a/app/db/__pycache__/session.cpython-312.pyc and /dev/null differ diff --git a/app/models/__pycache__/__init__.cpython-312.pyc b/app/models/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 4b95a9b9933d678da667530fcc1bd0aabed6fd4d..0000000000000000000000000000000000000000 Binary files a/app/models/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/models/__pycache__/entities.cpython-312.pyc b/app/models/__pycache__/entities.cpython-312.pyc deleted file mode 100644 index abe3d4a3bd63c1b40c528af6d8abbe779822261d..0000000000000000000000000000000000000000 Binary files a/app/models/__pycache__/entities.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/__init__.cpython-312.pyc b/app/rag/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 0a03a8fcd5566109e9a53e52118e6a0383edb7c0..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/chunk_verification.cpython-312.pyc b/app/rag/__pycache__/chunk_verification.cpython-312.pyc deleted file mode 100644 index b677e66199f0b04a4cc62b2f58308081e7f54c0c..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/chunk_verification.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/chunking.cpython-312.pyc b/app/rag/__pycache__/chunking.cpython-312.pyc deleted file mode 100644 index 0ce5195a3a3a58ade85fe1837c513994b0df20a5..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/chunking.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/document_profile.cpython-312.pyc b/app/rag/__pycache__/document_profile.cpython-312.pyc deleted file mode 100644 index aae841f001449aefac7d9925559cc5662303e9e1..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/document_profile.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/embeddings.cpython-312.pyc b/app/rag/__pycache__/embeddings.cpython-312.pyc deleted file mode 100644 index 22999a5858774a6cc984ce05050945ba8c35e471..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/embeddings.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/extraction.cpython-312.pyc b/app/rag/__pycache__/extraction.cpython-312.pyc deleted file mode 100644 index 6ce52baf16939eb46704c4f5c74419f11af5e42a..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/extraction.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/faiss_store.cpython-312.pyc b/app/rag/__pycache__/faiss_store.cpython-312.pyc deleted file mode 100644 index 7c297291a0ef779f8c5ec62cf99a47d55276c74e..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/faiss_store.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/grounding.cpython-312.pyc b/app/rag/__pycache__/grounding.cpython-312.pyc deleted file mode 100644 index b8c08d679f09ff3a8abe111dbf872fd7d9f2275f..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/grounding.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/prompts.cpython-312.pyc b/app/rag/__pycache__/prompts.cpython-312.pyc deleted file mode 100644 index 0993caa746ccd280dc9ef634ce058490de64e5e3..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/prompts.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/quality_retrieval.cpython-312.pyc b/app/rag/__pycache__/quality_retrieval.cpython-312.pyc deleted file mode 100644 index bbdbe01c6869a98bae50f7320577379e91cbaf27..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/quality_retrieval.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/query_expansion.cpython-312.pyc b/app/rag/__pycache__/query_expansion.cpython-312.pyc deleted file mode 100644 index 7f00a60182e3639476618151a02d6d3d16aec269..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/query_expansion.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/query_scope.cpython-312.pyc b/app/rag/__pycache__/query_scope.cpython-312.pyc deleted file mode 100644 index 0defe3570d79347de0081c83e61b419479defbc8..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/query_scope.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/retrieval.cpython-312.pyc b/app/rag/__pycache__/retrieval.cpython-312.pyc deleted file mode 100644 index 6d1dbe1c89783fc6716d44cbf1d18e2c89540259..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/retrieval.cpython-312.pyc and /dev/null differ diff --git a/app/rag/__pycache__/types.cpython-312.pyc b/app/rag/__pycache__/types.cpython-312.pyc deleted file mode 100644 index 5836ee8add29737a3a6bfcead108c8f0cdd7a3b9..0000000000000000000000000000000000000000 Binary files a/app/rag/__pycache__/types.cpython-312.pyc and /dev/null differ diff --git a/app/rag/chunk_verification.py b/app/rag/chunk_verification.py index 28f793bbe0adea31df7d24b50bc92c37badab14e..1faab307d74135532da707044d0caf3c63defcf0 100644 --- a/app/rag/chunk_verification.py +++ b/app/rag/chunk_verification.py @@ -14,19 +14,6 @@ from app.services.llm_client import LLMUnavailableError, OpenAICompatibleClient THINK_BLOCK_RE = re.compile(r".*?", re.IGNORECASE | re.DOTALL) JSON_BLOCK_RE = re.compile(r"\{.*\}", re.DOTALL) FENCED_BLOCK_RE = re.compile(r"```(?:json)?\s*(.*?)```", re.IGNORECASE | re.DOTALL) -CONTINUATION_PREFIXES = ( - "also ", - "another ", - "additionally ", - "further ", - "furthermore ", - "the system ", - "the platform ", - "this solution ", - "this system ", - "built on ", - "it ", -) def verify_semantic_chunks( @@ -41,44 +28,44 @@ def verify_semantic_chunks( if len(chunks) < 2: return annotate_chunks(chunks, verification_status="skipped_too_short") - candidate_windows = build_candidate_windows(chunks) - if not candidate_windows: - return annotate_chunks(chunks, verification_status="skipped_no_candidates") - + boundary_windows = build_boundary_windows(chunks) llm = llm or OpenAICompatibleClient() - try: - raw_response = llm.complete( - build_chunk_verification_prompt( - document_title=document_title, - chunks=chunks, - candidate_windows=candidate_windows, - chunk_max_chars=settings.chunk_max_chars, - ) - ) - except LLMUnavailableError: - return annotate_chunks(chunks, verification_status="skipped_llm_unavailable") - except Exception: - return annotate_chunks(chunks, verification_status="skipped_llm_error") - - decisions = parse_chunk_verification_response(raw_response) - if decisions is None: - return annotate_chunks(chunks, verification_status="skipped_invalid_response") - merge_boundaries: set[int] = set() decision_notes: dict[int, str] = {} - for decision in decisions: - left_index = decision.get("left_chunk_index") - right_index = decision.get("right_chunk_index") - action = str(decision.get("action", "")).strip().lower() - if not isinstance(left_index, int) or not isinstance(right_index, int): - continue - if right_index != left_index + 1: - continue - if action not in {"merge", "keep"}: - continue - decision_notes[left_index] = str(decision.get("reason", "")).strip() - if action == "merge": - merge_boundaries.add(left_index) + batch_size = max(1, settings.chunk_verification_max_windows) + + for boundary_batch in batch_boundary_windows(boundary_windows, batch_size): + try: + raw_response = llm.complete( + build_chunk_verification_prompt( + document_title=document_title, + chunks=chunks, + boundary_windows=boundary_batch, + chunk_max_chars=settings.chunk_max_chars, + ) + ) + except LLMUnavailableError: + return annotate_chunks(chunks, verification_status="skipped_llm_unavailable") + except Exception: + return annotate_chunks(chunks, verification_status="skipped_llm_error") + + decisions = parse_chunk_verification_response(raw_response) + if decisions is None: + return annotate_chunks(chunks, verification_status="skipped_invalid_response") + + for decision in decisions: + left_index = decision.get("left_chunk_index") + right_index = decision.get("right_chunk_index") + action = str(decision.get("action", "")).strip().lower() + if not isinstance(left_index, int) or not isinstance(right_index, int): + continue + if right_index != left_index + 1: + continue + if action not in {"merge", "keep"}: + continue + decision_notes[left_index] = str(decision.get("reason", "")).strip() + if action == "merge": + merge_boundaries.add(left_index) return apply_merge_decisions( document_title=document_title, @@ -88,42 +75,23 @@ def verify_semantic_chunks( ) -def build_candidate_windows(chunks: list[ChunkDraft]) -> list[dict[str, int]]: - settings = get_settings() - candidate_windows: list[dict[str, int]] = [] +def build_boundary_windows(chunks: list[ChunkDraft]) -> list[dict[str, int]]: + boundary_windows: list[dict[str, int]] = [] for left_index in range(len(chunks) - 1): - if is_verification_candidate(chunks[left_index], chunks[left_index + 1], settings.chunk_max_chars): - candidate_windows.append( - { - "left_index": left_index, - "right_index": left_index + 1, - } - ) - if len(candidate_windows) >= settings.chunk_verification_max_windows: - break - return candidate_windows - - -def is_verification_candidate(left: ChunkDraft, right: ChunkDraft, chunk_max_chars: int) -> bool: - if left.section_title != right.section_title and left.heading_path != right.heading_path: - return False - - combined_length = len(left.raw_text) + len(right.raw_text) + 2 - if combined_length > chunk_max_chars + 180: - return False - - shorter_chunk = min(len(left.raw_text), len(right.raw_text)) - if shorter_chunk <= max(180, chunk_max_chars // 4): - return True - - if left.raw_text.count("•") and right.raw_text.count("•"): - return True + boundary_windows.append( + { + "left_index": left_index, + "right_index": left_index + 1, + } + ) + return boundary_windows - right_lower = right.raw_text.strip().lower() - if any(right_lower.startswith(prefix) for prefix in CONTINUATION_PREFIXES): - return True - return left.section_title == right.section_title +def batch_boundary_windows(boundary_windows: list[dict[str, int]], batch_size: int) -> list[list[dict[str, int]]]: + return [ + boundary_windows[index : index + batch_size] + for index in range(0, len(boundary_windows), batch_size) + ] def parse_chunk_verification_response(raw_response: str) -> list[dict[str, object]] | None: diff --git a/app/rag/prompts.py b/app/rag/prompts.py index f7b92938798753dfa185a71a61d110b8bf04615d..c503e8ae83628c227a939941cd26af36401ae93b 100644 --- a/app/rag/prompts.py +++ b/app/rag/prompts.py @@ -187,11 +187,11 @@ def build_chunk_verification_prompt( *, document_title: str, chunks: list[ChunkDraft], - candidate_windows: list[dict[str, int]], + boundary_windows: list[dict[str, int]], chunk_max_chars: int, ) -> str: windows: list[str] = [] - for window in candidate_windows: + for window in boundary_windows: left_index = window["left_index"] right_index = window["right_index"] previous_chunk = chunks[left_index - 1] if left_index > 0 else None @@ -215,7 +215,7 @@ def build_chunk_verification_prompt( window_text = "\n\n---\n\n".join(windows) return ( "You are verifying document chunk boundaries after deterministic chunking.\n" - "For each candidate boundary, decide whether the right chunk should stay separate or merge with the left chunk.\n" + "For each chunk boundary, decide whether the right chunk should stay separate or merge with the left chunk.\n" "Choose merge only when the right chunk is a continuation of the same semantic unit, list item group, or subtopic.\n" "Choose keep when the right chunk starts a different role, project, section, or independently meaningful unit.\n" "Do not rewrite or add facts. Do not use outside knowledge.\n" @@ -223,7 +223,7 @@ def build_chunk_verification_prompt( '{"decisions":[{"left_chunk_index":0,"right_chunk_index":1,"action":"keep","reason":"..."},{"left_chunk_index":1,"right_chunk_index":2,"action":"merge","reason":"..."}]}\n\n' f"Document Title: {document_title}\n" f"Chunk Max Chars: {chunk_max_chars}\n\n" - f"Candidate Boundaries:\n{window_text}\n" + f"Chunk Boundaries:\n{window_text}\n" ) diff --git a/app/repositories/__pycache__/__init__.cpython-312.pyc b/app/repositories/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index b402e8d2acf7cfec4b4c18b576b32432b6d81270..0000000000000000000000000000000000000000 Binary files a/app/repositories/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/repositories/__pycache__/conversations.cpython-312.pyc b/app/repositories/__pycache__/conversations.cpython-312.pyc deleted file mode 100644 index 6822f690a16b42c47c24f51e513373302872972d..0000000000000000000000000000000000000000 Binary files a/app/repositories/__pycache__/conversations.cpython-312.pyc and /dev/null differ diff --git a/app/repositories/__pycache__/documents.cpython-312.pyc b/app/repositories/__pycache__/documents.cpython-312.pyc deleted file mode 100644 index 2834a4411355b6fb1e430a7bb1483af1a79c522e..0000000000000000000000000000000000000000 Binary files a/app/repositories/__pycache__/documents.cpython-312.pyc and /dev/null differ diff --git a/app/services/__pycache__/__init__.cpython-312.pyc b/app/services/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index a015b945d9a3b71d5e3f4eeaffe310bfc64a01e1..0000000000000000000000000000000000000000 Binary files a/app/services/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/services/__pycache__/document_processor.cpython-312.pyc b/app/services/__pycache__/document_processor.cpython-312.pyc deleted file mode 100644 index 0dd8dcb8acccd14033de7bbe314c632b65fbc269..0000000000000000000000000000000000000000 Binary files a/app/services/__pycache__/document_processor.cpython-312.pyc and /dev/null differ diff --git a/app/services/__pycache__/document_service.cpython-312.pyc b/app/services/__pycache__/document_service.cpython-312.pyc deleted file mode 100644 index 1efc6a1a92f4a35a783ad3d2e89f4a3dc36b79a3..0000000000000000000000000000000000000000 Binary files a/app/services/__pycache__/document_service.cpython-312.pyc and /dev/null differ diff --git a/app/services/__pycache__/llm_client.cpython-312.pyc b/app/services/__pycache__/llm_client.cpython-312.pyc deleted file mode 100644 index c26fce914634c905447d9cb774b475395d6785fc..0000000000000000000000000000000000000000 Binary files a/app/services/__pycache__/llm_client.cpython-312.pyc and /dev/null differ diff --git a/app/services/__pycache__/qa_service.cpython-312.pyc b/app/services/__pycache__/qa_service.cpython-312.pyc deleted file mode 100644 index e47fc59c08e09d226b08ea94cc4dbd308f9b86de..0000000000000000000000000000000000000000 Binary files a/app/services/__pycache__/qa_service.cpython-312.pyc and /dev/null differ diff --git a/app/services/llm_client.py b/app/services/llm_client.py index 23041e6a224294087cac21a808239c31334bfc12..4a05105c3703dd1805a69593d1880b1345fdecd4 100644 --- a/app/services/llm_client.py +++ b/app/services/llm_client.py @@ -1,9 +1,13 @@ from __future__ import annotations +import logging + from openai import OpenAI, OpenAIError from app.core.config import get_settings +logger = logging.getLogger(__name__) + class LLMUnavailableError(RuntimeError): """Raised when the configured OpenAI-compatible endpoint cannot complete a request.""" @@ -13,13 +17,16 @@ class OpenAICompatibleClient: def __init__(self) -> None: settings = get_settings() self.settings = settings - client_kwargs = { - "api_key": settings.effective_openai_api_key, - "timeout": settings.openai_timeout_seconds, - } - if settings.effective_openai_base_url: - client_kwargs["base_url"] = settings.effective_openai_base_url - self.client = OpenAI(**client_kwargs) + self.client = self._build_client( + api_key=settings.effective_openai_api_key, + base_url=settings.effective_openai_base_url, + ) + self.fallback_client = None + if settings.has_openai_api_key: + self.fallback_client = self._build_client( + api_key=settings.local_llm_api_key, + base_url=settings.local_llm_base_url, + ) def complete(self, prompt: str, *, system_prompt: str | None = None) -> str: messages = [] @@ -28,11 +35,43 @@ class OpenAICompatibleClient: messages.append({"role": "user", "content": prompt}) try: - response = self.client.chat.completions.create( + response = self._complete_with_client( + self.client, model=self.settings.effective_openai_model, - temperature=0.1, messages=messages, ) except OpenAIError as exc: - raise LLMUnavailableError(self.settings.llm_unavailable_message) from exc + if self.fallback_client is not None: + logger.warning( + "Hosted LLM request failed for model %s; falling back to local model %s: %s", + self.settings.effective_openai_model, + self.settings.local_llm_model, + exc, + ) + try: + response = self._complete_with_client( + self.fallback_client, + model=self.settings.local_llm_model, + messages=messages, + ) + except OpenAIError as fallback_exc: + raise LLMUnavailableError(self.settings.hosted_then_local_llm_unavailable_message) from fallback_exc + else: + raise LLMUnavailableError(self.settings.llm_unavailable_message) from exc return response.choices[0].message.content or "" + + def _build_client(self, *, api_key: str, base_url: str | None) -> OpenAI: + client_kwargs = { + "api_key": api_key, + "timeout": self.settings.openai_timeout_seconds, + } + if base_url: + client_kwargs["base_url"] = base_url + return OpenAI(**client_kwargs) + + def _complete_with_client(self, client: OpenAI, *, model: str, messages: list[dict[str, str]]): + return client.chat.completions.create( + model=model, + temperature=0.1, + messages=messages, + ) diff --git a/app/worker/__pycache__/__init__.cpython-312.pyc b/app/worker/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index d90dec3f3cda20d58831dfd566dfc9483a5d5e69..0000000000000000000000000000000000000000 Binary files a/app/worker/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/app/worker/__pycache__/celery_app.cpython-312.pyc b/app/worker/__pycache__/celery_app.cpython-312.pyc deleted file mode 100644 index 0a68f9cc074ba9060ef59c39596e3dbb3db5d16f..0000000000000000000000000000000000000000 Binary files a/app/worker/__pycache__/celery_app.cpython-312.pyc and /dev/null differ diff --git a/app/worker/__pycache__/tasks.cpython-312.pyc b/app/worker/__pycache__/tasks.cpython-312.pyc deleted file mode 100644 index e5a7daca7c23781ba6f94a275e71ea1e4c98ab65..0000000000000000000000000000000000000000 Binary files a/app/worker/__pycache__/tasks.cpython-312.pyc and /dev/null differ diff --git a/sample_docs/Amar_Agnihotri_Resume.pdf b/sample_docs/Amar_Agnihotri_Resume.pdf new file mode 100644 index 0000000000000000000000000000000000000000..eff3878b34823ee27eae26c0df74f34466ef654f --- /dev/null +++ b/sample_docs/Amar_Agnihotri_Resume.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33a2282d9930db1dd7ca8a98d1d4cca52ba1a307f30c0cab7a51570dfd102f24 +size 119196 diff --git a/sample_docs/candidate_profiles_packet.pdf b/sample_docs/candidate_profiles_packet.pdf deleted file mode 100644 index 11d9402d1db968f2bb2d34fad11a5b790d8b6c67..0000000000000000000000000000000000000000 --- a/sample_docs/candidate_profiles_packet.pdf +++ /dev/null @@ -1,95 +0,0 @@ -%PDF-1.4 -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -2 0 obj -<< /Type /Pages /Kids [3 0 R] /Count 1 >> -endobj -3 0 obj -<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >> -endobj -4 0 obj -<< /Length 1306 >> -stream -BT -/F1 11 Tf -14 TL -50 760 Td -(Candidate Profiles Packet) Tj -T* -() Tj -T* -(Priya Nair) Tj -T* -() Tj -T* -(6 years experience. Built Python microservices with FastAPI, Celery, Redis, PostgreSQL, and) Tj -T* -(Docker. Led an incident reduction effort for asynchronous workflows.) Tj -T* -() Tj -T* -(Strengths: backend platform ownership, queue design, API reliability, production debugging.) Tj -T* -() Tj -T* -(Raghav Menon) Tj -T* -() Tj -T* -(5 years experience. Built semantic search and document question-answering systems using) Tj -T* -(sentence transformers, FAISS, reranking, and evaluation tooling.) Tj -T* -() Tj -T* -(Strengths: LLM products, retrieval quality tuning, prompt controls, source attribution.) Tj -T* -() Tj -T* -(Asha Kulkarni) Tj -T* -() Tj -T* -(7 years experience. Strong in Kubernetes, AWS, Terraform, CI/CD, and observability. Limited) Tj -T* -(recent Python API work and no direct Celery ownership.) Tj -T* -() Tj -T* -(Strengths: DevOps depth, platform automation. Risks: weaker application-layer backend fit.) Tj -T* -() Tj -T* -(Neel Shah) Tj -T* -() Tj -T* -(2 years experience. Built internal dashboards and simple Flask APIs. Good communication but) Tj -T* -(below the target experience band.) Tj -T* -() Tj -T* -(Strengths: learning speed. Risks: limited scale and insufficient backend depth for the role.) Tj -T* -() Tj -ET -endstream -endobj -5 0 obj -<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> -endobj -xref -0 6 -0000000000 65535 f -0000000009 00000 n -0000000058 00000 n -0000000115 00000 n -0000000241 00000 n -0000001599 00000 n -trailer -<< /Size 6 /Root 1 0 R >> -startxref -1669 -%%EOF diff --git a/sample_docs/complete_backend_hiring_packet.pdf b/sample_docs/complete_backend_hiring_packet.pdf deleted file mode 100644 index ca08cc19af50094aa80c23809b7ff3db7d24eebe..0000000000000000000000000000000000000000 --- a/sample_docs/complete_backend_hiring_packet.pdf +++ /dev/null @@ -1,109 +0,0 @@ -%PDF-1.4 -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -2 0 obj -<< /Type /Pages /Kids [3 0 R] /Count 1 >> -endobj -3 0 obj -<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >> -endobj -4 0 obj -<< /Length 1292 >> -stream -BT -/F1 11 Tf -14 TL -50 760 Td -(Backend Hiring Packet) Tj -T* -() Tj -T* -(Role Overview) Tj -T* -() Tj -T* -(Role: Senior Backend Engineer - Recruitment Automation Platform) Tj -T* -() Tj -T* -(Location: Bengaluru or Remote India) Tj -T* -() Tj -T* -(Experience Range: 4 to 7 years) Tj -T* -() Tj -T* -(Must-have Skills) Tj -T* -() Tj -T* -(Python, FastAPI, PostgreSQL, Redis, Docker, REST APIs, and Celery/background jobs are) Tj -T* -(mandatory.) Tj -T* -() Tj -T* -(Candidates should be comfortable owning production services, tracing incidents, and) Tj -T* -(reviewing schemas and API contracts.) Tj -T* -() Tj -T* -(Good-to-have Skills) Tj -T* -() Tj -T* -(Experience with LLM applications, retrieval-augmented generation, FAISS, Kubernetes, AWS,) Tj -T* -(and observability is helpful but not mandatory.) Tj -T* -() Tj -T* -(Interview Process) Tj -T* -() Tj -T* -(Recruiter screen, coding exercise, system design interview, and hiring manager round.) Tj -T* -() Tj -T* -(Evaluation Weights) Tj -T* -() Tj -T* -(Backend depth: 30 percent. System design: 25 percent. Production ownership: 20 percent.) Tj -T* -(Communication: 15 percent. Culture fit: 10 percent.) Tj -T* -() Tj -T* -(Decision Notes) Tj -T* -() Tj -T* -(Strong candidates should demonstrate careful API design, asynchronous job handling, and) Tj -T* -(evidence-based debugging.) Tj -T* -() Tj -ET -endstream -endobj -5 0 obj -<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >> -endobj -xref -0 6 -0000000000 65535 f -0000000009 00000 n -0000000058 00000 n -0000000115 00000 n -0000000241 00000 n -0000001585 00000 n -trailer -<< /Size 6 /Root 1 0 R >> -startxref -1655 -%%EOF diff --git a/scripts/__pycache__/evaluate_resume_questions.cpython-312.pyc b/scripts/__pycache__/evaluate_resume_questions.cpython-312.pyc deleted file mode 100644 index 6c600d0d6ccad2c17b559674f9da5aaa6a05c65c..0000000000000000000000000000000000000000 Binary files a/scripts/__pycache__/evaluate_resume_questions.cpython-312.pyc and /dev/null differ diff --git a/scripts/__pycache__/generate_sample_docs.cpython-312.pyc b/scripts/__pycache__/generate_sample_docs.cpython-312.pyc deleted file mode 100644 index 9367b4db15117c2eec4d9238ad715f88d50fa817..0000000000000000000000000000000000000000 Binary files a/scripts/__pycache__/generate_sample_docs.cpython-312.pyc and /dev/null differ diff --git a/start-space.sh b/start-space.sh index bf7f62a600fb22b7c7c88b1967362473c5749a95..d53ab4b98a053a4a2b65b2940cac6ed9db13b854 100644 --- a/start-space.sh +++ b/start-space.sh @@ -5,7 +5,7 @@ export PORT="${PORT:-7860}" export DATABASE_URL="${DATABASE_URL:-postgresql+psycopg://docqa:docqa@127.0.0.1:5432/docqa}" export REDIS_URL="${REDIS_URL:-redis://127.0.0.1:6379/0}" export OPENAI_BASE_URL="${OPENAI_BASE_URL:-https://router.huggingface.co/v1}" -export OPENAI_MODEL="${OPENAI_MODEL:-deepseek-ai/DeepSeek-R1}" +export OPENAI_MODEL="${OPENAI_MODEL:-openai/gpt-oss-20b}" export EMBEDDING_MODEL="${EMBEDDING_MODEL:-infgrad/Jasper-Token-Compression-600M}" export HF_INFERENCE_PROVIDER="${HF_INFERENCE_PROVIDER:-auto}" export HF_EMBEDDING_MODEL="${HF_EMBEDDING_MODEL:-Qwen/Qwen3-Embedding-8B}"