agni512 commited on
Commit
3811b74
·
verified ·
1 Parent(s): dfc8bda

Redeploy latest local changes

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +1 -1
  3. README.md +1 -1
  4. app/__pycache__/__init__.cpython-312.pyc +0 -0
  5. app/__pycache__/main.cpython-312.pyc +0 -0
  6. app/api/__pycache__/__init__.cpython-312.pyc +0 -0
  7. app/api/__pycache__/conversations.cpython-312.pyc +0 -0
  8. app/api/__pycache__/deps.cpython-312.pyc +0 -0
  9. app/api/__pycache__/documents.cpython-312.pyc +0 -0
  10. app/api/__pycache__/health.cpython-312.pyc +0 -0
  11. app/api/__pycache__/questions.cpython-312.pyc +0 -0
  12. app/api/__pycache__/schemas.cpython-312.pyc +0 -0
  13. app/core/__pycache__/__init__.cpython-312.pyc +0 -0
  14. app/core/__pycache__/config.cpython-312.pyc +0 -0
  15. app/core/config.py +17 -2
  16. app/db/__pycache__/__init__.cpython-312.pyc +0 -0
  17. app/db/__pycache__/base.cpython-312.pyc +0 -0
  18. app/db/__pycache__/session.cpython-312.pyc +0 -0
  19. app/models/__pycache__/__init__.cpython-312.pyc +0 -0
  20. app/models/__pycache__/entities.cpython-312.pyc +0 -0
  21. app/rag/__pycache__/__init__.cpython-312.pyc +0 -0
  22. app/rag/__pycache__/chunk_verification.cpython-312.pyc +0 -0
  23. app/rag/__pycache__/chunking.cpython-312.pyc +0 -0
  24. app/rag/__pycache__/document_profile.cpython-312.pyc +0 -0
  25. app/rag/__pycache__/embeddings.cpython-312.pyc +0 -0
  26. app/rag/__pycache__/extraction.cpython-312.pyc +0 -0
  27. app/rag/__pycache__/faiss_store.cpython-312.pyc +0 -0
  28. app/rag/__pycache__/grounding.cpython-312.pyc +0 -0
  29. app/rag/__pycache__/prompts.cpython-312.pyc +0 -0
  30. app/rag/__pycache__/quality_retrieval.cpython-312.pyc +0 -0
  31. app/rag/__pycache__/query_expansion.cpython-312.pyc +0 -0
  32. app/rag/__pycache__/query_scope.cpython-312.pyc +0 -0
  33. app/rag/__pycache__/retrieval.cpython-312.pyc +0 -0
  34. app/rag/__pycache__/types.cpython-312.pyc +0 -0
  35. app/rag/chunk_verification.py +49 -81
  36. app/rag/prompts.py +4 -4
  37. app/repositories/__pycache__/__init__.cpython-312.pyc +0 -0
  38. app/repositories/__pycache__/conversations.cpython-312.pyc +0 -0
  39. app/repositories/__pycache__/documents.cpython-312.pyc +0 -0
  40. app/services/__pycache__/__init__.cpython-312.pyc +0 -0
  41. app/services/__pycache__/document_processor.cpython-312.pyc +0 -0
  42. app/services/__pycache__/document_service.cpython-312.pyc +0 -0
  43. app/services/__pycache__/llm_client.cpython-312.pyc +0 -0
  44. app/services/__pycache__/qa_service.cpython-312.pyc +0 -0
  45. app/services/llm_client.py +49 -10
  46. app/worker/__pycache__/__init__.cpython-312.pyc +0 -0
  47. app/worker/__pycache__/celery_app.cpython-312.pyc +0 -0
  48. app/worker/__pycache__/tasks.cpython-312.pyc +0 -0
  49. sample_docs/Amar_Agnihotri_Resume.pdf +3 -0
  50. sample_docs/candidate_profiles_packet.pdf +0 -95
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ sample_docs/Amar_Agnihotri_Resume.pdf filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -37,7 +37,7 @@ ENV APP_ENV=production \
37
  CELERY_CONCURRENCY=1 \
38
  HF_INFERENCE_PROVIDER=auto \
39
  OPENAI_BASE_URL=https://router.huggingface.co/v1 \
40
- OPENAI_MODEL=deepseek-ai/DeepSeek-R1 \
41
  HF_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B \
42
  EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M \
43
  EMBEDDING_DEVICE=cpu \
 
37
  CELERY_CONCURRENCY=1 \
38
  HF_INFERENCE_PROVIDER=auto \
39
  OPENAI_BASE_URL=https://router.huggingface.co/v1 \
40
+ OPENAI_MODEL=openai/gpt-oss-20b \
41
  HF_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B \
42
  EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M \
43
  EMBEDDING_DEVICE=cpu \
README.md CHANGED
@@ -39,7 +39,7 @@ Required Space secrets:
39
  Recommended Space variables:
40
 
41
  - `OPENAI_BASE_URL=https://router.huggingface.co/v1`
42
- - `OPENAI_MODEL=deepseek-ai/DeepSeek-R1`
43
  - `HF_INFERENCE_PROVIDER=auto`
44
  - `HF_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B`
45
  - `EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M`
 
39
  Recommended Space variables:
40
 
41
  - `OPENAI_BASE_URL=https://router.huggingface.co/v1`
42
+ - `OPENAI_MODEL=openai/gpt-oss-20b`
43
  - `HF_INFERENCE_PROVIDER=auto`
44
  - `HF_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B`
45
  - `EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M`
app/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (181 Bytes)
 
app/__pycache__/main.cpython-312.pyc DELETED
Binary file (2.32 kB)
 
app/api/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (155 Bytes)
 
app/api/__pycache__/conversations.cpython-312.pyc DELETED
Binary file (2.67 kB)
 
app/api/__pycache__/deps.cpython-312.pyc DELETED
Binary file (483 Bytes)
 
app/api/__pycache__/documents.cpython-312.pyc DELETED
Binary file (5.33 kB)
 
app/api/__pycache__/health.cpython-312.pyc DELETED
Binary file (836 Bytes)
 
app/api/__pycache__/questions.cpython-312.pyc DELETED
Binary file (2.27 kB)
 
app/api/__pycache__/schemas.cpython-312.pyc DELETED
Binary file (4.32 kB)
 
app/core/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (172 Bytes)
 
app/core/__pycache__/config.cpython-312.pyc DELETED
Binary file (7.77 kB)
 
app/core/config.py CHANGED
@@ -27,9 +27,9 @@ class Settings(BaseSettings):
27
  openai_api_key: str | None = None
28
  openai_model: str | None = None
29
  openai_timeout_seconds: int = 60
30
- local_llm_base_url: str = "http://host.docker.internal:11434/v1"
31
  local_llm_api_key: str = "local-dev"
32
- local_llm_model: str = "qwen3:0.6b"
33
  local_llm_model_placeholder: str = "local-model"
34
  default_openai_model: str = "gpt-4.1-mini"
35
 
@@ -121,6 +121,21 @@ class Settings(BaseSettings):
121
  )
122
  return "OpenAI unavailable. Check OPENAI_API_KEY, OPENAI_BASE_URL, and network access."
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  @property
125
  def use_hf_inference_embeddings(self) -> bool:
126
  return self._clean_optional(self.hf_token) is not None
 
27
  openai_api_key: str | None = None
28
  openai_model: str | None = None
29
  openai_timeout_seconds: int = 60
30
+ local_llm_base_url: str = "http://host.docker.internal:8000/v1"
31
  local_llm_api_key: str = "local-dev"
32
+ local_llm_model: str = "Qwen/Qwen3-0.6B"
33
  local_llm_model_placeholder: str = "local-model"
34
  default_openai_model: str = "gpt-4.1-mini"
35
 
 
121
  )
122
  return "OpenAI unavailable. Check OPENAI_API_KEY, OPENAI_BASE_URL, and network access."
123
 
124
+ @property
125
+ def local_llm_unavailable_message(self) -> str:
126
+ return (
127
+ "Local LLM unavailable. "
128
+ f"Check {self.local_llm_base_url} and model {self.local_llm_model}."
129
+ )
130
+
131
+ @property
132
+ def hosted_then_local_llm_unavailable_message(self) -> str:
133
+ return (
134
+ "Hosted LLM failed and local fallback is unavailable. "
135
+ f"Check OPENAI_BASE_URL/OPENAI_API_KEY plus local fallback {self.local_llm_base_url} "
136
+ f"with model {self.local_llm_model}."
137
+ )
138
+
139
  @property
140
  def use_hf_inference_embeddings(self) -> bool:
141
  return self._clean_optional(self.hf_token) is not None
app/db/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (150 Bytes)
 
app/db/__pycache__/base.cpython-312.pyc DELETED
Binary file (389 Bytes)
 
app/db/__pycache__/session.cpython-312.pyc DELETED
Binary file (2.64 kB)
 
app/models/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (359 Bytes)
 
app/models/__pycache__/entities.cpython-312.pyc DELETED
Binary file (8.66 kB)
 
app/rag/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (165 Bytes)
 
app/rag/__pycache__/chunk_verification.cpython-312.pyc DELETED
Binary file (11.4 kB)
 
app/rag/__pycache__/chunking.cpython-312.pyc DELETED
Binary file (12.4 kB)
 
app/rag/__pycache__/document_profile.cpython-312.pyc DELETED
Binary file (9.07 kB)
 
app/rag/__pycache__/embeddings.cpython-312.pyc DELETED
Binary file (4.87 kB)
 
app/rag/__pycache__/extraction.cpython-312.pyc DELETED
Binary file (11.4 kB)
 
app/rag/__pycache__/faiss_store.cpython-312.pyc DELETED
Binary file (1.52 kB)
 
app/rag/__pycache__/grounding.cpython-312.pyc DELETED
Binary file (46.6 kB)
 
app/rag/__pycache__/prompts.cpython-312.pyc DELETED
Binary file (13.1 kB)
 
app/rag/__pycache__/quality_retrieval.cpython-312.pyc DELETED
Binary file (12.7 kB)
 
app/rag/__pycache__/query_expansion.cpython-312.pyc DELETED
Binary file (13.8 kB)
 
app/rag/__pycache__/query_scope.cpython-312.pyc DELETED
Binary file (11.7 kB)
 
app/rag/__pycache__/retrieval.cpython-312.pyc DELETED
Binary file (19.9 kB)
 
app/rag/__pycache__/types.cpython-312.pyc DELETED
Binary file (2.81 kB)
 
app/rag/chunk_verification.py CHANGED
@@ -14,19 +14,6 @@ from app.services.llm_client import LLMUnavailableError, OpenAICompatibleClient
14
  THINK_BLOCK_RE = re.compile(r"<think>.*?</think>", re.IGNORECASE | re.DOTALL)
15
  JSON_BLOCK_RE = re.compile(r"\{.*\}", re.DOTALL)
16
  FENCED_BLOCK_RE = re.compile(r"```(?:json)?\s*(.*?)```", re.IGNORECASE | re.DOTALL)
17
- CONTINUATION_PREFIXES = (
18
- "also ",
19
- "another ",
20
- "additionally ",
21
- "further ",
22
- "furthermore ",
23
- "the system ",
24
- "the platform ",
25
- "this solution ",
26
- "this system ",
27
- "built on ",
28
- "it ",
29
- )
30
 
31
 
32
  def verify_semantic_chunks(
@@ -41,44 +28,44 @@ def verify_semantic_chunks(
41
  if len(chunks) < 2:
42
  return annotate_chunks(chunks, verification_status="skipped_too_short")
43
 
44
- candidate_windows = build_candidate_windows(chunks)
45
- if not candidate_windows:
46
- return annotate_chunks(chunks, verification_status="skipped_no_candidates")
47
-
48
  llm = llm or OpenAICompatibleClient()
49
- try:
50
- raw_response = llm.complete(
51
- build_chunk_verification_prompt(
52
- document_title=document_title,
53
- chunks=chunks,
54
- candidate_windows=candidate_windows,
55
- chunk_max_chars=settings.chunk_max_chars,
56
- )
57
- )
58
- except LLMUnavailableError:
59
- return annotate_chunks(chunks, verification_status="skipped_llm_unavailable")
60
- except Exception:
61
- return annotate_chunks(chunks, verification_status="skipped_llm_error")
62
-
63
- decisions = parse_chunk_verification_response(raw_response)
64
- if decisions is None:
65
- return annotate_chunks(chunks, verification_status="skipped_invalid_response")
66
-
67
  merge_boundaries: set[int] = set()
68
  decision_notes: dict[int, str] = {}
69
- for decision in decisions:
70
- left_index = decision.get("left_chunk_index")
71
- right_index = decision.get("right_chunk_index")
72
- action = str(decision.get("action", "")).strip().lower()
73
- if not isinstance(left_index, int) or not isinstance(right_index, int):
74
- continue
75
- if right_index != left_index + 1:
76
- continue
77
- if action not in {"merge", "keep"}:
78
- continue
79
- decision_notes[left_index] = str(decision.get("reason", "")).strip()
80
- if action == "merge":
81
- merge_boundaries.add(left_index)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
  return apply_merge_decisions(
84
  document_title=document_title,
@@ -88,42 +75,23 @@ def verify_semantic_chunks(
88
  )
89
 
90
 
91
- def build_candidate_windows(chunks: list[ChunkDraft]) -> list[dict[str, int]]:
92
- settings = get_settings()
93
- candidate_windows: list[dict[str, int]] = []
94
  for left_index in range(len(chunks) - 1):
95
- if is_verification_candidate(chunks[left_index], chunks[left_index + 1], settings.chunk_max_chars):
96
- candidate_windows.append(
97
- {
98
- "left_index": left_index,
99
- "right_index": left_index + 1,
100
- }
101
- )
102
- if len(candidate_windows) >= settings.chunk_verification_max_windows:
103
- break
104
- return candidate_windows
105
-
106
-
107
- def is_verification_candidate(left: ChunkDraft, right: ChunkDraft, chunk_max_chars: int) -> bool:
108
- if left.section_title != right.section_title and left.heading_path != right.heading_path:
109
- return False
110
-
111
- combined_length = len(left.raw_text) + len(right.raw_text) + 2
112
- if combined_length > chunk_max_chars + 180:
113
- return False
114
-
115
- shorter_chunk = min(len(left.raw_text), len(right.raw_text))
116
- if shorter_chunk <= max(180, chunk_max_chars // 4):
117
- return True
118
-
119
- if left.raw_text.count("•") and right.raw_text.count("•"):
120
- return True
121
 
122
- right_lower = right.raw_text.strip().lower()
123
- if any(right_lower.startswith(prefix) for prefix in CONTINUATION_PREFIXES):
124
- return True
125
 
126
- return left.section_title == right.section_title
 
 
 
 
127
 
128
 
129
  def parse_chunk_verification_response(raw_response: str) -> list[dict[str, object]] | None:
 
14
  THINK_BLOCK_RE = re.compile(r"<think>.*?</think>", re.IGNORECASE | re.DOTALL)
15
  JSON_BLOCK_RE = re.compile(r"\{.*\}", re.DOTALL)
16
  FENCED_BLOCK_RE = re.compile(r"```(?:json)?\s*(.*?)```", re.IGNORECASE | re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def verify_semantic_chunks(
 
28
  if len(chunks) < 2:
29
  return annotate_chunks(chunks, verification_status="skipped_too_short")
30
 
31
+ boundary_windows = build_boundary_windows(chunks)
 
 
 
32
  llm = llm or OpenAICompatibleClient()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  merge_boundaries: set[int] = set()
34
  decision_notes: dict[int, str] = {}
35
+ batch_size = max(1, settings.chunk_verification_max_windows)
36
+
37
+ for boundary_batch in batch_boundary_windows(boundary_windows, batch_size):
38
+ try:
39
+ raw_response = llm.complete(
40
+ build_chunk_verification_prompt(
41
+ document_title=document_title,
42
+ chunks=chunks,
43
+ boundary_windows=boundary_batch,
44
+ chunk_max_chars=settings.chunk_max_chars,
45
+ )
46
+ )
47
+ except LLMUnavailableError:
48
+ return annotate_chunks(chunks, verification_status="skipped_llm_unavailable")
49
+ except Exception:
50
+ return annotate_chunks(chunks, verification_status="skipped_llm_error")
51
+
52
+ decisions = parse_chunk_verification_response(raw_response)
53
+ if decisions is None:
54
+ return annotate_chunks(chunks, verification_status="skipped_invalid_response")
55
+
56
+ for decision in decisions:
57
+ left_index = decision.get("left_chunk_index")
58
+ right_index = decision.get("right_chunk_index")
59
+ action = str(decision.get("action", "")).strip().lower()
60
+ if not isinstance(left_index, int) or not isinstance(right_index, int):
61
+ continue
62
+ if right_index != left_index + 1:
63
+ continue
64
+ if action not in {"merge", "keep"}:
65
+ continue
66
+ decision_notes[left_index] = str(decision.get("reason", "")).strip()
67
+ if action == "merge":
68
+ merge_boundaries.add(left_index)
69
 
70
  return apply_merge_decisions(
71
  document_title=document_title,
 
75
  )
76
 
77
 
78
+ def build_boundary_windows(chunks: list[ChunkDraft]) -> list[dict[str, int]]:
79
+ boundary_windows: list[dict[str, int]] = []
 
80
  for left_index in range(len(chunks) - 1):
81
+ boundary_windows.append(
82
+ {
83
+ "left_index": left_index,
84
+ "right_index": left_index + 1,
85
+ }
86
+ )
87
+ return boundary_windows
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
 
 
 
89
 
90
+ def batch_boundary_windows(boundary_windows: list[dict[str, int]], batch_size: int) -> list[list[dict[str, int]]]:
91
+ return [
92
+ boundary_windows[index : index + batch_size]
93
+ for index in range(0, len(boundary_windows), batch_size)
94
+ ]
95
 
96
 
97
  def parse_chunk_verification_response(raw_response: str) -> list[dict[str, object]] | None:
app/rag/prompts.py CHANGED
@@ -187,11 +187,11 @@ def build_chunk_verification_prompt(
187
  *,
188
  document_title: str,
189
  chunks: list[ChunkDraft],
190
- candidate_windows: list[dict[str, int]],
191
  chunk_max_chars: int,
192
  ) -> str:
193
  windows: list[str] = []
194
- for window in candidate_windows:
195
  left_index = window["left_index"]
196
  right_index = window["right_index"]
197
  previous_chunk = chunks[left_index - 1] if left_index > 0 else None
@@ -215,7 +215,7 @@ def build_chunk_verification_prompt(
215
  window_text = "\n\n---\n\n".join(windows)
216
  return (
217
  "You are verifying document chunk boundaries after deterministic chunking.\n"
218
- "For each candidate boundary, decide whether the right chunk should stay separate or merge with the left chunk.\n"
219
  "Choose merge only when the right chunk is a continuation of the same semantic unit, list item group, or subtopic.\n"
220
  "Choose keep when the right chunk starts a different role, project, section, or independently meaningful unit.\n"
221
  "Do not rewrite or add facts. Do not use outside knowledge.\n"
@@ -223,7 +223,7 @@ def build_chunk_verification_prompt(
223
  '{"decisions":[{"left_chunk_index":0,"right_chunk_index":1,"action":"keep","reason":"..."},{"left_chunk_index":1,"right_chunk_index":2,"action":"merge","reason":"..."}]}\n\n'
224
  f"Document Title: {document_title}\n"
225
  f"Chunk Max Chars: {chunk_max_chars}\n\n"
226
- f"Candidate Boundaries:\n{window_text}\n"
227
  )
228
 
229
 
 
187
  *,
188
  document_title: str,
189
  chunks: list[ChunkDraft],
190
+ boundary_windows: list[dict[str, int]],
191
  chunk_max_chars: int,
192
  ) -> str:
193
  windows: list[str] = []
194
+ for window in boundary_windows:
195
  left_index = window["left_index"]
196
  right_index = window["right_index"]
197
  previous_chunk = chunks[left_index - 1] if left_index > 0 else None
 
215
  window_text = "\n\n---\n\n".join(windows)
216
  return (
217
  "You are verifying document chunk boundaries after deterministic chunking.\n"
218
+ "For each chunk boundary, decide whether the right chunk should stay separate or merge with the left chunk.\n"
219
  "Choose merge only when the right chunk is a continuation of the same semantic unit, list item group, or subtopic.\n"
220
  "Choose keep when the right chunk starts a different role, project, section, or independently meaningful unit.\n"
221
  "Do not rewrite or add facts. Do not use outside knowledge.\n"
 
223
  '{"decisions":[{"left_chunk_index":0,"right_chunk_index":1,"action":"keep","reason":"..."},{"left_chunk_index":1,"right_chunk_index":2,"action":"merge","reason":"..."}]}\n\n'
224
  f"Document Title: {document_title}\n"
225
  f"Chunk Max Chars: {chunk_max_chars}\n\n"
226
+ f"Chunk Boundaries:\n{window_text}\n"
227
  )
228
 
229
 
app/repositories/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (157 Bytes)
 
app/repositories/__pycache__/conversations.cpython-312.pyc DELETED
Binary file (4.29 kB)
 
app/repositories/__pycache__/documents.cpython-312.pyc DELETED
Binary file (7.49 kB)
 
app/services/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (150 Bytes)
 
app/services/__pycache__/document_processor.cpython-312.pyc DELETED
Binary file (4.03 kB)
 
app/services/__pycache__/document_service.cpython-312.pyc DELETED
Binary file (4.08 kB)
 
app/services/__pycache__/llm_client.cpython-312.pyc DELETED
Binary file (2.4 kB)
 
app/services/__pycache__/qa_service.cpython-312.pyc DELETED
Binary file (14.5 kB)
 
app/services/llm_client.py CHANGED
@@ -1,9 +1,13 @@
1
  from __future__ import annotations
2
 
 
 
3
  from openai import OpenAI, OpenAIError
4
 
5
  from app.core.config import get_settings
6
 
 
 
7
 
8
  class LLMUnavailableError(RuntimeError):
9
  """Raised when the configured OpenAI-compatible endpoint cannot complete a request."""
@@ -13,13 +17,16 @@ class OpenAICompatibleClient:
13
  def __init__(self) -> None:
14
  settings = get_settings()
15
  self.settings = settings
16
- client_kwargs = {
17
- "api_key": settings.effective_openai_api_key,
18
- "timeout": settings.openai_timeout_seconds,
19
- }
20
- if settings.effective_openai_base_url:
21
- client_kwargs["base_url"] = settings.effective_openai_base_url
22
- self.client = OpenAI(**client_kwargs)
 
 
 
23
 
24
  def complete(self, prompt: str, *, system_prompt: str | None = None) -> str:
25
  messages = []
@@ -28,11 +35,43 @@ class OpenAICompatibleClient:
28
  messages.append({"role": "user", "content": prompt})
29
 
30
  try:
31
- response = self.client.chat.completions.create(
 
32
  model=self.settings.effective_openai_model,
33
- temperature=0.1,
34
  messages=messages,
35
  )
36
  except OpenAIError as exc:
37
- raise LLMUnavailableError(self.settings.llm_unavailable_message) from exc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return response.choices[0].message.content or ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ import logging
4
+
5
  from openai import OpenAI, OpenAIError
6
 
7
  from app.core.config import get_settings
8
 
9
+ logger = logging.getLogger(__name__)
10
+
11
 
12
  class LLMUnavailableError(RuntimeError):
13
  """Raised when the configured OpenAI-compatible endpoint cannot complete a request."""
 
17
  def __init__(self) -> None:
18
  settings = get_settings()
19
  self.settings = settings
20
+ self.client = self._build_client(
21
+ api_key=settings.effective_openai_api_key,
22
+ base_url=settings.effective_openai_base_url,
23
+ )
24
+ self.fallback_client = None
25
+ if settings.has_openai_api_key:
26
+ self.fallback_client = self._build_client(
27
+ api_key=settings.local_llm_api_key,
28
+ base_url=settings.local_llm_base_url,
29
+ )
30
 
31
  def complete(self, prompt: str, *, system_prompt: str | None = None) -> str:
32
  messages = []
 
35
  messages.append({"role": "user", "content": prompt})
36
 
37
  try:
38
+ response = self._complete_with_client(
39
+ self.client,
40
  model=self.settings.effective_openai_model,
 
41
  messages=messages,
42
  )
43
  except OpenAIError as exc:
44
+ if self.fallback_client is not None:
45
+ logger.warning(
46
+ "Hosted LLM request failed for model %s; falling back to local model %s: %s",
47
+ self.settings.effective_openai_model,
48
+ self.settings.local_llm_model,
49
+ exc,
50
+ )
51
+ try:
52
+ response = self._complete_with_client(
53
+ self.fallback_client,
54
+ model=self.settings.local_llm_model,
55
+ messages=messages,
56
+ )
57
+ except OpenAIError as fallback_exc:
58
+ raise LLMUnavailableError(self.settings.hosted_then_local_llm_unavailable_message) from fallback_exc
59
+ else:
60
+ raise LLMUnavailableError(self.settings.llm_unavailable_message) from exc
61
  return response.choices[0].message.content or ""
62
+
63
+ def _build_client(self, *, api_key: str, base_url: str | None) -> OpenAI:
64
+ client_kwargs = {
65
+ "api_key": api_key,
66
+ "timeout": self.settings.openai_timeout_seconds,
67
+ }
68
+ if base_url:
69
+ client_kwargs["base_url"] = base_url
70
+ return OpenAI(**client_kwargs)
71
+
72
+ def _complete_with_client(self, client: OpenAI, *, model: str, messages: list[dict[str, str]]):
73
+ return client.chat.completions.create(
74
+ model=model,
75
+ temperature=0.1,
76
+ messages=messages,
77
+ )
app/worker/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (155 Bytes)
 
app/worker/__pycache__/celery_app.cpython-312.pyc DELETED
Binary file (580 Bytes)
 
app/worker/__pycache__/tasks.cpython-312.pyc DELETED
Binary file (574 Bytes)
 
sample_docs/Amar_Agnihotri_Resume.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33a2282d9930db1dd7ca8a98d1d4cca52ba1a307f30c0cab7a51570dfd102f24
3
+ size 119196
sample_docs/candidate_profiles_packet.pdf DELETED
@@ -1,95 +0,0 @@
1
- %PDF-1.4
2
- 1 0 obj
3
- << /Type /Catalog /Pages 2 0 R >>
4
- endobj
5
- 2 0 obj
6
- << /Type /Pages /Kids [3 0 R] /Count 1 >>
7
- endobj
8
- 3 0 obj
9
- << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 5 0 R >> >> /Contents 4 0 R >>
10
- endobj
11
- 4 0 obj
12
- << /Length 1306 >>
13
- stream
14
- BT
15
- /F1 11 Tf
16
- 14 TL
17
- 50 760 Td
18
- (Candidate Profiles Packet) Tj
19
- T*
20
- () Tj
21
- T*
22
- (Priya Nair) Tj
23
- T*
24
- () Tj
25
- T*
26
- (6 years experience. Built Python microservices with FastAPI, Celery, Redis, PostgreSQL, and) Tj
27
- T*
28
- (Docker. Led an incident reduction effort for asynchronous workflows.) Tj
29
- T*
30
- () Tj
31
- T*
32
- (Strengths: backend platform ownership, queue design, API reliability, production debugging.) Tj
33
- T*
34
- () Tj
35
- T*
36
- (Raghav Menon) Tj
37
- T*
38
- () Tj
39
- T*
40
- (5 years experience. Built semantic search and document question-answering systems using) Tj
41
- T*
42
- (sentence transformers, FAISS, reranking, and evaluation tooling.) Tj
43
- T*
44
- () Tj
45
- T*
46
- (Strengths: LLM products, retrieval quality tuning, prompt controls, source attribution.) Tj
47
- T*
48
- () Tj
49
- T*
50
- (Asha Kulkarni) Tj
51
- T*
52
- () Tj
53
- T*
54
- (7 years experience. Strong in Kubernetes, AWS, Terraform, CI/CD, and observability. Limited) Tj
55
- T*
56
- (recent Python API work and no direct Celery ownership.) Tj
57
- T*
58
- () Tj
59
- T*
60
- (Strengths: DevOps depth, platform automation. Risks: weaker application-layer backend fit.) Tj
61
- T*
62
- () Tj
63
- T*
64
- (Neel Shah) Tj
65
- T*
66
- () Tj
67
- T*
68
- (2 years experience. Built internal dashboards and simple Flask APIs. Good communication but) Tj
69
- T*
70
- (below the target experience band.) Tj
71
- T*
72
- () Tj
73
- T*
74
- (Strengths: learning speed. Risks: limited scale and insufficient backend depth for the role.) Tj
75
- T*
76
- () Tj
77
- ET
78
- endstream
79
- endobj
80
- 5 0 obj
81
- << /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
82
- endobj
83
- xref
84
- 0 6
85
- 0000000000 65535 f
86
- 0000000009 00000 n
87
- 0000000058 00000 n
88
- 0000000115 00000 n
89
- 0000000241 00000 n
90
- 0000001599 00000 n
91
- trailer
92
- << /Size 6 /Root 1 0 R >>
93
- startxref
94
- 1669
95
- %%EOF