raj999 commited on
Commit
b067322
·
1 Parent(s): cf95ade

added multiple llm option

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. app.py +52 -5
  3. llm/client.py +80 -1
  4. llm/pipeline.py +16 -6
README.md CHANGED
@@ -12,11 +12,11 @@ license: mit
12
 
13
  # Smart Resume Builder
14
 
15
- Generate grounded, tailored resumes from a job description and a PDF resume using Gradio, OpenAI, and LaTeX templates. Suitable for local runs or Hugging Face Spaces.
16
 
17
  ## Features
18
  - PDF parsing with `pdfplumber` and `pymupdf` fallback
19
- - Strict, evidence-backed JSON extraction via OpenAI
20
  - Tailoring step that rewrites bullets without fabrication and reports missing items
21
  - Two LaTeX templates (modern single-column and classic two-column)
22
  - Streamlit UI with API key storage (keyring preferred), template selector, and export buttons
@@ -33,7 +33,7 @@ uv run app.py
33
  ## Using the app
34
  1. Paste the job description.
35
  2. Upload a resume PDF.
36
- 3. Enter your OpenAI API key (optionally save it locally; system keychain is used when available).
37
  4. Choose a model name and LaTeX template.
38
  5. Click **Generate Tailored Resume**.
39
  6. Review the LaTeX preview, missing/needs-confirmation list, and keyword alignment.
@@ -63,4 +63,4 @@ uv run pytest
63
  ## Troubleshooting
64
  - Missing `latexmk`: install TeX Live/MikTeX.
65
  - If PDF parsing is poor, ensure the resume PDF is text-based; image-only scans are harder to extract.
66
- - For OpenAI errors, verify the API key and model name in the UI.
 
12
 
13
  # Smart Resume Builder
14
 
15
+ Generate grounded, tailored resumes from a job description and a PDF resume using Gradio, OpenAI or Hugging Face models, and LaTeX templates. Suitable for local runs or Hugging Face Spaces.
16
 
17
  ## Features
18
  - PDF parsing with `pdfplumber` and `pymupdf` fallback
19
+ - Strict, evidence-backed JSON extraction via OpenAI or Hugging Face Inference API
20
  - Tailoring step that rewrites bullets without fabrication and reports missing items
21
  - Two LaTeX templates (modern single-column and classic two-column)
22
  - Streamlit UI with API key storage (keyring preferred), template selector, and export buttons
 
33
  ## Using the app
34
  1. Paste the job description.
35
  2. Upload a resume PDF.
36
+ 3. Pick a provider and enter your API key/token (optionally save it locally; system keychain is used when available).
37
  4. Choose a model name and LaTeX template.
38
  5. Click **Generate Tailored Resume**.
39
  6. Review the LaTeX preview, missing/needs-confirmation list, and keyword alignment.
 
63
  ## Troubleshooting
64
  - Missing `latexmk`: install TeX Live/MikTeX.
65
  - If PDF parsing is poor, ensure the resume PDF is text-based; image-only scans are harder to extract.
66
+ - For provider errors, verify the API key/token and model name in the UI.
app.py CHANGED
@@ -19,6 +19,13 @@ logger = logging.getLogger("smart_resume_builder")
19
 
20
  APP_TITLE = "Smart Resume Builder"
21
  LOCAL_KEY_PATH = Path.home() / ".smart_resume_builder_key"
 
 
 
 
 
 
 
22
 
23
 
24
  # Gradio 4.44.1 can emit JSON schema fragments with `additionalProperties: true`,
@@ -36,6 +43,12 @@ def _safe_json_schema_to_python_type(schema, defs=None):
36
  gr_client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type
37
 
38
 
 
 
 
 
 
 
39
  def load_api_key() -> Optional[str]:
40
  try:
41
  import keyring # type: ignore
@@ -97,6 +110,7 @@ def generate_tailored_resume(
97
  job_description: str,
98
  pdf_file,
99
  api_key: str,
 
100
  model: str,
101
  template_choice: str,
102
  save_key: bool,
@@ -107,7 +121,16 @@ def generate_tailored_resume(
107
  logs.append(msg)
108
 
109
  if not api_key:
110
- return ("", "API key required.", "", {}, "\n".join(logs), None, None, {})
 
 
 
 
 
 
 
 
 
111
  if not pdf_file:
112
  return ("", "Please upload a resume PDF.", "", {}, "\n".join(logs), None, None, {})
113
  if not job_description.strip():
@@ -123,7 +146,7 @@ def generate_tailored_resume(
123
  pdf_path = Path(tmp.name)
124
  result = parse_resume_pdf(str(pdf_path))
125
  log(f"Extracted text using {result.method}")
126
- log("Starting OpenAI pipeline...")
127
 
128
  template_map = list_templates()
129
  template_source = template_map[template_choice].read_text(encoding="utf-8")
@@ -131,6 +154,7 @@ def generate_tailored_resume(
131
  resume, tailored = run_pipeline(
132
  api_key=api_key,
133
  model=model,
 
134
  raw_text=result.raw_text,
135
  job_description=job_description,
136
  template_name=template_choice,
@@ -173,7 +197,7 @@ def generate_tailored_resume(
173
  except Exception as exc:
174
  log(f"Error: {exc}")
175
  log(
176
- "If this persists, verify your OpenAI API key/model and that outbound network access is allowed."
177
  )
178
  return (
179
  "",
@@ -197,9 +221,19 @@ def build_ui():
197
  with gr.Row():
198
  with gr.Column():
199
  jd = gr.Textbox(label="Job Description", lines=12, placeholder="Paste JD here")
 
 
 
 
 
200
  api = gr.Textbox(label="OpenAI API Key", type="password", value=stored_key)
201
  save_key = gr.Checkbox(label="Save key locally (keyring preferred)", value=bool(stored_key))
202
- model = gr.Textbox(label="Model name", value="gpt-4o-mini")
 
 
 
 
 
203
  template_choice = gr.Dropdown(
204
  label="Template", choices=template_names, value=template_names[0]
205
  )
@@ -219,7 +253,7 @@ def build_ui():
219
 
220
  generate_btn.click(
221
  fn=generate_tailored_resume,
222
- inputs=[jd, pdf, api, model, template_choice, save_key],
223
  outputs=[
224
  latex_preview,
225
  missing_panel,
@@ -232,6 +266,19 @@ def build_ui():
232
  ],
233
  )
234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  clear_btn.click(fn=clear_api_key, inputs=None, outputs=api)
236
 
237
  return demo
 
19
 
20
  APP_TITLE = "Smart Resume Builder"
21
  LOCAL_KEY_PATH = Path.home() / ".smart_resume_builder_key"
22
+ OPENAI_MODELS = ["gpt-4o-mini", "gpt-4o", "gpt-4.1-mini"]
23
+ HF_MODELS = [
24
+ "mistralai/Mistral-7B-Instruct-v0.2",
25
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
26
+ "HuggingFaceH4/zephyr-7b-beta",
27
+ ]
28
+ HF_PROVIDER_LABEL = "Hugging Face (Inference API)"
29
 
30
 
31
  # Gradio 4.44.1 can emit JSON schema fragments with `additionalProperties: true`,
 
43
  gr_client_utils._json_schema_to_python_type = _safe_json_schema_to_python_type
44
 
45
 
46
+ def _provider_defaults(provider: str) -> Tuple[list[str], str, str]:
47
+ if provider == HF_PROVIDER_LABEL:
48
+ return HF_MODELS, HF_MODELS[0], "Hugging Face Token"
49
+ return OPENAI_MODELS, OPENAI_MODELS[0], "OpenAI API Key"
50
+
51
+
52
  def load_api_key() -> Optional[str]:
53
  try:
54
  import keyring # type: ignore
 
110
  job_description: str,
111
  pdf_file,
112
  api_key: str,
113
+ provider: str,
114
  model: str,
115
  template_choice: str,
116
  save_key: bool,
 
121
  logs.append(msg)
122
 
123
  if not api_key:
124
+ return (
125
+ "",
126
+ "API key/token required.",
127
+ "",
128
+ {},
129
+ "\n".join(logs),
130
+ None,
131
+ None,
132
+ {},
133
+ )
134
  if not pdf_file:
135
  return ("", "Please upload a resume PDF.", "", {}, "\n".join(logs), None, None, {})
136
  if not job_description.strip():
 
146
  pdf_path = Path(tmp.name)
147
  result = parse_resume_pdf(str(pdf_path))
148
  log(f"Extracted text using {result.method}")
149
+ log(f"Starting LLM pipeline (provider={provider})...")
150
 
151
  template_map = list_templates()
152
  template_source = template_map[template_choice].read_text(encoding="utf-8")
 
154
  resume, tailored = run_pipeline(
155
  api_key=api_key,
156
  model=model,
157
+ provider=provider,
158
  raw_text=result.raw_text,
159
  job_description=job_description,
160
  template_name=template_choice,
 
197
  except Exception as exc:
198
  log(f"Error: {exc}")
199
  log(
200
+ "If this persists, verify your API key/token and model and that outbound network access is allowed."
201
  )
202
  return (
203
  "",
 
221
  with gr.Row():
222
  with gr.Column():
223
  jd = gr.Textbox(label="Job Description", lines=12, placeholder="Paste JD here")
224
+ provider = gr.Dropdown(
225
+ label="Provider",
226
+ choices=["OpenAI", HF_PROVIDER_LABEL],
227
+ value="OpenAI",
228
+ )
229
  api = gr.Textbox(label="OpenAI API Key", type="password", value=stored_key)
230
  save_key = gr.Checkbox(label="Save key locally (keyring preferred)", value=bool(stored_key))
231
+ model = gr.Dropdown(
232
+ label="Model name",
233
+ choices=OPENAI_MODELS,
234
+ value=OPENAI_MODELS[0],
235
+ allow_custom_value=True,
236
+ )
237
  template_choice = gr.Dropdown(
238
  label="Template", choices=template_names, value=template_names[0]
239
  )
 
253
 
254
  generate_btn.click(
255
  fn=generate_tailored_resume,
256
+ inputs=[jd, pdf, api, provider, model, template_choice, save_key],
257
  outputs=[
258
  latex_preview,
259
  missing_panel,
 
266
  ],
267
  )
268
 
269
+ def _update_provider_fields(selected: str):
270
+ choices, value, key_label = _provider_defaults(selected)
271
+ return (
272
+ gr.Dropdown.update(choices=choices, value=value),
273
+ gr.Textbox.update(label=key_label),
274
+ )
275
+
276
+ provider.change(
277
+ fn=_update_provider_fields,
278
+ inputs=provider,
279
+ outputs=[model, api],
280
+ )
281
+
282
  clear_btn.click(fn=clear_api_key, inputs=None, outputs=api)
283
 
284
  return demo
llm/client.py CHANGED
@@ -3,11 +3,17 @@ from __future__ import annotations
3
  import json
4
  import logging
5
  import time
6
- from typing import Any, Dict, List
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
 
 
 
 
 
 
 
11
  class OpenAIClient:
12
  def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
13
  try:
@@ -87,6 +93,79 @@ class OpenAIClient:
87
  return repaired
88
 
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def _safe_json_parse(text: str) -> Dict[str, Any] | None:
91
  # Attempt direct parse
92
  try:
 
3
  import json
4
  import logging
5
  import time
6
+ from typing import Any, Dict, Protocol
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
 
11
+ class LLMClient(Protocol):
12
+ def chat(self, prompt: str, *, max_retries: int = 3) -> str: ...
13
+
14
+ def chat_json(self, prompt: str, *, max_retries: int = 3) -> Dict[str, Any]: ...
15
+
16
+
17
  class OpenAIClient:
18
  def __init__(self, api_key: str, model: str = "gpt-4o-mini"):
19
  try:
 
93
  return repaired
94
 
95
 
96
+ class HuggingFaceClient:
97
+ def __init__(self, api_token: str, model: str):
98
+ if not api_token:
99
+ raise ValueError("Hugging Face token required.")
100
+ try:
101
+ from huggingface_hub import InferenceClient # type: ignore
102
+ except Exception as exc: # pragma: no cover - import guard
103
+ raise RuntimeError(
104
+ "huggingface_hub package is required. Install with `pip install huggingface_hub`."
105
+ ) from exc
106
+
107
+ self.client = InferenceClient(model=model, token=api_token)
108
+ self.model = model
109
+
110
+ def chat(self, prompt: str, *, max_retries: int = 3) -> str:
111
+ delay = 1.0
112
+ last_error: Exception | None = None
113
+ for attempt in range(max_retries):
114
+ try:
115
+ return self.client.text_generation(
116
+ prompt,
117
+ max_new_tokens=1024,
118
+ temperature=0.2,
119
+ do_sample=False,
120
+ return_full_text=False,
121
+ )
122
+ except Exception as exc: # pragma: no cover - network call
123
+ last_error = exc
124
+ if _is_rate_limit_error(exc):
125
+ wait_time = 30.0
126
+ logger.warning(
127
+ "Hugging Face rate limit encountered (attempt %s). Waiting %.1fs",
128
+ attempt + 1,
129
+ wait_time,
130
+ )
131
+ time.sleep(wait_time)
132
+ else:
133
+ logger.warning(
134
+ "Hugging Face call failed (attempt %s): %s", attempt + 1, exc
135
+ )
136
+ time.sleep(delay)
137
+ delay *= 2
138
+ raise RuntimeError(
139
+ f"Hugging Face call failed after retries: {last_error}"
140
+ ) # pragma: no cover - network call
141
+
142
+ def chat_json(self, prompt: str, *, max_retries: int = 3) -> Dict[str, Any]:
143
+ raw = self.chat(prompt, max_retries=max_retries)
144
+ parsed = _safe_json_parse(raw)
145
+ if parsed is not None:
146
+ return parsed
147
+
148
+ repair_prompt = (
149
+ "The previous response was invalid JSON. "
150
+ "Return ONLY valid JSON that fixes it without adding new facts.\n"
151
+ f"Original response:\n{raw}"
152
+ )
153
+ repaired_raw = self.chat(repair_prompt, max_retries=max_retries)
154
+ repaired = _safe_json_parse(repaired_raw)
155
+ if repaired is None:
156
+ raise ValueError("Model did not return valid JSON after repair attempt")
157
+ return repaired
158
+
159
+
160
+ def build_client(provider: str, api_key: str, model: str) -> LLMClient:
161
+ normalized = provider.strip().lower()
162
+ if normalized in {"openai", "open ai"}:
163
+ return OpenAIClient(api_key=api_key, model=model)
164
+ if normalized in {"huggingface", "hugging face", "hugging face (inference api)"}:
165
+ return HuggingFaceClient(api_token=api_key, model=model)
166
+ raise ValueError(f"Unknown provider: {provider}")
167
+
168
+
169
  def _safe_json_parse(text: str) -> Dict[str, Any] | None:
170
  # Attempt direct parse
171
  try:
llm/pipeline.py CHANGED
@@ -5,12 +5,14 @@ from typing import Tuple
5
 
6
  from schemas.resume import Resume, TailoredResume
7
 
8
- from .client import OpenAIClient
9
  from .prompts import EXTRACTION_PROMPT, TAILORING_PROMPT
10
 
11
 
12
- def extract_resume_json(api_key: str, model: str, raw_text: str) -> Resume:
13
- client = OpenAIClient(api_key=api_key, model=model)
 
 
14
  prompt = EXTRACTION_PROMPT.format(resume_text=raw_text)
15
  data = client.chat_json(prompt)
16
  resume = Resume.parse_obj(data)
@@ -21,12 +23,13 @@ def extract_resume_json(api_key: str, model: str, raw_text: str) -> Resume:
21
  def tailor_resume(
22
  api_key: str,
23
  model: str,
 
24
  resume: Resume,
25
  job_description: str,
26
  template_name: str,
27
  template_source: str,
28
  ) -> TailoredResume:
29
- client = OpenAIClient(api_key=api_key, model=model)
30
  payload = json.loads(resume.json())
31
  prompt = TAILORING_PROMPT.format(
32
  resume_json=json.dumps(payload),
@@ -41,13 +44,20 @@ def tailor_resume(
41
  def run_pipeline(
42
  api_key: str,
43
  model: str,
 
44
  raw_text: str,
45
  job_description: str,
46
  template_name: str,
47
  template_source: str,
48
  ) -> Tuple[Resume, TailoredResume]:
49
- resume = extract_resume_json(api_key, model, raw_text)
50
  tailored = tailor_resume(
51
- api_key, model, resume, job_description, template_name, template_source
 
 
 
 
 
 
52
  )
53
  return resume, tailored
 
5
 
6
  from schemas.resume import Resume, TailoredResume
7
 
8
+ from .client import build_client
9
  from .prompts import EXTRACTION_PROMPT, TAILORING_PROMPT
10
 
11
 
12
+ def extract_resume_json(
13
+ api_key: str, model: str, provider: str, raw_text: str
14
+ ) -> Resume:
15
+ client = build_client(provider, api_key=api_key, model=model)
16
  prompt = EXTRACTION_PROMPT.format(resume_text=raw_text)
17
  data = client.chat_json(prompt)
18
  resume = Resume.parse_obj(data)
 
23
  def tailor_resume(
24
  api_key: str,
25
  model: str,
26
+ provider: str,
27
  resume: Resume,
28
  job_description: str,
29
  template_name: str,
30
  template_source: str,
31
  ) -> TailoredResume:
32
+ client = build_client(provider, api_key=api_key, model=model)
33
  payload = json.loads(resume.json())
34
  prompt = TAILORING_PROMPT.format(
35
  resume_json=json.dumps(payload),
 
44
  def run_pipeline(
45
  api_key: str,
46
  model: str,
47
+ provider: str,
48
  raw_text: str,
49
  job_description: str,
50
  template_name: str,
51
  template_source: str,
52
  ) -> Tuple[Resume, TailoredResume]:
53
+ resume = extract_resume_json(api_key, model, provider, raw_text)
54
  tailored = tailor_resume(
55
+ api_key,
56
+ model,
57
+ provider,
58
+ resume,
59
+ job_description,
60
+ template_name,
61
+ template_source,
62
  )
63
  return resume, tailored