Spaces:

ak0601
/

lightweight-job

Sleeping

App Files Files Community

ak0601 commited on May 18, 2025

Commit

b3ff1bd

verified ·

1 Parent(s): 265750f

Update app_job_copy_1.py

Browse files

Files changed (1) hide show

app_job_copy_1.py +447 -305

app_job_copy_1.py CHANGED Viewed

@@ -3,41 +3,87 @@ import pandas as pd
 import json
 import os
 from pydantic import BaseModel, Field
-from typing import List, Set, Dict, Any, Optional
-import time
 from langchain_openai import ChatOpenAI
-from langchain_core.messages import HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import PromptTemplate
 import gspread
 from google.oauth2 import service_account
-os.environ["STREAMLIT_DISABLE_USAGE_STATS"] = "1"
 st.set_page_config(
     page_title="Candidate Matching App",
     page_icon="👨‍💻🎯",
     layout="wide"
 )
 # Define pydantic model for structured output
 class Shortlist(BaseModel):
-    fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements.")
     candidate_name: str = Field(description="The name of the candidate.")
     candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
     candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
     candidate_location: str = Field(description="The location of the candidate.")
     justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
 # Function to parse and normalize tech stacks
 def parse_tech_stack(stack):
-    if pd.isna(stack) or stack == "" or stack is None:
-        return set()
-    if isinstance(stack, set):
-        return stack
     try:
-        # Handle potential string representation of sets
         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
-            # This could be a string representation of a set
             items = stack.strip("{}").split(",")
             return set(item.strip().strip("'\"") for item in items if item.strip())
         return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
@@ -46,38 +92,40 @@ def parse_tech_stack(stack):
         return set()
 def display_tech_stack(stack_set):
-    if isinstance(stack_set, set):
-        return ", ".join(sorted(stack_set))
-    return str(stack_set)
 def get_matching_candidates(job_stack, candidates_df):
-    """Find candidates with matching tech stack for a specific job"""
     matched = []
     job_stack_set = parse_tech_stack(job_stack)
     for _, candidate in candidates_df.iterrows():
         candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
         common = job_stack_set & candidate_stack
-        if len(common) >= 2:
             matched.append({
-                "Name": candidate["Full Name"],
-                "URL": candidate["LinkedIn URL"],
                 "Degree & Education": candidate["Degree & University"],
                 "Years of Experience": candidate["Years of Experience"],
                 "Current Title & Company": candidate['Current Title & Company'],
                 "Key Highlights": candidate["Key Highlights"],
                 "Location": candidate["Location (from most recent experience)"],
-                "Experience": str(candidate["Experience"]),
-                "Tech Stack": candidate_stack
             })
     return matched
 def setup_llm():
     """Set up the LangChain LLM with structured output"""
     # Create LLM instance
     llm = ChatOpenAI(
-        model="gpt-4o-mini",
-        temperature=0,
         max_tokens=None,
         timeout=None,
         max_retries=2,
@@ -87,30 +135,33 @@ def setup_llm():
     sum_llm = llm.with_structured_output(Shortlist)
     # Create system prompt
-    system = """You are an expert Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
     the profile is according to job.
 Try to ensure following points while estimating the candidate's fit score:
 For education:
 Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
 Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
 Tier3 - Unknown or unranked institutions - Lower points or reject
 Startup Experience Requirement:
 Candidates must have worked  as a direct employee at a VC-backed startup (Seed to series C/D)
-preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
     The fit score signifies based on following metrics:
     1–5 - Poor Fit - Auto-reject
     6–7 - Weak Fit - Auto-reject
     8.0–8.7 - Moderate Fit - Auto-reject
     8.8–10 - STRONG Fit - Include in results
     """
     # Create query prompt
     query_prompt = ChatPromptTemplate.from_messages([
         ("system", system),
         ("human", """
-    You are an expert Recruitor, your task is to determine if the user is a correct match for the given job or not.
           For this you will be provided with the follwing inputs of job and candidates:
     Job Details
     Company: {Company}
@@ -120,7 +171,6 @@ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,B
     Tech Stack: {Tech_Stack}
     Industry: {Industry}
     Candidate Details:
     Full Name: {Full_Name}
     LinkedIn URL: {LinkedIn_URL}
@@ -131,10 +181,9 @@ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,B
     Key Highlights: {Key_Highlights}
     Location (from most recent experience): {cand_Location}
     Past_Experience: {Experience}
     Answer in the structured manner as per the schema.
     If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
     """),
     ])
@@ -144,332 +193,425 @@ preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,B
     return cat_class
 def call_llm(candidate_data, job_data, llm_chain):
-    """Call the actual LLM to evaluate the candidate"""
     try:
-        # Convert tech stacks to strings for the LLM payload
-        job_tech_stack = job_data.get("Tech_Stack", set())
-        candidate_tech_stack = candidate_data.get("Tech Stack", set())
-        if isinstance(job_tech_stack, set):
-            job_tech_stack = ", ".join(sorted(job_tech_stack))
-        if isinstance(candidate_tech_stack, set):
-            candidate_tech_stack = ", ".join(sorted(candidate_tech_stack))
-        # Prepare payload for LLM
         payload = {
-            "Company": job_data.get("Company", ""),
-            "Role": job_data.get("Role", ""),
-            "desc": job_data.get("desc", ""),
-            "Locations": job_data.get("Locations", ""),
-            "Tech_Stack": job_tech_stack,
-            "Industry": job_data.get("Industry", ""),
-            "Full_Name": candidate_data.get("Name", ""),
-            "LinkedIn_URL": candidate_data.get("URL", ""),
             "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
             "Years_of_Experience": candidate_data.get("Years of Experience", ""),
             "Degree_University": candidate_data.get("Degree & Education", ""),
-            "Key_Tech_Stack": candidate_tech_stack,
-            "Key_Highlights": candidate_data.get("Key Highlights", ""),
-            "cand_Location": candidate_data.get("Location", ""),
-            "Experience": candidate_data.get("Experience", "")
         }
-        # Call LLM
         response = llm_chain.invoke(payload)
-        print(candidate_data.get("Experience", ""))
-        # Return response in expected format
         return {
-            "candidate_name": response.candidate_name,
-            "candidate_url": response.candidate_url,
-            "candidate_summary": response.candidate_summary,
-            "candidate_location": response.candidate_location,
-            "fit_score": response.fit_score,
-            "justification": response.justification
         }
     except Exception as e:
-        st.error(f"Error calling LLM: {e}")
-        # Fallback to a default response
         return {
-            "candidate_name": candidate_data.get("Name", "Unknown"),
-            "candidate_url": candidate_data.get("URL", ""),
-            "candidate_summary": "Error processing candidate profile",
-            "candidate_location": candidate_data.get("Location", "Unknown"),
-            "fit_score": 0.0,
-            "justification": f"Error in LLM processing: {str(e)}"
         }
 def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
-    """Process candidates for a specific job using the LLM"""
     if llm_chain is None:
-        with st.spinner("Setting up LLM..."):
-            llm_chain = setup_llm()
     selected_candidates = []
-    try:
-        # Get job-specific data
-        job_data = {
-            "Company": job_row["Company"],
-            "Role": job_row["Role"],
-            "desc": job_row.get("One liner", ""),
-            "Locations": job_row.get("Locations", ""),
-            "Tech_Stack": job_row["Tech Stack"],
-            "Industry": job_row.get("Industry", "")
-        }
-        # Find matching candidates for this job
-        with st.spinner("Finding matching candidates based on tech stack..."):
-            matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
-        if not matching_candidates:
-            st.warning("No candidates with matching tech stack found for this job.")
-            return []
-        st.success(f"Found {len(matching_candidates)} candidates with matching tech stack.")
-        # Create progress elements
-        candidates_progress = st.progress(0)
-        candidate_status = st.empty()
-        # Process each candidate
-        for i, candidate_data in enumerate(matching_candidates):
-            # Update progress
-            candidates_progress.progress((i + 1) / len(matching_candidates))
-            candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
-            # Process the candidate with the LLM
-            response = call_llm(candidate_data, job_data, llm_chain)
-            response_dict = {
-                "Name": response["candidate_name"],
-                "LinkedIn": response["candidate_url"],
-                "summary": response["candidate_summary"],
-                "Location": response["candidate_location"],
-                "Fit Score": response["fit_score"],
-                "justification": response["justification"],
-                # Add back original candidate data for context
-                "Educational Background": candidate_data.get("Degree & Education", ""),
-                "Years of Experience": candidate_data.get("Years of Experience", ""),
-                "Current Title & Company": candidate_data.get("Current Title & Company", "")
-            }
-            # Add to selected candidates if score is high enough
-            if response["fit_score"] >= 8.8:
-                selected_candidates.append(response_dict)
-                st.markdown(response_dict)
-            else:
-                st.write(f"Rejected candidate: {response_dict['Name']} with score: {response['fit_score']}")
-        # Clear progress indicators
-        candidates_progress.empty()
-        candidate_status.empty()
-        # Show results
         if selected_candidates:
-            st.success(f"✅ Found {len(selected_candidates)} suitable candidates for this job!")
         else:
-            st.info("No candidates met the minimum fit score threshold for this job.")
-        return selected_candidates
-    except Exception as e:
-        st.error(f"Error processing job: {e}")
-        return []
 def main():
     st.title("👨‍💻 Candidate Matching App")
-    # Initialize session state
-    if 'processed_jobs' not in st.session_state:
-        st.session_state.processed_jobs = {}
-    st.write("""
-    This app matches job listings with candidate profiles based on tech stack and other criteria.
-    Select a job to find matching candidates.
-    """)
-    # API Key input
     with st.sidebar:
         st.header("API Configuration")
-        api_key = st.text_input("Enter OpenAI API Key", type="password")
         if api_key:
             os.environ["OPENAI_API_KEY"] = api_key
-            st.success("API Key set!")
         else:
             st.warning("Please enter OpenAI API Key to use LLM features")
-    # Show API key warning if not set
-    secret_content = os.getenv("GCP_SERVICE_ACCOUNT")
-    # secret_content = secret_content.replace("\n", "\\n")
-    secret_content = json.loads(secret_content)
-    SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
-    creds = service_account.Credentials.from_service_account_info(secret_content, scopes=SCOPES)
-    gc = gspread.authorize(creds)
-    job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
-    candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
-    if not api_key:
         st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
-    if api_key:
-        try:
-            # Load data from Google Sheets
-            job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
-            job_data = job_worksheet.get_all_values()
-            candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
-            candidate_data = candidate_worksheet.get_all_values()
-            # Convert to DataFrames
-            jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0])
-            candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0])
-            candidates_df = candidates_df.fillna("Unknown")
-            # Display data preview
-            with st.expander("Preview uploaded data"):
-                st.subheader("Jobs Data Preview")
-                st.dataframe(jobs_df.head(3))
-                st.subheader("Candidates Data Preview")
-                st.dataframe(candidates_df.head(3))
-            # Map column names if needed
-            column_mapping = {
-                "Full Name": "Full Name",
-                "LinkedIn URL": "LinkedIn URL",
-                "Current Title & Company": "Current Title & Company",
-                "Years of Experience": "Years of Experience",
-                "Degree & University": "Degree & University",
-                "Key Tech Stack": "Key Tech Stack",
-                "Key Highlights": "Key Highlights",
-                "Location (from most recent experience)": "Location (from most recent experience)"
-            }
-            # Rename columns if they don't match expected
-            candidates_df = candidates_df.rename(columns={
-                col: mapping for col, mapping in column_mapping.items()
-                if col in candidates_df.columns and col != mapping
-            })
-            # Now, instead of processing all jobs upfront, we'll display job selection
-            # and only process the selected job when the user chooses it
-            display_job_selection(jobs_df, candidates_df)
-        except Exception as e:
-            st.error(f"Error processing files: {e}")
     st.divider()
-def display_job_selection(jobs_df, candidates_df):
-    # Store the LLM chain as a session state to avoid recreating it
-    if 'llm_chain' not in st.session_state:
-        st.session_state.llm_chain = None
-    st.subheader("Select a job to view potential matches")
-    # Create job options - but don't compute matches yet
-    job_options = []
-    for i, row in jobs_df.iterrows():
-        job_options.append(f"{row['Role']} at {row['Company']}")
-    if job_options:
-        selected_job_index = st.selectbox("Jobs:",
-                                      range(len(job_options)),
-                                      format_func=lambda x: job_options[x])
-        # Display job details
-        job_row = jobs_df.iloc[selected_job_index]
-        # Parse tech stack for display
-        job_row_stack = parse_tech_stack(job_row["Tech Stack"])
-        col1, col2 = st.columns([2, 1])
-        with col1:
-            st.subheader(f"Job Details: {job_row['Role']}")
-            job_details = {
-                "Company": job_row["Company"],
-                "Role": job_row["Role"],
-                "Description": job_row.get("One liner", "N/A"),
-                "Locations": job_row.get("Locations", "N/A"),
-                "Industry": job_row.get("Industry", "N/A"),
-                "Tech Stack": display_tech_stack(job_row_stack)
-            }
-            for key, value in job_details.items():
-                st.markdown(f"**{key}:** {value}")
-        # Create a key for this job in session state
-        job_key = f"job_{selected_job_index}_processed"
-        if job_key not in st.session_state:
-            st.session_state[job_key] = False
-        # Add a process button for this job
-        if not st.session_state[job_key]:
-            if st.button(f"Find Matching Candidates for this Job"):
-                if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"]:
-                    st.error("Please enter your OpenAI API key in the sidebar before processing")
-                else:
-                    # Process candidates for this job (only when requested)
-                    selected_candidates = process_candidates_for_job(
-                        job_row,
-                        candidates_df,
-                        st.session_state.llm_chain
-                    )
-                    # Store the results and set as processed
-                    if 'Selected_Candidates' not in st.session_state:
-                        st.session_state.Selected_Candidates = {}
-                    st.session_state.Selected_Candidates[selected_job_index] = selected_candidates
-                    st.session_state[job_key] = True
-                    # Store the LLM chain for reuse
-                    if st.session_state.llm_chain is None:
-                        st.session_state.llm_chain = setup_llm()
-                    # Force refresh
-                    st.rerun()
-        # Display selected candidates if already processed
-        if st.session_state[job_key] and 'Selected_Candidates' in st.session_state:
-            selected_candidates = st.session_state.Selected_Candidates.get(selected_job_index, [])
-            # Display selected candidates
-            st.subheader("Selected Candidates")
-            if len(selected_candidates) > 0:
-                for i, candidate in enumerate(selected_candidates):
-                    with st.expander(f"{i+1}. {candidate['Name']} (Score: {candidate['Fit Score']})"):
-                        col1, col2 = st.columns([3, 1])
-                        with col1:
-                            st.markdown(f"**Summary:** {candidate['summary']}")
-                            st.markdown(f"**Current:** {candidate['Current Title & Company']}")
-                            st.markdown(f"**Education:** {candidate['Educational Background']}")
-                            st.markdown(f"**Experience:** {candidate['Years of Experience']}")
-                            st.markdown(f"**Location:** {candidate['Location']}")
-                            st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
-                        with col2:
-                            st.markdown(f"**Fit Score:** {candidate['Fit Score']}")
                         st.markdown("**Justification:**")
                         st.info(candidate['justification'])
-            else:
-                st.info("No candidates met the minimum score threshold (8.8) for this job.")
-                # We don't show tech-matched candidates here since they are generated
-                # during the LLM matching process now
-            # Add a reset button to start over
-            if st.button("Reset and Process Again"):
-                st.session_state[job_key] = False
-                st.rerun()
 if __name__ == "__main__":
-    main()

 import json
 import os
 from pydantic import BaseModel, Field
+from typing import List, Set, Dict, Any, Optional # Already have these, but commented for brevity if not all used
+import time # Added for potential small delays if needed
 from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage # Not directly used in provided snippet
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser # Not directly used in provided snippet
+from langchain_core.prompts import PromptTemplate # Not directly used in provided snippet
 import gspread
+import tempfile
 from google.oauth2 import service_account
+import tiktoken
 st.set_page_config(
     page_title="Candidate Matching App",
     page_icon="👨‍💻🎯",
     layout="wide"
 )
+os.environ["STREAMLIT_HOME"] = tempfile.gettempdir()
+os.environ["STREAMLIT_DISABLE_TELEMETRY"] = "1"
 # Define pydantic model for structured output
 class Shortlist(BaseModel):
+    fit_score: float = Field(description="A score between 0 and 10 indicating how closely the candidate profile matches the job requirements upto 3 decimal points.")
     candidate_name: str = Field(description="The name of the candidate.")
     candidate_url: str = Field(description="The URL of the candidate's LinkedIn profile.")
     candidate_summary: str = Field(description="A brief summary of the candidate's skills and experience along with its educational background.")
     candidate_location: str = Field(description="The location of the candidate.")
     justification: str = Field(description="Justification for the shortlisted candidate with the fit score")
+# Function to calculate tokens
+def calculate_tokens(text, model="gpt-4o-mini"):
+    try:
+        if "gpt-4" in model:
+            encoding = tiktoken.encoding_for_model("gpt-4o-mini")
+        elif "gpt-3.5" in model:
+            encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
+        else:
+            encoding = tiktoken.get_encoding("cl100k_base")
+        return len(encoding.encode(text))
+    except Exception as e:
+        return len(text) // 4
+# Function to display token usage
+def display_token_usage():
+    if 'total_input_tokens' not in st.session_state:
+        st.session_state.total_input_tokens = 0
+    if 'total_output_tokens' not in st.session_state:
+        st.session_state.total_output_tokens = 0
+    total_input = st.session_state.total_input_tokens
+    total_output = st.session_state.total_output_tokens
+    total_tokens = total_input + total_output
+    model_to_check = st.session_state.get('model_name', "gpt-4o-mini") # Use a default if not set
+    if model_to_check == "gpt-4o-mini":
+        input_cost_per_1k = 0.00015 # Adjusted to example rates ($0.15 / 1M tokens)
+        output_cost_per_1k = 0.0006  # Adjusted to example rates ($0.60 / 1M tokens)
+    elif "gpt-4" in model_to_check: # Fallback for other gpt-4
+        input_cost_per_1k = 0.005
+        output_cost_per_1k = 0.015 # General gpt-4 pricing can vary
+    else:  # Assume gpt-3.5-turbo pricing
+        input_cost_per_1k = 0.0005  # $0.0005 per 1K input tokens
+        output_cost_per_1k = 0.0015   # $0.0015 per 1K output tokens
+    estimated_cost = (total_input / 1000 * input_cost_per_1k) + (total_output / 1000 * output_cost_per_1k)
+    st.subheader("📊 Token Usage Statistics (for last processed job)")
+    col1, col2, col3 = st.columns(3)
+    with col1: st.metric("Input Tokens", f"{total_input:,}")
+    with col2: st.metric("Output Tokens", f"{total_output:,}")
+    with col3: st.metric("Total Tokens", f"{total_tokens:,}")
+    st.markdown(f"**Estimated Cost:** ${estimated_cost:.4f}")
+    return total_tokens
 # Function to parse and normalize tech stacks
 def parse_tech_stack(stack):
+    if pd.isna(stack) or stack == "" or stack is None: return set()
+    if isinstance(stack, set): return stack
     try:
         if isinstance(stack, str) and stack.startswith("{") and stack.endswith("}"):
             items = stack.strip("{}").split(",")
             return set(item.strip().strip("'\"") for item in items if item.strip())
         return set(map(lambda x: x.strip().lower(), str(stack).split(',')))
         return set()
 def display_tech_stack(stack_set):
+    return ", ".join(sorted(list(stack_set))) if isinstance(stack_set, set) else str(stack_set)
 def get_matching_candidates(job_stack, candidates_df):
     matched = []
     job_stack_set = parse_tech_stack(job_stack)
     for _, candidate in candidates_df.iterrows():
         candidate_stack = parse_tech_stack(candidate['Key Tech Stack'])
         common = job_stack_set & candidate_stack
+        if len(common) >= 2: # Original condition
             matched.append({
+                "Name": candidate["Full Name"], "URL": candidate["LinkedIn URL"],
                 "Degree & Education": candidate["Degree & University"],
                 "Years of Experience": candidate["Years of Experience"],
                 "Current Title & Company": candidate['Current Title & Company'],
                 "Key Highlights": candidate["Key Highlights"],
                 "Location": candidate["Location (from most recent experience)"],
+                "Experience": str(candidate["Experience"]), "Tech Stack": candidate_stack
             })
     return matched
 def setup_llm():
     """Set up the LangChain LLM with structured output"""
+    # Define the model to use
+    model_name = "gpt-4o-mini"
+    # Store model name in session state for token calculation
+    if 'model_name' not in st.session_state:
+        st.session_state.model_name = model_name
     # Create LLM instance
     llm = ChatOpenAI(
+        model=model_name,
+        temperature=0.3,
         max_tokens=None,
         timeout=None,
         max_retries=2,
     sum_llm = llm.with_structured_output(Shortlist)
     # Create system prompt
+    system = """You are an expert Tech Recruitor, your task is to analyse the Candidate profile and determine if it matches with the job details and provide a score(out of 10) indicating how compatible the
     the profile is according to job.
+    First of all check the location of the candidate, if the location is not in the range of the job location then reject the candidate directly without any further analysis.
+    for example if the job location is New York and the candidate is in San Francisco then reject the candidate. Similarly for other states as well.
 Try to ensure following points while estimating the candidate's fit score:
 For education:
 Tier1 - MIT, Stanford, CMU, UC Berkeley, Caltech, Harvard, IIT Bombay, IIT Delhi, Princeton, UIUC, University of Washington, Columbia, University of Chicago, Cornell, University of Michigan (Ann Arbor), UT Austin - Maximum points
 Tier2 - UC Davis, Georgia Tech, Purdue, UMass Amherst,etc - Moderate points
 Tier3 - Unknown or unranked institutions - Lower points or reject
 Startup Experience Requirement:
 Candidates must have worked  as a direct employee at a VC-backed startup (Seed to series C/D)
+preferred - Y Combinator, Sequoia,a16z,Accel,Founders Fund,LightSpeed,Greylock,Benchmark,Index Ventures,etc.
     The fit score signifies based on following metrics:
     1–5 - Poor Fit - Auto-reject
     6–7 - Weak Fit - Auto-reject
     8.0–8.7 - Moderate Fit - Auto-reject
     8.8–10 - STRONG Fit - Include in results
+    Each candidate's fit score should be calculated based on a weighted evaluation of their background and must be distinct even if candidates have similar profiles.
     """
     # Create query prompt
     query_prompt = ChatPromptTemplate.from_messages([
         ("system", system),
         ("human", """
+    You are an expert Recruitor. Your task is to determine if the candidate matches the given job.
+Provide the score as a `float` rounded to exactly **three decimal places** (e.g., 8.943, 9.211, etc.).
+Avoid rounding to whole or one-decimal numbers. Every candidate should have a **unique** fit score.
           For this you will be provided with the follwing inputs of job and candidates:
     Job Details
     Company: {Company}
     Tech Stack: {Tech_Stack}
     Industry: {Industry}
     Candidate Details:
     Full Name: {Full_Name}
     LinkedIn URL: {LinkedIn_URL}
     Key Highlights: {Key_Highlights}
     Location (from most recent experience): {cand_Location}
     Past_Experience: {Experience}
     Answer in the structured manner as per the schema.
     If any parameter is Unknown try not to include in the summary, only include those parameters which are known.
+    The `fit_score` must be a float with **exactly three decimal digits** (e.g. 8.812, 9.006). Do not round to 1 or 2 decimals.
     """),
     ])
     return cat_class
 def call_llm(candidate_data, job_data, llm_chain):
     try:
+        job_tech_stack = ", ".join(sorted(list(job_data.get("Tech_Stack", set())))) if isinstance(job_data.get("Tech_Stack"), set) else job_data.get("Tech_Stack", "")
+        candidate_tech_stack = ", ".join(sorted(list(candidate_data.get("Tech Stack", set())))) if isinstance(candidate_data.get("Tech Stack"), set) else candidate_data.get("Tech Stack", "")
         payload = {
+            "Company": job_data.get("Company", ""), "Role": job_data.get("Role", ""),
+            "desc": job_data.get("desc", ""), "Locations": job_data.get("Locations", ""),
+            "Tech_Stack": job_tech_stack, "Industry": job_data.get("Industry", ""),
+            "Full_Name": candidate_data.get("Name", ""), "LinkedIn_URL": candidate_data.get("URL", ""),
             "Current_Title_Company": candidate_data.get("Current Title & Company", ""),
             "Years_of_Experience": candidate_data.get("Years of Experience", ""),
             "Degree_University": candidate_data.get("Degree & Education", ""),
+            "Key_Tech_Stack": candidate_tech_stack, "Key_Highlights": candidate_data.get("Key Highlights", ""),
+            "cand_Location": candidate_data.get("Location", ""), "Experience": candidate_data.get("Experience", "")
         }
+        payload_str = json.dumps(payload)
+        input_tokens = calculate_tokens(payload_str, st.session_state.model_name)
         response = llm_chain.invoke(payload)
+        # print(candidate_data.get("Experience", "")) # Kept for your debugging if needed
+        response_str = f"candidate_name: {response.candidate_name} ... fit_score: {float(f'{response.fit_score:.3f}')} ..." # Truncated
+        output_tokens = calculate_tokens(response_str, st.session_state.model_name)
+        if 'total_input_tokens' not in st.session_state: st.session_state.total_input_tokens = 0
+        if 'total_output_tokens' not in st.session_state: st.session_state.total_output_tokens = 0
+        st.session_state.total_input_tokens += input_tokens
+        st.session_state.total_output_tokens += output_tokens
         return {
+            "candidate_name": response.candidate_name, "candidate_url": response.candidate_url,
+            "candidate_summary": response.candidate_summary, "candidate_location": response.candidate_location,
+            "fit_score": response.fit_score, "justification": response.justification
         }
     except Exception as e:
+        st.error(f"Error calling LLM for {candidate_data.get('Name', 'Unknown')}: {e}")
         return {
+            "candidate_name": candidate_data.get("Name", "Unknown"), "candidate_url": candidate_data.get("URL", ""),
+            "candidate_summary": "Error processing candidate profile", "candidate_location": candidate_data.get("Location", "Unknown"),
+            "fit_score": 0.0, "justification": f"Error in LLM processing: {str(e)}"
         }
 def process_candidates_for_job(job_row, candidates_df, llm_chain=None):
+    st.session_state.total_input_tokens = 0 # Reset for this job
+    st.session_state.total_output_tokens = 0
     if llm_chain is None:
+        with st.spinner("Setting up LLM..."): llm_chain = setup_llm()
     selected_candidates = []
+    job_data = {
+        "Company": job_row["Company"], "Role": job_row["Role"], "desc": job_row.get("One liner", ""),
+        "Locations": job_row.get("Locations", ""), "Tech_Stack": job_row["Tech Stack"], "Industry": job_row.get("Industry", "")
+    }
+    with st.spinner("Sourcing candidates based on tech stack..."):
+        matching_candidates = get_matching_candidates(job_row["Tech Stack"], candidates_df)
+    if not matching_candidates:
+        st.warning("No candidates with matching tech stack found for this job.")
+        return []
+    st.success(f"Found {len(matching_candidates)} candidates with matching tech stack. Evaluating with LLM...")
+    candidates_progress = st.progress(0)
+    candidate_status = st.empty() # For live updates
+    for i, candidate_data in enumerate(matching_candidates):
+        # *** MODIFICATION: Check for stop flag ***
+        if st.session_state.get('stop_processing_flag', False):
+            candidate_status.warning("Processing stopped by user.")
+            time.sleep(1) # Allow message to be seen
+            break
+        candidate_status.text(f"Evaluating candidate {i+1}/{len(matching_candidates)}: {candidate_data.get('Name', 'Unknown')}")
+        response = call_llm(candidate_data, job_data, llm_chain)
+        response_dict = {
+            "Name": response["candidate_name"], "LinkedIn": response["candidate_url"],
+            "summary": response["candidate_summary"], "Location": response["candidate_location"],
+            "Fit Score": float(f"{response['fit_score']:.3f}"), "justification": response["justification"],
+            "Educational Background": candidate_data.get("Degree & Education", ""),
+            "Years of Experience": candidate_data.get("Years of Experience", ""),
+            "Current Title & Company": candidate_data.get("Current Title & Company", "")
+        }
+        # *** MODIFICATION: Live output of candidate dicts - will disappear on rerun after processing ***
+        if response["fit_score"] >= 8.800:
+            selected_candidates.append(response_dict)
+            # This st.markdown will be visible during processing and cleared on the next full script rerun
+            # after this processing block finishes or is stopped.
+            st.markdown(
+    f"**Selected Candidate:** [{response_dict['Name']}]({response_dict['LinkedIn']}) "
+    f"(Score: {response_dict['Fit Score']:.3f}, Location: {response_dict['Location']})"
+)
+        else:
+            # This st.write will also be visible during processing and cleared later.
+            st.write(f"Rejected candidate: {response_dict['Name']} with score: {response_dict['Fit Score']:.3f}, Location: {response_dict['Location']})")
+        candidates_progress.progress((i + 1) / len(matching_candidates))
+    candidates_progress.empty()
+    candidate_status.empty()
+    if not st.session_state.get('stop_processing_flag', False): # Only show if not stopped
         if selected_candidates:
+            st.success(f"✅ LLM evaluation complete. Found {len(selected_candidates)} suitable candidates for this job!")
         else:
+            st.info("LLM evaluation complete. No candidates met the minimum fit score threshold for this job.")
+    return selected_candidates
 def main():
     st.title("👨‍💻 Candidate Matching App")
+    if 'processed_jobs' not in st.session_state: st.session_state.processed_jobs = {} # May not be used with new logic
+    if 'Selected_Candidates' not in st.session_state: st.session_state.Selected_Candidates = {}
+    if 'llm_chain' not in st.session_state: st.session_state.llm_chain = None # Initialize to None
+    # *** MODIFICATION: Initialize stop flag ***
+    if 'stop_processing_flag' not in st.session_state: st.session_state.stop_processing_flag = False
+    st.write("This app matches job listings with candidate profiles...")
     with st.sidebar:
         st.header("API Configuration")
+        api_key = st.text_input("Enter OpenAI API Key", type="password", key="api_key_input")
         if api_key:
             os.environ["OPENAI_API_KEY"] = api_key
+            # Initialize LLM chain once API key is set
+            if st.session_state.llm_chain is None:
+                 with st.spinner("Setting up LLM..."):
+                    st.session_state.llm_chain = setup_llm()
+            st.success("API Key set")
         else:
             st.warning("Please enter OpenAI API Key to use LLM features")
+            st.session_state.llm_chain = None # Clear chain if key removed
+    # ... (rest of your gspread setup) ...
+    try:
+        SERVICE_ACCOUNT_FILE = 'src/synapse-recruitment-e94255ca76fd.json' # Ensure this path is correct
+        SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
+        creds = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
+        gc = gspread.authorize(creds)
+        job_sheet = gc.open_by_key('1BZlvbtFyiQ9Pgr_lpepDJua1ZeVEqrCLjssNd6OiG9k')
+        candidates_sheet = gc.open_by_key('1u_9o5f0MPHFUSScjEcnA8Lojm4Y9m9LuWhvjYm6ytF4')
+    except Exception as e:
+        st.error(f"Failed to connect to Google Sheets. Ensure '{SERVICE_ACCOUNT_FILE}' is valid and has permissions. Error: {e}")
+        st.stop()
+    if not os.environ.get("OPENAI_API_KEY"):
         st.warning("⚠️ You need to provide an OpenAI API key in the sidebar to use this app.")
+        st.stop()
+    if st.session_state.llm_chain is None and os.environ.get("OPENAI_API_KEY"):
+        with st.spinner("Setting up LLM..."):
+            st.session_state.llm_chain = setup_llm()
+        st.rerun() # Rerun to ensure LLM is ready for the main display logic
+    try:
+        job_worksheet = job_sheet.worksheet('paraform_jobs_formatted')
+        job_data = job_worksheet.get_all_values()
+        candidate_worksheet = candidates_sheet.worksheet('transformed_candidates_updated')
+        candidate_data = candidate_worksheet.get_all_values()
+        jobs_df = pd.DataFrame(job_data[1:], columns=job_data[0]).drop(["Link"], axis=1, errors='ignore')
+        jobs_df1 = jobs_df[["Company","Role","One liner","Locations","Tech Stack","Workplace","Industry","YOE"]]
+        candidates_df = pd.DataFrame(candidate_data[1:], columns=candidate_data[0]).fillna("Unknown")
+        candidates_df.drop_duplicates(subset=['LinkedIn URL'], keep='first', inplace=True)
+        with st.expander("Preview uploaded data"):
+            st.subheader("Jobs Data Preview"); st.dataframe(jobs_df1.head(3))
+            # st.subheader("Candidates Data Preview"); st.dataframe(candidates_df.head(3))
+        # Column mapping (simplified, ensure your CSVs have these exact names or adjust)
+        # candidates_df = candidates_df.rename(columns={...}) # Add if needed
+        display_job_selection(jobs_df, candidates_df, job_sheet) # job_sheet is 'sh'
+    except Exception as e:
+        st.error(f"Error processing files or data: {e}")
     st.divider()
+def display_job_selection(jobs_df, candidates_df, sh): # 'sh' is the Google Sheets client
+    st.subheader("Select a job to Source for potential matches")
+    job_options = [f"{row['Role']} at {row['Company']}" for _, row in jobs_df.iterrows()]
+    if not job_options:
+        st.warning("No jobs found to display.")
+        return
+    selected_job_index = st.selectbox("Jobs:", range(len(job_options)), format_func=lambda x: job_options[x], key="job_selectbox")
+    job_row = jobs_df.iloc[selected_job_index]
+    job_row_stack = parse_tech_stack(job_row["Tech Stack"]) # Assuming parse_tech_stack is defined
+    col_job_details_display, _ = st.columns([2,1])
+    with col_job_details_display:
+        st.subheader(f"Job Details: {job_row['Role']}")
+        job_details_dict = {
+            "Company": job_row["Company"], "Role": job_row["Role"], "Description": job_row.get("One liner", "N/A"),
+            "Locations": job_row.get("Locations", "N/A"), "Industry": job_row.get("Industry", "N/A"),
+            "Tech Stack": display_tech_stack(job_row_stack) # Assuming display_tech_stack is defined
+        }
+        for key, value in job_details_dict.items(): st.markdown(f"**{key}:** {value}")
+    # State keys for the selected job
+    job_processed_key = f"job_{selected_job_index}_processed_successfully"
+    job_is_processing_key = f"job_{selected_job_index}_is_currently_processing"
+    # Initialize states if they don't exist for this job
+    if job_processed_key not in st.session_state: st.session_state[job_processed_key] = False
+    if job_is_processing_key not in st.session_state: st.session_state[job_is_processing_key] = False
+    sheet_name = f"{job_row['Role']} at {job_row['Company']}".strip()[:100]
+    worksheet_exists = False
+    existing_candidates_from_sheet = [] # This will store raw data from sheet
+    try:
+        cand_worksheet = sh.worksheet(sheet_name)
+        worksheet_exists = True
+        existing_data = cand_worksheet.get_all_values() # Get all values as list of lists
+        if len(existing_data) > 1: # Has data beyond header
+            existing_candidates_from_sheet = existing_data # Store raw data
+    except gspread.exceptions.WorksheetNotFound:
+        pass
+    # --- Processing Control Area ---
+    # Show controls if not successfully processed in this session OR if sheet exists (allow re-process/overwrite)
+    if not st.session_state.get(job_processed_key, False) or existing_candidates_from_sheet:
+        if existing_candidates_from_sheet and not st.session_state.get(job_is_processing_key, False) and not st.session_state.get(job_processed_key, False):
+             st.info(f"Processing ('{sheet_name}')")
+        col_find, col_stop = st.columns(2)
+        with col_find:
+            if st.button(f"Find Matching Candidates for this Job", key=f"find_btn_{selected_job_index}", disabled=st.session_state.get(job_is_processing_key, False)):
+                if not os.environ.get("OPENAI_API_KEY") or st.session_state.llm_chain is None: # Assuming llm_chain is in session_state
+                    st.error("OpenAI API key not set or LLM not initialized. Please check sidebar.")
+                else:
+                    st.session_state[job_is_processing_key] = True
+                    st.session_state.stop_processing_flag = False # Reset for new run, assuming stop_processing_flag is used
+                    st.session_state.Selected_Candidates[selected_job_index] = [] # Clear previous run for this job
+                    st.session_state[job_processed_key] = False # Mark as not successfully processed yet for this attempt
+                    st.rerun()
+        with col_stop:
+            if st.session_state.get(job_is_processing_key, False): # Show STOP only if "Find" was clicked and currently processing
+                if st.button("STOP Processing", key=f"stop_btn_{selected_job_index}"):
+                    st.session_state.stop_processing_flag = True # Assuming stop_processing_flag is used
+                    st.warning("Stop request sent. Processing will halt shortly.")
+    # --- Actual Processing Logic ---
+    if st.session_state.get(job_is_processing_key, False):
+        with st.spinner(f"Sourcing candidates for {job_row['Role']} at {job_row['Company']}..."):
+            # Assuming process_candidates_for_job is defined and handles stop_processing_flag
+            processed_candidates_list = process_candidates_for_job(
+                job_row, candidates_df, st.session_state.llm_chain # Assuming llm_chain from session_state
+            )
+        st.session_state[job_is_processing_key] = False # Mark as no longer actively processing
+        if not st.session_state.get('stop_processing_flag', False): # If processing was NOT stopped
+            if processed_candidates_list:
+                # Ensure Fit Score is float for reliable sorting
+                for cand in processed_candidates_list:
+                    if 'Fit Score' in cand and isinstance(cand['Fit Score'], str):
+                        try: cand['Fit Score'] = float(cand['Fit Score'])
+                        except ValueError: cand['Fit Score'] = 0.0 # Default if conversion fails
+                    elif 'Fit Score' not in cand:
+                        cand['Fit Score'] = 0.0
+                processed_candidates_list.sort(key=lambda x: x.get("Fit Score", 0.0), reverse=True)
+                st.session_state.Selected_Candidates[selected_job_index] = processed_candidates_list
+                st.session_state[job_processed_key] = True # Mark as successfully processed
+                # Save to Google Sheet
+                try:
+                    target_worksheet = None
+                    if not worksheet_exists:
+                        target_worksheet = sh.add_worksheet(title=sheet_name, rows=max(100, len(processed_candidates_list) + 10), cols=20)
+                    else:
+                        target_worksheet = sh.worksheet(sheet_name)
+                    headers = list(processed_candidates_list[0].keys())
+                    # Ensure all values are converted to strings for gspread
+                    rows_to_write = [headers] + [[str(candidate.get(h, "")) for h in headers] for candidate in processed_candidates_list]
+                    target_worksheet.clear()
+                    target_worksheet.update('A1', rows_to_write)
+                    st.success(f"Results saved to Google Sheet: '{sheet_name}'")
+                except Exception as e:
+                    st.error(f"Error writing to Google Sheet '{sheet_name}': {e}")
+            else:
+                st.info("No suitable candidates found after processing.")
+                st.session_state.Selected_Candidates[selected_job_index] = []
+                st.session_state[job_processed_key] = True # Mark as processed, even if no results
+        else: # If processing WAS stopped
+            st.info("Processing was stopped by user. Results (if any) were not saved. You can try processing again.")
+            st.session_state.Selected_Candidates[selected_job_index] = [] # Clear any partial results
+            st.session_state[job_processed_key] = False # Not successfully processed
+        st.session_state.pop('stop_processing_flag', None) # Clean up flag
+        st.rerun() # Rerun to update UI based on new state
+    # --- Display Results Area ---
+    should_display_results_area = False
+    final_candidates_to_display = [] # Initialize to ensure it's always defined
+    if st.session_state.get(job_is_processing_key, False):
+        should_display_results_area = False # Not if actively processing
+    elif st.session_state.get(job_processed_key, False): # If successfully processed in this session
+        should_display_results_area = True
+        final_candidates_to_display = st.session_state.Selected_Candidates.get(selected_job_index, [])
+    elif existing_candidates_from_sheet: # If not processed in this session, but sheet has data
+        should_display_results_area = True
+        headers = existing_candidates_from_sheet[0]
+        parsed_sheet_candidates = []
+        for row_idx, row_data in enumerate(existing_candidates_from_sheet[1:]): # Skip header row
+            candidate_dict = {}
+            for col_idx, header_name in enumerate(headers):
+                candidate_dict[header_name] = row_data[col_idx] if col_idx < len(row_data) else None
+            # Convert Fit Score from string to float for consistent handling
+            if 'Fit Score' in candidate_dict and isinstance(candidate_dict['Fit Score'], str):
+                try:
+                    candidate_dict['Fit Score'] = float(candidate_dict['Fit Score'])
+                except ValueError:
+                    st.warning(f"Could not convert Fit Score '{candidate_dict['Fit Score']}' to float for candidate in sheet row {row_idx+2}.")
+                    candidate_dict['Fit Score'] = 0.0 # Default if conversion fails
+            elif 'Fit Score' not in candidate_dict:
+                 candidate_dict['Fit Score'] = 0.0
+            parsed_sheet_candidates.append(candidate_dict)
+        final_candidates_to_display = sorted(parsed_sheet_candidates, key=lambda x: x.get("Fit Score", 0.0), reverse=True)
+        if not st.session_state.get(job_processed_key, False): # Inform if loading from sheet and not explicitly processed
+             st.info(f"Displaying: '{sheet_name}'.")
+    if should_display_results_area:
+        st.subheader("Selected Candidates")
+        # Display token usage if it was just processed (job_processed_key is True and tokens exist)
+        if st.session_state.get(job_processed_key, False) and \
+           (st.session_state.get('total_input_tokens', 0) > 0 or st.session_state.get('total_output_tokens', 0) > 0):
+            display_token_usage() # Assuming display_token_usage is defined
+        if final_candidates_to_display:
+            for i, candidate in enumerate(final_candidates_to_display):
+                score_display = candidate.get('Fit Score', 'N/A')
+                if isinstance(score_display, (float, int)):
+                    score_display = f"{score_display:.3f}"
+                # If score_display is still a string (e.g. 'N/A' or failed float conversion), it will be displayed as is.
+                expander_title = f"{i+1}. {candidate.get('Name', 'N/A')} (Score: {score_display})"
+                with st.expander(expander_title):
+                    text_to_copy = f"""Candidate: {candidate.get('Name', 'N/A')} (Score: {score_display})
+Summary: {candidate.get('summary', 'N/A')}
+Current: {candidate.get('Current Title & Company', 'N/A')}
+Education: {candidate.get('Educational Background', 'N/A')}
+Experience: {candidate.get('Years of Experience', 'N/A')}
+Location: {candidate.get('Location', 'N/A')}
+LinkedIn: {candidate.get('LinkedIn', 'N/A')}
+Justification: {candidate.get('justification', 'N/A')}
+"""
+                    js_text_to_copy = json.dumps(text_to_copy)
+                    button_unique_id = f"copy_btn_job{selected_job_index}_cand{i}"
+                    copy_button_html = f"""
+                    <script>
+                        function copyToClipboard_{button_unique_id}() {{
+                            const textToCopy = {js_text_to_copy};
+                            navigator.clipboard.writeText(textToCopy).then(function() {{
+                                const btn = document.getElementById('{button_unique_id}');
+                                if (btn) {{ // Check if button exists
+                                    const originalText = btn.innerText;
+                                    btn.innerText = 'Copied!';
+                                    setTimeout(function() {{ btn.innerText = originalText; }}, 1500);
+                                }}
+                            }}, function(err) {{
+                                console.error('Could not copy text: ', err);
+                                alert('Failed to copy text. Please use Ctrl+C or your browser\\'s copy function.');
+                            }});
+                        }}
+                    </script>
+                    <button id="{button_unique_id}" onclick="copyToClipboard_{button_unique_id}()">📋 Copy Details</button>
+                    """
+                    expander_cols = st.columns([0.82, 0.18])
+                    with expander_cols[1]:
+                        st.components.v1.html(copy_button_html, height=40)
+                    with expander_cols[0]:
+                        st.markdown(f"**Summary:** {candidate.get('summary', 'N/A')}")
+                        st.markdown(f"**Current:** {candidate.get('Current Title & Company', 'N/A')}")
+                        st.markdown(f"**Education:** {candidate.get('Educational Background', 'N/A')}")
+                        st.markdown(f"**Experience:** {candidate.get('Years of Experience', 'N/A')}")
+                        st.markdown(f"**Location:** {candidate.get('Location', 'N/A')}")
+                        if 'LinkedIn' in candidate and candidate.get('LinkedIn'):
+                            st.markdown(f"**[LinkedIn Profile]({candidate['LinkedIn']})**")
+                        else:
+                            st.markdown("**LinkedIn Profile:** N/A")
+                    if 'justification' in candidate and candidate.get('justification'):
                         st.markdown("**Justification:**")
                         st.info(candidate['justification'])
+        elif st.session_state.get(job_processed_key, False): # Processed but no candidates
+            st.info("No candidates met the criteria for this job after processing.")
+        # This "Reset" button is now governed by should_display_results_area
+        if st.button("Reset and Process Again", key=f"reset_btn_{selected_job_index}"):
+            st.session_state[job_processed_key] = False
+            st.session_state.pop(job_is_processing_key, None)
+            if selected_job_index in st.session_state.Selected_Candidates:
+                del st.session_state.Selected_Candidates[selected_job_index]
+            try:
+                sh.worksheet(sheet_name).clear()
+                st.info(f"Cleared Google Sheet '{sheet_name}' as part of reset.")
+            except: pass # Ignore if sheet not found or error
+            st.rerun()
 if __name__ == "__main__":
+    main()