| |
| """ |
| Test Chatbot Quality on Real Recruiter Scenarios |
| |
| This script tests the chatbot on 3 real recruiter scenarios: |
| 1. "Explain why candidate X matches job Y" |
| 2. "Compare candidate A vs candidate B for role Z" |
| 3. "What is the ideal profile for this job?" |
| |
| Requires: ANTHROPIC_API_KEY environment variable set |
| """ |
|
|
| import json |
| import sys |
| import os |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).parent / "backend")) |
|
|
| from anthropic import Anthropic |
| from app.models import Job, Candidate |
| from app.services.matching_service import MatchingService |
| from app.schemas import CandidateProfile |
| from ai_module.nlp.enhanced_skill_extractor import EnhancedSkillExtractor |
| from ai_module.matching.semantic_matcher import SemanticSkillMatcher |
|
|
|
|
| class ChatbotQualityTester: |
| """Test chatbot quality on recruiter scenarios.""" |
|
|
| def __init__(self, api_key: str = None): |
| """Initialize with Anthropic API key.""" |
| api_key = api_key or os.getenv("ANTHROPIC_API_KEY") |
| if not api_key: |
| raise ValueError("ANTHROPIC_API_KEY not set") |
| |
| self.client = Anthropic() |
| self.conversation_history = [] |
| self.skill_extractor = EnhancedSkillExtractor(load_ner=False) |
|
|
| def reset_conversation(self): |
| """Reset conversation history for new scenario.""" |
| self.conversation_history = [] |
|
|
| def _chat(self, user_message: str) -> str: |
| """Send message to Claude and get response.""" |
| self.conversation_history.append({ |
| "role": "user", |
| "content": user_message |
| }) |
| |
| response = self.client.messages.create( |
| model="claude-3-5-sonnet-20241022", |
| max_tokens=1024, |
| system="""You are an expert HR recruiter assistant. You help recruiters understand candidate-job matches, |
| compare candidates, and define ideal profiles. Be concise but insightful. Focus on: |
| - Technical skill alignment |
| - Experience relevance |
| - Growth potential |
| - Risk factors""", |
| messages=self.conversation_history |
| ) |
| |
| assistant_message = response.content[0].text |
| self.conversation_history.append({ |
| "role": "assistant", |
| "content": assistant_message |
| }) |
| |
| return assistant_message |
|
|
| def scenario_1_explain_match(self): |
| """Scenario 1: Explain why candidate matches job.""" |
| print("\n" + "="*70) |
| print("SCENARIO 1: Explain Candidate-Job Match") |
| print("="*70) |
| |
| self.reset_conversation() |
| |
| |
| candidate_cv = """ |
| Senior Python Developer |
| Skills: Python 10 years, FastAPI 4 years, Docker, Kubernetes, PostgreSQL, Redis |
| Experience: |
| - Led team of 5 developers at TechCorp (3 years) |
| - Built microservices architecture serving 1M+ users |
| - Open source contributor (Flask, requests) |
| """ |
| |
| |
| job_description = """ |
| Senior Backend Engineer - Python/FastAPI |
| Location: Remote |
| Responsibilities: |
| - Design and implement scalable APIs |
| - Lead technical decisions for backend team |
| - Mentor junior developers |
| Requirements: |
| - 5+ years Python experience |
| - FastAPI or similar framework |
| - Docker & container orchestration knowledge |
| - Team leadership experience |
| """ |
| |
| |
| extracted_skills = self.skill_extractor.extract_skills_hybrid(candidate_cv) |
| |
| |
| prompt = f""" |
| I have a candidate with this profile: |
| {candidate_cv} |
| |
| Extracted skills: {', '.join(extracted_skills[:10])} |
| |
| For this job: |
| {job_description} |
| |
| Explain why this candidate is a good or bad fit, in 3-4 sentences. Focus on skill alignment and experience. |
| """ |
| |
| print("\nπ Candidate CV:") |
| print(candidate_cv) |
| print("\nπ Job Description:") |
| print(job_description) |
| print(f"\nπ Extracted skills: {', '.join(extracted_skills[:8])}") |
| |
| response = self._chat(prompt) |
| print(f"\n㪠Chatbot Analysis:\n{response}") |
| |
| |
| follow_up = "What are the top 3 risks with this candidate?" |
| print(f"\nβ Follow-up: {follow_up}") |
| response2 = self._chat(follow_up) |
| print(f"π¬ Response:\n{response2}") |
| |
| return { |
| "scenario": "explain_match", |
| "initial_response": response, |
| "followup_response": response2, |
| "status": "β
SUCCESS" |
| } |
|
|
| def scenario_2_compare_candidates(self): |
| """Scenario 2: Compare two candidates for same role.""" |
| print("\n" + "="*70) |
| print("SCENARIO 2: Compare Candidates for Same Role") |
| print("="*70) |
| |
| self.reset_conversation() |
| |
| candidate_a = """ |
| Software Engineer |
| Skills: Python 8 years, Django 5 years, JavaScript, React, AWS, PostgreSQL |
| Experience: |
| - Full-stack developer at StartupX (4 years) |
| - Shipped 3 major products |
| - No team leadership experience |
| - Bachelor's in CS |
| """ |
| |
| candidate_b = """ |
| Tech Lead |
| Skills: Python 6 years, FastAPI 3 years, Docker, Kubernetes, AWS, Team leadership |
| Experience: |
| - Led backend team of 3 at EstablishedCorp (2 years) |
| - Backend architect, migrated monolith to microservices |
| - 2 years team leadership |
| - Master's in Computer Science |
| """ |
| |
| role_desc = """ |
| Senior Backend Engineer - Team Leadership Track |
| - 5+ years backend development |
| - Team leadership experience preferred |
| - FastAPI or similar modern framework |
| - Cloud deployment (AWS) |
| """ |
| |
| prompt = f""" |
| Compare these 2 candidates for this role: |
| |
| **Candidate A:** |
| {candidate_a} |
| |
| **Candidate B:** |
| {candidate_b} |
| |
| **Role:** |
| {role_desc} |
| |
| Which candidate is better suited? Create a quick comparison table with pros/cons. |
| """ |
| |
| print("\nπ€ Candidate A:") |
| print(candidate_a) |
| print("\nπ€ Candidate B:") |
| print(candidate_b) |
| print("\nπ Role Description:") |
| print(role_desc) |
| |
| response = self._chat(prompt) |
| print(f"\n㪠Comparison:\n{response}") |
| |
| |
| follow_up = "If I can only hire one, who should it be and why?" |
| print(f"\nβ Follow-up: {follow_up}") |
| response2 = self._chat(follow_up) |
| print(f"π¬ Response:\n{response2}") |
| |
| return { |
| "scenario": "compare_candidates", |
| "initial_response": response, |
| "followup_response": response2, |
| "status": "β
SUCCESS" |
| } |
|
|
| def scenario_3_ideal_profile(self): |
| """Scenario 3: Define ideal profile for role.""" |
| print("\n" + "="*70) |
| print("SCENARIO 3: Define Ideal Profile for Role") |
| print("="*70) |
| |
| self.reset_conversation() |
| |
| job_description = """ |
| Data Engineer |
| Location: San Francisco |
| We're building a real-time data pipeline for a high-frequency trading platform. |
| |
| Responsibilities: |
| - Design and maintain ETL pipelines |
| - Build data infrastructure on cloud |
| - Optimize query performance |
| - Mentor data analysts |
| |
| Tech stack: Python, Spark, Kafka, PostgreSQL, GCP, Airflow |
| |
| Company: 5-year-old fintech startup, $200M funding |
| """ |
| |
| prompt = f""" |
| Describe the ideal candidate profile for this role. Consider: |
| - Technical skills (specific tools, languages) |
| - Experience depth needed |
| - Soft skills |
| - Team fit |
| - Growth potential |
| |
| Role details: |
| {job_description} |
| |
| Be specific: what's the exact experience level, what tools matter most? |
| """ |
| |
| print("\nπ Job Description:") |
| print(job_description) |
| |
| response = self._chat(prompt) |
| print(f"\n㪠Ideal Profile:\n{response}") |
| |
| |
| follow_up = "How would you weight these requirements? Which are must-have vs nice-to-have?" |
| print(f"\nβ Follow-up: {follow_up}") |
| response2 = self._chat(follow_up) |
| print(f"π¬ Response:\n{response2}") |
| |
| return { |
| "scenario": "ideal_profile", |
| "initial_response": response, |
| "followup_response": response2, |
| "status": "β
SUCCESS" |
| } |
|
|
|
|
| def main(): |
| """Run all chatbot scenarios.""" |
| print("\nπ€ AI Talent Finder β Chatbot Quality Testing") |
| print("Testing 3 real recruiter scenarios") |
| |
| |
| api_key = os.getenv("ANTHROPIC_API_KEY") |
| if not api_key: |
| print("\nβ ERROR: ANTHROPIC_API_KEY not set") |
| print("Export it: export ANTHROPIC_API_KEY='sk-...'") |
| return 1 |
| |
| print(f"β
Anthropic API configured") |
| |
| try: |
| tester = ChatbotQualityTester(api_key) |
| except Exception as e: |
| print(f"β Failed to initialize chatbot: {e}") |
| return 1 |
| |
| results = [] |
| |
| |
| try: |
| result1 = tester.scenario_1_explain_match() |
| results.append(result1) |
| except Exception as e: |
| print(f"\nβ Scenario 1 failed: {e}") |
| results.append({"scenario": "explain_match", "status": f"β FAILED: {e}"}) |
| |
| try: |
| result2 = tester.scenario_2_compare_candidates() |
| results.append(result2) |
| except Exception as e: |
| print(f"\nβ Scenario 2 failed: {e}") |
| results.append({"scenario": "compare_candidates", "status": f"β FAILED: {e}"}) |
| |
| try: |
| result3 = tester.scenario_3_ideal_profile() |
| results.append(result3) |
| except Exception as e: |
| print(f"\nβ Scenario 3 failed: {e}") |
| results.append({"scenario": "ideal_profile", "status": f"β FAILED: {e}"}) |
| |
| |
| print("\n" + "="*70) |
| print("TEST SUMMARY") |
| print("="*70) |
| |
| success_count = sum(1 for r in results if r.get("status") == "β
SUCCESS") |
| total_count = len(results) |
| |
| for r in results: |
| scenario = r.get("scenario", "unknown").replace("_", " ").title() |
| status = r.get("status", "?") |
| print(f"{status} β {scenario}") |
| |
| print(f"\nπ Result: {success_count}/{total_count} scenarios passed") |
| |
| |
| report_path = Path(__file__).parent / "reports" / "chatbot_quality_test.json" |
| report_path.parent.mkdir(exist_ok=True) |
| |
| with open(report_path, "w") as f: |
| json.dump(results, f, indent=2) |
| |
| print(f"π Report saved to: {report_path}") |
| |
| return 0 if success_count == total_count else 1 |
|
|
|
|
| if __name__ == "__main__": |
| exit_code = main() |
| sys.exit(exit_code) |
|
|