""" Scenario registry — 8 EU AI Act compliance audit scenarios across 3 difficulty tiers. Investigation-grade: Each tool returns realistic regulatory documents that require analysis to identify violations. No pre-digested verdicts — the agent must reason about the evidence to find compliance gaps. Easy (2): Clear-cut systems, shorter documents, obvious violations Medium (3): Detailed documents with statistical evidence, red herrings mixed in Hard (3): Ambiguous framing, misleading deployer claims, compound violations """ from __future__ import annotations import random from typing import Dict, List, Optional from server.engine import AuditScenario, StateGraph, StateNode, Transition # --------------------------------------------------------------------------- # Unique state graph builder # --------------------------------------------------------------------------- def _build_scenario_graph( investigation_tools: List[str], is_prohibited: bool = False, ) -> StateGraph: """Build a state graph unique to this scenario's investigation path. Only tools in `investigation_tools` create progress transitions through investigation nodes. Other investigation tools are allowed but produce no_effect. This gives each scenario a distinct graph topology. Args: investigation_tools: Ordered list of investigation tool names forming the progress path (e.g. ["check_documentation", "audit_training_data"]). is_prohibited: If True, classification leads directly to findings (no extended investigation needed for prohibited systems). """ g = StateGraph() # Investigation tool → node mapping TOOL_NODES = { "check_documentation": ("docs_reviewed", "Documentation Reviewed"), "audit_training_data": ("data_audited", "Training Data Audited"), "verify_human_oversight": ("oversight_checked", "Human Oversight Verified"), "check_transparency": ("transparency_checked", "Transparency Checked"), "assess_risk_management": ("risk_assessed", "Risk Management Assessed"), "check_logging": ("logging_checked", "Logging Verified"), } ALL_INVESTIGATION_TOOLS = list(TOOL_NODES.keys()) # Always-present nodes g.add_node(StateNode("initial", "Audit Assigned", is_start=True)) g.add_node(StateNode("overview", "System Overview Gathered")) g.add_node(StateNode("classified", "Risk Classification Done")) g.add_node(StateNode("findings_submitted", "Findings Submitted")) g.add_node(StateNode("remediation_proposed", "Remediation Recommended")) g.add_node(StateNode("resolved", "Compliance Verified", is_terminal=True)) # Add nodes only for tools in the investigation path for tool in investigation_tools: node_id, label = TOOL_NODES[tool] g.add_node(StateNode(node_id, label)) # --- Build progress chain --- # initial → overview → classified → [investigation tools...] → findings → remediation → resolved g.add_transition(Transition("initial", "overview", "get_system_overview", "progress", description="Gather system overview and deployment context")) g.add_transition(Transition("overview", "classified", "classify_system", "progress", description="Classify the AI system risk category")) if is_prohibited: # Prohibited: classify → findings directly g.add_transition(Transition("classified", "findings_submitted", "submit_finding", "progress", description="Report prohibited AI system")) else: # Chain investigation tools in order prev_state = "classified" for tool in investigation_tools: node_id = TOOL_NODES[tool][0] g.add_transition(Transition(prev_state, node_id, tool, "progress", description=f"Progress: {tool}")) prev_state = node_id g.add_transition(Transition(prev_state, "findings_submitted", "submit_finding", "progress", description="Submit compliance findings")) g.add_transition(Transition("findings_submitted", "remediation_proposed", "recommend_fix", "progress", description="Propose remediation actions")) g.add_transition(Transition("remediation_proposed", "resolved", "verify_compliance", "progress", description="Final compliance determination")) # --- No-effect transitions (investigation tools not in the path) --- # These tools work (return data) but don't advance the state graph all_progress_states = ["initial", "overview", "classified"] for tool in investigation_tools: all_progress_states.append(TOOL_NODES[tool][0]) for tool in ALL_INVESTIGATION_TOOLS: if tool not in investigation_tools: # Can call from any state, no effect on graph for state_id in all_progress_states: g.add_transition(Transition(state_id, state_id, tool, "no_effect", description=f"{tool} not required for this scenario")) # --- No-effect for premature terminal actions --- for state in all_progress_states: if state not in ("findings_submitted", "remediation_proposed"): g.add_transition(Transition(state, state, "verify_compliance", "no_effect", description="Cannot verify before completing audit")) # --- Worsened transitions (trap actions) --- g.add_transition(Transition("initial", "initial", "classify_system", "worsened", description="Classifying without understanding the system first")) for state in ["initial", "overview", "classified"]: g.add_transition(Transition(state, state, "recommend_fix", "worsened", description="Proposing fixes before identifying problems")) for state in ["initial", "overview"]: g.add_transition(Transition(state, state, "submit_finding", "no_effect", description="Submitting findings before investigation")) return g # --------------------------------------------------------------------------- # EASY SCENARIOS # --------------------------------------------------------------------------- def _easy_chatbot() -> AuditScenario: s = AuditScenario( scenario_id="easy_chatbot_transparency_001", title="Customer Service Chatbot — Transparency Audit", difficulty="easy", description=( "A customer service chatbot deployed on a retail website needs compliance " "review. The system uses a general-purpose LLM to answer product questions. " "Initial assessment suggests it may be missing required transparency " "disclosures under Article 50. Conduct a focused audit." ), system_name="RetailAssist Chatbot", system_description=( "General-purpose LLM-based chatbot answering customer questions about " "products, shipping, and returns. No biometric processing, no profiling, " "no high-risk decisions. Deployed on company website with __USERCOUNT__ " "daily users." ), system_category="limited_risk", deployer_info="__COMPANY__ — mid-size EU e-commerce retailer, 200 employees, operating in DE/FR/NL.", correct_classification="limited_risk", ground_truth_findings=[ "missing_ai_disclosure", "no_human_escalation_option", ], required_remediation=[ "add_ai_disclosure_banner", "implement_human_handoff", ], red_herrings=[ "high_risk_automated_decision", ], # --- Investigation documents --- documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: RetailAssist Chatbot __VERSION__ Deployer: __COMPANY__ Review Date: __DATE__ DOCUMENT STATUS: System Architecture Overview Present (updated 2025-11-03) Data Processing Description Present (updated 2025-09-22) Input/Output Specification Present (updated 2025-11-03) Performance Metrics Report Present (accuracy 94.2% on test set) User Interaction Guidelines Present (updated 2025-06-10) ANNEX IV CROSS-REFERENCE (for limited-risk, advisory only): Section 1(a) Intended purpose Documented Section 1(b) Deployer identification Documented Section 2(a) Development methods Documented NOTE: Documentation is generally adequate for a limited-risk system. The system processes no biometric data and makes no legally significant decisions. Standard chatbot architecture with retrieval-augmented generation. No gaps identified in core technical documentation. The primary compliance concern for this system category relates to Article 50 transparency obligations, not documentation completeness.""", training_data_info="""\ TRAINING DATA SUMMARY System: RetailAssist Chatbot __VERSION__ Dataset: Product catalog entries + FAQ question-answer pairs Volume: 52,847 examples (last updated 2025-10-15) DATA COMPOSITION: Product descriptions 31,204 entries FAQ pairs 14,892 entries Customer service transcripts 6,751 entries (anonymized) Personal data in training set: None identified - Customer transcripts were fully anonymized before inclusion - No names, emails, or account numbers in training data - PII scrubbing verified by DPO on 2025-08-20 Bias assessment: Not formally required for limited-risk chatbot systems. The system provides product information and does not make decisions affecting individuals' rights or access to services. Data governance: Adequate — data sources documented, retention policy in place (36-month cycle), access controls implemented.""", oversight_info="""\ HUMAN OVERSIGHT PROCEDURES System: RetailAssist Chatbot __VERSION__ Department: Customer Experience Team CURRENT PROCESS: - Customer service team of 12 agents monitors a dashboard showing flagged conversations (profanity filter, sentiment < 0.3, repeat queries) - Approximately 8% of conversations are flagged for human review - Average response time for flagged conversations: 4.2 minutes - Team operates during business hours (08:00-20:00 CET, Mon-Sat) ESCALATION PROCEDURE: The chatbot displays a generic "Was this helpful?" prompt after each interaction. If the user clicks "No", the chatbot offers to repeat the answer or try a different phrasing. There is no option for the user to request transfer to a human agent during the conversation. The "Contact Us" page exists separately on the website but is not linked from the chat interface. After business hours, flagged conversations queue until the next business day. No real-time human intervention is available outside business hours. OVERRIDE CAPABILITY: Customer service agents can take over any active conversation from the monitoring dashboard, but this is agent-initiated, not customer-initiated. Customers have no mechanism to trigger human intervention directly.""", transparency_info="""\ TRANSPARENCY & DISCLOSURE REVIEW System: RetailAssist Chatbot __VERSION__ USER INTERFACE ANALYSIS: The chatbot appears as a widget in the bottom-right corner of the website. The chat window header reads "RetailAssist — How can I help?" with a generic chat bubble icon. Opening message: "Hi! I'm here to help you find products, track orders, and answer questions about our store. What can I do for you?" The chat interface includes: - Text input field - Send button - Emoji picker - "Was this helpful?" feedback after each response SEARCHED FOR AI DISCLOSURE INDICATORS: - Chat header: No mention of AI or automated system - Opening message: No mention of AI or automated system - Website footer: Standard legal links (Privacy, Terms, Cookies) - Terms of Service: Section 7.3 mentions "automated tools may assist customer service" but this is not shown to users during chat interaction - Privacy Policy: Section 4.1 mentions "AI-powered features" in general terms but does not specifically disclose the chatbot as AI - Chat widget tooltip: "Chat with us" (no AI indication) - Accessibility statement: No mention of AI Article 50(1) requires: "Providers shall ensure that AI systems intended to directly interact with natural persons are designed and developed in such a way that the natural persons concerned are informed that they are interacting with an AI system." CONTENT LABELING: Not applicable — system generates text responses only, no deepfakes or synthetic media requiring Article 50(2) labeling.""", risk_assessment_info="""\ RISK CLASSIFICATION ANALYSIS System: RetailAssist Chatbot __VERSION__ ANNEX III HIGH-RISK CHECK: 1. Biometric identification Not applicable 2. Critical infrastructure Not applicable 3. Education and vocational training Not applicable 4. Employment, workers management Not applicable 5. Access to essential services Not applicable — provides product information only, no credit/insurance/social services decisions 6. Law enforcement Not applicable 7. Migration, asylum, border control Not applicable 8. Administration of justice Not applicable ARTICLE 5 PROHIBITED PRACTICES CHECK: Subliminal manipulation Not applicable Exploitation of vulnerabilities Not applicable Social scoring Not applicable Real-time biometric identification Not applicable RISK LEVEL DETERMINATION: Limited Risk The system falls under Article 50 transparency obligations as an AI system designed to directly interact with natural persons. No high-risk or prohibited characteristics identified. CONFORMITY ASSESSMENT: Not required for limited-risk systems. Voluntary code of conduct adherence recommended per Article 95.""", logging_info="""\ LOGGING & TRACEABILITY REVIEW System: RetailAssist Chatbot __VERSION__ CURRENT LOGGING: Conversation logs: Retained for 30 days, then anonymized + archived Model version: Tracked per deployment (current: __VERSION__) User feedback: Stored with conversation ID System health metrics: Uptime, response latency, error rate AUDIT TRAIL: Each conversation has a unique ID linking: - User session (anonymized) - Model version used - All messages in sequence - Feedback received - Any flags triggered Logs are adequate for a limited-risk system. Article 12 automatic logging requirements apply primarily to high-risk systems. Current logging exceeds what is required for the system's risk level.""", ) # Unique graph: only transparency and oversight are in the investigation path s.graph = _build_scenario_graph( investigation_tools=["check_transparency", "verify_human_oversight"], ) return s def _easy_recommendation() -> AuditScenario: s = AuditScenario( scenario_id="easy_recommendation_minimal_001", title="Music Recommendation Engine — Minimal Risk Verification", difficulty="easy", description=( "A music streaming platform's recommendation algorithm needs EU AI Act " "classification. The system suggests songs based on listening history. " "Stakeholders believe it's minimal risk but need formal verification." ), system_name="MelodyMatch Algorithm", system_description=( "Collaborative filtering recommendation engine for music. Suggests songs " "based on listening patterns. No personal profiling beyond music preferences. " "No decisions affecting legal rights. Pure content personalization for " "__USERCOUNT__ EU users." ), system_category="minimal_risk", deployer_info="__COMPANY__ — Swedish music streaming startup, 50 employees.", correct_classification="minimal_risk", ground_truth_findings=[ "voluntary_code_of_conduct_recommended", ], required_remediation=[ "adopt_voluntary_transparency_code", ], red_herrings=[ "gdpr_violation", "high_risk_profiling", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: MelodyMatch Algorithm __VERSION__ Deployer: __COMPANY__ DOCUMENT STATUS: System Architecture Present (hybrid collaborative filtering) Algorithm Description Present (item-item CF + content embeddings) Data Pipeline Documentation Present (Spark ETL pipeline) Performance Metrics Present (hit@10: 0.342, NDCG: 0.281) API Documentation Present (REST API for mobile/web clients) All core technical documents are present and current. The system is a standard recommendation engine with no novel or experimental components requiring additional documentation.""", training_data_info="""\ TRAINING DATA SUMMARY System: MelodyMatch Algorithm __VERSION__ Dataset: Anonymized listening history from __USERCOUNT__ users Volume: 10.3M user-song interactions (2023-2025) DATA COMPOSITION: Interaction types: Play, skip, save, playlist-add User features: Pseudonymized user ID, country, subscription tier Song features: Genre, tempo, energy, valence, artist, release year Personal data assessment: - User IDs are pseudonymized (SHA-256 hash, no reversal possible) - No names, emails, or demographic data in training set - Country used for regional catalog filtering only - GDPR Article 6(1)(f) legitimate interest basis documented Bias considerations: Music recommendations do not involve protected characteristics. Popularity bias exists (mainstream content recommended more often) but this does not constitute discrimination under the AI Act. No individuals are disadvantaged in access to services or rights. Note: Some stakeholders raised concerns about "profiling" under GDPR. Music taste profiling for recommendations is distinct from profiling under the AI Act which concerns evaluation of personal aspects affecting natural persons' rights. Recommendation of entertainment content does not qualify as high-risk profiling.""", oversight_info="""\ HUMAN OVERSIGHT ASSESSMENT System: MelodyMatch Algorithm __VERSION__ The EU AI Act does not impose specific human oversight requirements on minimal-risk AI systems. Article 14 human oversight obligations apply to high-risk AI systems only. Current oversight measures (voluntary): - Product team reviews recommendation quality metrics weekly - A/B testing framework validates algorithm changes before rollout - Content moderation team can flag/remove specific recommendations - Users can provide feedback ("Don't recommend this") per track""", transparency_info="""\ TRANSPARENCY REVIEW System: MelodyMatch Algorithm __VERSION__ USER-FACING DISCLOSURES: The app includes a "How recommendations work" info page accessible from Settings > Privacy > Personalization, which states: "MelodyMatch uses your listening history to suggest music you might enjoy. Our algorithm analyzes patterns in what you play, skip, and save to find similar tracks and artists." Recommendations are labeled "Recommended for you" and "Based on your recent listening" in the UI. Article 50 transparency obligations for systems interacting directly with natural persons apply to chatbots and emotion recognition, not to content recommendation systems. No mandatory transparency obligations identified for this system category. Voluntary transparency: The deployer has voluntarily disclosed the algorithmic nature of recommendations, which aligns with Article 95 (voluntary codes of conduct for non-high-risk AI).""", risk_assessment_info="""\ RISK CLASSIFICATION ANALYSIS System: MelodyMatch Algorithm __VERSION__ ANNEX III HIGH-RISK CHECK: 1. Biometric identification Not applicable 2. Critical infrastructure Not applicable 3. Education and vocational training Not applicable 4. Employment, workers management Not applicable 5. Access to essential services Not applicable 6. Law enforcement Not applicable 7. Migration, asylum, border control Not applicable 8. Administration of justice Not applicable ARTICLE 5 PROHIBITED PRACTICES: None applicable — content recommendation is not subliminal manipulation, social scoring, or biometric identification. RISK LEVEL DETERMINATION: Minimal Risk The system recommends entertainment content. It does not make decisions affecting natural persons' rights, access to services, or legal status. No Annex III category applies. Under Article 95, deployers of minimal-risk AI systems are encouraged to voluntarily apply codes of conduct for transparency and trustworthiness, but this is not mandatory.""", logging_info="""\ LOGGING REVIEW System: MelodyMatch Algorithm __VERSION__ Article 12 automatic logging requirements apply to high-risk AI systems. This minimal-risk system is not subject to those obligations. Current logging (voluntary): Model version tracking: Yes (per deployment) Recommendation logs: Retained 90 days A/B test results: Archived indefinitely User feedback: Linked to recommendation events Logging is adequate and exceeds requirements for the risk level.""", ) # Minimal graph — mainly just classification verification s.graph = _build_scenario_graph( investigation_tools=["check_transparency", "assess_risk_management"], ) return s # --------------------------------------------------------------------------- # MEDIUM SCENARIOS # --------------------------------------------------------------------------- def _medium_hiring() -> AuditScenario: s = AuditScenario( scenario_id="medium_hiring_bias_001", title="AI Resume Screener — High-Risk Employment System Audit", difficulty="medium", description=( "A recruitment platform uses AI to screen and rank job applicants. " "This falls under Annex III (employment, workers management) making it " "a candidate for HIGH-RISK classification. Reports suggest potential " "gender bias in technical role screening. Full compliance audit required." ), system_name="TalentRank AI Screener", system_description=( "NLP-based resume screening system that scores and ranks applicants for " "job openings. Processes name, education, work history, skills. Used by " "500+ EU companies for initial candidate filtering. Automated decisions " "affect who gets interviews. Deployed in __REGION__ with __USERCOUNT__ " "applicants processed to date." ), system_category="high_risk", deployer_info="__COMPANY__ — French HR-tech company, 300 employees, serving 500+ EU enterprise clients.", correct_classification="high_risk", ground_truth_findings=[ "gender_bias_in_technical_screening", "insufficient_human_oversight", "incomplete_technical_documentation", "missing_fundamental_rights_impact_assessment", "inadequate_data_governance", ], required_remediation=[ "conduct_bias_audit_and_mitigation", "implement_human_review_for_all_rejections", "complete_annex_iv_technical_documentation", "perform_fundamental_rights_impact_assessment", "establish_data_governance_framework", ], red_herrings=[ "prohibited_social_scoring", "biometric_processing", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: TalentRank AI Screener __VERSION__ Deployer: __COMPANY__ Audit Date: __DATE__ DOCUMENT STATUS MATRIX: Document Status Last Updated ───────────────────────────────────────────────────────────────── System Architecture Overview Present 2025-11-03 Algorithm Description (NLP Pipeline) Present 2025-09-22 Input Data Specification Present 2025-11-03 Output Specification Present 2025-11-03 Performance Metrics Report Absent — Accuracy by Demographic Group Report Absent — Data Governance Policy Absent — Fundamental Rights Impact Assessment Absent — Risk Management Plan Present 2024-08-15 Post-Market Monitoring Plan Draft 2025-12-01 User Instructions (Article 13) Partial 2025-06-10 Change Management Log Present 2025-11-28 EU AI ACT ANNEX IV CROSS-REFERENCE: Section 1 — General Description (a) Intended purpose Documented (b) Deployer name and contact Documented (c) Version and update history Documented Section 2 — Detailed Description (a) Development methods and process Documented (b) Design specifications and rationale Not documented — no architecture diagrams for the scoring pipeline (c) Data requirements and provenance Partial — data sources listed but no data governance policy document (d) Metrics and performance benchmarks Not documented — no disaggregated performance metrics by demographic group (e) Computational resources Documented (f) Expected lifetime and maintenance Documented (g) Quality management procedures Not documented Section 3 — Monitoring and Reporting (a) Post-market monitoring plan Draft only — not finalized (b) Logging requirements specification Documented OBSERVATIONS: The Risk Management Plan was last updated 18 months ago. It does not reference the EU AI Act, harmonised standards, or Article 9 requirements. The Post-Market Monitoring Plan remains in draft status with no target completion date established. Article 13 user instructions do not include information about the system's accuracy levels, known limitations, or demographic performance variation. No Fundamental Rights Impact Assessment has been conducted.""", training_data_info="""\ TRAINING DATA AUDIT REPORT System: TalentRank AI Screener __VERSION__ Dataset: Historical hiring decisions from EU enterprise clients Records: 2,147,832 applications (2019-2025) Last model retrain: 2025-10-15 DEMOGRAPHIC DISTRIBUTION OF TRAINING DATA: Category Count Pct of Total ──────────────────────────────────────────────── Male applicants 1,342,104 62.5% Female applicants 805,728 37.5% Age 18-30 751,741 35.0% Age 31-45 923,168 43.0% Age 46-65 472,923 22.0% Western EU origin 1,567,516 73.0% Eastern EU origin 580,316 27.0% CALLBACK RATES BY DEMOGRAPHIC (Technical Roles Only): Group Applications Callbacks Rate vs Baseline ────────────────────────────────────────────────────────────────────── Male applicants 487,204 166,624 34.2% (baseline) Female applicants 198,731 52,266 26.3% -23.1% Age 18-30 241,367 91,961 38.1% +11.4% Age 31-45 298,422 100,927 33.8% -1.2% Age 46-65 146,146 41,924 28.7% -16.1% Western EU 512,489 179,884 35.1% +2.6% Eastern EU 173,446 47,547 27.4% -19.9% Note: Callback rate = percentage of applicants advanced to interview stage. Baseline = overall male callback rate for technical roles. DATA SOURCES: Source Records Period Documented ──────────────────────────────────────────────────────────────── Enterprise client ATS exports 1,847,291 2019-2025 Yes Partner recruiting platforms 203,114 2021-2025 Yes Internal test applications 97,427 2024-2025 Yes DATA GOVERNANCE: Data retention policy: Present (5-year retention) Source documentation: 47 of 52 sources documented Personal data inventory: Names, education, work history, skills Consent mechanism: Employer-level agreements in place; individual applicant consent not obtained Article 10 specific provisions: Not addressed in current data governance Data quality assessment: Last conducted 2024-06-12 ADDITIONAL CONTEXT: A separate fraud detection module shares the same data lake infrastructure but operates independently with its own pipeline. The fraud detection module passed its most recent bias audit with no findings (2025-09-01). The fraud system's compliance status has no bearing on TalentRank's compliance obligations.""", oversight_info="""\ HUMAN OVERSIGHT PROCEDURES System: TalentRank AI Screener __VERSION__ Department: Client Success & Quality Assurance REVIEW PROCESS: TalentRank's screening pipeline processes applications in three stages: Stage 1 — Automated Screening (AI): All incoming applications are scored by the AI model (0-100). Applications scoring below the client-configured threshold (default: 40) are automatically marked as "Not Progressed." Stage 2 — Quality Sampling: The QA team reviews a random sample of screened applications. Current sampling rate: 5% of "Not Progressed" decisions. Sampling is conducted weekly in batch. Stage 3 — Client Review: Applications marked "Progressed" are presented to the hiring manager for final interview selection. Clients may also view "Not Progressed" applications if they choose, but fewer than 2% of clients do so. REVIEW STATISTICS (Q4 2025): Applications processed: 347,291 Automatically rejected: 208,375 (60.0%) QA sample reviewed: 10,419 (5.0% of rejections) QA overrides (rejection → pass): 312 (3.0% of samples) Client-initiated reviews: 4,166 (2.0% of clients) OVERRIDE CAPABILITY: Both QA staff and client hiring managers can override any AI decision. The override interface is accessible from the application dashboard. However, the system does not proactively flag borderline cases or indicate confidence scores to reviewers. MONITORING: No ongoing bias monitoring system is in place. The QA sampling is focused on general quality, not demographic fairness. No automated alerts exist for drift in rejection rates across demographic groups.""", transparency_info="""\ TRANSPARENCY & USER NOTIFICATION REVIEW System: TalentRank AI Screener __VERSION__ APPLICANT-FACING COMMUNICATIONS: At the time of application, candidates see the following notice in the application portal footer (8pt font, light gray text): "By submitting your application, you agree that your information may be processed using automated tools to assist in the evaluation process." No further information is provided about: - The specific role of AI in screening decisions - The logic involved in the automated processing - The significance and envisaged consequences for the applicant - The applicant's right to obtain human intervention - The applicant's right to contest the decision Rejection notifications are sent via email with the text: "After careful review, we have decided not to progress your application at this time. We wish you the best in your search." No mention is made that the decision was automated or that AI was involved in the screening process. DEPLOYER-FACING (CLIENT) INFORMATION: Client onboarding materials describe TalentRank as an "AI-powered screening solution" with "proprietary NLP scoring." Clients receive a product sheet with overall accuracy metrics (precision: 0.82, recall: 0.71) but no demographic disaggregation. RIGHT TO EXPLANATION: No mechanism exists for applicants to request an explanation of how the AI arrived at its scoring decision. The company's privacy policy references GDPR Article 22 but states "meaningful human involvement exists in the hiring process" without specifying the extent of that involvement.""", risk_assessment_info="""\ RISK MANAGEMENT & CONFORMITY ASSESSMENT System: TalentRank AI Screener __VERSION__ ANNEX III CLASSIFICATION: Category 4 — Employment, workers management and access to self-employment Sub-category: AI systems intended to be used for recruitment or selection of natural persons, for making decisions affecting terms of work-related relationships, or for task allocation based on individual behavior. This system screens and ranks job applicants. It directly affects which candidates are progressed to interview, constituting a decision that impacts access to employment. CONFORMITY ASSESSMENT STATUS: Internal conformity assessment (Article 43): Not initiated Quality management system (Article 17): Basic framework exists but does not address AI-specific requirements EU Declaration of Conformity (Article 47): Not filed CE marking (Article 48): Not applied RISK MANAGEMENT SYSTEM (Article 9): A risk management plan was created in August 2024, prior to the EU AI Act application date. The plan covers general software risks (availability, data integrity) but does not address: - AI-specific risks (bias, drift, adversarial inputs) - Residual risk assessment methodology - Risk control measures for demographic fairness - Foreseeable misuse scenarios POST-MARKET MONITORING (Article 72): A post-market monitoring plan is in draft status. It outlines monitoring of system uptime and client satisfaction scores. It does not include: - Performance monitoring by demographic group - Bias drift detection mechanisms - Incident reporting procedures to national authorities - Criteria for triggering corrective action""", logging_info="""\ AUTOMATIC LOGGING & TRACEABILITY REVIEW System: TalentRank AI Screener __VERSION__ CURRENT LOGGING IMPLEMENTATION: Event Type Logged Format Retention ──────────────────────────────────────────────────────── Application received Yes JSON 5 years AI score generated Yes Numeric 5 years Threshold decision Yes Boolean 5 years Input features used No — — Model version Yes String Indefinite QA review action Yes JSON 5 years Client override Yes JSON 5 years Score explanation No — — ARTICLE 12 REQUIREMENTS CHECK: Article 12(1) requires high-risk AI systems to include logging capabilities that enable recording of events relevant to: (a) Identifying situations that may result in risks: Partially met — threshold decisions logged, but no logging of cases where the model's confidence was low or where the decision was borderline. (b) Facilitating post-market monitoring: Not met — input features are not logged, making it impossible to reconstruct why a particular score was assigned or to detect feature drift over time. (c) Ensuring traceability of the AI system's functioning: Partially met — model version and scores are tracked but input-output pairs are not linked, preventing full reproducibility of decisions. IDENTIFIED GAPS: 1. Input features are not logged alongside output scores 2. No confidence/uncertainty indicators are recorded 3. Logs do not support demographic performance analysis 4. No automated alerting on logging failures""", # --- Deep-dive documents (revealed on repeat tool calls) --- deep_training_data="""\ DEEP DIVE: TRAINING DATA FORENSIC ANALYSIS Re-investigation of TalentRank AI Screener __VERSION__ INTERSECTIONAL ANALYSIS (not available in initial audit): Group Applications Callbacks Rate ──────────────────────────────────────────────────────────── Female + Eastern EU 41,287 8,669 21.0% Female + Age 46-65 28,904 6,647 23.0% Male + Western EU 398,721 147,527 37.0% Male + Age 18-30 189,234 75,694 40.0% Intersectional analysis reveals compounding bias: female applicants from Eastern EU have the lowest callback rate at 21.0% — a 38.6% gap versus the highest-performing group (young Western EU males). FEATURE IMPORTANCE ANALYSIS (extracted from model internals): Top 10 predictive features for "Not Progressed" decision: 1. years_experience_gap (0.142) — penalizes career gaps 2. education_prestige_score (0.098) — biased toward Western EU universities 3. keyword_density_technical (0.087) — favors specific technical jargon 4. name_encoding_cluster (0.076) — CONCERNING: name-derived feature 5. employment_continuity (0.071) — penalizes parental leave gaps 6. skills_match_score (0.065) 7. recency_weighted_exp (0.058) 8. industry_match (0.052) 9. location_cluster (0.048) — correlates with Eastern/Western EU 10. application_completeness (0.041) Features #1, #4, #5, and #9 have documented correlations with protected characteristics (gender, ethnicity, national origin). Feature #4 (name_encoding_cluster) appears to encode ethnic origin.""", deep_oversight="""\ DEEP DIVE: HUMAN OVERSIGHT FORENSIC ANALYSIS Re-investigation of review process effectiveness QA OVERRIDE ANALYSIS (detailed breakdown): Of the 312 QA overrides in Q4 2025: Female applicants overridden to pass: 187 (60.0%) Male applicants overridden to pass: 125 (40.0%) This suggests QA reviewers are catching gender bias in the AI decisions — but only for the 5% sample they review. The remaining 95% of automated rejections are not corrected. ESTIMATED IMPACT: If the QA override rate (3%) applied to ALL automated rejections: Estimated wrongful rejections: 208,375 * 0.03 = ~6,251 applicants Of which female: ~3,751 (based on override demographics) These applicants were automatically rejected without human review and had no mechanism to request reconsideration. CLIENT AWARENESS: Survey of 50 client hiring managers (2025-Q4): - 78% were unaware that AI makes the initial screening decision - 92% believed "a recruiter reviews every application" - 0% knew the QA sampling rate was only 5%""", deep_documentation="""\ DEEP DIVE: DOCUMENTATION GAP ANALYSIS Detailed Annex IV compliance assessment SECTION 2(d) — PERFORMANCE METRICS: The published model card reports aggregate metrics: Precision: 0.82, Recall: 0.71, F1: 0.76 These metrics are NOT disaggregated by any protected characteristic. Article 9(7) requires: "Testing shall be made against specific metrics and probabilistic thresholds that are appropriate to the intended purpose of the high-risk AI system." No per-group metrics exist for: gender, age, nationality, ethnicity. SECTION 2(b) — DESIGN SPECIFICATIONS: The scoring pipeline processes resumes through 4 stages: 1. Text extraction (OCR + NLP parsing) 2. Feature engineering (218 features) 3. Scoring model (XGBoost ensemble) 4. Threshold comparison (client-configurable) Stage 2 (feature engineering) is undocumented. No specification explains how features like "name_encoding_cluster" or "education_prestige_score" are computed.""", ) # Full investigation path for high-risk system s.graph = _build_scenario_graph( investigation_tools=[ "check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency", "assess_risk_management", "check_logging", ], ) return s def _medium_credit() -> AuditScenario: s = AuditScenario( scenario_id="medium_credit_scoring_001", title="Credit Scoring Model — Financial Services Compliance", difficulty="medium", description=( "A fintech company's AI credit scoring model is under review. The model " "determines creditworthiness for consumer loans up to EUR 50,000. " "Full compliance audit required under EU AI Act high-risk provisions." ), system_name="CreditFlow AI Score", system_description=( "Gradient-boosted ensemble model scoring creditworthiness using 200+ " "features from credit bureau data, transaction history, and alternative " "data sources. Automated decisions for loans up to EUR 50K. Serving " "__USERCOUNT__ active users across NL/DE/FR." ), system_category="high_risk", deployer_info="__COMPANY__ — Dutch fintech, 150 employees, licensed in NL/DE/FR.", correct_classification="high_risk", ground_truth_findings=[ "opaque_feature_importance", "alternative_data_bias_risk", "no_right_to_human_review", "missing_conformity_assessment", ], required_remediation=[ "implement_explainability_module", "remove_or_audit_alternative_data_sources", "add_human_review_for_rejections", "complete_conformity_assessment", ], red_herrings=[ "fraud_detection_non_compliant", "gdpr_data_breach", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: CreditFlow AI Score __VERSION__ Deployer: __COMPANY__ Audit Date: __DATE__ DOCUMENT STATUS: Document Status Last Updated ───────────────────────────────────────────────────────────────── System Architecture Present 2025-10-12 Model Card Present 2025-08-30 Feature Engineering Pipeline Present 2025-10-12 Performance Metrics (Aggregate) Present 2025-11-01 Performance Metrics (Disaggregated) Absent — Explainability Documentation Absent — Data Source Inventory Present 2025-07-18 Conformity Declaration Absent — User Instructions Present 2025-09-05 MODEL CARD SUMMARY (from published card): Model type: Gradient-boosted ensemble (XGBoost) Features: 218 input features Target: Probability of default within 12 months AUC-ROC: 0.847 (test set) Gini coefficient: 0.694 (test set) The model card lists aggregate performance metrics but does not include performance breakdowns by age group, gender, nationality, or income bracket. Feature importance rankings are described as "proprietary" and not included in the published card. ANNEX IV GAPS: Section 2(b) — Design specifications: No documentation explaining why alternative data sources (social media sentiment, device metadata) were included as features or their impact on model decisions. Section 2(d) — Performance metrics: No demographic disaggregation. Section 2(g) — Quality management: Referenced but links to outdated ISO 27001 procedures, not AI-specific QMS.""", training_data_info="""\ TRAINING DATA AUDIT REPORT System: CreditFlow AI Score __VERSION__ Dataset: Loan applications and outcomes (2019-2025) Records: 3,217,445 applications FEATURE CATEGORIES: Category Features Source ────────────────────────────────────────────────── Credit bureau data 42 TransUnion, Experian Transaction history 67 Banking API aggregator Application data 31 Direct from applicant Alternative data 78 See breakdown below ALTERNATIVE DATA BREAKDOWN: Feature Group Count Source ────────────────────────────────────────────────── Device metadata 23 Browser/mobile fingerprint Social media sentiment 18 LinkedIn, public profiles Location signals 12 IP geolocation, check-in App usage patterns 15 Installed apps, usage freq Email domain analysis 10 Provider reputation scoring Alternative data features were added in v3.8 to improve prediction for "thin-file" applicants lacking traditional credit history. Internal validation showed +3.2% AUC improvement. No bias assessment has been conducted specifically for alternative data features. Academic literature suggests device metadata and social media signals can correlate with protected characteristics including race, income, and education level. LOAN OUTCOMES BY APPLICANT PROFILE (Approval Rates): Age Group Applications Approved Rate ──────────────────────────────────────────────── 18-25 482,617 168,916 35.0% 26-35 1,029,582 586,862 57.0% 36-50 1,061,753 657,287 61.9% 51-65 504,930 277,712 55.0% 65+ 138,563 55,425 40.0% ADDITIONAL CONTEXT: The company also operates a separate fraud detection system that uses rule-based heuristics (not ML). This system was audited independently in 2025-Q3 and found compliant with applicable regulations. The fraud system does not share models with CreditFlow.""", oversight_info="""\ HUMAN OVERSIGHT PROCEDURES System: CreditFlow AI Score __VERSION__ DECISION WORKFLOW: Loan applications are processed as follows: 1. Applicant submits online application 2. CreditFlow AI generates creditworthiness score (0-1000) 3. Score is compared against risk threshold: - Score >= 650: Automatically approved (up to EUR 25K) - Score 450-649: Queued for human review - Score < 450: Automatically declined For loans EUR 25K-50K, all applications require human review regardless of AI score. REVIEW STATISTICS (2025): Total applications: 892,456 Auto-approved (< EUR 25K): 401,605 (45.0%) Auto-declined: 223,114 (25.0%) Human-reviewed: 267,737 (30.0%) Of auto-declined applications: Appealed by applicant: 8,924 (4.0%) Appeal reviewed by human: 8,924 (100% of appeals) Appeal overturned: 1,338 (15.0% of appeals) Note: Applicants must actively submit an appeal through a form linked in the rejection email. The appeal process is described in FAQ section 7 of the website (3 clicks from homepage). HUMAN REVIEWER TOOLS: Reviewers see the AI score and top-5 contributing features but no full explanation of the model's reasoning. The reviewer interface does not highlight cases where the model's confidence is low or where protected characteristics may be influencing the outcome.""", transparency_info="""\ TRANSPARENCY REVIEW System: CreditFlow AI Score __VERSION__ APPLICANT NOTIFICATIONS: Application form includes the following notice: "Your application will be assessed using automated decision-making systems. You have the right to request human review of any automated decision." Rejection email text: "Based on our assessment, we are unable to offer you a loan at this time. If you wish to understand the main factors behind this decision or request a manual review, please contact our support team or visit [link]." The rejection email links to a generic FAQ page. The FAQ states that decisions are made using "a combination of credit history, financial data, and statistical models" but does not mention alternative data sources (social media, device metadata). RIGHT TO EXPLANATION: Applicants can request an explanation by contacting support. Support agents provide a templated response listing the top 3 general factors (e.g., "credit history length," "income level," "existing debt") without specifying which exact features or thresholds drove the specific decision. No individualized explanation is generated. The support team does not have access to the model's per-application feature importance breakdown.""", risk_assessment_info="""\ RISK MANAGEMENT & CONFORMITY ASSESSMENT System: CreditFlow AI Score __VERSION__ ANNEX III CLASSIFICATION: Category 5(b) — AI systems intended to be used to evaluate the creditworthiness of natural persons. This system directly determines loan approval/rejection for consumer credit up to EUR 50,000. It falls squarely within the high-risk category. CONFORMITY ASSESSMENT STATUS: Internal conformity assessment: Not initiated Quality management system: ISO 27001 certified but no AI-specific QMS per Article 17 EU Declaration of Conformity: Not filed CE marking: Not applied DORA ALIGNMENT: As a financial services firm, the company has ICT risk management procedures under DORA (Digital Operational Resilience Act). These cover general IT risks but do not address AI-specific risks such as model drift, adversarial inputs, or algorithmic bias. RISK MANAGEMENT GAPS: The company's risk register includes traditional financial risks (credit risk, market risk, operational risk) but does not include: - AI model risk as a distinct risk category - Bias risk for protected characteristics - Alternative data correlation risk - Drift detection requirements""", logging_info="""\ LOGGING & TRACEABILITY REVIEW System: CreditFlow AI Score __VERSION__ CURRENT LOGGING: Event Type Logged Format Retention ───────────────────────────────────────────────────── Application received Yes JSON 7 years Score generated Yes Numeric 7 years Decision (approve/deny) Yes String 7 years Features used Yes Vector 7 years Model version Yes String Indefinite Human review action Yes JSON 7 years Appeal and outcome Yes JSON 7 years Logging is comprehensive with input-output linkage. All decisions can be reconstructed. Feature vectors are stored alongside scores, enabling retrospective audits. Financial services retention requirements (7 years) are met. Article 12 logging requirements for high-risk AI appear to be adequately addressed at the technical level. NOTE: While individual decision logs are complete, there is no automated aggregation or monitoring dashboard that tracks demographic patterns across decisions over time.""", ) s.graph = _build_scenario_graph( investigation_tools=[ "check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency", "assess_risk_management", ], ) return s def _medium_medical() -> AuditScenario: s = AuditScenario( scenario_id="medium_medical_triage_001", title="Emergency Triage AI — Medical Device Compliance", difficulty="medium", description=( "A hospital network deployed an AI system that prioritizes emergency " "department patients based on vital signs and symptoms. As a medical " "device with AI, it falls under both the EU AI Act (high-risk, Annex III) " "and the Medical Devices Regulation (MDR). Audit required." ), system_name="TriageAI Priority System", system_description=( "ML model processing vital signs (heart rate, BP, SpO2, temperature), " "symptoms, and medical history to assign emergency triage priority " "(ESI levels 1-5). Used in 12 EU hospitals across DE/AT/CH. Decisions " "directly affect patient care timing." ), system_category="high_risk", deployer_info="__COMPANY__ — German health-tech company, certified under MDR, deployed in DE/AT/CH hospitals.", correct_classification="high_risk", ground_truth_findings=[ "insufficient_clinical_validation", "age_bias_in_triage_scoring", "no_real_time_performance_monitoring", "missing_post_market_surveillance", ], required_remediation=[ "conduct_prospective_clinical_validation_study", "audit_age_related_bias_and_recalibrate", "implement_real_time_performance_dashboard", "establish_post_market_surveillance_plan", ], red_herrings=[ "prohibited_system", "missing_ce_marking", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: TriageAI Priority System __VERSION__ Deployer: __COMPANY__ Audit Date: __DATE__ DOCUMENT STATUS: Document Status Last Updated ───────────────────────────────────────────────────────────────── System Architecture Present 2025-09-15 Clinical Evaluation Report Present 2025-03-20 MDR Technical Documentation Present 2025-09-15 Intended Purpose Statement Present 2025-09-15 Software Life Cycle Documentation Present 2025-11-01 AI Act Annex IV Documentation Absent — Post-Market Clinical Follow-up Plan Absent — Post-Market Surveillance Plan (AI) Absent — MDR CONFORMITY: CE marking: Applied (Class IIa medical device) Notified Body: BSI Group (NB 0086) Last MDR audit: 2025-06-12 — no non-conformities The system has valid MDR conformity assessment. However, the EU AI Act imposes ADDITIONAL requirements beyond MDR compliance for AI-enabled medical devices classified as high-risk under Annex III. CLINICAL EVALUATION: The Clinical Evaluation Report (CER) is based on: - Retrospective analysis of 500K historical ER visits - Literature review of 23 published studies on AI triage - No prospective clinical trial has been conducted - CER does not address AI-specific performance degradation (concept drift, distribution shift between hospitals) NOTE: MDR clinical evaluation accepted the retrospective analysis. The EU AI Act may require additional validation demonstrating real-world performance across deployment sites.""", training_data_info="""\ TRAINING DATA AUDIT REPORT System: TriageAI Priority System __VERSION__ Dataset: Historical ER visit records from 3 university hospitals Records: 512,847 patient encounters (2018-2024) DEMOGRAPHIC DISTRIBUTION: Category Count Pct ESI 1-2 Rate ────────────────────────────────────────────────────────── Age 0-17 71,799 14.0% 8.2% Age 18-44 179,497 35.0% 6.1% Age 45-64 143,597 28.0% 9.3% Age 65-74 76,927 15.0% 14.7% Age 75+ 41,027 8.0% 19.8% MODEL PERFORMANCE BY AGE GROUP (ESI Classification Accuracy): Age Group Accuracy Sensitivity(ESI 1-2) Specificity ────────────────────────────────────────────────────────────── 0-17 91.3% 89.1% 92.0% 18-44 93.7% 91.8% 94.2% 45-64 92.1% 90.4% 93.1% 65-74 88.4% 84.2% 90.7% 75+ 82.6% 76.3% 85.8% Performance degrades notably for patients aged 75+. Sensitivity for the highest-acuity patients (ESI 1-2) drops to 76.3% for the elderly cohort — meaning 23.7% of critical elderly patients may be under-triaged. TRAINING DATA COMPOSITION: Patients aged 75+ represent 8.0% of the training data but 19.8% of ESI 1-2 presentations. The model was predominantly trained on younger demographics. Data from 3 hospitals in Germany only. No Austrian or Swiss patient data despite deployment in AT/CH hospitals. CLINICAL VALIDATION: Validation approach: Retrospective holdout (80/20 split) No prospective trial conducted. No external validation on data from deployment hospitals. No assessment of performance variation across deployment sites. NOTE: The system holds valid CE marking under MDR as a Class IIa device. MDR conformity does not exempt from AI Act requirements.""", oversight_info="""\ HUMAN OVERSIGHT PROCEDURES System: TriageAI Priority System __VERSION__ Department: Emergency Department Operations CLINICAL WORKFLOW: 1. Patient arrives at ER and is registered at reception 2. Initial vitals collected by triage nurse (HR, BP, SpO2, temp) 3. Nurse enters symptoms and relevant history into the system 4. TriageAI generates ESI level recommendation (1-5) 5. Triage nurse reviews and can accept or override the AI suggestion 6. Patient is directed to appropriate care area OVERRIDE STATISTICS (2025 Q3-Q4, across all 12 hospitals): Total triage assessments: 187,423 AI recommendations accepted: 171,577 (91.5%) Nurse overrides: 15,846 (8.5%) Override to higher acuity: 9,508 (60.0% of overrides) Override to lower acuity: 6,338 (40.0% of overrides) By ESI level: ESI 1 (resuscitation): All reviewed by attending physician ESI 2 (emergent): Nurse review + attending notification ESI 3 (urgent): Nurse review only ESI 4 (less urgent): Nurse review only ESI 5 (non-urgent): Nurse review only The system does not flag cases where its confidence is low. There is no visual indicator distinguishing high-confidence from borderline recommendations. Nurses report in surveys that they tend to "trust the system" unless the recommendation is clearly at odds with their clinical judgment. AFTER-HOURS OPERATIONS: Staffing levels are reduced between 22:00-06:00. During this window, a single triage nurse handles all incoming patients. Override rates drop to 4.2% during overnight shifts (vs 8.5% daytime).""", transparency_info="""\ TRANSPARENCY REVIEW System: TriageAI Priority System __VERSION__ PATIENT-FACING COMMUNICATION: Patients are not informed that an AI system is involved in their triage assessment. The triage process appears fully nurse-directed from the patient's perspective. Hospital intake forms do not mention AI-assisted triage. The hospitals' privacy notices (available on their websites) include a general statement about "digital health technologies" being used to support clinical decisions, but do not specifically mention TriageAI or AI-based triage prioritization. CLINICIAN-FACING INFORMATION: Triage nurses see the AI's recommended ESI level on their screen alongside a summary of input vital signs. The interface does NOT show: - The model's confidence score - Which factors most influenced the recommendation - Whether the patient falls into a demographic group where the model has known lower accuracy Attending physicians can view the AI recommendation in the patient record but receive no additional context about the model's reasoning. ARTICLE 13 USER INSTRUCTIONS: A deployment guide was provided to hospital IT departments describing system architecture, integration points, and API specifications. The guide does not include information about: - Known accuracy limitations by demographic group - Situations where the system should not be relied upon - Procedures for reporting suspected AI errors""", risk_assessment_info="""\ RISK MANAGEMENT & CONFORMITY ASSESSMENT System: TriageAI Priority System __VERSION__ ANNEX III CLASSIFICATION: The system falls under multiple Annex III categories: - Category 5(c): AI intended for use as a safety component of a product covered by Union harmonisation legislation (MDR) - Category 5(a): AI intended for evaluation of eligibility for essential public services (healthcare access/prioritization) Classification: HIGH-RISK MDR CONFORMITY STATUS: CE marking applied: Yes (Class IIa) Notified body: BSI Group (NB 0086) Last periodic audit: 2025-06-12 Non-conformities found: None under MDR EU AI ACT CONFORMITY STATUS: The EU AI Act imposes requirements ADDITIONAL to MDR: Internal conformity assessment: Not initiated AI-specific risk management: Not addressed Post-market monitoring (AI): Not established RISK MANAGEMENT: An MDR risk management file exists (ISO 14971 compliant). It covers clinical risks and software hazards but does not address: - AI-specific risks (distribution shift, adversarial inputs) - Performance degradation for specific demographic groups - Failure modes unique to the ML model - Concept drift between training data and deployment population POST-MARKET MONITORING: MDR PMCF (Post-Market Clinical Follow-up) plan exists. No AI-specific post-market surveillance has been established. There is no system for monitoring: - Real-time triage accuracy at individual hospital level - Demographic performance variation over time - Model prediction confidence distribution shifts""", logging_info="""\ LOGGING & TRACEABILITY REVIEW System: TriageAI Priority System __VERSION__ CURRENT LOGGING: Event Type Logged Retention ────────────────────────────────────────────────── Patient vitals input Yes 10 years (clinical record) AI ESI recommendation Yes 10 years Nurse final ESI assignment Yes 10 years Override indicator Yes 10 years Model version Yes Indefinite Response latency Yes 1 year System errors/timeouts Yes 1 year Logs are stored in the hospital's electronic health record (EHR) system. Each triage event links vitals, AI recommendation, and final nurse decision. MONITORING CAPABILITIES: Individual triage events: Fully traceable Aggregate accuracy reports: Generated quarterly (manual process) Real-time performance monitoring: Not implemented Automated drift detection: Not implemented The quarterly accuracy reports compare AI recommendations against final nurse decisions and patient outcomes (admission, discharge). Reports are produced by the clinical informatics team and reviewed at quarterly governance meetings. There is no mechanism for near-real-time detection of performance degradation. A significant drop in accuracy could go unnoticed for up to 3 months between quarterly reviews.""", ) s.graph = _build_scenario_graph( investigation_tools=[ "check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency", "assess_risk_management", "check_logging", ], ) return s # --------------------------------------------------------------------------- # HARD SCENARIOS # --------------------------------------------------------------------------- def _hard_social_scoring() -> AuditScenario: s = AuditScenario( scenario_id="hard_social_scoring_prohibited_001", title="Citizen Wellness App — PROHIBITED System Detection", difficulty="hard", description=( "A 'wellness and civic engagement' app has been flagged for review. " "The deployer describes it as a voluntary digital wellness platform " "that gamifies community participation. Municipal governments in " "Italy and Spain have partnered with the deployer. Conduct a " "thorough compliance audit." ), system_name="CivicPulse Wellness Score", system_description=( "CivicPulse is a digital wellness and civic engagement platform that " "helps citizens track and improve their community participation. The app " "provides personalized recommendations for local events, volunteer " "opportunities, and community initiatives. Users receive a 'wellness score' " "reflecting their engagement level. __USERCOUNT__ citizens across 3 " "municipalities currently use the platform." ), system_category="prohibited", deployer_info="__COMPANY__ — EU-based civic tech startup, partnered with 3 municipal governments in IT/ES.", correct_classification="prohibited", ground_truth_findings=[ "prohibited_social_scoring_system", "disguised_as_voluntary_wellness", "affects_access_to_public_services", "uses_social_behavior_for_scoring", "discriminatory_outcomes_on_service_access", ], required_remediation=[ "immediate_system_shutdown", "notify_national_supervisory_authority", "delete_all_scoring_data", "remediate_affected_citizens", ], red_herrings=[ "high_risk_classification", "needs_conformity_assessment", "voluntary_participation", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: CivicPulse Wellness Score __VERSION__ Deployer: __COMPANY__ Audit Date: __DATE__ DOCUMENT STATUS: Document Status Last Updated ───────────────────────────────────────────────────────────── Platform Architecture Present 2025-10-30 Scoring Algorithm Description Present 2025-10-30 Data Integration Specifications Present 2025-08-14 Municipal Partnership Agreements Present 2025-06-01 Privacy Impact Assessment Present 2025-07-22 User Guide / Onboarding Materials Present 2025-11-05 SCORING ALGORITHM DESCRIPTION (from deployer documentation): "The CivicPulse Wellness Score is a composite metric ranging from 0 to 1000, calculated from the following weighted components: Community Event Attendance (25%) — tracked via QR check-ins Volunteer Hours Logged (20%) — verified by partner orgs Social Media Civic Posts (15%) — public posts tagged #CivicPulse Payment History Reliability (15%) — municipal utility payments Civic Survey Participation (10%) — in-app micro-surveys Peer Endorsements (10%) — other users vouching App Engagement Metrics (5%) — daily active usage Scores are updated weekly. Score history is retained indefinitely. Users can view their score and breakdown in the app." MUNICIPAL PARTNERSHIP AGREEMENTS (summary): Three municipal governments have signed integration agreements: - Municipality of Torino (IT) — population 848,000 - Municipality of Sevilla (ES) — population 681,000 - Municipality of Valencia (ES) — population 791,000 Under these agreements, the municipalities provide: - API access to utility payment records - Event attendance verification infrastructure - Integration with municipal service booking systems In return, the municipalities receive: - Aggregate civic engagement analytics - "Priority queue integration" for municipal services""", training_data_info="""\ DATA SOURCES & PROCESSING REPORT System: CivicPulse Wellness Score __VERSION__ The system aggregates data from the following sources: SOURCE INVENTORY: Source Records Update Freq Consent ──────────────────────────────────────────────────────────────── Social media APIs 487,291 Daily App ToS Municipal utility records 312,847 Monthly Municipal MOU Event attendance (QR) 891,234 Real-time App ToS Volunteer organization data 156,723 Weekly Partner MOU In-app behavior 2,341,567 Real-time App ToS Peer endorsement graph 234,891 Real-time App ToS PERSONAL DATA PROCESSED: - Full name and municipal ID (for service integration) - Social media activity (public posts, frequency, topics) - Utility payment timeliness and amounts - Physical location (event check-ins, frequency maps) - Volunteer activity (hours, organizations, regularity) - In-app behavior (session duration, feature usage) - Social graph (who endorses whom, connection density) BIAS ASSESSMENT: No formal bias assessment has been conducted. The deployer states that the scoring algorithm is "objective and based on observable civic engagement indicators." Preliminary analysis of score distribution: Score Range Pct of Users Avg Monthly Income (self-reported) ───────────────────────────────────────────────────────────────── 800-1000 12.3% EUR 4,200 600-799 28.7% EUR 3,100 400-599 34.1% EUR 2,300 200-399 18.4% EUR 1,700 0-199 6.5% EUR 1,100 Higher scores correlate strongly with higher income. Citizens with lower income have less time for volunteer activities, fewer social media posts, and less stable utility payment histories.""", oversight_info="""\ HUMAN OVERSIGHT & GOVERNANCE System: CivicPulse Wellness Score __VERSION__ GOVERNANCE STRUCTURE: The platform is operated by __COMPANY__ with oversight from a "Civic Advisory Board" consisting of: - 2 company representatives - 1 municipal liaison per partner city - 1 data protection consultant The Advisory Board meets quarterly to review: - Platform usage statistics - Score distribution trends - User feedback summaries - New feature proposals SCORE ADJUSTMENT CAPABILITY: Municipal administrators have access to a dashboard where they can: - View individual citizen scores - Apply manual score adjustments (with documented reason) - Exclude specific citizens from the scoring system - Configure score thresholds for municipal service integration In 2025, municipal administrators made 847 manual adjustments: - 612 score increases (typically after verified volunteer hours were not automatically captured) - 235 score decreases (typically after fraudulent check-ins were identified) USER OPT-OUT: Users can delete their account through in-app settings. The deployer states participation is fully voluntary. However, the municipal service integration means that citizens without a CivicPulse account do not have access to the "priority queue" for municipal services (see Municipal Services Integration below). MUNICIPAL SERVICES INTEGRATION: Citizens with CivicPulse scores above 600 receive: - Priority appointment slots for municipal offices - Reduced wait times for public healthcare appointments - 10% discount on municipal parking permits - Priority enrollment in public recreational programs Citizens with scores below 300 experience: - Standard (non-priority) queue for all municipal services - No access to priority healthcare appointment slots - Standard parking permit pricing - Standard enrollment priority for recreational programs The deployer describes this as "rewarding engaged citizens" rather than penalizing low scorers. Citizens without an account are treated identically to those with scores below 300.""", transparency_info="""\ TRANSPARENCY REVIEW System: CivicPulse Wellness Score __VERSION__ USER-FACING INFORMATION: The app displays each user's current score prominently on the home screen along with a breakdown by category (events, volunteering, social, payments, surveys, endorsements, engagement). Users can see: - Their current score and weekly trend - Category-level breakdown - "Tips to improve your score" (e.g., "Attend 2 more events this month to boost your Community score by 50 points") - Leaderboard showing anonymized score rankings in their area Users are NOT told: - That their score affects access to municipal services - That utility payment history is factored into the score - That social media activity is monitored and scored - That users without accounts (or with low scores) receive lower priority for public services - The full list of data sources and their weights - How the scoring algorithm handles edge cases MUNICIPAL COMMUNICATION: Partner municipalities have not publicly announced the integration between CivicPulse scores and service prioritization. The municipal booking systems show "priority" and "standard" queues without explaining the criteria for queue assignment. MARKETING MATERIALS: The company website describes CivicPulse as: "A fun, gamified way to strengthen your connection with your community. Track your civic engagement, discover local events, and earn rewards for being an active citizen." No marketing materials mention the scoring system's impact on access to public services.""", risk_assessment_info="""\ RISK CLASSIFICATION ANALYSIS System: CivicPulse Wellness Score __VERSION__ DEPLOYER'S SELF-CLASSIFICATION: The deployer has classified the system as "limited risk — voluntary wellness platform" and states that no conformity assessment is required. The deployer's risk assessment document argues: "CivicPulse is a voluntary civic engagement tool. Users choose to participate and can opt out at any time. The wellness score is a motivational gamification feature, similar to fitness tracker achievement systems. No consequential decisions are made based on the score." AUDITOR'S ANALYSIS: Article 5(1)(c) of the EU AI Act PROHIBITS: "AI systems used for the evaluation or classification of natural persons or groups thereof over a certain period of time based on their social behaviour or known, inferred or predicted personal or personality characteristics, with the social score leading to either or both of the following: (i) detrimental or unfavourable treatment of certain natural persons or groups thereof in social contexts which are unrelated to the contexts in which the data was originally generated or collected; (ii) detrimental or unfavourable treatment of certain natural persons or groups thereof that is unjustified or disproportionate to their social behaviour or its gravity." KEY ASSESSMENT FACTORS: 1. The system evaluates citizens based on social behavior (social media activity, community participation, peer endorsements) 2. Scores determine priority access to public services (healthcare appointments, municipal office queues, recreational programs) 3. Low scorers receive worse service across domains unrelated to the scored behaviors (utility payments don't relate to healthcare priority) 4. Municipal governments — public authorities — are partners in deploying and using the scoring system 5. Participation is described as "voluntary" but non-participants are treated identically to low scorers""", logging_info="""\ LOGGING REVIEW System: CivicPulse Wellness Score __VERSION__ SYSTEM LOGS: The platform maintains extensive logging: - Score calculations and weekly updates - Data source ingestion events - User interactions (app opens, feature usage) - Municipal API calls (service queue assignments) - Administrator actions (manual adjustments) - Score-to-service mapping events All logs are retained for the lifetime of the platform. NOTE: The comprehensiveness of logging is not relevant to the primary compliance question for this system. If the system is found to constitute a prohibited practice under Article 5, no amount of logging, oversight, or documentation can make it compliant. The appropriate response to a prohibited system is cessation of operation, not improvement of governance.""", deep_transparency="""\ DEEP DIVE: SERVICE ACCESS IMPACT ANALYSIS Re-investigation of CivicPulse municipal integration QUANTIFIED IMPACT ON PUBLIC SERVICES: Analysis of municipal booking system data (2025-Q3): Healthcare appointments (Torino): Score 800+: Average wait 2.3 days for non-urgent appointment Score 400-599: Average wait 8.7 days Score < 300: Average wait 14.2 days No account: Average wait 14.2 days (same as lowest tier) Municipal office appointments (Sevilla): Priority queue (score 600+): 87% seen within 15 minutes Standard queue (score < 600): 34% seen within 15 minutes Average wait difference: 42 minutes Recreational program enrollment (Valencia): Priority enrollment opens 2 weeks before standard Score 600+: 94% get first-choice program Score < 600: 31% get first-choice program CORRELATION WITH SOCIOECONOMIC STATUS: Citizens in lowest income quartile: avg score 287 (below 300 threshold) Citizens in highest income quartile: avg score 741 (above 600 threshold) Pearson correlation (income, score): r = 0.73, p < 0.001 The scoring system systematically disadvantages lower-income citizens in access to healthcare, municipal services, and recreational programs.""", deep_oversight="""\ DEEP DIVE: MUNICIPAL ADMINISTRATOR ACTIONS Re-investigation of CivicPulse governance ADMINISTRATOR SCORE ADJUSTMENTS (detailed): Of 847 manual adjustments in 2025: Political affiliation correlation detected: - 89% of score increases were for citizens active in the ruling party's community organizations - Municipal liaison (advisory board member) initiated 67% of all score increase requests COMPLAINT HANDLING: 23 citizen complaints received about service queue disparities 0 complaints resulted in investigation Standard response: "CivicPulse is a voluntary wellness program. Queue prioritization is based on engagement metrics." No disclosure that the 'engagement metrics' ARE the CivicPulse score""", ) # Prohibited system: short investigation path s.graph = _build_scenario_graph( investigation_tools=["check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency"], is_prohibited=True, ) return s def _hard_deepfake() -> AuditScenario: s = AuditScenario( scenario_id="hard_deepfake_generation_001", title="AI Content Studio — Deepfake & Synthetic Media Compliance", difficulty="hard", description=( "A content creation platform offers AI-generated synthetic media " "including face-swaps, voice cloning, and AI-generated video. Recent " "reports of generated political content going viral without AI labels. " "Multiple Article 50 compliance angles to investigate." ), system_name="SynthStudio Pro", system_description=( "Suite of generative AI tools: face-swap (video), voice cloning, " "text-to-video, AI image generation. Used by __USERCOUNT__ EU creators. " "Content is shared across all major social platforms. Platform serves " "creators, marketing agencies, and entertainment companies." ), system_category="limited_risk", deployer_info="__COMPANY__ — Irish AI startup, 80 employees, __USERCOUNT__ EU users.", correct_classification="limited_risk", ground_truth_findings=[ "missing_ai_content_labeling", "no_watermarking_or_content_authentication", "political_content_without_disclosure", "no_user_verification_for_deepfakes", "facilitating_disinformation", ], required_remediation=[ "implement_mandatory_ai_content_labels", "deploy_c2pa_watermarking", "add_political_content_restrictions", "implement_creator_verification", "establish_content_moderation_pipeline", ], red_herrings=[ "prohibited_manipulation", "high_risk_biometric", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: SynthStudio Pro __VERSION__ Deployer: __COMPANY__ Audit Date: __DATE__ DOCUMENT STATUS: Document Status Last Updated ───────────────────────────────────────────────────────────────── Platform Architecture Present 2025-11-10 Model Cards (per generation model) Present 2025-10-01 API Documentation Present 2025-11-10 Content Policy / Terms of Service Present 2025-09-15 Content Labeling Implementation Absent — Content Authentication / Provenance Absent — Watermarking Specification Absent — Content Moderation Procedures Present 2025-04-20 GENERATION MODELS: Model Type Training Data ────────────────────────────────────────────────────── FaceSwap v3 GAN CelebA + proprietary VoiceClone v2 Neural TTS LibriTTS + licensed TextToVideo v1 Diffusion WebVid-10M + licensed ImageGen v4 Latent Diffusion LAION-filtered + licensed CONTENT POLICY (from Terms of Service): Section 4.2: "Users agree not to use SynthStudio for: (a) creating content intended to deceive or defraud, (b) non-consensual intimate imagery, (c) content targeting minors, (d) content that violates applicable law." Section 4.3: "Users are responsible for ensuring their use of generated content complies with all applicable laws and regulations." Enforcement: The content policy is enforced reactively. Users report violations via an in-platform form. Average response time: 72+ hours. No proactive content scanning is implemented.""", training_data_info="""\ TRAINING DATA & CONSENT REPORT System: SynthStudio Pro __VERSION__ TRAINING DATA SOURCES: Model Dataset Size Consent Status ────────────────────────────────────────────────────────────── FaceSwap v3 CelebA 202,599 Research license only; individual consent not obtained from subjects Proprietary set 84,231 Licensed from stock media agencies VoiceClone v2 LibriTTS 585 hrs CC-BY 4.0 license Licensed voices 200 hrs Individual consent TextToVideo WebVid-10M 10M clips Web-scraped; no individual consent Licensed footage 500K clips Commercial license ImageGen v4 LAION-filtered 2.3B imgs Web-scraped; filtered for CSAM but not for individual consent Licensed imagery 1.2M imgs Commercial license CONSENT CONCERNS: The FaceSwap model was trained partly on CelebA, which contains photos of public figures collected without individual consent for AI training purposes. While the images are publicly available, training face-swap models on non-consenting individuals' likenesses raises ethical and potentially legal concerns under GDPR Article 6. WebVid-10M and LAION-filtered datasets are web-scraped collections. Content creators depicted in these datasets did not consent to their content being used for AI model training. DEEPFAKE DETECTION: SynthStudio does not include any built-in deepfake detection capability. Generated content is not distinguishable from authentic content without external forensic analysis tools. USAGE STATISTICS (2025): Face-swaps generated: 2,847,291 Voice clones created: 891,234 Videos generated: 1,234,567 Images generated: 12,456,789 Content flagged by users: 4,231 (0.02% of total output) Content removed after review: 1,847 (43.6% of flagged)""", oversight_info="""\ CONTENT MODERATION & OVERSIGHT System: SynthStudio Pro __VERSION__ MODERATION PROCESS: SynthStudio operates a reactive content moderation system: 1. Automated pre-screening: Basic NSFW classifier runs on image generation outputs (estimated 91% accuracy). Flagged content requires manual review before delivery. 2. User reporting: Any user can flag content via a report button. Reports are queued for the Trust & Safety team. 3. Trust & Safety team: 6 full-time moderators review reported content. Working hours: Mon-Fri, 09:00-18:00 IST. MODERATION STATISTICS (2025): Content generated: 17,429,881 Auto-flagged (NSFW): 182,471 (1.05%) User reports: 4,231 (0.02%) Reviewed by T&S team: 6,892 Content removed: 1,847 Average review time: 74 hours No proactive scanning for: - Political disinformation - Non-consensual deepfakes of real individuals - Misleading news or propaganda - Content impersonating public figures POLITICAL CONTENT: SynthStudio has no special handling for political content. Users have generated content depicting politicians in fabricated scenarios. At least 3 instances of AI-generated political content went viral on social media in 2025 without any AI disclosure. The company became aware through media reports, not internal detection. No restrictions exist on generating content depicting: - Political figures - Electoral/campaign material - News-like content""", transparency_info="""\ TRANSPARENCY & CONTENT LABELING REVIEW System: SynthStudio Pro __VERSION__ AI CONTENT LABELING: Article 50(2) requires: "Providers of AI systems, including general-purpose AI systems, generating synthetic audio, image, video or text content, shall ensure that the outputs of the AI system are marked in a machine-readable format and detectable as artificially generated or manipulated." Current implementation: - Generated images: No AI label or metadata tag applied - Generated videos: No AI label or metadata tag applied - Generated audio: No AI label or metadata tag applied - Face-swaps: No AI label or metadata tag applied When users download generated content, it is delivered as a standard media file (JPEG, MP4, WAV) with no embedded metadata indicating AI generation. CONTENT PROVENANCE: C2PA (Coalition for Content Provenance and Authenticity): Not implemented. No content credentials are attached to generated media. IPTC metadata: Not implemented. No AI generation metadata in EXIF/XMP fields. Digital watermarking: Not implemented. Generated content contains no steganographic or perceptual watermarks. After download, generated content is indistinguishable from authentic media using standard tools. USER AGREEMENTS: The Terms of Service (Section 6.1) state: "Users are responsible for disclosing the AI-generated nature of content when required by applicable law." This places the disclosure burden entirely on the user, but Article 50(2) places the obligation on the PROVIDER to ensure outputs are marked, not merely on users to self-disclose. PLATFORM UI: Within the SynthStudio platform, generated content is displayed with a small "AI Generated" tag in the project view. This tag does not persist when content is downloaded or exported. No option exists to embed permanent AI labels in exported content.""", risk_assessment_info="""\ RISK CLASSIFICATION ANALYSIS System: SynthStudio Pro __VERSION__ ANNEX III HIGH-RISK CHECK: 1. Biometric identification: The face-swap tool processes facial features but is used for content CREATION, not identification. It does not identify individuals — it transfers facial appearance between subjects. This does not fall under the biometric identification category of Annex III. 2-8. Other high-risk categories: Not applicable — the system creates media content, it does not make decisions affecting individuals' rights, access to services, or legal status. ARTICLE 5 PROHIBITED PRACTICES: Subliminal manipulation: The system creates content on user request. It does not autonomously deploy manipulative content. However, the OUTPUTS could be used for manipulation if shared without AI disclosure. The tool itself is not a prohibited practice, but it can facilitate prohibited outcomes if misused. RISK LEVEL DETERMINATION: Limited Risk Primary obligations fall under Article 50 transparency requirements for AI systems generating synthetic content. The platform's systemic risk lies not in the tool's classification level but in the scale of potentially misleading synthetic content being produced and distributed without provenance tracking. CONTENT INTEGRITY RISK: The combination of: (a) high-quality synthetic media generation, (b) no content labeling, (c) no watermarking, and (d) no proactive content moderation creates significant systemic risk for information integrity, particularly around elections and public discourse.""", logging_info="""\ LOGGING & CONTENT TRACEABILITY REVIEW System: SynthStudio Pro __VERSION__ GENERATION LOGS: Event Type Logged Retention ───────────────────────────────────────────── Generation request Yes 90 days Model and params used Yes 90 days Input media (face source) Yes 90 days Output media hash Yes 90 days User account ID Yes 90 days Download event Yes 90 days Export destination No — TRACEABILITY AFTER EXPORT: Once content is downloaded by the user, SynthStudio has no mechanism to track its distribution or usage. The output hash is retained for 90 days, but this only allows verification if the exact file is submitted back for checking. Content shared on social media, messaging apps, or websites cannot be traced back to SynthStudio or the creator without the original file hash. CONTENT-TO-CREATOR LINKING: Within the 90-day retention window, SynthStudio can link a specific piece of content to the user account that generated it (via output hash matching). After 90 days, this linkage is permanently deleted. No legal hold or preservation mechanism exists for content involved in potential misuse investigations. For face-swap content specifically, there is no record of whose likeness was used as the source face, only the source image hash.""", ) s.graph = _build_scenario_graph( investigation_tools=[ "check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency", "assess_risk_management", "check_logging", ], ) return s def _hard_multi_system() -> AuditScenario: s = AuditScenario( scenario_id="hard_multi_system_corporate_001", title="Corporate AI Portfolio Audit — Multi-System Compliance", difficulty="hard", description=( "A large enterprise uses 4 AI systems that need simultaneous audit: " "(1) employee sentiment analysis, (2) customer churn prediction, " "(3) automated invoice processing, (4) workplace safety monitoring " "with cameras. Each has different risk levels. The auditor must " "correctly classify each and identify cross-system data sharing risks." ), system_name="Enterprise AI Portfolio", system_description=( "Four interconnected AI systems sharing a common data lake: " "EmployeePulse (sentiment from Slack/email), ChurnGuard (customer " "retention prediction), InvoiceAI (AP automation), SafetyWatch " "(CCTV-based workplace monitoring). Deployed at __COMPANY__ " "manufacturing conglomerate, __USERCOUNT__ employees across EU." ), system_category="high_risk", deployer_info="__COMPANY__ — German manufacturing conglomerate, 15,000 employees, operating across EU.", correct_classification="high_risk", ground_truth_findings=[ "employee_sentiment_is_high_risk_workplace_monitoring", "safety_watch_uses_biometric_categorization", "cross_system_data_sharing_amplifies_risks", "no_dpia_for_combined_processing", "employee_consent_not_freely_given", "churn_prediction_minimal_risk_but_data_sharing_elevates", ], required_remediation=[ "reclassify_employee_sentiment_as_high_risk", "assess_safety_watch_for_biometric_categorization", "implement_data_isolation_between_systems", "conduct_combined_dpia", "obtain_valid_employee_consent_or_remove_sentiment", "audit_cross_system_data_flows", ], red_herrings=[ "invoice_ai_high_risk", "all_systems_prohibited", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY Enterprise AI Portfolio — __COMPANY__ Audit Date: __DATE__ SYSTEM INVENTORY: System Deployer Classification Documentation ──────────────────────────────────────────────────────────────── EmployeePulse "Workforce analytics" Per-system docs present ChurnGuard "Customer analytics" Per-system docs present InvoiceAI "Process automation" Per-system docs present SafetyWatch "Safety compliance" Per-system docs present PER-SYSTEM DOCUMENTATION STATUS: EmployeePulse — Employee Sentiment Analysis: Architecture document: Present (describes NLP pipeline) Data flow diagram: Present (shows Slack/email ingestion) Algorithm description: Present (BERT-based sentiment model) Performance metrics: Present (F1: 0.84 on test set) DPIA: Present (standalone, 2024-11) Combined processing assessment: Absent ChurnGuard — Customer Churn Prediction: Architecture document: Present Algorithm description: Present (gradient-boosted trees) Data sources: Present (CRM, support tickets, usage) Performance metrics: Present (AUC: 0.81) InvoiceAI — Automated Invoice Processing: Architecture document: Present (OCR + classification) Processing rules: Present Accuracy metrics: Present (99.2% extraction accuracy) Error handling procedures: Present SafetyWatch — Workplace Safety Monitoring: Architecture document: Present (computer vision pipeline) Camera placement documentation: Present Detection model description: Present (YOLO-based detection) Works council agreement: Present (2024-06) CROSS-SYSTEM DOCUMENTATION: Combined risk assessment: ABSENT Cross-system data flow diagram: ABSENT Combined DPIA: ABSENT Data lake access control matrix: Present but outdated (2023-09) NOTE: Each system has individual documentation that appears adequate in isolation. No documentation addresses the combined risks of four AI systems sharing a common data infrastructure.""", training_data_info="""\ DATA AUDIT REPORT — MULTI-SYSTEM Enterprise AI Portfolio — __COMPANY__ SYSTEM 1: EmployeePulse (Sentiment Analysis) Data sources: - Slack messages from internal workspace (12.4M messages) - Email subject lines and metadata (8.7M emails) - Employee survey responses (47K responses) - Meeting transcript summaries (234K meetings) Personal data processed: Employee names, communication patterns, sentiment indicators, meeting participation frequency, response times, collaboration network metrics. Consent: Employees signed an "IT systems usage agreement" upon hiring that includes a clause: "The company may process workplace communications for operational analytics purposes." Employees were not specifically informed about AI-powered sentiment analysis. Note: Under EU labor law, consent given as a condition of employment may not constitute "freely given" consent under GDPR Article 7, as the power imbalance between employer and employee undermines voluntary choice. SYSTEM 2: ChurnGuard (Customer Churn) Data sources: - CRM records (2.1M customers) - Support ticket history (5.8M tickets) - Product usage telemetry (real-time) - Contract terms and renewal dates (2.1M contracts) Personal data: Customer names, contact info, usage patterns, support interaction history, contract details. SYSTEM 3: InvoiceAI (Invoice Processing) Data sources: - Scanned invoices (3.4M documents) - Vendor database (12K vendors) - Purchase orders (1.8M orders) Personal data: Minimal — vendor business information only. No individual personal data processed. SYSTEM 4: SafetyWatch (Workplace Monitoring) Data sources: - CCTV footage from 847 cameras across 23 facilities - Real-time video stream processing Processing details: - Object detection: Hard hat, safety vest, goggles presence - Zone violation: Entry into restricted areas - Pose estimation: Ergonomic risk assessment (bending, lifting) - FACIAL RECOGNITION: Used for zone access verification in restricted areas (R&D labs, chemical storage) The pose estimation module processes body positioning data that could constitute biometric categorization — inferring physical characteristics and behavior patterns of employees. CROSS-SYSTEM DATA SHARING: All four systems access a shared Azure Data Lake (ADL) instance. Access control is implemented at the storage container level. OBSERVED DATA FLOWS: EmployeePulse → SharedLake: Employee sentiment scores ChurnGuard ← SharedLake: Pulls employee data for "internal engagement correlation" feature SafetyWatch → SharedLake: Zone compliance records InvoiceAI → SharedLake: Vendor payment data CONCERN: ChurnGuard's "internal engagement correlation" feature accesses EmployeePulse sentiment data to predict whether disengaged employees might cause customer churn through poor service. This creates an undocumented data flow where employee sentiment analysis affects customer-facing predictions.""", oversight_info="""\ HUMAN OVERSIGHT — MULTI-SYSTEM Enterprise AI Portfolio — __COMPANY__ SYSTEM 1: EmployeePulse Oversight: HR Analytics team reviews monthly aggregate reports. Individual-level data accessible to HR Business Partners. No opt-out mechanism for employees. No employee notification that individual sentiment is tracked. Aggregated "team health" scores shared with department managers. HR reports that 3 employees were "counseled" in 2025 after EmployeePulse flagged sustained negative sentiment patterns. SYSTEM 2: ChurnGuard Oversight: Customer Success team reviews churn predictions weekly. High-risk accounts flagged for proactive outreach. No direct impact on individual customers' service or pricing. Predictions used as advisory signals only. SYSTEM 3: InvoiceAI Oversight: Finance team reviews all flagged exceptions (approx 3% of invoices). Full human review for invoices above EUR 50K. System handles routine three-way matching autonomously. Error rate: 0.8% (caught in downstream reconciliation). SYSTEM 4: SafetyWatch Oversight: Safety officers monitor real-time alerts. All zone violations are reviewed within 15 minutes. Pose estimation alerts are reviewed in batches (daily). Facial recognition matches for restricted zones are logged and reviewed if there is a mismatch (attempted unauthorized access). CROSS-SYSTEM OVERSIGHT: No unified oversight body monitors the interaction between systems. Each system has its own operational team: - EmployeePulse: HR Analytics (3 people) - ChurnGuard: Customer Success (5 people) - InvoiceAI: Finance Operations (2 people) - SafetyWatch: HSE Department (4 people) The IT department manages the shared data lake infrastructure but does not monitor data flows between systems from a compliance perspective. No data governance officer has been appointed with authority over cross-system data usage.""", transparency_info="""\ TRANSPARENCY REVIEW — MULTI-SYSTEM Enterprise AI Portfolio — __COMPANY__ SYSTEM 1: EmployeePulse Employee notification: The company's internal IT policy document (available on the intranet, 47 pages) includes the statement: "Workplace communications may be processed for analytical purposes to support organizational effectiveness." Employees are NOT specifically told: - That AI analyzes their Slack messages and email metadata - That individual sentiment scores are generated - That these scores are accessible to HR Business Partners - That sentiment data flows to the ChurnGuard system - That sustained negative sentiment may trigger HR intervention SYSTEM 2: ChurnGuard Customer notification: The company's privacy policy mentions "automated analysis to improve customer service." Customers are not informed that their accounts are scored for churn risk or that this scoring uses employee sentiment data internally. SYSTEM 3: InvoiceAI Vendor notification: Vendors are informed that invoices are "processed electronically." No specific AI disclosure required as the system handles business documents, not personal data of natural persons in a consequential manner. SYSTEM 4: SafetyWatch Employee notification: The works council agreement from 2024-06 authorizes CCTV monitoring for safety purposes. The agreement specifically mentions: - PPE compliance detection (hard hats, vests, goggles) - Restricted zone monitoring - "Advanced safety analytics" (vague — does not specify pose estimation or facial recognition) Employees are aware of cameras but NOT specifically informed: - That pose estimation analyzes their body movements - That facial recognition identifies them in restricted zones - That safety compliance data is stored in the shared data lake""", risk_assessment_info="""\ RISK CLASSIFICATION — MULTI-SYSTEM Enterprise AI Portfolio — __COMPANY__ PER-SYSTEM CLASSIFICATION ANALYSIS: SYSTEM 1: EmployeePulse (Sentiment Analysis) Deployer classification: "Workforce analytics tool — minimal risk" Annex III check: Category 4 — "AI systems intended to be used for making decisions affecting terms of work-related relationships" The system generates individual sentiment scores accessible to HR, and has been used as a factor in HR interventions (counseling). This constitutes a system that affects work-related relationships. AUDITOR ASSESSMENT: HIGH-RISK under Annex III Category 4 SYSTEM 2: ChurnGuard (Customer Churn) Deployer classification: "Customer analytics — minimal risk" Annex III check: The system predicts customer churn for advisory purposes. It does not make decisions affecting individual customers' service level, pricing, or contract terms. In isolation: MINIMAL RISK However: Cross-system data flows (employee sentiment → churn prediction) create compound processing that was not assessed. SYSTEM 3: InvoiceAI (Invoice Processing) Deployer classification: "Process automation — minimal risk" Annex III check: No applicable category. The system processes business documents (invoices, POs) with minimal personal data. AUDITOR ASSESSMENT: MINIMAL RISK (correct classification) SYSTEM 4: SafetyWatch (Workplace Safety) Deployer classification: "Safety compliance — limited risk" Annex III check: - Pose estimation: May constitute biometric categorization (inferring physical characteristics) under Annex III Cat 1 - Facial recognition for zone access: Biometric identification in a workplace context under Annex III Cat 1 - Safety PPE detection: Standard computer vision, not biometric AUDITOR ASSESSMENT: Requires detailed assessment — components range from minimal risk (PPE detection) to potentially HIGH-RISK (facial recognition, pose estimation) CROSS-SYSTEM RISK: No combined risk assessment has been conducted. The interaction between EmployeePulse sentiment data and ChurnGuard predictions creates a processing chain that was not individually assessed by either system's standalone documentation. A Data Protection Impact Assessment (DPIA) should have been conducted for the combined processing but none exists.""", logging_info="""\ LOGGING & TRACEABILITY — MULTI-SYSTEM Enterprise AI Portfolio — __COMPANY__ PER-SYSTEM LOGGING: System Decision Logs Retention Completeness ──────────────────────────────────────────────────────────── EmployeePulse Yes 2 years Individual scores + inputs ChurnGuard Yes 3 years Predictions + features InvoiceAI Yes 7 years All processing steps SafetyWatch Yes 30 days Alerts + footage refs CROSS-SYSTEM AUDIT TRAIL: Data lake access logs: Yes (Azure ADL audit logs enabled) Cross-system query logs: Present but not monitored Data flow lineage tracking: Not implemented The Azure Data Lake audit logs record which service principal accessed which data container and when. However: - No alerts are configured for unusual cross-system data access - No monitoring of data volume transferred between systems - No lineage tracking from source system to consuming system - Logs are retained for 90 days only (insufficient for audit) SAFETWATCH SPECIFIC CONCERN: Video footage is retained for only 30 days. Facial recognition match logs are retained for 30 days. After this period, there is no way to audit whether the system correctly identified individuals or to investigate potential misidentification events. For a system that may be classified as high-risk (biometric identification in workplace), 30-day retention is likely insufficient to meet Article 12 logging requirements.""", ) # Full investigation for multi-system audit s.graph = _build_scenario_graph( investigation_tools=[ "check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency", "assess_risk_management", "check_logging", ], ) return s # --------------------------------------------------------------------------- # MEDIUM BONUS SCENARIO # --------------------------------------------------------------------------- def _medium_emotion_recognition() -> AuditScenario: s = AuditScenario( scenario_id="medium_emotion_recognition_workplace_001", title="Workplace Emotion Recognition — Article 5/50 Boundary Case", difficulty="medium", description=( "A corporate productivity platform uses webcam-based emotion recognition " "during video calls to measure 'employee engagement scores.' The EU AI Act " "prohibits emotion recognition in the workplace (Article 5(1)(f)) with narrow " "exceptions. Determine whether this system falls under the prohibition or " "the medical/safety exception." ), system_name="EngageMetrics Pro", system_description=( "Video analytics module integrated with the corporate video conferencing " "platform. Analyzes facial micro-expressions, voice tone, and body language " "during meetings to generate per-employee 'engagement scores' on a 0-100 " "scale. Scores are aggregated into team dashboards accessible to managers. " "Deployed across __USERCOUNT__ employees in __REGION__." ), system_category="prohibited", deployer_info="__COMPANY__ — US-based HR analytics firm, EU subsidiary in Ireland, serving 200+ EU corporate clients.", correct_classification="prohibited", ground_truth_findings=[ "prohibited_workplace_emotion_recognition", "no_medical_or_safety_exception_applies", "employee_consent_not_freely_given", "covert_processing_of_biometric_data", ], required_remediation=[ "immediate_cessation_of_emotion_recognition", "notify_data_protection_authority", "delete_all_emotion_scores_and_biometric_data", "inform_affected_employees", ], red_herrings=[ "high_risk_classification", "needs_bias_audit", "medical_exception_applies", ], documentation_data="""\ TECHNICAL DOCUMENTATION INVENTORY System: EngageMetrics Pro __VERSION__ Deployer: __COMPANY__ Audit Date: __DATE__ DOCUMENT STATUS: Document Status Last Updated ───────────────────────────────────────────────────────────────── Platform Architecture Present 2025-10-22 ML Model Documentation Present 2025-10-22 Data Processing Agreement (DPA) Present 2025-08-01 Employee Privacy Notice Present 2025-09-15 Works Council Consultation Record Absent — DPIA Present 2025-07-10 ML MODEL DETAILS: Architecture: Multi-modal CNN (video) + Transformer (audio) Input: Webcam video frames (10 fps) + microphone audio Output: Engagement probability score (0-100) Features analyzed: - Facial Action Units (AU1-AU28) — eyebrow, lip, jaw movements - Gaze direction and duration - Head pose (pitch, yaw, roll) - Voice pitch variation and speaking rate - Micro-expression detection (< 500ms duration) Training data: 154K labeled video clips from US call centers Accuracy: "86% correlation with human engagement ratings" DEPLOYER'S CLAIMED PURPOSE: "EngageMetrics helps organizations understand meeting effectiveness and employee well-being. The tool provides aggregate insights to improve team dynamics and reduce meeting fatigue." DPIA FINDINGS: The DPIA conducted in July 2025 concluded that the system processes "behavioral analytics data" rather than biometric data, and classified the processing as "legitimate interest" under GDPR Article 6(1)(f). The DPIA does not reference the EU AI Act or its provisions on emotion recognition.""", training_data_info="""\ DATA PROCESSING REPORT System: EngageMetrics Pro __VERSION__ DATA COLLECTION: Source: Corporate video conferencing platform API Collection method: Real-time video frame extraction during meetings Frequency: 10 frames/second during active video Audio: Continuous during meetings (voice characteristics only) Storage: Frames processed in-memory, engagement scores stored PROCESSING DETAILS: The system extracts the following biometric indicators: - 28 Facial Action Units per the Facial Action Coding System (FACS) - Gaze tracking (eye position relative to screen center) - Head movement patterns - Voice fundamental frequency (F0) and formants - Speech rate, pause duration, filler word frequency - Micro-expression detection (expressions lasting < 500ms) These indicators are processed through the ML model to produce a scalar "engagement score" for each participant, each meeting. EMPLOYEE DATA RETENTION: Per-meeting scores: Retained 12 months Aggregated weekly scores: Retained 24 months Raw video/audio: Not retained (processed in real-time) Individual score history: Accessible to employee and their manager CONSENT MECHANISM: Employees are notified via a banner at the start of each meeting: "This meeting uses engagement analytics. By joining, you consent to having your engagement level measured." Employees can "opt out" by disabling their camera, but this is noted in the team dashboard as "camera off — engagement unknown" and managers receive a monthly report of camera-off frequency. TRAINING DATA COMPOSITION: Source: 154,291 labeled video clips from US-based customer service call centers. Labels assigned by human raters scoring engagement on a 1-5 scale. Demographic representation of training data: Age 20-35: 72% Age 36-50: 23% Age 50+: 5% Note: Training data from US only. System deployed on EU employees with different cultural norms for facial expression.""", oversight_info="""\ HUMAN OVERSIGHT & GOVERNANCE System: EngageMetrics Pro __VERSION__ MANAGEMENT ACCESS: Team managers receive: - Weekly aggregated engagement dashboard per team member - Meeting-level engagement scores (per person, per meeting) - "Low engagement alerts" when an employee's score drops below 40 for 3 consecutive meetings - Trend analysis showing engagement trajectory over months HR department receives: - Department-level aggregated engagement reports (monthly) - Individual engagement data accessible "for performance review purposes" per company HR policy EMPLOYEE ACCESS: Employees can view their own engagement scores in a personal dashboard. They cannot see other employees' scores. DOCUMENTED USES OF ENGAGEMENT DATA: Per the deployer's case studies and client testimonials: - "Identified and coached underperforming team members" (Client A) - "Used engagement data as one factor in performance reviews" (Client B) - "Detected early signs of burnout in engineering team" (Client C) WORKS COUNCIL CONSULTATION: No works council consultation record exists. The deployer states that implementation was handled as an "IT tool deployment" not requiring works council approval. In Germany and several other EU member states, workplace monitoring systems require works council agreement (Betriebsrat Mitbestimmung). EMPLOYEE GRIEVANCES: 17 formal complaints filed in Q3-Q4 2025: - 8 complaints about feeling "surveilled" during meetings - 5 complaints that camera-off reporting is coercive - 4 complaints that engagement scores affected performance reviews""", transparency_info="""\ TRANSPARENCY REVIEW System: EngageMetrics Pro __VERSION__ EMPLOYEE NOTIFICATION: Meeting banner: "This meeting uses engagement analytics." No further detail provided about: - What specific facial/voice features are analyzed - How the engagement score is calculated - Who has access to individual scores - How long scores are retained - The employee's right to object Employee onboarding materials include a section titled "Digital Workplace Tools" that states: "We use various analytics tools to improve collaboration and meeting effectiveness. These tools may process behavioral data." The word "emotion" does not appear in any employee-facing communication. The system is marketed internally as "engagement analytics" rather than "emotion recognition." ARTICLE 50(3) — EMOTION RECOGNITION DISCLOSURE: Article 50(3) requires: "Users of an emotion recognition system or a biometric categorisation system shall inform the natural persons exposed thereto of the operation of the system." The current notification ("engagement analytics") does not inform employees that the system recognizes emotional states from their facial expressions and voice characteristics. ARTICLE 5(1)(f) — PROHIBITION: Article 5(1)(f) prohibits: "the use of emotion recognition systems in the workplace [...] except where the use of such system is intended to be put in place or put on the market for medical or safety reasons." The deployer's stated purpose is measuring "engagement" for productivity optimization and performance management. This does not fall under the medical or safety exception.""", risk_assessment_info="""\ RISK CLASSIFICATION ANALYSIS System: EngageMetrics Pro __VERSION__ DEPLOYER'S SELF-CLASSIFICATION: The deployer classified the system as "limited risk — workplace analytics tool" and argues that it measures "engagement" not "emotions," citing that the output is a single numeric score rather than discrete emotion labels (happy, sad, angry, etc.). AUDITOR'S ANALYSIS: EMOTION RECOGNITION DEFINITION (Article 3(39)): "emotion recognition system means an AI system for the purpose of identifying or inferring emotions or intentions of natural persons on the basis of their biometric data" The system processes: - Facial Action Units (biometric data under GDPR) - Voice pitch and tone characteristics (biometric data) - Micro-expressions (inherently emotional indicators) The system's output — an "engagement score" — is derived from emotional and attentional indicators. Regardless of whether the output is labeled "engagement" or "emotion," the underlying processing constitutes emotion recognition per Article 3(39). ARTICLE 5(1)(f) APPLICABILITY: - Location: workplace (employee meetings) — YES - Purpose: productivity monitoring, performance review — YES - Medical exception: not applicable (not for health/safety) - Safety exception: not applicable (office work, not hazardous) The deployer's argument that "engagement ≠ emotion" contradicts the technical reality: the system reads facial micro-expressions and voice stress patterns — precisely the biometric data that Article 3(39) identifies as emotion recognition inputs.""", logging_info="""\ LOGGING & DATA PROCESSING REVIEW System: EngageMetrics Pro __VERSION__ PROCESSING LOGS: Event Type Logged Retention ────────────────────────────────────────────────── Meeting start/end Yes 24 months Per-meeting engagement score Yes 12 months Weekly aggregated score Yes 24 months Manager dashboard access Yes 6 months Low engagement alerts sent Yes 12 months Employee opt-out events Yes 12 months Camera-off events Yes 12 months NOTE: If the system constitutes prohibited emotion recognition under Article 5(1)(f), the existence and quality of logging is irrelevant to the primary compliance determination. The system must cease operation regardless of its logging capabilities. Camera-off tracking may constitute additional coercion, as employees who exercise their right to avoid emotion recognition are identifiable and their behavior is reported to management.""", ) # Prohibited system — short investigation then findings s.graph = _build_scenario_graph( investigation_tools=["check_documentation", "audit_training_data", "verify_human_oversight", "check_transparency", "assess_risk_management"], is_prohibited=True, ) return s # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- _SCENARIO_FACTORIES = { "easy_chatbot_transparency_001": _easy_chatbot, "easy_recommendation_minimal_001": _easy_recommendation, "medium_hiring_bias_001": _medium_hiring, "medium_credit_scoring_001": _medium_credit, "medium_medical_triage_001": _medium_medical, "medium_emotion_recognition_workplace_001": _medium_emotion_recognition, "hard_social_scoring_prohibited_001": _hard_social_scoring, "hard_deepfake_generation_001": _hard_deepfake, "hard_multi_system_corporate_001": _hard_multi_system, } SCENARIOS: Dict[str, type] = _SCENARIO_FACTORIES SCENARIO_LIST = [ {"id": "easy_chatbot_transparency_001", "title": "Customer Service Chatbot", "difficulty": "easy"}, {"id": "easy_recommendation_minimal_001", "title": "Music Recommendation Engine", "difficulty": "easy"}, {"id": "medium_hiring_bias_001", "title": "AI Resume Screener", "difficulty": "medium"}, {"id": "medium_credit_scoring_001", "title": "Credit Scoring Model", "difficulty": "medium"}, {"id": "medium_medical_triage_001", "title": "Emergency Triage AI", "difficulty": "medium"}, {"id": "medium_emotion_recognition_workplace_001", "title": "Workplace Emotion Recognition (PROHIBITED)", "difficulty": "medium"}, {"id": "hard_social_scoring_prohibited_001", "title": "Citizen Wellness App (PROHIBITED)", "difficulty": "hard"}, {"id": "hard_deepfake_generation_001", "title": "AI Content Studio (Deepfake)", "difficulty": "hard"}, {"id": "hard_multi_system_corporate_001", "title": "Corporate AI Portfolio Audit", "difficulty": "hard"}, ] DIFFICULTY_TIERS = { "easy": ["easy_chatbot_transparency_001", "easy_recommendation_minimal_001"], "medium": ["medium_hiring_bias_001", "medium_credit_scoring_001", "medium_medical_triage_001", "medium_emotion_recognition_workplace_001"], "hard": ["hard_social_scoring_prohibited_001", "hard_deepfake_generation_001", "hard_multi_system_corporate_001"], } def get_scenario(scenario_id: str, seed: Optional[int] = None) -> AuditScenario: """Create and randomize a scenario by ID. Supports both fixed scenarios (e.g. 'medium_hiring_bias_001') and procedurally generated ones (e.g. 'procedural_medium_42' or 'procedural_hard_12345'). Procedural scenarios are generated from seed, producing infinite unique combinations. """ # Handle procedural scenario IDs if scenario_id.startswith("procedural_"): from scenarios.procedural import generate_procedural_scenario parts = scenario_id.split("_") # Format: procedural_{difficulty}_{seed} or procedural_{difficulty} difficulty = parts[1] if len(parts) > 1 else "medium" proc_seed = int(parts[2]) if len(parts) > 2 else (seed or 42) return generate_procedural_scenario(proc_seed, difficulty) factory = _SCENARIO_FACTORIES.get(scenario_id) if factory is None: raise ValueError(f"Unknown scenario: {scenario_id}. Available: {list(_SCENARIO_FACTORIES.keys())} + procedural_{{difficulty}}_{{seed}}") scenario = factory() scenario.randomize(seed) return scenario def get_scenarios_by_difficulty(difficulty: str) -> List[str]: """Get scenario IDs for a difficulty tier.""" return DIFFICULTY_TIERS.get(difficulty, []) def get_random_scenario(difficulty: str, seed: Optional[int] = None) -> AuditScenario: """Pick a random scenario from a difficulty tier.""" rng = random.Random(seed) ids = get_scenarios_by_difficulty(difficulty) if not ids: raise ValueError(f"Unknown difficulty: {difficulty}") return get_scenario(rng.choice(ids), seed)