""" Procedural scenario generator — infinite unique compliance audit scenarios. Combines system type templates, violation templates, and red herring templates using seed-based randomization to produce coherent, graded scenarios that are unique for every seed. Impossible to memorize. Architecture: 1. SystemTemplate — defines a category of AI system (drone delivery, exam proctoring, etc.) 2. ViolationTemplate — a specific compliance violation with document injection text 3. RedHerringTemplate — misleading information that isn't a real violation 4. ProceduralGenerator.generate(seed, difficulty) → AuditScenario """ from __future__ import annotations import random from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple from server.engine import AuditScenario, StateGraph, StateNode, Transition # --------------------------------------------------------------------------- # Templates # --------------------------------------------------------------------------- @dataclass(frozen=True) class SystemTemplate: id: str name_template: str # e.g. "{company} DroneGuard" category: str # prohibited, high_risk, limited_risk, minimal_risk annex_ref: str # which Annex III category or article description_template: str deployer_template: str domain_keywords: Tuple[str, ...] = () @dataclass(frozen=True) class ViolationTemplate: id: str tool_area: str # documentation, training_data, oversight, transparency, risk_management, logging finding_id: str # ground truth finding string remediation_id: str # required remediation string doc_injection: str # text injected into the relevant document section severity: str = "high" @dataclass(frozen=True) class RedHerringTemplate: id: str tool_area: str # which document section contains it doc_injection: str # misleading text # --------------------------------------------------------------------------- # System type pool (5 types covering different AI Act categories) # --------------------------------------------------------------------------- SYSTEM_TEMPLATES: List[SystemTemplate] = [ SystemTemplate( id="drone_delivery", name_template="{company} SkyRoute Delivery AI", category="high_risk", annex_ref="Annex III Category 2 — Critical Infrastructure", description_template=( "Autonomous drone delivery system operating in urban areas across {region}. " "AI controls flight path planning, obstacle avoidance, and delivery routing " "for {user_count} packages per month. System makes real-time autonomous " "decisions affecting public safety in shared airspace." ), deployer_template="{company} — logistics-tech startup, drone operator license in {region}.", domain_keywords=("drone", "airspace", "safety", "autonomous", "delivery"), ), SystemTemplate( id="exam_proctoring", name_template="{company} ExamGuard AI", category="high_risk", annex_ref="Annex III Category 3 — Education and Vocational Training", description_template=( "AI-powered online exam proctoring system used by {user_count} students " "across {region}. Monitors webcam feeds, screen activity, and audio to " "detect cheating behavior. Automated flagging can result in exam " "invalidation and academic disciplinary proceedings." ), deployer_template="{company} — EdTech company, serving 200+ universities in {region}.", domain_keywords=("exam", "proctoring", "student", "cheating", "academic"), ), SystemTemplate( id="insurance_claims", name_template="{company} ClaimIQ Adjudicator", category="high_risk", annex_ref="Annex III Category 5(a) — Access to Essential Services (Insurance)", description_template=( "AI system that evaluates and adjudicates insurance claims for health, " "property, and vehicle policies. Processes {user_count} claims annually " "in {region}. Automated decisions include claim approval, denial, and " "payout amount determination up to EUR 100,000." ), deployer_template="{company} — InsurTech firm, licensed in {region}, {user_count} policyholders.", domain_keywords=("insurance", "claim", "adjudication", "payout", "policy"), ), SystemTemplate( id="legal_research", name_template="{company} LexAssist AI", category="limited_risk", annex_ref="Article 50 — Transparency obligations for AI interacting with persons", description_template=( "AI-powered legal research assistant used by law firms across {region}. " "Analyzes case law, statutes, and regulatory documents to provide " "research summaries and case strategy suggestions. Used by {user_count} " "attorneys. Does not make legal decisions — advisory role only." ), deployer_template="{company} — LegalTech startup, SaaS platform for law firms in {region}.", domain_keywords=("legal", "research", "case law", "attorney", "advisory"), ), SystemTemplate( id="predictive_policing", name_template="{company} SafeCity Predictor", category="prohibited", annex_ref="Article 5(1)(d) — Prohibited: individual predictive policing", description_template=( "AI system deployed by municipal police in {region} to predict individual " "criminal behavior based on personal characteristics, social network " "analysis, and neighborhood data. Generates 'risk scores' for {user_count} " "residents used to allocate patrol resources and inform stop-and-search decisions." ), deployer_template="{company} — public safety technology vendor, contracted by 5 municipalities in {region}.", domain_keywords=("policing", "prediction", "crime", "risk score", "patrol"), ), ] # --------------------------------------------------------------------------- # Violation pool (20 violations across all tool areas) # --------------------------------------------------------------------------- VIOLATION_TEMPLATES: List[ViolationTemplate] = [ # --- Documentation violations --- ViolationTemplate( id="missing_annex_iv_sections", tool_area="documentation", finding_id="incomplete_annex_iv_documentation", remediation_id="complete_missing_annex_iv_sections", doc_injection=( "ANNEX IV COMPLIANCE:\n" " Section 2(b) Design specifications: Not documented\n" " Section 2(d) Performance per group: Not documented\n" " Section 2(g) Quality management: Not documented\n" " 3 of 10 required sections are missing." ), ), ViolationTemplate( id="stale_documentation", tool_area="documentation", finding_id="outdated_technical_documentation", remediation_id="update_documentation_to_current_version", doc_injection=( "NOTE: Core technical documentation was last updated 22 months ago\n" "(prior to EU AI Act enforcement). It does not reference the AI Act,\n" "harmonised standards, or current deployment configuration." ), ), ViolationTemplate( id="no_fria", tool_area="documentation", finding_id="missing_fundamental_rights_impact_assessment", remediation_id="conduct_fundamental_rights_impact_assessment", doc_injection="Fundamental Rights Impact Assessment: ABSENT — not conducted", ), # --- Training data violations --- ViolationTemplate( id="gender_bias", tool_area="training_data", finding_id="gender_bias_in_automated_decisions", remediation_id="conduct_bias_audit_and_mitigation", doc_injection=( "OUTCOME RATES BY GENDER:\n" " Group Rate Delta\n" " Male 41.3% (baseline)\n" " Female 29.7% -28.1%\n" " Non-binary 31.2% -24.5%\n" "\n" " Statistically significant disparity detected (p < 0.001)." ), ), ViolationTemplate( id="age_bias", tool_area="training_data", finding_id="age_discrimination_in_model_outputs", remediation_id="recalibrate_model_for_age_fairness", doc_injection=( "PERFORMANCE BY AGE GROUP:\n" " Age 18-30: accuracy 94.2%\n" " Age 31-50: accuracy 91.8%\n" " Age 51-65: accuracy 83.4%\n" " Age 65+: accuracy 71.9%\n" "\n" " Performance degrades significantly for older demographics." ), ), ViolationTemplate( id="no_data_governance", tool_area="training_data", finding_id="inadequate_data_governance_framework", remediation_id="establish_article_10_data_governance", doc_injection=( "DATA GOVERNANCE (Article 10):\n" " Data quality assessment: Not conducted\n" " Bias testing protocol: Not established\n" " Data provenance documentation: Incomplete (23 of 47 sources undocumented)\n" " Personal data handling: No Article 10-specific provisions" ), ), ViolationTemplate( id="consent_issue", tool_area="training_data", finding_id="invalid_consent_for_training_data", remediation_id="obtain_valid_consent_or_remove_data", doc_injection=( "CONSENT STATUS:\n" " Data collected under employer/institutional agreement.\n" " Individual subjects did not provide specific consent for AI\n" " training. Under EU labor/education law, consent given as a\n" " condition of employment/enrollment may not be freely given." ), ), # --- Oversight violations --- ViolationTemplate( id="low_review_rate", tool_area="oversight", finding_id="insufficient_human_oversight_of_decisions", remediation_id="implement_human_review_for_all_adverse_decisions", doc_injection=( "REVIEW STATISTICS:\n" " Automated decisions: 482,917\n" " Adverse outcomes: 144,875 (30.0%)\n" " Human-reviewed: 7,244 (5.0% of adverse)\n" " Review overrides: 362 (5.0% of reviews)\n" "\n" " 95% of adverse decisions receive no human review." ), ), ViolationTemplate( id="no_override", tool_area="oversight", finding_id="no_meaningful_override_capability", remediation_id="implement_accessible_override_mechanism", doc_injection=( "OVERRIDE CAPABILITY:\n" " Technical override exists in admin panel but is not accessible\n" " to frontline operators. Override requires supervisor approval\n" " and written justification. Average override processing time:\n" " 3.2 business days. Affected individuals cannot request override." ), ), ViolationTemplate( id="no_bias_monitoring", tool_area="oversight", finding_id="no_ongoing_bias_monitoring", remediation_id="implement_continuous_fairness_monitoring", doc_injection=( "BIAS MONITORING:\n" " No automated fairness monitoring system in place.\n" " No alerts configured for demographic drift.\n" " Last manual fairness review: 14 months ago." ), ), # --- Transparency violations --- ViolationTemplate( id="missing_ai_disclosure", tool_area="transparency", finding_id="missing_ai_system_disclosure", remediation_id="implement_clear_ai_disclosure", doc_injection=( "USER-FACING DISCLOSURE AUDIT:\n" " Application interface: No AI mention\n" " Terms of Service: Generic 'automated tools' reference (Section 7)\n" " Privacy Policy: No specific AI disclosure\n" " Decision notifications: No mention of AI involvement\n" "\n" " Article 50(1) requires informing persons they interact with AI." ), ), ViolationTemplate( id="no_explanation", tool_area="transparency", finding_id="no_right_to_explanation_mechanism", remediation_id="implement_individualized_explanations", doc_injection=( "RIGHT TO EXPLANATION:\n" " No mechanism for affected individuals to request explanation\n" " of AI-assisted decisions. Support team provides templated\n" " responses listing generic factors, not individual-specific\n" " reasoning." ), ), # --- Risk management violations --- ViolationTemplate( id="no_conformity", tool_area="risk_management", finding_id="missing_conformity_assessment", remediation_id="complete_conformity_assessment_procedure", doc_injection=( "CONFORMITY ASSESSMENT:\n" " Internal assessment (Article 43): Not initiated\n" " EU Declaration of Conformity: Not filed\n" " CE marking: Not applied\n" " Quality management system: Does not meet Article 17" ), ), ViolationTemplate( id="no_post_market", tool_area="risk_management", finding_id="missing_post_market_monitoring", remediation_id="establish_post_market_monitoring_plan", doc_injection=( "POST-MARKET MONITORING (Article 72):\n" " Post-market monitoring plan: ABSENT\n" " Incident reporting procedures: Not established\n" " Corrective action criteria: Not defined" ), ), # --- Logging violations --- ViolationTemplate( id="incomplete_logging", tool_area="logging", finding_id="inadequate_automatic_logging", remediation_id="implement_article_12_compliant_logging", doc_injection=( "ARTICLE 12 COMPLIANCE:\n" " Input features logged: No\n" " Confidence scores logged: No\n" " Decision rationale logged: No\n" " Demographic tracking: No\n" "\n" " Only final decisions and timestamps are recorded.\n" " Cannot reconstruct why specific decisions were made." ), ), ViolationTemplate( id="short_retention", tool_area="logging", finding_id="insufficient_log_retention_period", remediation_id="extend_retention_to_regulatory_minimum", doc_injection=( "LOG RETENTION:\n" " Decision logs: 30 days (regulatory minimum varies by sector)\n" " Audit trails: 30 days\n" " Model versions: 90 days\n" "\n" " For high-risk systems, retention periods should support\n" " retrospective audits and regulatory investigations." ), ), ] # --------------------------------------------------------------------------- # Red herring pool # --------------------------------------------------------------------------- RED_HERRING_TEMPLATES: List[RedHerringTemplate] = [ RedHerringTemplate( id="gdpr_separate", tool_area="training_data", doc_injection=( "NOTE: A separate GDPR compliance audit was conducted in 2025-Q2\n" "and identified no critical findings. GDPR compliance is assessed\n" "independently from EU AI Act obligations." ), ), RedHerringTemplate( id="separate_compliant_system", tool_area="training_data", doc_injection=( "ADDITIONAL CONTEXT: The organization operates a separate AI system\n" "for internal reporting that shares the same data infrastructure.\n" "That system passed its own compliance audit (2025-08) with no findings.\n" "Its compliance has no bearing on this system's obligations." ), ), RedHerringTemplate( id="iso_certification", tool_area="documentation", doc_injection=( "The deployer holds ISO 27001 and ISO 42001 certifications.\n" "These certifications cover information security and AI management\n" "respectively, but do not constitute EU AI Act conformity assessment." ), ), RedHerringTemplate( id="voluntary_ethics_board", tool_area="oversight", doc_injection=( "The deployer has established a voluntary AI Ethics Advisory Board\n" "that meets quarterly to review ethical considerations.\n" "The board's recommendations are non-binding and do not substitute\n" "for the mandatory human oversight requirements of Article 14." ), ), RedHerringTemplate( id="high_accuracy_claim", tool_area="risk_management", doc_injection=( "The deployer emphasizes that the system achieves 96.3% overall\n" "accuracy on the test benchmark, exceeding industry standards.\n" "However, aggregate accuracy does not address per-group performance\n" "or the specific risk management requirements of Article 9." ), ), ] # --------------------------------------------------------------------------- # Document templates per tool area # --------------------------------------------------------------------------- def _base_doc_template(area: str) -> str: """Base document structure for each investigation tool area.""" templates = { "documentation": ( "TECHNICAL DOCUMENTATION INVENTORY\n" "System: {system_name} {version}\n" "Deployer: {deployer}\n" "Audit Date: {date}\n\n" "DOCUMENT STATUS:\n" " System Architecture Present (updated 2025-10-15)\n" " Algorithm Description Present (updated 2025-09-22)\n" " Input/Output Specification Present (updated 2025-10-15)\n" " Performance Metrics (Aggregate) Present (updated 2025-11-01)\n" " Data Source Inventory Present (updated 2025-07-18)\n" " Change Management Log Present (updated 2025-11-28)\n\n" "{violations}\n\n" "{red_herrings}" ), "training_data": ( "TRAINING DATA AUDIT REPORT\n" "System: {system_name} {version}\n" "Records: {record_count} entries ({data_period})\n\n" "DEMOGRAPHIC DISTRIBUTION:\n" " Category Count Pct\n" " Male {male_count} {male_pct}%\n" " Female {female_count} {female_pct}%\n" " Age 18-35 {young_count} {young_pct}%\n" " Age 36-55 {mid_count} {mid_pct}%\n" " Age 56+ {old_count} {old_pct}%\n\n" "{violations}\n\n" "DATA SOURCES:\n" " {data_source_1}\n" " {data_source_2}\n\n" "{red_herrings}" ), "oversight": ( "HUMAN OVERSIGHT PROCEDURES\n" "System: {system_name} {version}\n" "Department: Operations\n\n" "DECISION WORKFLOW:\n" " 1. Input data received and preprocessed\n" " 2. AI model generates recommendation/decision\n" " 3. Output delivered to end-user or downstream system\n\n" "{violations}\n\n" "{red_herrings}" ), "transparency": ( "TRANSPARENCY & DISCLOSURE REVIEW\n" "System: {system_name} {version}\n\n" "USER-FACING COMMUNICATIONS:\n" " The system's user interface and documentation were reviewed\n" " for compliance with EU AI Act transparency obligations.\n\n" "{violations}\n\n" "{red_herrings}" ), "risk_management": ( "RISK MANAGEMENT & CONFORMITY ASSESSMENT\n" "System: {system_name} {version}\n\n" "ANNEX III CLASSIFICATION:\n" " {annex_ref}\n\n" "RISK LEVEL DETERMINATION: {risk_level}\n\n" "{violations}\n\n" "{red_herrings}" ), "logging": ( "LOGGING & TRACEABILITY REVIEW\n" "System: {system_name} {version}\n\n" "CURRENT LOGGING IMPLEMENTATION:\n" " Event Type Logged Retention\n" " Application received Yes {retention}\n" " Decision generated Yes {retention}\n" " Model version Yes Indefinite\n\n" "{violations}\n\n" "{red_herrings}" ), } return templates.get(area, "") # --------------------------------------------------------------------------- # Procedural generator # --------------------------------------------------------------------------- # Difficulty → violation count range DIFFICULTY_VIOLATION_RANGE = { "easy": (1, 2), "medium": (3, 5), "hard": (4, 6), } DIFFICULTY_RED_HERRING_RANGE = { "easy": (0, 1), "medium": (1, 2), "hard": (2, 3), } def _build_procedural_graph( investigation_tools: List[str], is_prohibited: bool = False, ) -> StateGraph: """Build state graph for a procedural scenario (same logic as registry).""" # Import the shared graph builder from scenarios.registry import _build_scenario_graph return _build_scenario_graph(investigation_tools, is_prohibited) def generate_procedural_scenario( seed: int, difficulty: str = "medium", ) -> AuditScenario: """Generate a unique compliance audit scenario from seed. Every seed produces a different combination of system type, violations, red herrings, and document content. The ground truth, state graph, and reward computation are all coherent and valid. Args: seed: Random seed for reproducible generation. difficulty: "easy", "medium", or "hard". Returns: A fully populated AuditScenario ready for use. """ rng = random.Random(seed) # 1. Pick system type if difficulty == "easy": candidates = [s for s in SYSTEM_TEMPLATES if s.category in ("limited_risk", "minimal_risk")] if not candidates: candidates = [s for s in SYSTEM_TEMPLATES if s.category == "limited_risk"] elif difficulty == "hard": candidates = [s for s in SYSTEM_TEMPLATES if s.category in ("prohibited", "high_risk")] else: candidates = list(SYSTEM_TEMPLATES) system = rng.choice(candidates) # 2. Pick violations min_v, max_v = DIFFICULTY_VIOLATION_RANGE[difficulty] n_violations = rng.randint(min_v, max_v) available_violations = list(VIOLATION_TEMPLATES) rng.shuffle(available_violations) violations = available_violations[:n_violations] # 3. Pick red herrings min_r, max_r = DIFFICULTY_RED_HERRING_RANGE[difficulty] n_red_herrings = rng.randint(min_r, max_r) available_red_herrings = list(RED_HERRING_TEMPLATES) rng.shuffle(available_red_herrings) red_herrings = available_red_herrings[:n_red_herrings] # 4. Generate randomized parameters company_names = [ "TechNova Solutions", "QuantumLeap AI", "NeuralPath Inc", "DataForge Systems", "CogniTech Labs", "AlphaWave AI", "SynthMind Corp", "PrismAI Technologies", "Vertex Analytics", "OmniSense AI", "DeepCurrent Inc", "StrataLogic Systems", "AeroMind Labs", "CyberPulse Inc", "InnoVista AI", ] regions = ["EU-West (DE/FR/NL)", "EU-Central (DE/AT/CH)", "EU-North (SE/FI/DK)", "EU-South (IT/ES/PT)", "EU-East (PL/CZ/RO)"] company = rng.choice(company_names) region = rng.choice(regions) version = f"v{rng.randint(1,6)}.{rng.randint(0,9)}" date = f"2026-{rng.randint(1,3):02d}-{rng.randint(1,28):02d}" user_count = f"{rng.randint(10000, 5000000):,}" system_name = system.name_template.format(company=company) deployer = system.deployer_template.format( company=company, region=region, user_count=user_count ) description = system.description_template.format( company=company, region=region, user_count=user_count ) # 5. Group violations and red herrings by tool area area_violations: Dict[str, List[str]] = {} area_red_herrings: Dict[str, List[str]] = {} for v in violations: area_violations.setdefault(v.tool_area, []).append(v.doc_injection) for r in red_herrings: area_red_herrings.setdefault(r.tool_area, []).append(r.doc_injection) # 6. Generate documents fill_params = { "system_name": system_name, "version": version, "deployer": deployer, "date": date, "annex_ref": system.annex_ref, "risk_level": system.category.replace("_", " ").title(), "record_count": f"{rng.randint(100000, 5000000):,}", "data_period": f"20{rng.randint(19,23)}-2025", "male_count": f"{rng.randint(400000, 800000):,}", "male_pct": f"{rng.uniform(55, 68):.1f}", "female_count": f"{rng.randint(200000, 500000):,}", "female_pct": f"{rng.uniform(32, 45):.1f}", "young_count": f"{rng.randint(200000, 400000):,}", "young_pct": f"{rng.uniform(28, 40):.1f}", "mid_count": f"{rng.randint(300000, 500000):,}", "mid_pct": f"{rng.uniform(35, 48):.1f}", "old_count": f"{rng.randint(50000, 200000):,}", "old_pct": f"{rng.uniform(12, 25):.1f}", "data_source_1": f"Primary: {rng.choice(['Enterprise API exports', 'Partner platform data', 'Direct user submissions'])}", "data_source_2": f"Secondary: {rng.choice(['Public datasets (filtered)', 'Licensed commercial data', 'Internal test data'])}", "retention": rng.choice(["5 years", "7 years", "3 years", "10 years"]), } def _build_doc(area: str) -> str: template = _base_doc_template(area) v_text = "\n\n".join(area_violations.get(area, ["(No issues identified in this area.)"])) r_text = "\n\n".join(area_red_herrings.get(area, [""])) filled = template.format(violations=v_text, red_herrings=r_text, **fill_params) return filled docs = { "documentation_data": _build_doc("documentation"), "training_data_info": _build_doc("training_data"), "oversight_info": _build_doc("oversight"), "transparency_info": _build_doc("transparency"), "risk_assessment_info": _build_doc("risk_management"), "logging_info": _build_doc("logging"), } # 7. Determine investigation tools (areas that have violations) affected_areas = set(v.tool_area for v in violations) tool_map = { "documentation": "check_documentation", "training_data": "audit_training_data", "oversight": "verify_human_oversight", "transparency": "check_transparency", "risk_management": "assess_risk_management", "logging": "check_logging", } investigation_tools = [tool_map[a] for a in [ "documentation", "training_data", "oversight", "transparency", "risk_management", "logging" ] if a in affected_areas] # Ensure at least 2 investigation tools for meaningful audit if len(investigation_tools) < 2: extras = ["check_documentation", "check_transparency"] for e in extras: if e not in investigation_tools: investigation_tools.append(e) if len(investigation_tools) >= 2: break # 8. Build the scenario scenario = AuditScenario( scenario_id=f"procedural_{difficulty}_{seed:06d}", title=f"Procedural: {system_name} ({difficulty.title()})", difficulty=difficulty, description=description, system_name=system_name, system_description=description, system_category=system.category, deployer_info=deployer, correct_classification=system.category, ground_truth_findings=[v.finding_id for v in violations], required_remediation=[v.remediation_id for v in violations], red_herrings=[r.id for r in red_herrings], **docs, ) # 9. Build state graph scenario.graph = _build_procedural_graph( investigation_tools=investigation_tools, is_prohibited=(system.category == "prohibited"), ) # 10. Randomize (adds company/region/version params) scenario.randomize(seed) return scenario