Itachi-1824
feat: investigation-grade overhaul + procedural generation
107f92d
Raw
History Blame Contribute Delete
128 kB
"""
Scenario registry β€” 8 EU AI Act compliance audit scenarios across 3 difficulty tiers.
Investigation-grade: Each tool returns realistic regulatory documents that
require analysis to identify violations. No pre-digested verdicts β€” the agent
must reason about the evidence to find compliance gaps.
Easy (2): Clear-cut systems, shorter documents, obvious violations
Medium (3): Detailed documents with statistical evidence, red herrings mixed in
Hard (3): Ambiguous framing, misleading deployer claims, compound violations
"""
from __future__ import annotations
import random
from typing import Dict, List, Optional
from server.engine import AuditScenario, StateGraph, StateNode, Transition
# ---------------------------------------------------------------------------
# Unique state graph builder
# ---------------------------------------------------------------------------
def _build_scenario_graph(
investigation_tools: List[str],
is_prohibited: bool = False,
) -> StateGraph:
"""Build a state graph unique to this scenario's investigation path.
Only tools in `investigation_tools` create progress transitions through
investigation nodes. Other investigation tools are allowed but produce
no_effect. This gives each scenario a distinct graph topology.
Args:
investigation_tools: Ordered list of investigation tool names forming
the progress path (e.g. ["check_documentation", "audit_training_data"]).
is_prohibited: If True, classification leads directly to findings
(no extended investigation needed for prohibited systems).
"""
g = StateGraph()
# Investigation tool β†’ node mapping
TOOL_NODES = {
"check_documentation": ("docs_reviewed", "Documentation Reviewed"),
"audit_training_data": ("data_audited", "Training Data Audited"),
"verify_human_oversight": ("oversight_checked", "Human Oversight Verified"),
"check_transparency": ("transparency_checked", "Transparency Checked"),
"assess_risk_management": ("risk_assessed", "Risk Management Assessed"),
"check_logging": ("logging_checked", "Logging Verified"),
}
ALL_INVESTIGATION_TOOLS = list(TOOL_NODES.keys())
# Always-present nodes
g.add_node(StateNode("initial", "Audit Assigned", is_start=True))
g.add_node(StateNode("overview", "System Overview Gathered"))
g.add_node(StateNode("classified", "Risk Classification Done"))
g.add_node(StateNode("findings_submitted", "Findings Submitted"))
g.add_node(StateNode("remediation_proposed", "Remediation Recommended"))
g.add_node(StateNode("resolved", "Compliance Verified", is_terminal=True))
# Add nodes only for tools in the investigation path
for tool in investigation_tools:
node_id, label = TOOL_NODES[tool]
g.add_node(StateNode(node_id, label))
# --- Build progress chain ---
# initial β†’ overview β†’ classified β†’ [investigation tools...] β†’ findings β†’ remediation β†’ resolved
g.add_transition(Transition("initial", "overview", "get_system_overview", "progress",
description="Gather system overview and deployment context"))
g.add_transition(Transition("overview", "classified", "classify_system", "progress",
description="Classify the AI system risk category"))
if is_prohibited:
# Prohibited: classify β†’ findings directly
g.add_transition(Transition("classified", "findings_submitted", "submit_finding", "progress",
description="Report prohibited AI system"))
else:
# Chain investigation tools in order
prev_state = "classified"
for tool in investigation_tools:
node_id = TOOL_NODES[tool][0]
g.add_transition(Transition(prev_state, node_id, tool, "progress",
description=f"Progress: {tool}"))
prev_state = node_id
g.add_transition(Transition(prev_state, "findings_submitted", "submit_finding", "progress",
description="Submit compliance findings"))
g.add_transition(Transition("findings_submitted", "remediation_proposed", "recommend_fix", "progress",
description="Propose remediation actions"))
g.add_transition(Transition("remediation_proposed", "resolved", "verify_compliance", "progress",
description="Final compliance determination"))
# --- No-effect transitions (investigation tools not in the path) ---
# These tools work (return data) but don't advance the state graph
all_progress_states = ["initial", "overview", "classified"]
for tool in investigation_tools:
all_progress_states.append(TOOL_NODES[tool][0])
for tool in ALL_INVESTIGATION_TOOLS:
if tool not in investigation_tools:
# Can call from any state, no effect on graph
for state_id in all_progress_states:
g.add_transition(Transition(state_id, state_id, tool, "no_effect",
description=f"{tool} not required for this scenario"))
# --- No-effect for premature terminal actions ---
for state in all_progress_states:
if state not in ("findings_submitted", "remediation_proposed"):
g.add_transition(Transition(state, state, "verify_compliance", "no_effect",
description="Cannot verify before completing audit"))
# --- Worsened transitions (trap actions) ---
g.add_transition(Transition("initial", "initial", "classify_system", "worsened",
description="Classifying without understanding the system first"))
for state in ["initial", "overview", "classified"]:
g.add_transition(Transition(state, state, "recommend_fix", "worsened",
description="Proposing fixes before identifying problems"))
for state in ["initial", "overview"]:
g.add_transition(Transition(state, state, "submit_finding", "no_effect",
description="Submitting findings before investigation"))
return g
# ---------------------------------------------------------------------------
# EASY SCENARIOS
# ---------------------------------------------------------------------------
def _easy_chatbot() -> AuditScenario:
s = AuditScenario(
scenario_id="easy_chatbot_transparency_001",
title="Customer Service Chatbot β€” Transparency Audit",
difficulty="easy",
description=(
"A customer service chatbot deployed on a retail website needs compliance "
"review. The system uses a general-purpose LLM to answer product questions. "
"Initial assessment suggests it may be missing required transparency "
"disclosures under Article 50. Conduct a focused audit."
),
system_name="RetailAssist Chatbot",
system_description=(
"General-purpose LLM-based chatbot answering customer questions about "
"products, shipping, and returns. No biometric processing, no profiling, "
"no high-risk decisions. Deployed on company website with __USERCOUNT__ "
"daily users."
),
system_category="limited_risk",
deployer_info="__COMPANY__ β€” mid-size EU e-commerce retailer, 200 employees, operating in DE/FR/NL.",
correct_classification="limited_risk",
ground_truth_findings=[
"missing_ai_disclosure",
"no_human_escalation_option",
],
required_remediation=[
"add_ai_disclosure_banner",
"implement_human_handoff",
],
red_herrings=[
"high_risk_automated_decision",
],
# --- Investigation documents ---
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: RetailAssist Chatbot __VERSION__
Deployer: __COMPANY__
Review Date: __DATE__
DOCUMENT STATUS:
System Architecture Overview Present (updated 2025-11-03)
Data Processing Description Present (updated 2025-09-22)
Input/Output Specification Present (updated 2025-11-03)
Performance Metrics Report Present (accuracy 94.2% on test set)
User Interaction Guidelines Present (updated 2025-06-10)
ANNEX IV CROSS-REFERENCE (for limited-risk, advisory only):
Section 1(a) Intended purpose Documented
Section 1(b) Deployer identification Documented
Section 2(a) Development methods Documented
NOTE: Documentation is generally adequate for a limited-risk system.
The system processes no biometric data and makes no legally significant
decisions. Standard chatbot architecture with retrieval-augmented generation.
No gaps identified in core technical documentation.
The primary compliance concern for this system category relates to
Article 50 transparency obligations, not documentation completeness.""",
training_data_info="""\
TRAINING DATA SUMMARY
System: RetailAssist Chatbot __VERSION__
Dataset: Product catalog entries + FAQ question-answer pairs
Volume: 52,847 examples (last updated 2025-10-15)
DATA COMPOSITION:
Product descriptions 31,204 entries
FAQ pairs 14,892 entries
Customer service transcripts 6,751 entries (anonymized)
Personal data in training set: None identified
- Customer transcripts were fully anonymized before inclusion
- No names, emails, or account numbers in training data
- PII scrubbing verified by DPO on 2025-08-20
Bias assessment: Not formally required for limited-risk chatbot systems.
The system provides product information and does not make decisions
affecting individuals' rights or access to services.
Data governance: Adequate β€” data sources documented, retention policy
in place (36-month cycle), access controls implemented.""",
oversight_info="""\
HUMAN OVERSIGHT PROCEDURES
System: RetailAssist Chatbot __VERSION__
Department: Customer Experience Team
CURRENT PROCESS:
- Customer service team of 12 agents monitors a dashboard showing
flagged conversations (profanity filter, sentiment < 0.3, repeat queries)
- Approximately 8% of conversations are flagged for human review
- Average response time for flagged conversations: 4.2 minutes
- Team operates during business hours (08:00-20:00 CET, Mon-Sat)
ESCALATION PROCEDURE:
The chatbot displays a generic "Was this helpful?" prompt after each
interaction. If the user clicks "No", the chatbot offers to repeat
the answer or try a different phrasing.
There is no option for the user to request transfer to a human agent
during the conversation. The "Contact Us" page exists separately on
the website but is not linked from the chat interface.
After business hours, flagged conversations queue until the next
business day. No real-time human intervention is available outside
business hours.
OVERRIDE CAPABILITY:
Customer service agents can take over any active conversation from the
monitoring dashboard, but this is agent-initiated, not customer-initiated.
Customers have no mechanism to trigger human intervention directly.""",
transparency_info="""\
TRANSPARENCY & DISCLOSURE REVIEW
System: RetailAssist Chatbot __VERSION__
USER INTERFACE ANALYSIS:
The chatbot appears as a widget in the bottom-right corner of the
website. The chat window header reads "RetailAssist β€” How can I help?"
with a generic chat bubble icon.
Opening message: "Hi! I'm here to help you find products, track
orders, and answer questions about our store. What can I do for you?"
The chat interface includes:
- Text input field
- Send button
- Emoji picker
- "Was this helpful?" feedback after each response
SEARCHED FOR AI DISCLOSURE INDICATORS:
- Chat header: No mention of AI or automated system
- Opening message: No mention of AI or automated system
- Website footer: Standard legal links (Privacy, Terms, Cookies)
- Terms of Service: Section 7.3 mentions "automated tools may assist
customer service" but this is not shown to users
during chat interaction
- Privacy Policy: Section 4.1 mentions "AI-powered features" in
general terms but does not specifically disclose
the chatbot as AI
- Chat widget tooltip: "Chat with us" (no AI indication)
- Accessibility statement: No mention of AI
Article 50(1) requires: "Providers shall ensure that AI systems intended
to directly interact with natural persons are designed and developed in
such a way that the natural persons concerned are informed that they are
interacting with an AI system."
CONTENT LABELING:
Not applicable β€” system generates text responses only, no deepfakes
or synthetic media requiring Article 50(2) labeling.""",
risk_assessment_info="""\
RISK CLASSIFICATION ANALYSIS
System: RetailAssist Chatbot __VERSION__
ANNEX III HIGH-RISK CHECK:
1. Biometric identification Not applicable
2. Critical infrastructure Not applicable
3. Education and vocational training Not applicable
4. Employment, workers management Not applicable
5. Access to essential services Not applicable β€” provides product
information only, no credit/insurance/social services decisions
6. Law enforcement Not applicable
7. Migration, asylum, border control Not applicable
8. Administration of justice Not applicable
ARTICLE 5 PROHIBITED PRACTICES CHECK:
Subliminal manipulation Not applicable
Exploitation of vulnerabilities Not applicable
Social scoring Not applicable
Real-time biometric identification Not applicable
RISK LEVEL DETERMINATION: Limited Risk
The system falls under Article 50 transparency obligations as an
AI system designed to directly interact with natural persons.
No high-risk or prohibited characteristics identified.
CONFORMITY ASSESSMENT: Not required for limited-risk systems.
Voluntary code of conduct adherence recommended per Article 95.""",
logging_info="""\
LOGGING & TRACEABILITY REVIEW
System: RetailAssist Chatbot __VERSION__
CURRENT LOGGING:
Conversation logs: Retained for 30 days, then anonymized + archived
Model version: Tracked per deployment (current: __VERSION__)
User feedback: Stored with conversation ID
System health metrics: Uptime, response latency, error rate
AUDIT TRAIL:
Each conversation has a unique ID linking:
- User session (anonymized)
- Model version used
- All messages in sequence
- Feedback received
- Any flags triggered
Logs are adequate for a limited-risk system. Article 12 automatic
logging requirements apply primarily to high-risk systems.
Current logging exceeds what is required for the system's risk level.""",
)
# Unique graph: only transparency and oversight are in the investigation path
s.graph = _build_scenario_graph(
investigation_tools=["check_transparency", "verify_human_oversight"],
)
return s
def _easy_recommendation() -> AuditScenario:
s = AuditScenario(
scenario_id="easy_recommendation_minimal_001",
title="Music Recommendation Engine β€” Minimal Risk Verification",
difficulty="easy",
description=(
"A music streaming platform's recommendation algorithm needs EU AI Act "
"classification. The system suggests songs based on listening history. "
"Stakeholders believe it's minimal risk but need formal verification."
),
system_name="MelodyMatch Algorithm",
system_description=(
"Collaborative filtering recommendation engine for music. Suggests songs "
"based on listening patterns. No personal profiling beyond music preferences. "
"No decisions affecting legal rights. Pure content personalization for "
"__USERCOUNT__ EU users."
),
system_category="minimal_risk",
deployer_info="__COMPANY__ β€” Swedish music streaming startup, 50 employees.",
correct_classification="minimal_risk",
ground_truth_findings=[
"voluntary_code_of_conduct_recommended",
],
required_remediation=[
"adopt_voluntary_transparency_code",
],
red_herrings=[
"gdpr_violation",
"high_risk_profiling",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: MelodyMatch Algorithm __VERSION__
Deployer: __COMPANY__
DOCUMENT STATUS:
System Architecture Present (hybrid collaborative filtering)
Algorithm Description Present (item-item CF + content embeddings)
Data Pipeline Documentation Present (Spark ETL pipeline)
Performance Metrics Present (hit@10: 0.342, NDCG: 0.281)
API Documentation Present (REST API for mobile/web clients)
All core technical documents are present and current.
The system is a standard recommendation engine with no novel or
experimental components requiring additional documentation.""",
training_data_info="""\
TRAINING DATA SUMMARY
System: MelodyMatch Algorithm __VERSION__
Dataset: Anonymized listening history from __USERCOUNT__ users
Volume: 10.3M user-song interactions (2023-2025)
DATA COMPOSITION:
Interaction types: Play, skip, save, playlist-add
User features: Pseudonymized user ID, country, subscription tier
Song features: Genre, tempo, energy, valence, artist, release year
Personal data assessment:
- User IDs are pseudonymized (SHA-256 hash, no reversal possible)
- No names, emails, or demographic data in training set
- Country used for regional catalog filtering only
- GDPR Article 6(1)(f) legitimate interest basis documented
Bias considerations:
Music recommendations do not involve protected characteristics.
Popularity bias exists (mainstream content recommended more often)
but this does not constitute discrimination under the AI Act.
No individuals are disadvantaged in access to services or rights.
Note: Some stakeholders raised concerns about "profiling" under GDPR.
Music taste profiling for recommendations is distinct from profiling
under the AI Act which concerns evaluation of personal aspects
affecting natural persons' rights. Recommendation of entertainment
content does not qualify as high-risk profiling.""",
oversight_info="""\
HUMAN OVERSIGHT ASSESSMENT
System: MelodyMatch Algorithm __VERSION__
The EU AI Act does not impose specific human oversight requirements
on minimal-risk AI systems. Article 14 human oversight obligations
apply to high-risk AI systems only.
Current oversight measures (voluntary):
- Product team reviews recommendation quality metrics weekly
- A/B testing framework validates algorithm changes before rollout
- Content moderation team can flag/remove specific recommendations
- Users can provide feedback ("Don't recommend this") per track""",
transparency_info="""\
TRANSPARENCY REVIEW
System: MelodyMatch Algorithm __VERSION__
USER-FACING DISCLOSURES:
The app includes a "How recommendations work" info page accessible
from Settings > Privacy > Personalization, which states:
"MelodyMatch uses your listening history to suggest music you might
enjoy. Our algorithm analyzes patterns in what you play, skip, and
save to find similar tracks and artists."
Recommendations are labeled "Recommended for you" and "Based on
your recent listening" in the UI.
Article 50 transparency obligations for systems interacting directly
with natural persons apply to chatbots and emotion recognition, not
to content recommendation systems. No mandatory transparency
obligations identified for this system category.
Voluntary transparency: The deployer has voluntarily disclosed the
algorithmic nature of recommendations, which aligns with Article 95
(voluntary codes of conduct for non-high-risk AI).""",
risk_assessment_info="""\
RISK CLASSIFICATION ANALYSIS
System: MelodyMatch Algorithm __VERSION__
ANNEX III HIGH-RISK CHECK:
1. Biometric identification Not applicable
2. Critical infrastructure Not applicable
3. Education and vocational training Not applicable
4. Employment, workers management Not applicable
5. Access to essential services Not applicable
6. Law enforcement Not applicable
7. Migration, asylum, border control Not applicable
8. Administration of justice Not applicable
ARTICLE 5 PROHIBITED PRACTICES:
None applicable β€” content recommendation is not subliminal
manipulation, social scoring, or biometric identification.
RISK LEVEL DETERMINATION: Minimal Risk
The system recommends entertainment content. It does not make
decisions affecting natural persons' rights, access to services,
or legal status. No Annex III category applies.
Under Article 95, deployers of minimal-risk AI systems are
encouraged to voluntarily apply codes of conduct for transparency
and trustworthiness, but this is not mandatory.""",
logging_info="""\
LOGGING REVIEW
System: MelodyMatch Algorithm __VERSION__
Article 12 automatic logging requirements apply to high-risk AI
systems. This minimal-risk system is not subject to those obligations.
Current logging (voluntary):
Model version tracking: Yes (per deployment)
Recommendation logs: Retained 90 days
A/B test results: Archived indefinitely
User feedback: Linked to recommendation events
Logging is adequate and exceeds requirements for the risk level.""",
)
# Minimal graph β€” mainly just classification verification
s.graph = _build_scenario_graph(
investigation_tools=["check_transparency", "assess_risk_management"],
)
return s
# ---------------------------------------------------------------------------
# MEDIUM SCENARIOS
# ---------------------------------------------------------------------------
def _medium_hiring() -> AuditScenario:
s = AuditScenario(
scenario_id="medium_hiring_bias_001",
title="AI Resume Screener β€” High-Risk Employment System Audit",
difficulty="medium",
description=(
"A recruitment platform uses AI to screen and rank job applicants. "
"This falls under Annex III (employment, workers management) making it "
"a candidate for HIGH-RISK classification. Reports suggest potential "
"gender bias in technical role screening. Full compliance audit required."
),
system_name="TalentRank AI Screener",
system_description=(
"NLP-based resume screening system that scores and ranks applicants for "
"job openings. Processes name, education, work history, skills. Used by "
"500+ EU companies for initial candidate filtering. Automated decisions "
"affect who gets interviews. Deployed in __REGION__ with __USERCOUNT__ "
"applicants processed to date."
),
system_category="high_risk",
deployer_info="__COMPANY__ β€” French HR-tech company, 300 employees, serving 500+ EU enterprise clients.",
correct_classification="high_risk",
ground_truth_findings=[
"gender_bias_in_technical_screening",
"insufficient_human_oversight",
"incomplete_technical_documentation",
"missing_fundamental_rights_impact_assessment",
"inadequate_data_governance",
],
required_remediation=[
"conduct_bias_audit_and_mitigation",
"implement_human_review_for_all_rejections",
"complete_annex_iv_technical_documentation",
"perform_fundamental_rights_impact_assessment",
"establish_data_governance_framework",
],
red_herrings=[
"prohibited_social_scoring",
"biometric_processing",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: TalentRank AI Screener __VERSION__
Deployer: __COMPANY__
Audit Date: __DATE__
DOCUMENT STATUS MATRIX:
Document Status Last Updated
─────────────────────────────────────────────────────────────────
System Architecture Overview Present 2025-11-03
Algorithm Description (NLP Pipeline) Present 2025-09-22
Input Data Specification Present 2025-11-03
Output Specification Present 2025-11-03
Performance Metrics Report Absent β€”
Accuracy by Demographic Group Report Absent β€”
Data Governance Policy Absent β€”
Fundamental Rights Impact Assessment Absent β€”
Risk Management Plan Present 2024-08-15
Post-Market Monitoring Plan Draft 2025-12-01
User Instructions (Article 13) Partial 2025-06-10
Change Management Log Present 2025-11-28
EU AI ACT ANNEX IV CROSS-REFERENCE:
Section 1 β€” General Description
(a) Intended purpose Documented
(b) Deployer name and contact Documented
(c) Version and update history Documented
Section 2 β€” Detailed Description
(a) Development methods and process Documented
(b) Design specifications and rationale Not documented β€” no architecture
diagrams for the scoring pipeline
(c) Data requirements and provenance Partial β€” data sources listed but
no data governance policy document
(d) Metrics and performance benchmarks Not documented β€” no disaggregated
performance metrics by demographic group
(e) Computational resources Documented
(f) Expected lifetime and maintenance Documented
(g) Quality management procedures Not documented
Section 3 β€” Monitoring and Reporting
(a) Post-market monitoring plan Draft only β€” not finalized
(b) Logging requirements specification Documented
OBSERVATIONS:
The Risk Management Plan was last updated 18 months ago. It does not
reference the EU AI Act, harmonised standards, or Article 9 requirements.
The Post-Market Monitoring Plan remains in draft status with no target
completion date established.
Article 13 user instructions do not include information about the system's
accuracy levels, known limitations, or demographic performance variation.
No Fundamental Rights Impact Assessment has been conducted.""",
training_data_info="""\
TRAINING DATA AUDIT REPORT
System: TalentRank AI Screener __VERSION__
Dataset: Historical hiring decisions from EU enterprise clients
Records: 2,147,832 applications (2019-2025)
Last model retrain: 2025-10-15
DEMOGRAPHIC DISTRIBUTION OF TRAINING DATA:
Category Count Pct of Total
────────────────────────────────────────────────
Male applicants 1,342,104 62.5%
Female applicants 805,728 37.5%
Age 18-30 751,741 35.0%
Age 31-45 923,168 43.0%
Age 46-65 472,923 22.0%
Western EU origin 1,567,516 73.0%
Eastern EU origin 580,316 27.0%
CALLBACK RATES BY DEMOGRAPHIC (Technical Roles Only):
Group Applications Callbacks Rate vs Baseline
──────────────────────────────────────────────────────────────────────
Male applicants 487,204 166,624 34.2% (baseline)
Female applicants 198,731 52,266 26.3% -23.1%
Age 18-30 241,367 91,961 38.1% +11.4%
Age 31-45 298,422 100,927 33.8% -1.2%
Age 46-65 146,146 41,924 28.7% -16.1%
Western EU 512,489 179,884 35.1% +2.6%
Eastern EU 173,446 47,547 27.4% -19.9%
Note: Callback rate = percentage of applicants advanced to interview stage.
Baseline = overall male callback rate for technical roles.
DATA SOURCES:
Source Records Period Documented
────────────────────────────────────────────────────────────────
Enterprise client ATS exports 1,847,291 2019-2025 Yes
Partner recruiting platforms 203,114 2021-2025 Yes
Internal test applications 97,427 2024-2025 Yes
DATA GOVERNANCE:
Data retention policy: Present (5-year retention)
Source documentation: 47 of 52 sources documented
Personal data inventory: Names, education, work history, skills
Consent mechanism: Employer-level agreements in place;
individual applicant consent not obtained
Article 10 specific provisions: Not addressed in current data governance
Data quality assessment: Last conducted 2024-06-12
ADDITIONAL CONTEXT:
A separate fraud detection module shares the same data lake infrastructure
but operates independently with its own pipeline. The fraud detection
module passed its most recent bias audit with no findings (2025-09-01).
The fraud system's compliance status has no bearing on TalentRank's
compliance obligations.""",
oversight_info="""\
HUMAN OVERSIGHT PROCEDURES
System: TalentRank AI Screener __VERSION__
Department: Client Success & Quality Assurance
REVIEW PROCESS:
TalentRank's screening pipeline processes applications in three stages:
Stage 1 β€” Automated Screening (AI):
All incoming applications are scored by the AI model (0-100).
Applications scoring below the client-configured threshold (default: 40)
are automatically marked as "Not Progressed."
Stage 2 β€” Quality Sampling:
The QA team reviews a random sample of screened applications.
Current sampling rate: 5% of "Not Progressed" decisions.
Sampling is conducted weekly in batch.
Stage 3 β€” Client Review:
Applications marked "Progressed" are presented to the hiring manager
for final interview selection. Clients may also view "Not Progressed"
applications if they choose, but fewer than 2% of clients do so.
REVIEW STATISTICS (Q4 2025):
Applications processed: 347,291
Automatically rejected: 208,375 (60.0%)
QA sample reviewed: 10,419 (5.0% of rejections)
QA overrides (rejection β†’ pass): 312 (3.0% of samples)
Client-initiated reviews: 4,166 (2.0% of clients)
OVERRIDE CAPABILITY:
Both QA staff and client hiring managers can override any AI decision.
The override interface is accessible from the application dashboard.
However, the system does not proactively flag borderline cases or
indicate confidence scores to reviewers.
MONITORING:
No ongoing bias monitoring system is in place. The QA sampling is
focused on general quality, not demographic fairness. No automated
alerts exist for drift in rejection rates across demographic groups.""",
transparency_info="""\
TRANSPARENCY & USER NOTIFICATION REVIEW
System: TalentRank AI Screener __VERSION__
APPLICANT-FACING COMMUNICATIONS:
At the time of application, candidates see the following notice in
the application portal footer (8pt font, light gray text):
"By submitting your application, you agree that your information may
be processed using automated tools to assist in the evaluation process."
No further information is provided about:
- The specific role of AI in screening decisions
- The logic involved in the automated processing
- The significance and envisaged consequences for the applicant
- The applicant's right to obtain human intervention
- The applicant's right to contest the decision
Rejection notifications are sent via email with the text:
"After careful review, we have decided not to progress your
application at this time. We wish you the best in your search."
No mention is made that the decision was automated or that
AI was involved in the screening process.
DEPLOYER-FACING (CLIENT) INFORMATION:
Client onboarding materials describe TalentRank as an "AI-powered
screening solution" with "proprietary NLP scoring." Clients receive
a product sheet with overall accuracy metrics (precision: 0.82,
recall: 0.71) but no demographic disaggregation.
RIGHT TO EXPLANATION:
No mechanism exists for applicants to request an explanation of
how the AI arrived at its scoring decision. The company's privacy
policy references GDPR Article 22 but states "meaningful human
involvement exists in the hiring process" without specifying the
extent of that involvement.""",
risk_assessment_info="""\
RISK MANAGEMENT & CONFORMITY ASSESSMENT
System: TalentRank AI Screener __VERSION__
ANNEX III CLASSIFICATION:
Category 4 β€” Employment, workers management and access to self-employment
Sub-category: AI systems intended to be used for recruitment or selection
of natural persons, for making decisions affecting terms of work-related
relationships, or for task allocation based on individual behavior.
This system screens and ranks job applicants. It directly affects which
candidates are progressed to interview, constituting a decision that
impacts access to employment.
CONFORMITY ASSESSMENT STATUS:
Internal conformity assessment (Article 43): Not initiated
Quality management system (Article 17): Basic framework exists
but does not address
AI-specific requirements
EU Declaration of Conformity (Article 47): Not filed
CE marking (Article 48): Not applied
RISK MANAGEMENT SYSTEM (Article 9):
A risk management plan was created in August 2024, prior to the
EU AI Act application date. The plan covers general software risks
(availability, data integrity) but does not address:
- AI-specific risks (bias, drift, adversarial inputs)
- Residual risk assessment methodology
- Risk control measures for demographic fairness
- Foreseeable misuse scenarios
POST-MARKET MONITORING (Article 72):
A post-market monitoring plan is in draft status. It outlines
monitoring of system uptime and client satisfaction scores.
It does not include:
- Performance monitoring by demographic group
- Bias drift detection mechanisms
- Incident reporting procedures to national authorities
- Criteria for triggering corrective action""",
logging_info="""\
AUTOMATIC LOGGING & TRACEABILITY REVIEW
System: TalentRank AI Screener __VERSION__
CURRENT LOGGING IMPLEMENTATION:
Event Type Logged Format Retention
────────────────────────────────────────────────────────
Application received Yes JSON 5 years
AI score generated Yes Numeric 5 years
Threshold decision Yes Boolean 5 years
Input features used No β€” β€”
Model version Yes String Indefinite
QA review action Yes JSON 5 years
Client override Yes JSON 5 years
Score explanation No β€” β€”
ARTICLE 12 REQUIREMENTS CHECK:
Article 12(1) requires high-risk AI systems to include logging
capabilities that enable recording of events relevant to:
(a) Identifying situations that may result in risks:
Partially met β€” threshold decisions logged, but no logging
of cases where the model's confidence was low or where
the decision was borderline.
(b) Facilitating post-market monitoring:
Not met β€” input features are not logged, making it impossible
to reconstruct why a particular score was assigned or to
detect feature drift over time.
(c) Ensuring traceability of the AI system's functioning:
Partially met β€” model version and scores are tracked but
input-output pairs are not linked, preventing full
reproducibility of decisions.
IDENTIFIED GAPS:
1. Input features are not logged alongside output scores
2. No confidence/uncertainty indicators are recorded
3. Logs do not support demographic performance analysis
4. No automated alerting on logging failures""",
# --- Deep-dive documents (revealed on repeat tool calls) ---
deep_training_data="""\
DEEP DIVE: TRAINING DATA FORENSIC ANALYSIS
Re-investigation of TalentRank AI Screener __VERSION__
INTERSECTIONAL ANALYSIS (not available in initial audit):
Group Applications Callbacks Rate
────────────────────────────────────────────────────────────
Female + Eastern EU 41,287 8,669 21.0%
Female + Age 46-65 28,904 6,647 23.0%
Male + Western EU 398,721 147,527 37.0%
Male + Age 18-30 189,234 75,694 40.0%
Intersectional analysis reveals compounding bias: female applicants
from Eastern EU have the lowest callback rate at 21.0% β€” a 38.6%
gap versus the highest-performing group (young Western EU males).
FEATURE IMPORTANCE ANALYSIS (extracted from model internals):
Top 10 predictive features for "Not Progressed" decision:
1. years_experience_gap (0.142) β€” penalizes career gaps
2. education_prestige_score (0.098) β€” biased toward Western EU universities
3. keyword_density_technical (0.087) β€” favors specific technical jargon
4. name_encoding_cluster (0.076) β€” CONCERNING: name-derived feature
5. employment_continuity (0.071) β€” penalizes parental leave gaps
6. skills_match_score (0.065)
7. recency_weighted_exp (0.058)
8. industry_match (0.052)
9. location_cluster (0.048) β€” correlates with Eastern/Western EU
10. application_completeness (0.041)
Features #1, #4, #5, and #9 have documented correlations with
protected characteristics (gender, ethnicity, national origin).
Feature #4 (name_encoding_cluster) appears to encode ethnic origin.""",
deep_oversight="""\
DEEP DIVE: HUMAN OVERSIGHT FORENSIC ANALYSIS
Re-investigation of review process effectiveness
QA OVERRIDE ANALYSIS (detailed breakdown):
Of the 312 QA overrides in Q4 2025:
Female applicants overridden to pass: 187 (60.0%)
Male applicants overridden to pass: 125 (40.0%)
This suggests QA reviewers are catching gender bias in the
AI decisions β€” but only for the 5% sample they review.
The remaining 95% of automated rejections are not corrected.
ESTIMATED IMPACT:
If the QA override rate (3%) applied to ALL automated rejections:
Estimated wrongful rejections: 208,375 * 0.03 = ~6,251 applicants
Of which female: ~3,751 (based on override demographics)
These applicants were automatically rejected without human review
and had no mechanism to request reconsideration.
CLIENT AWARENESS:
Survey of 50 client hiring managers (2025-Q4):
- 78% were unaware that AI makes the initial screening decision
- 92% believed "a recruiter reviews every application"
- 0% knew the QA sampling rate was only 5%""",
deep_documentation="""\
DEEP DIVE: DOCUMENTATION GAP ANALYSIS
Detailed Annex IV compliance assessment
SECTION 2(d) β€” PERFORMANCE METRICS:
The published model card reports aggregate metrics:
Precision: 0.82, Recall: 0.71, F1: 0.76
These metrics are NOT disaggregated by any protected characteristic.
Article 9(7) requires: "Testing shall be made against specific
metrics and probabilistic thresholds that are appropriate to the
intended purpose of the high-risk AI system."
No per-group metrics exist for: gender, age, nationality, ethnicity.
SECTION 2(b) β€” DESIGN SPECIFICATIONS:
The scoring pipeline processes resumes through 4 stages:
1. Text extraction (OCR + NLP parsing)
2. Feature engineering (218 features)
3. Scoring model (XGBoost ensemble)
4. Threshold comparison (client-configurable)
Stage 2 (feature engineering) is undocumented. No specification
explains how features like "name_encoding_cluster" or
"education_prestige_score" are computed.""",
)
# Full investigation path for high-risk system
s.graph = _build_scenario_graph(
investigation_tools=[
"check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency",
"assess_risk_management", "check_logging",
],
)
return s
def _medium_credit() -> AuditScenario:
s = AuditScenario(
scenario_id="medium_credit_scoring_001",
title="Credit Scoring Model β€” Financial Services Compliance",
difficulty="medium",
description=(
"A fintech company's AI credit scoring model is under review. The model "
"determines creditworthiness for consumer loans up to EUR 50,000. "
"Full compliance audit required under EU AI Act high-risk provisions."
),
system_name="CreditFlow AI Score",
system_description=(
"Gradient-boosted ensemble model scoring creditworthiness using 200+ "
"features from credit bureau data, transaction history, and alternative "
"data sources. Automated decisions for loans up to EUR 50K. Serving "
"__USERCOUNT__ active users across NL/DE/FR."
),
system_category="high_risk",
deployer_info="__COMPANY__ β€” Dutch fintech, 150 employees, licensed in NL/DE/FR.",
correct_classification="high_risk",
ground_truth_findings=[
"opaque_feature_importance",
"alternative_data_bias_risk",
"no_right_to_human_review",
"missing_conformity_assessment",
],
required_remediation=[
"implement_explainability_module",
"remove_or_audit_alternative_data_sources",
"add_human_review_for_rejections",
"complete_conformity_assessment",
],
red_herrings=[
"fraud_detection_non_compliant",
"gdpr_data_breach",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: CreditFlow AI Score __VERSION__
Deployer: __COMPANY__
Audit Date: __DATE__
DOCUMENT STATUS:
Document Status Last Updated
─────────────────────────────────────────────────────────────────
System Architecture Present 2025-10-12
Model Card Present 2025-08-30
Feature Engineering Pipeline Present 2025-10-12
Performance Metrics (Aggregate) Present 2025-11-01
Performance Metrics (Disaggregated) Absent β€”
Explainability Documentation Absent β€”
Data Source Inventory Present 2025-07-18
Conformity Declaration Absent β€”
User Instructions Present 2025-09-05
MODEL CARD SUMMARY (from published card):
Model type: Gradient-boosted ensemble (XGBoost)
Features: 218 input features
Target: Probability of default within 12 months
AUC-ROC: 0.847 (test set)
Gini coefficient: 0.694 (test set)
The model card lists aggregate performance metrics but does not
include performance breakdowns by age group, gender, nationality,
or income bracket. Feature importance rankings are described as
"proprietary" and not included in the published card.
ANNEX IV GAPS:
Section 2(b) β€” Design specifications: No documentation explaining
why alternative data sources (social media sentiment, device metadata)
were included as features or their impact on model decisions.
Section 2(d) β€” Performance metrics: No demographic disaggregation.
Section 2(g) β€” Quality management: Referenced but links to
outdated ISO 27001 procedures, not AI-specific QMS.""",
training_data_info="""\
TRAINING DATA AUDIT REPORT
System: CreditFlow AI Score __VERSION__
Dataset: Loan applications and outcomes (2019-2025)
Records: 3,217,445 applications
FEATURE CATEGORIES:
Category Features Source
──────────────────────────────────────────────────
Credit bureau data 42 TransUnion, Experian
Transaction history 67 Banking API aggregator
Application data 31 Direct from applicant
Alternative data 78 See breakdown below
ALTERNATIVE DATA BREAKDOWN:
Feature Group Count Source
──────────────────────────────────────────────────
Device metadata 23 Browser/mobile fingerprint
Social media sentiment 18 LinkedIn, public profiles
Location signals 12 IP geolocation, check-in
App usage patterns 15 Installed apps, usage freq
Email domain analysis 10 Provider reputation scoring
Alternative data features were added in v3.8 to improve prediction
for "thin-file" applicants lacking traditional credit history.
Internal validation showed +3.2% AUC improvement.
No bias assessment has been conducted specifically for alternative
data features. Academic literature suggests device metadata and
social media signals can correlate with protected characteristics
including race, income, and education level.
LOAN OUTCOMES BY APPLICANT PROFILE (Approval Rates):
Age Group Applications Approved Rate
────────────────────────────────────────────────
18-25 482,617 168,916 35.0%
26-35 1,029,582 586,862 57.0%
36-50 1,061,753 657,287 61.9%
51-65 504,930 277,712 55.0%
65+ 138,563 55,425 40.0%
ADDITIONAL CONTEXT:
The company also operates a separate fraud detection system that
uses rule-based heuristics (not ML). This system was audited
independently in 2025-Q3 and found compliant with applicable
regulations. The fraud system does not share models with CreditFlow.""",
oversight_info="""\
HUMAN OVERSIGHT PROCEDURES
System: CreditFlow AI Score __VERSION__
DECISION WORKFLOW:
Loan applications are processed as follows:
1. Applicant submits online application
2. CreditFlow AI generates creditworthiness score (0-1000)
3. Score is compared against risk threshold:
- Score >= 650: Automatically approved (up to EUR 25K)
- Score 450-649: Queued for human review
- Score < 450: Automatically declined
For loans EUR 25K-50K, all applications require human review
regardless of AI score.
REVIEW STATISTICS (2025):
Total applications: 892,456
Auto-approved (< EUR 25K): 401,605 (45.0%)
Auto-declined: 223,114 (25.0%)
Human-reviewed: 267,737 (30.0%)
Of auto-declined applications:
Appealed by applicant: 8,924 (4.0%)
Appeal reviewed by human: 8,924 (100% of appeals)
Appeal overturned: 1,338 (15.0% of appeals)
Note: Applicants must actively submit an appeal through a form
linked in the rejection email. The appeal process is described
in FAQ section 7 of the website (3 clicks from homepage).
HUMAN REVIEWER TOOLS:
Reviewers see the AI score and top-5 contributing features but
no full explanation of the model's reasoning. The reviewer
interface does not highlight cases where the model's confidence
is low or where protected characteristics may be influencing
the outcome.""",
transparency_info="""\
TRANSPARENCY REVIEW
System: CreditFlow AI Score __VERSION__
APPLICANT NOTIFICATIONS:
Application form includes the following notice:
"Your application will be assessed using automated decision-making
systems. You have the right to request human review of any
automated decision."
Rejection email text:
"Based on our assessment, we are unable to offer you a loan at
this time. If you wish to understand the main factors behind this
decision or request a manual review, please contact our support
team or visit [link]."
The rejection email links to a generic FAQ page. The FAQ states
that decisions are made using "a combination of credit history,
financial data, and statistical models" but does not mention
alternative data sources (social media, device metadata).
RIGHT TO EXPLANATION:
Applicants can request an explanation by contacting support.
Support agents provide a templated response listing the top 3
general factors (e.g., "credit history length," "income level,"
"existing debt") without specifying which exact features or
thresholds drove the specific decision.
No individualized explanation is generated. The support team
does not have access to the model's per-application feature
importance breakdown.""",
risk_assessment_info="""\
RISK MANAGEMENT & CONFORMITY ASSESSMENT
System: CreditFlow AI Score __VERSION__
ANNEX III CLASSIFICATION:
Category 5(b) β€” AI systems intended to be used to evaluate the
creditworthiness of natural persons.
This system directly determines loan approval/rejection for
consumer credit up to EUR 50,000. It falls squarely within the
high-risk category.
CONFORMITY ASSESSMENT STATUS:
Internal conformity assessment: Not initiated
Quality management system: ISO 27001 certified but no
AI-specific QMS per Article 17
EU Declaration of Conformity: Not filed
CE marking: Not applied
DORA ALIGNMENT:
As a financial services firm, the company has ICT risk management
procedures under DORA (Digital Operational Resilience Act). These
cover general IT risks but do not address AI-specific risks
such as model drift, adversarial inputs, or algorithmic bias.
RISK MANAGEMENT GAPS:
The company's risk register includes traditional financial risks
(credit risk, market risk, operational risk) but does not include:
- AI model risk as a distinct risk category
- Bias risk for protected characteristics
- Alternative data correlation risk
- Drift detection requirements""",
logging_info="""\
LOGGING & TRACEABILITY REVIEW
System: CreditFlow AI Score __VERSION__
CURRENT LOGGING:
Event Type Logged Format Retention
─────────────────────────────────────────────────────
Application received Yes JSON 7 years
Score generated Yes Numeric 7 years
Decision (approve/deny) Yes String 7 years
Features used Yes Vector 7 years
Model version Yes String Indefinite
Human review action Yes JSON 7 years
Appeal and outcome Yes JSON 7 years
Logging is comprehensive with input-output linkage. All decisions
can be reconstructed. Feature vectors are stored alongside scores,
enabling retrospective audits.
Financial services retention requirements (7 years) are met.
Article 12 logging requirements for high-risk AI appear to be
adequately addressed at the technical level.
NOTE: While individual decision logs are complete, there is no
automated aggregation or monitoring dashboard that tracks
demographic patterns across decisions over time.""",
)
s.graph = _build_scenario_graph(
investigation_tools=[
"check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency",
"assess_risk_management",
],
)
return s
def _medium_medical() -> AuditScenario:
s = AuditScenario(
scenario_id="medium_medical_triage_001",
title="Emergency Triage AI β€” Medical Device Compliance",
difficulty="medium",
description=(
"A hospital network deployed an AI system that prioritizes emergency "
"department patients based on vital signs and symptoms. As a medical "
"device with AI, it falls under both the EU AI Act (high-risk, Annex III) "
"and the Medical Devices Regulation (MDR). Audit required."
),
system_name="TriageAI Priority System",
system_description=(
"ML model processing vital signs (heart rate, BP, SpO2, temperature), "
"symptoms, and medical history to assign emergency triage priority "
"(ESI levels 1-5). Used in 12 EU hospitals across DE/AT/CH. Decisions "
"directly affect patient care timing."
),
system_category="high_risk",
deployer_info="__COMPANY__ β€” German health-tech company, certified under MDR, deployed in DE/AT/CH hospitals.",
correct_classification="high_risk",
ground_truth_findings=[
"insufficient_clinical_validation",
"age_bias_in_triage_scoring",
"no_real_time_performance_monitoring",
"missing_post_market_surveillance",
],
required_remediation=[
"conduct_prospective_clinical_validation_study",
"audit_age_related_bias_and_recalibrate",
"implement_real_time_performance_dashboard",
"establish_post_market_surveillance_plan",
],
red_herrings=[
"prohibited_system",
"missing_ce_marking",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: TriageAI Priority System __VERSION__
Deployer: __COMPANY__
Audit Date: __DATE__
DOCUMENT STATUS:
Document Status Last Updated
─────────────────────────────────────────────────────────────────
System Architecture Present 2025-09-15
Clinical Evaluation Report Present 2025-03-20
MDR Technical Documentation Present 2025-09-15
Intended Purpose Statement Present 2025-09-15
Software Life Cycle Documentation Present 2025-11-01
AI Act Annex IV Documentation Absent β€”
Post-Market Clinical Follow-up Plan Absent β€”
Post-Market Surveillance Plan (AI) Absent β€”
MDR CONFORMITY:
CE marking: Applied (Class IIa medical device)
Notified Body: BSI Group (NB 0086)
Last MDR audit: 2025-06-12 β€” no non-conformities
The system has valid MDR conformity assessment. However, the
EU AI Act imposes ADDITIONAL requirements beyond MDR compliance
for AI-enabled medical devices classified as high-risk under
Annex III.
CLINICAL EVALUATION:
The Clinical Evaluation Report (CER) is based on:
- Retrospective analysis of 500K historical ER visits
- Literature review of 23 published studies on AI triage
- No prospective clinical trial has been conducted
- CER does not address AI-specific performance degradation
(concept drift, distribution shift between hospitals)
NOTE: MDR clinical evaluation accepted the retrospective analysis.
The EU AI Act may require additional validation demonstrating
real-world performance across deployment sites.""",
training_data_info="""\
TRAINING DATA AUDIT REPORT
System: TriageAI Priority System __VERSION__
Dataset: Historical ER visit records from 3 university hospitals
Records: 512,847 patient encounters (2018-2024)
DEMOGRAPHIC DISTRIBUTION:
Category Count Pct ESI 1-2 Rate
──────────────────────────────────────────────────────────
Age 0-17 71,799 14.0% 8.2%
Age 18-44 179,497 35.0% 6.1%
Age 45-64 143,597 28.0% 9.3%
Age 65-74 76,927 15.0% 14.7%
Age 75+ 41,027 8.0% 19.8%
MODEL PERFORMANCE BY AGE GROUP (ESI Classification Accuracy):
Age Group Accuracy Sensitivity(ESI 1-2) Specificity
──────────────────────────────────────────────────────────────
0-17 91.3% 89.1% 92.0%
18-44 93.7% 91.8% 94.2%
45-64 92.1% 90.4% 93.1%
65-74 88.4% 84.2% 90.7%
75+ 82.6% 76.3% 85.8%
Performance degrades notably for patients aged 75+. Sensitivity
for the highest-acuity patients (ESI 1-2) drops to 76.3% for
the elderly cohort β€” meaning 23.7% of critical elderly patients
may be under-triaged.
TRAINING DATA COMPOSITION:
Patients aged 75+ represent 8.0% of the training data but 19.8%
of ESI 1-2 presentations. The model was predominantly trained on
younger demographics.
Data from 3 hospitals in Germany only. No Austrian or Swiss
patient data despite deployment in AT/CH hospitals.
CLINICAL VALIDATION:
Validation approach: Retrospective holdout (80/20 split)
No prospective trial conducted.
No external validation on data from deployment hospitals.
No assessment of performance variation across deployment sites.
NOTE: The system holds valid CE marking under MDR as a Class IIa
device. MDR conformity does not exempt from AI Act requirements.""",
oversight_info="""\
HUMAN OVERSIGHT PROCEDURES
System: TriageAI Priority System __VERSION__
Department: Emergency Department Operations
CLINICAL WORKFLOW:
1. Patient arrives at ER and is registered at reception
2. Initial vitals collected by triage nurse (HR, BP, SpO2, temp)
3. Nurse enters symptoms and relevant history into the system
4. TriageAI generates ESI level recommendation (1-5)
5. Triage nurse reviews and can accept or override the AI suggestion
6. Patient is directed to appropriate care area
OVERRIDE STATISTICS (2025 Q3-Q4, across all 12 hospitals):
Total triage assessments: 187,423
AI recommendations accepted: 171,577 (91.5%)
Nurse overrides: 15,846 (8.5%)
Override to higher acuity: 9,508 (60.0% of overrides)
Override to lower acuity: 6,338 (40.0% of overrides)
By ESI level:
ESI 1 (resuscitation): All reviewed by attending physician
ESI 2 (emergent): Nurse review + attending notification
ESI 3 (urgent): Nurse review only
ESI 4 (less urgent): Nurse review only
ESI 5 (non-urgent): Nurse review only
The system does not flag cases where its confidence is low.
There is no visual indicator distinguishing high-confidence from
borderline recommendations. Nurses report in surveys that they
tend to "trust the system" unless the recommendation is clearly
at odds with their clinical judgment.
AFTER-HOURS OPERATIONS:
Staffing levels are reduced between 22:00-06:00. During this window,
a single triage nurse handles all incoming patients. Override rates
drop to 4.2% during overnight shifts (vs 8.5% daytime).""",
transparency_info="""\
TRANSPARENCY REVIEW
System: TriageAI Priority System __VERSION__
PATIENT-FACING COMMUNICATION:
Patients are not informed that an AI system is involved in their
triage assessment. The triage process appears fully nurse-directed
from the patient's perspective.
Hospital intake forms do not mention AI-assisted triage.
The hospitals' privacy notices (available on their websites) include
a general statement about "digital health technologies" being used
to support clinical decisions, but do not specifically mention
TriageAI or AI-based triage prioritization.
CLINICIAN-FACING INFORMATION:
Triage nurses see the AI's recommended ESI level on their screen
alongside a summary of input vital signs. The interface does NOT
show:
- The model's confidence score
- Which factors most influenced the recommendation
- Whether the patient falls into a demographic group where the
model has known lower accuracy
Attending physicians can view the AI recommendation in the patient
record but receive no additional context about the model's reasoning.
ARTICLE 13 USER INSTRUCTIONS:
A deployment guide was provided to hospital IT departments describing
system architecture, integration points, and API specifications.
The guide does not include information about:
- Known accuracy limitations by demographic group
- Situations where the system should not be relied upon
- Procedures for reporting suspected AI errors""",
risk_assessment_info="""\
RISK MANAGEMENT & CONFORMITY ASSESSMENT
System: TriageAI Priority System __VERSION__
ANNEX III CLASSIFICATION:
The system falls under multiple Annex III categories:
- Category 5(c): AI intended for use as a safety component of a
product covered by Union harmonisation legislation (MDR)
- Category 5(a): AI intended for evaluation of eligibility for
essential public services (healthcare access/prioritization)
Classification: HIGH-RISK
MDR CONFORMITY STATUS:
CE marking applied: Yes (Class IIa)
Notified body: BSI Group (NB 0086)
Last periodic audit: 2025-06-12
Non-conformities found: None under MDR
EU AI ACT CONFORMITY STATUS:
The EU AI Act imposes requirements ADDITIONAL to MDR:
Internal conformity assessment: Not initiated
AI-specific risk management: Not addressed
Post-market monitoring (AI): Not established
RISK MANAGEMENT:
An MDR risk management file exists (ISO 14971 compliant).
It covers clinical risks and software hazards but does not address:
- AI-specific risks (distribution shift, adversarial inputs)
- Performance degradation for specific demographic groups
- Failure modes unique to the ML model
- Concept drift between training data and deployment population
POST-MARKET MONITORING:
MDR PMCF (Post-Market Clinical Follow-up) plan exists.
No AI-specific post-market surveillance has been established.
There is no system for monitoring:
- Real-time triage accuracy at individual hospital level
- Demographic performance variation over time
- Model prediction confidence distribution shifts""",
logging_info="""\
LOGGING & TRACEABILITY REVIEW
System: TriageAI Priority System __VERSION__
CURRENT LOGGING:
Event Type Logged Retention
──────────────────────────────────────────────────
Patient vitals input Yes 10 years (clinical record)
AI ESI recommendation Yes 10 years
Nurse final ESI assignment Yes 10 years
Override indicator Yes 10 years
Model version Yes Indefinite
Response latency Yes 1 year
System errors/timeouts Yes 1 year
Logs are stored in the hospital's electronic health record (EHR)
system. Each triage event links vitals, AI recommendation, and
final nurse decision.
MONITORING CAPABILITIES:
Individual triage events: Fully traceable
Aggregate accuracy reports: Generated quarterly (manual process)
Real-time performance monitoring: Not implemented
Automated drift detection: Not implemented
The quarterly accuracy reports compare AI recommendations against
final nurse decisions and patient outcomes (admission, discharge).
Reports are produced by the clinical informatics team and reviewed
at quarterly governance meetings.
There is no mechanism for near-real-time detection of performance
degradation. A significant drop in accuracy could go unnoticed
for up to 3 months between quarterly reviews.""",
)
s.graph = _build_scenario_graph(
investigation_tools=[
"check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency",
"assess_risk_management", "check_logging",
],
)
return s
# ---------------------------------------------------------------------------
# HARD SCENARIOS
# ---------------------------------------------------------------------------
def _hard_social_scoring() -> AuditScenario:
s = AuditScenario(
scenario_id="hard_social_scoring_prohibited_001",
title="Citizen Wellness App β€” PROHIBITED System Detection",
difficulty="hard",
description=(
"A 'wellness and civic engagement' app has been flagged for review. "
"The deployer describes it as a voluntary digital wellness platform "
"that gamifies community participation. Municipal governments in "
"Italy and Spain have partnered with the deployer. Conduct a "
"thorough compliance audit."
),
system_name="CivicPulse Wellness Score",
system_description=(
"CivicPulse is a digital wellness and civic engagement platform that "
"helps citizens track and improve their community participation. The app "
"provides personalized recommendations for local events, volunteer "
"opportunities, and community initiatives. Users receive a 'wellness score' "
"reflecting their engagement level. __USERCOUNT__ citizens across 3 "
"municipalities currently use the platform."
),
system_category="prohibited",
deployer_info="__COMPANY__ β€” EU-based civic tech startup, partnered with 3 municipal governments in IT/ES.",
correct_classification="prohibited",
ground_truth_findings=[
"prohibited_social_scoring_system",
"disguised_as_voluntary_wellness",
"affects_access_to_public_services",
"uses_social_behavior_for_scoring",
"discriminatory_outcomes_on_service_access",
],
required_remediation=[
"immediate_system_shutdown",
"notify_national_supervisory_authority",
"delete_all_scoring_data",
"remediate_affected_citizens",
],
red_herrings=[
"high_risk_classification",
"needs_conformity_assessment",
"voluntary_participation",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: CivicPulse Wellness Score __VERSION__
Deployer: __COMPANY__
Audit Date: __DATE__
DOCUMENT STATUS:
Document Status Last Updated
─────────────────────────────────────────────────────────────
Platform Architecture Present 2025-10-30
Scoring Algorithm Description Present 2025-10-30
Data Integration Specifications Present 2025-08-14
Municipal Partnership Agreements Present 2025-06-01
Privacy Impact Assessment Present 2025-07-22
User Guide / Onboarding Materials Present 2025-11-05
SCORING ALGORITHM DESCRIPTION (from deployer documentation):
"The CivicPulse Wellness Score is a composite metric ranging from
0 to 1000, calculated from the following weighted components:
Community Event Attendance (25%) β€” tracked via QR check-ins
Volunteer Hours Logged (20%) β€” verified by partner orgs
Social Media Civic Posts (15%) β€” public posts tagged #CivicPulse
Payment History Reliability (15%) β€” municipal utility payments
Civic Survey Participation (10%) β€” in-app micro-surveys
Peer Endorsements (10%) β€” other users vouching
App Engagement Metrics (5%) β€” daily active usage
Scores are updated weekly. Score history is retained indefinitely.
Users can view their score and breakdown in the app."
MUNICIPAL PARTNERSHIP AGREEMENTS (summary):
Three municipal governments have signed integration agreements:
- Municipality of Torino (IT) β€” population 848,000
- Municipality of Sevilla (ES) β€” population 681,000
- Municipality of Valencia (ES) β€” population 791,000
Under these agreements, the municipalities provide:
- API access to utility payment records
- Event attendance verification infrastructure
- Integration with municipal service booking systems
In return, the municipalities receive:
- Aggregate civic engagement analytics
- "Priority queue integration" for municipal services""",
training_data_info="""\
DATA SOURCES & PROCESSING REPORT
System: CivicPulse Wellness Score __VERSION__
The system aggregates data from the following sources:
SOURCE INVENTORY:
Source Records Update Freq Consent
────────────────────────────────────────────────────────────────
Social media APIs 487,291 Daily App ToS
Municipal utility records 312,847 Monthly Municipal MOU
Event attendance (QR) 891,234 Real-time App ToS
Volunteer organization data 156,723 Weekly Partner MOU
In-app behavior 2,341,567 Real-time App ToS
Peer endorsement graph 234,891 Real-time App ToS
PERSONAL DATA PROCESSED:
- Full name and municipal ID (for service integration)
- Social media activity (public posts, frequency, topics)
- Utility payment timeliness and amounts
- Physical location (event check-ins, frequency maps)
- Volunteer activity (hours, organizations, regularity)
- In-app behavior (session duration, feature usage)
- Social graph (who endorses whom, connection density)
BIAS ASSESSMENT:
No formal bias assessment has been conducted. The deployer states
that the scoring algorithm is "objective and based on observable
civic engagement indicators."
Preliminary analysis of score distribution:
Score Range Pct of Users Avg Monthly Income (self-reported)
─────────────────────────────────────────────────────────────────
800-1000 12.3% EUR 4,200
600-799 28.7% EUR 3,100
400-599 34.1% EUR 2,300
200-399 18.4% EUR 1,700
0-199 6.5% EUR 1,100
Higher scores correlate strongly with higher income. Citizens with
lower income have less time for volunteer activities, fewer social
media posts, and less stable utility payment histories.""",
oversight_info="""\
HUMAN OVERSIGHT & GOVERNANCE
System: CivicPulse Wellness Score __VERSION__
GOVERNANCE STRUCTURE:
The platform is operated by __COMPANY__ with oversight from a
"Civic Advisory Board" consisting of:
- 2 company representatives
- 1 municipal liaison per partner city
- 1 data protection consultant
The Advisory Board meets quarterly to review:
- Platform usage statistics
- Score distribution trends
- User feedback summaries
- New feature proposals
SCORE ADJUSTMENT CAPABILITY:
Municipal administrators have access to a dashboard where they can:
- View individual citizen scores
- Apply manual score adjustments (with documented reason)
- Exclude specific citizens from the scoring system
- Configure score thresholds for municipal service integration
In 2025, municipal administrators made 847 manual adjustments:
- 612 score increases (typically after verified volunteer hours
were not automatically captured)
- 235 score decreases (typically after fraudulent check-ins
were identified)
USER OPT-OUT:
Users can delete their account through in-app settings. The deployer
states participation is fully voluntary. However, the municipal
service integration means that citizens without a CivicPulse account
do not have access to the "priority queue" for municipal services
(see Municipal Services Integration below).
MUNICIPAL SERVICES INTEGRATION:
Citizens with CivicPulse scores above 600 receive:
- Priority appointment slots for municipal offices
- Reduced wait times for public healthcare appointments
- 10% discount on municipal parking permits
- Priority enrollment in public recreational programs
Citizens with scores below 300 experience:
- Standard (non-priority) queue for all municipal services
- No access to priority healthcare appointment slots
- Standard parking permit pricing
- Standard enrollment priority for recreational programs
The deployer describes this as "rewarding engaged citizens" rather
than penalizing low scorers. Citizens without an account are
treated identically to those with scores below 300.""",
transparency_info="""\
TRANSPARENCY REVIEW
System: CivicPulse Wellness Score __VERSION__
USER-FACING INFORMATION:
The app displays each user's current score prominently on the
home screen along with a breakdown by category (events, volunteering,
social, payments, surveys, endorsements, engagement).
Users can see:
- Their current score and weekly trend
- Category-level breakdown
- "Tips to improve your score" (e.g., "Attend 2 more events
this month to boost your Community score by 50 points")
- Leaderboard showing anonymized score rankings in their area
Users are NOT told:
- That their score affects access to municipal services
- That utility payment history is factored into the score
- That social media activity is monitored and scored
- That users without accounts (or with low scores) receive
lower priority for public services
- The full list of data sources and their weights
- How the scoring algorithm handles edge cases
MUNICIPAL COMMUNICATION:
Partner municipalities have not publicly announced the integration
between CivicPulse scores and service prioritization. The municipal
booking systems show "priority" and "standard" queues without
explaining the criteria for queue assignment.
MARKETING MATERIALS:
The company website describes CivicPulse as: "A fun, gamified way
to strengthen your connection with your community. Track your civic
engagement, discover local events, and earn rewards for being an
active citizen."
No marketing materials mention the scoring system's impact on
access to public services.""",
risk_assessment_info="""\
RISK CLASSIFICATION ANALYSIS
System: CivicPulse Wellness Score __VERSION__
DEPLOYER'S SELF-CLASSIFICATION:
The deployer has classified the system as "limited risk β€” voluntary
wellness platform" and states that no conformity assessment is
required. The deployer's risk assessment document argues:
"CivicPulse is a voluntary civic engagement tool. Users choose to
participate and can opt out at any time. The wellness score is a
motivational gamification feature, similar to fitness tracker
achievement systems. No consequential decisions are made based
on the score."
AUDITOR'S ANALYSIS:
Article 5(1)(c) of the EU AI Act PROHIBITS:
"AI systems used for the evaluation or classification of natural
persons or groups thereof over a certain period of time based on
their social behaviour or known, inferred or predicted personal
or personality characteristics, with the social score leading to
either or both of the following:
(i) detrimental or unfavourable treatment of certain natural
persons or groups thereof in social contexts which are
unrelated to the contexts in which the data was originally
generated or collected;
(ii) detrimental or unfavourable treatment of certain natural
persons or groups thereof that is unjustified or
disproportionate to their social behaviour or its gravity."
KEY ASSESSMENT FACTORS:
1. The system evaluates citizens based on social behavior (social
media activity, community participation, peer endorsements)
2. Scores determine priority access to public services (healthcare
appointments, municipal office queues, recreational programs)
3. Low scorers receive worse service across domains unrelated
to the scored behaviors (utility payments don't relate to
healthcare priority)
4. Municipal governments β€” public authorities β€” are partners in
deploying and using the scoring system
5. Participation is described as "voluntary" but non-participants
are treated identically to low scorers""",
logging_info="""\
LOGGING REVIEW
System: CivicPulse Wellness Score __VERSION__
SYSTEM LOGS:
The platform maintains extensive logging:
- Score calculations and weekly updates
- Data source ingestion events
- User interactions (app opens, feature usage)
- Municipal API calls (service queue assignments)
- Administrator actions (manual adjustments)
- Score-to-service mapping events
All logs are retained for the lifetime of the platform.
NOTE: The comprehensiveness of logging is not relevant to the
primary compliance question for this system. If the system is
found to constitute a prohibited practice under Article 5,
no amount of logging, oversight, or documentation can make
it compliant. The appropriate response to a prohibited system
is cessation of operation, not improvement of governance.""",
deep_transparency="""\
DEEP DIVE: SERVICE ACCESS IMPACT ANALYSIS
Re-investigation of CivicPulse municipal integration
QUANTIFIED IMPACT ON PUBLIC SERVICES:
Analysis of municipal booking system data (2025-Q3):
Healthcare appointments (Torino):
Score 800+: Average wait 2.3 days for non-urgent appointment
Score 400-599: Average wait 8.7 days
Score < 300: Average wait 14.2 days
No account: Average wait 14.2 days (same as lowest tier)
Municipal office appointments (Sevilla):
Priority queue (score 600+): 87% seen within 15 minutes
Standard queue (score < 600): 34% seen within 15 minutes
Average wait difference: 42 minutes
Recreational program enrollment (Valencia):
Priority enrollment opens 2 weeks before standard
Score 600+: 94% get first-choice program
Score < 600: 31% get first-choice program
CORRELATION WITH SOCIOECONOMIC STATUS:
Citizens in lowest income quartile: avg score 287 (below 300 threshold)
Citizens in highest income quartile: avg score 741 (above 600 threshold)
Pearson correlation (income, score): r = 0.73, p < 0.001
The scoring system systematically disadvantages lower-income citizens
in access to healthcare, municipal services, and recreational programs.""",
deep_oversight="""\
DEEP DIVE: MUNICIPAL ADMINISTRATOR ACTIONS
Re-investigation of CivicPulse governance
ADMINISTRATOR SCORE ADJUSTMENTS (detailed):
Of 847 manual adjustments in 2025:
Political affiliation correlation detected:
- 89% of score increases were for citizens active in the
ruling party's community organizations
- Municipal liaison (advisory board member) initiated 67%
of all score increase requests
COMPLAINT HANDLING:
23 citizen complaints received about service queue disparities
0 complaints resulted in investigation
Standard response: "CivicPulse is a voluntary wellness program.
Queue prioritization is based on engagement metrics."
No disclosure that the 'engagement metrics' ARE the CivicPulse score""",
)
# Prohibited system: short investigation path
s.graph = _build_scenario_graph(
investigation_tools=["check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency"],
is_prohibited=True,
)
return s
def _hard_deepfake() -> AuditScenario:
s = AuditScenario(
scenario_id="hard_deepfake_generation_001",
title="AI Content Studio β€” Deepfake & Synthetic Media Compliance",
difficulty="hard",
description=(
"A content creation platform offers AI-generated synthetic media "
"including face-swaps, voice cloning, and AI-generated video. Recent "
"reports of generated political content going viral without AI labels. "
"Multiple Article 50 compliance angles to investigate."
),
system_name="SynthStudio Pro",
system_description=(
"Suite of generative AI tools: face-swap (video), voice cloning, "
"text-to-video, AI image generation. Used by __USERCOUNT__ EU creators. "
"Content is shared across all major social platforms. Platform serves "
"creators, marketing agencies, and entertainment companies."
),
system_category="limited_risk",
deployer_info="__COMPANY__ β€” Irish AI startup, 80 employees, __USERCOUNT__ EU users.",
correct_classification="limited_risk",
ground_truth_findings=[
"missing_ai_content_labeling",
"no_watermarking_or_content_authentication",
"political_content_without_disclosure",
"no_user_verification_for_deepfakes",
"facilitating_disinformation",
],
required_remediation=[
"implement_mandatory_ai_content_labels",
"deploy_c2pa_watermarking",
"add_political_content_restrictions",
"implement_creator_verification",
"establish_content_moderation_pipeline",
],
red_herrings=[
"prohibited_manipulation",
"high_risk_biometric",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: SynthStudio Pro __VERSION__
Deployer: __COMPANY__
Audit Date: __DATE__
DOCUMENT STATUS:
Document Status Last Updated
─────────────────────────────────────────────────────────────────
Platform Architecture Present 2025-11-10
Model Cards (per generation model) Present 2025-10-01
API Documentation Present 2025-11-10
Content Policy / Terms of Service Present 2025-09-15
Content Labeling Implementation Absent β€”
Content Authentication / Provenance Absent β€”
Watermarking Specification Absent β€”
Content Moderation Procedures Present 2025-04-20
GENERATION MODELS:
Model Type Training Data
──────────────────────────────────────────────────────
FaceSwap v3 GAN CelebA + proprietary
VoiceClone v2 Neural TTS LibriTTS + licensed
TextToVideo v1 Diffusion WebVid-10M + licensed
ImageGen v4 Latent Diffusion LAION-filtered + licensed
CONTENT POLICY (from Terms of Service):
Section 4.2: "Users agree not to use SynthStudio for: (a) creating
content intended to deceive or defraud, (b) non-consensual intimate
imagery, (c) content targeting minors, (d) content that violates
applicable law."
Section 4.3: "Users are responsible for ensuring their use of
generated content complies with all applicable laws and regulations."
Enforcement: The content policy is enforced reactively. Users report
violations via an in-platform form. Average response time: 72+ hours.
No proactive content scanning is implemented.""",
training_data_info="""\
TRAINING DATA & CONSENT REPORT
System: SynthStudio Pro __VERSION__
TRAINING DATA SOURCES:
Model Dataset Size Consent Status
──────────────────────────────────────────────────────────────
FaceSwap v3 CelebA 202,599 Research license only;
individual consent not
obtained from subjects
Proprietary set 84,231 Licensed from stock
media agencies
VoiceClone v2 LibriTTS 585 hrs CC-BY 4.0 license
Licensed voices 200 hrs Individual consent
TextToVideo WebVid-10M 10M clips Web-scraped; no
individual consent
Licensed footage 500K clips Commercial license
ImageGen v4 LAION-filtered 2.3B imgs Web-scraped; filtered
for CSAM but not for
individual consent
Licensed imagery 1.2M imgs Commercial license
CONSENT CONCERNS:
The FaceSwap model was trained partly on CelebA, which contains
photos of public figures collected without individual consent for
AI training purposes. While the images are publicly available,
training face-swap models on non-consenting individuals' likenesses
raises ethical and potentially legal concerns under GDPR Article 6.
WebVid-10M and LAION-filtered datasets are web-scraped collections.
Content creators depicted in these datasets did not consent to their
content being used for AI model training.
DEEPFAKE DETECTION:
SynthStudio does not include any built-in deepfake detection
capability. Generated content is not distinguishable from authentic
content without external forensic analysis tools.
USAGE STATISTICS (2025):
Face-swaps generated: 2,847,291
Voice clones created: 891,234
Videos generated: 1,234,567
Images generated: 12,456,789
Content flagged by users: 4,231 (0.02% of total output)
Content removed after review: 1,847 (43.6% of flagged)""",
oversight_info="""\
CONTENT MODERATION & OVERSIGHT
System: SynthStudio Pro __VERSION__
MODERATION PROCESS:
SynthStudio operates a reactive content moderation system:
1. Automated pre-screening: Basic NSFW classifier runs on image
generation outputs (estimated 91% accuracy). Flagged content
requires manual review before delivery.
2. User reporting: Any user can flag content via a report button.
Reports are queued for the Trust & Safety team.
3. Trust & Safety team: 6 full-time moderators review reported
content. Working hours: Mon-Fri, 09:00-18:00 IST.
MODERATION STATISTICS (2025):
Content generated: 17,429,881
Auto-flagged (NSFW): 182,471 (1.05%)
User reports: 4,231 (0.02%)
Reviewed by T&S team: 6,892
Content removed: 1,847
Average review time: 74 hours
No proactive scanning for:
- Political disinformation
- Non-consensual deepfakes of real individuals
- Misleading news or propaganda
- Content impersonating public figures
POLITICAL CONTENT:
SynthStudio has no special handling for political content.
Users have generated content depicting politicians in fabricated
scenarios. At least 3 instances of AI-generated political content
went viral on social media in 2025 without any AI disclosure.
The company became aware through media reports, not internal
detection.
No restrictions exist on generating content depicting:
- Political figures
- Electoral/campaign material
- News-like content""",
transparency_info="""\
TRANSPARENCY & CONTENT LABELING REVIEW
System: SynthStudio Pro __VERSION__
AI CONTENT LABELING:
Article 50(2) requires: "Providers of AI systems, including
general-purpose AI systems, generating synthetic audio, image,
video or text content, shall ensure that the outputs of the AI
system are marked in a machine-readable format and detectable as
artificially generated or manipulated."
Current implementation:
- Generated images: No AI label or metadata tag applied
- Generated videos: No AI label or metadata tag applied
- Generated audio: No AI label or metadata tag applied
- Face-swaps: No AI label or metadata tag applied
When users download generated content, it is delivered as a
standard media file (JPEG, MP4, WAV) with no embedded metadata
indicating AI generation.
CONTENT PROVENANCE:
C2PA (Coalition for Content Provenance and Authenticity):
Not implemented. No content credentials are attached to
generated media.
IPTC metadata:
Not implemented. No AI generation metadata in EXIF/XMP fields.
Digital watermarking:
Not implemented. Generated content contains no steganographic
or perceptual watermarks.
After download, generated content is indistinguishable from
authentic media using standard tools.
USER AGREEMENTS:
The Terms of Service (Section 6.1) state:
"Users are responsible for disclosing the AI-generated nature
of content when required by applicable law."
This places the disclosure burden entirely on the user, but
Article 50(2) places the obligation on the PROVIDER to ensure
outputs are marked, not merely on users to self-disclose.
PLATFORM UI:
Within the SynthStudio platform, generated content is displayed
with a small "AI Generated" tag in the project view. This tag
does not persist when content is downloaded or exported. No
option exists to embed permanent AI labels in exported content.""",
risk_assessment_info="""\
RISK CLASSIFICATION ANALYSIS
System: SynthStudio Pro __VERSION__
ANNEX III HIGH-RISK CHECK:
1. Biometric identification: The face-swap tool processes facial
features but is used for content CREATION, not identification.
It does not identify individuals β€” it transfers facial
appearance between subjects. This does not fall under the
biometric identification category of Annex III.
2-8. Other high-risk categories: Not applicable β€” the system
creates media content, it does not make decisions affecting
individuals' rights, access to services, or legal status.
ARTICLE 5 PROHIBITED PRACTICES:
Subliminal manipulation: The system creates content on user
request. It does not autonomously deploy manipulative content.
However, the OUTPUTS could be used for manipulation if shared
without AI disclosure.
The tool itself is not a prohibited practice, but it can
facilitate prohibited outcomes if misused.
RISK LEVEL DETERMINATION: Limited Risk
Primary obligations fall under Article 50 transparency requirements
for AI systems generating synthetic content.
The platform's systemic risk lies not in the tool's classification
level but in the scale of potentially misleading synthetic content
being produced and distributed without provenance tracking.
CONTENT INTEGRITY RISK:
The combination of: (a) high-quality synthetic media generation,
(b) no content labeling, (c) no watermarking, and (d) no
proactive content moderation creates significant systemic risk
for information integrity, particularly around elections and
public discourse.""",
logging_info="""\
LOGGING & CONTENT TRACEABILITY REVIEW
System: SynthStudio Pro __VERSION__
GENERATION LOGS:
Event Type Logged Retention
─────────────────────────────────────────────
Generation request Yes 90 days
Model and params used Yes 90 days
Input media (face source) Yes 90 days
Output media hash Yes 90 days
User account ID Yes 90 days
Download event Yes 90 days
Export destination No β€”
TRACEABILITY AFTER EXPORT:
Once content is downloaded by the user, SynthStudio has no
mechanism to track its distribution or usage. The output hash
is retained for 90 days, but this only allows verification if
the exact file is submitted back for checking.
Content shared on social media, messaging apps, or websites
cannot be traced back to SynthStudio or the creator without
the original file hash.
CONTENT-TO-CREATOR LINKING:
Within the 90-day retention window, SynthStudio can link a
specific piece of content to the user account that generated it
(via output hash matching).
After 90 days, this linkage is permanently deleted.
No legal hold or preservation mechanism exists for content
involved in potential misuse investigations.
For face-swap content specifically, there is no record of whose
likeness was used as the source face, only the source image hash.""",
)
s.graph = _build_scenario_graph(
investigation_tools=[
"check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency",
"assess_risk_management", "check_logging",
],
)
return s
def _hard_multi_system() -> AuditScenario:
s = AuditScenario(
scenario_id="hard_multi_system_corporate_001",
title="Corporate AI Portfolio Audit β€” Multi-System Compliance",
difficulty="hard",
description=(
"A large enterprise uses 4 AI systems that need simultaneous audit: "
"(1) employee sentiment analysis, (2) customer churn prediction, "
"(3) automated invoice processing, (4) workplace safety monitoring "
"with cameras. Each has different risk levels. The auditor must "
"correctly classify each and identify cross-system data sharing risks."
),
system_name="Enterprise AI Portfolio",
system_description=(
"Four interconnected AI systems sharing a common data lake: "
"EmployeePulse (sentiment from Slack/email), ChurnGuard (customer "
"retention prediction), InvoiceAI (AP automation), SafetyWatch "
"(CCTV-based workplace monitoring). Deployed at __COMPANY__ "
"manufacturing conglomerate, __USERCOUNT__ employees across EU."
),
system_category="high_risk",
deployer_info="__COMPANY__ β€” German manufacturing conglomerate, 15,000 employees, operating across EU.",
correct_classification="high_risk",
ground_truth_findings=[
"employee_sentiment_is_high_risk_workplace_monitoring",
"safety_watch_uses_biometric_categorization",
"cross_system_data_sharing_amplifies_risks",
"no_dpia_for_combined_processing",
"employee_consent_not_freely_given",
"churn_prediction_minimal_risk_but_data_sharing_elevates",
],
required_remediation=[
"reclassify_employee_sentiment_as_high_risk",
"assess_safety_watch_for_biometric_categorization",
"implement_data_isolation_between_systems",
"conduct_combined_dpia",
"obtain_valid_employee_consent_or_remove_sentiment",
"audit_cross_system_data_flows",
],
red_herrings=[
"invoice_ai_high_risk",
"all_systems_prohibited",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
Enterprise AI Portfolio β€” __COMPANY__
Audit Date: __DATE__
SYSTEM INVENTORY:
System Deployer Classification Documentation
────────────────────────────────────────────────────────────────
EmployeePulse "Workforce analytics" Per-system docs present
ChurnGuard "Customer analytics" Per-system docs present
InvoiceAI "Process automation" Per-system docs present
SafetyWatch "Safety compliance" Per-system docs present
PER-SYSTEM DOCUMENTATION STATUS:
EmployeePulse β€” Employee Sentiment Analysis:
Architecture document: Present (describes NLP pipeline)
Data flow diagram: Present (shows Slack/email ingestion)
Algorithm description: Present (BERT-based sentiment model)
Performance metrics: Present (F1: 0.84 on test set)
DPIA: Present (standalone, 2024-11)
Combined processing assessment: Absent
ChurnGuard β€” Customer Churn Prediction:
Architecture document: Present
Algorithm description: Present (gradient-boosted trees)
Data sources: Present (CRM, support tickets, usage)
Performance metrics: Present (AUC: 0.81)
InvoiceAI β€” Automated Invoice Processing:
Architecture document: Present (OCR + classification)
Processing rules: Present
Accuracy metrics: Present (99.2% extraction accuracy)
Error handling procedures: Present
SafetyWatch β€” Workplace Safety Monitoring:
Architecture document: Present (computer vision pipeline)
Camera placement documentation: Present
Detection model description: Present (YOLO-based detection)
Works council agreement: Present (2024-06)
CROSS-SYSTEM DOCUMENTATION:
Combined risk assessment: ABSENT
Cross-system data flow diagram: ABSENT
Combined DPIA: ABSENT
Data lake access control matrix: Present but outdated (2023-09)
NOTE: Each system has individual documentation that appears
adequate in isolation. No documentation addresses the combined
risks of four AI systems sharing a common data infrastructure.""",
training_data_info="""\
DATA AUDIT REPORT β€” MULTI-SYSTEM
Enterprise AI Portfolio β€” __COMPANY__
SYSTEM 1: EmployeePulse (Sentiment Analysis)
Data sources:
- Slack messages from internal workspace (12.4M messages)
- Email subject lines and metadata (8.7M emails)
- Employee survey responses (47K responses)
- Meeting transcript summaries (234K meetings)
Personal data processed: Employee names, communication patterns,
sentiment indicators, meeting participation frequency, response
times, collaboration network metrics.
Consent: Employees signed an "IT systems usage agreement" upon
hiring that includes a clause: "The company may process workplace
communications for operational analytics purposes." Employees
were not specifically informed about AI-powered sentiment analysis.
Note: Under EU labor law, consent given as a condition of
employment may not constitute "freely given" consent under GDPR
Article 7, as the power imbalance between employer and employee
undermines voluntary choice.
SYSTEM 2: ChurnGuard (Customer Churn)
Data sources:
- CRM records (2.1M customers)
- Support ticket history (5.8M tickets)
- Product usage telemetry (real-time)
- Contract terms and renewal dates (2.1M contracts)
Personal data: Customer names, contact info, usage patterns,
support interaction history, contract details.
SYSTEM 3: InvoiceAI (Invoice Processing)
Data sources:
- Scanned invoices (3.4M documents)
- Vendor database (12K vendors)
- Purchase orders (1.8M orders)
Personal data: Minimal β€” vendor business information only.
No individual personal data processed.
SYSTEM 4: SafetyWatch (Workplace Monitoring)
Data sources:
- CCTV footage from 847 cameras across 23 facilities
- Real-time video stream processing
Processing details:
- Object detection: Hard hat, safety vest, goggles presence
- Zone violation: Entry into restricted areas
- Pose estimation: Ergonomic risk assessment (bending, lifting)
- FACIAL RECOGNITION: Used for zone access verification in
restricted areas (R&D labs, chemical storage)
The pose estimation module processes body positioning data that
could constitute biometric categorization β€” inferring physical
characteristics and behavior patterns of employees.
CROSS-SYSTEM DATA SHARING:
All four systems access a shared Azure Data Lake (ADL) instance.
Access control is implemented at the storage container level.
OBSERVED DATA FLOWS:
EmployeePulse β†’ SharedLake: Employee sentiment scores
ChurnGuard ← SharedLake: Pulls employee data for "internal
engagement correlation" feature
SafetyWatch β†’ SharedLake: Zone compliance records
InvoiceAI β†’ SharedLake: Vendor payment data
CONCERN: ChurnGuard's "internal engagement correlation" feature
accesses EmployeePulse sentiment data to predict whether
disengaged employees might cause customer churn through poor
service. This creates an undocumented data flow where employee
sentiment analysis affects customer-facing predictions.""",
oversight_info="""\
HUMAN OVERSIGHT β€” MULTI-SYSTEM
Enterprise AI Portfolio β€” __COMPANY__
SYSTEM 1: EmployeePulse
Oversight: HR Analytics team reviews monthly aggregate reports.
Individual-level data accessible to HR Business Partners.
No opt-out mechanism for employees.
No employee notification that individual sentiment is tracked.
Aggregated "team health" scores shared with department managers.
HR reports that 3 employees were "counseled" in 2025 after
EmployeePulse flagged sustained negative sentiment patterns.
SYSTEM 2: ChurnGuard
Oversight: Customer Success team reviews churn predictions weekly.
High-risk accounts flagged for proactive outreach.
No direct impact on individual customers' service or pricing.
Predictions used as advisory signals only.
SYSTEM 3: InvoiceAI
Oversight: Finance team reviews all flagged exceptions (approx 3%
of invoices). Full human review for invoices above EUR 50K.
System handles routine three-way matching autonomously.
Error rate: 0.8% (caught in downstream reconciliation).
SYSTEM 4: SafetyWatch
Oversight: Safety officers monitor real-time alerts.
All zone violations are reviewed within 15 minutes.
Pose estimation alerts are reviewed in batches (daily).
Facial recognition matches for restricted zones are logged and
reviewed if there is a mismatch (attempted unauthorized access).
CROSS-SYSTEM OVERSIGHT:
No unified oversight body monitors the interaction between systems.
Each system has its own operational team:
- EmployeePulse: HR Analytics (3 people)
- ChurnGuard: Customer Success (5 people)
- InvoiceAI: Finance Operations (2 people)
- SafetyWatch: HSE Department (4 people)
The IT department manages the shared data lake infrastructure but
does not monitor data flows between systems from a compliance
perspective. No data governance officer has been appointed with
authority over cross-system data usage.""",
transparency_info="""\
TRANSPARENCY REVIEW β€” MULTI-SYSTEM
Enterprise AI Portfolio β€” __COMPANY__
SYSTEM 1: EmployeePulse
Employee notification: The company's internal IT policy document
(available on the intranet, 47 pages) includes the statement:
"Workplace communications may be processed for analytical purposes
to support organizational effectiveness."
Employees are NOT specifically told:
- That AI analyzes their Slack messages and email metadata
- That individual sentiment scores are generated
- That these scores are accessible to HR Business Partners
- That sentiment data flows to the ChurnGuard system
- That sustained negative sentiment may trigger HR intervention
SYSTEM 2: ChurnGuard
Customer notification: The company's privacy policy mentions
"automated analysis to improve customer service." Customers are
not informed that their accounts are scored for churn risk or
that this scoring uses employee sentiment data internally.
SYSTEM 3: InvoiceAI
Vendor notification: Vendors are informed that invoices are
"processed electronically." No specific AI disclosure required
as the system handles business documents, not personal data
of natural persons in a consequential manner.
SYSTEM 4: SafetyWatch
Employee notification: The works council agreement from 2024-06
authorizes CCTV monitoring for safety purposes. The agreement
specifically mentions:
- PPE compliance detection (hard hats, vests, goggles)
- Restricted zone monitoring
- "Advanced safety analytics" (vague β€” does not specify
pose estimation or facial recognition)
Employees are aware of cameras but NOT specifically informed:
- That pose estimation analyzes their body movements
- That facial recognition identifies them in restricted zones
- That safety compliance data is stored in the shared data lake""",
risk_assessment_info="""\
RISK CLASSIFICATION β€” MULTI-SYSTEM
Enterprise AI Portfolio β€” __COMPANY__
PER-SYSTEM CLASSIFICATION ANALYSIS:
SYSTEM 1: EmployeePulse (Sentiment Analysis)
Deployer classification: "Workforce analytics tool β€” minimal risk"
Annex III check: Category 4 β€” "AI systems intended to be used
for making decisions affecting terms of work-related relationships"
The system generates individual sentiment scores accessible to HR,
and has been used as a factor in HR interventions (counseling).
This constitutes a system that affects work-related relationships.
AUDITOR ASSESSMENT: HIGH-RISK under Annex III Category 4
SYSTEM 2: ChurnGuard (Customer Churn)
Deployer classification: "Customer analytics β€” minimal risk"
Annex III check: The system predicts customer churn for advisory
purposes. It does not make decisions affecting individual
customers' service level, pricing, or contract terms.
In isolation: MINIMAL RISK
However: Cross-system data flows (employee sentiment β†’ churn
prediction) create compound processing that was not assessed.
SYSTEM 3: InvoiceAI (Invoice Processing)
Deployer classification: "Process automation β€” minimal risk"
Annex III check: No applicable category. The system processes
business documents (invoices, POs) with minimal personal data.
AUDITOR ASSESSMENT: MINIMAL RISK (correct classification)
SYSTEM 4: SafetyWatch (Workplace Safety)
Deployer classification: "Safety compliance β€” limited risk"
Annex III check:
- Pose estimation: May constitute biometric categorization
(inferring physical characteristics) under Annex III Cat 1
- Facial recognition for zone access: Biometric identification
in a workplace context under Annex III Cat 1
- Safety PPE detection: Standard computer vision, not biometric
AUDITOR ASSESSMENT: Requires detailed assessment β€” components
range from minimal risk (PPE detection) to potentially HIGH-RISK
(facial recognition, pose estimation)
CROSS-SYSTEM RISK:
No combined risk assessment has been conducted. The interaction
between EmployeePulse sentiment data and ChurnGuard predictions
creates a processing chain that was not individually assessed by
either system's standalone documentation.
A Data Protection Impact Assessment (DPIA) should have been
conducted for the combined processing but none exists.""",
logging_info="""\
LOGGING & TRACEABILITY β€” MULTI-SYSTEM
Enterprise AI Portfolio β€” __COMPANY__
PER-SYSTEM LOGGING:
System Decision Logs Retention Completeness
────────────────────────────────────────────────────────────
EmployeePulse Yes 2 years Individual scores + inputs
ChurnGuard Yes 3 years Predictions + features
InvoiceAI Yes 7 years All processing steps
SafetyWatch Yes 30 days Alerts + footage refs
CROSS-SYSTEM AUDIT TRAIL:
Data lake access logs: Yes (Azure ADL audit logs enabled)
Cross-system query logs: Present but not monitored
Data flow lineage tracking: Not implemented
The Azure Data Lake audit logs record which service principal
accessed which data container and when. However:
- No alerts are configured for unusual cross-system data access
- No monitoring of data volume transferred between systems
- No lineage tracking from source system to consuming system
- Logs are retained for 90 days only (insufficient for audit)
SAFETWATCH SPECIFIC CONCERN:
Video footage is retained for only 30 days. Facial recognition
match logs are retained for 30 days. After this period, there
is no way to audit whether the system correctly identified
individuals or to investigate potential misidentification events.
For a system that may be classified as high-risk (biometric
identification in workplace), 30-day retention is likely
insufficient to meet Article 12 logging requirements.""",
)
# Full investigation for multi-system audit
s.graph = _build_scenario_graph(
investigation_tools=[
"check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency",
"assess_risk_management", "check_logging",
],
)
return s
# ---------------------------------------------------------------------------
# MEDIUM BONUS SCENARIO
# ---------------------------------------------------------------------------
def _medium_emotion_recognition() -> AuditScenario:
s = AuditScenario(
scenario_id="medium_emotion_recognition_workplace_001",
title="Workplace Emotion Recognition β€” Article 5/50 Boundary Case",
difficulty="medium",
description=(
"A corporate productivity platform uses webcam-based emotion recognition "
"during video calls to measure 'employee engagement scores.' The EU AI Act "
"prohibits emotion recognition in the workplace (Article 5(1)(f)) with narrow "
"exceptions. Determine whether this system falls under the prohibition or "
"the medical/safety exception."
),
system_name="EngageMetrics Pro",
system_description=(
"Video analytics module integrated with the corporate video conferencing "
"platform. Analyzes facial micro-expressions, voice tone, and body language "
"during meetings to generate per-employee 'engagement scores' on a 0-100 "
"scale. Scores are aggregated into team dashboards accessible to managers. "
"Deployed across __USERCOUNT__ employees in __REGION__."
),
system_category="prohibited",
deployer_info="__COMPANY__ β€” US-based HR analytics firm, EU subsidiary in Ireland, serving 200+ EU corporate clients.",
correct_classification="prohibited",
ground_truth_findings=[
"prohibited_workplace_emotion_recognition",
"no_medical_or_safety_exception_applies",
"employee_consent_not_freely_given",
"covert_processing_of_biometric_data",
],
required_remediation=[
"immediate_cessation_of_emotion_recognition",
"notify_data_protection_authority",
"delete_all_emotion_scores_and_biometric_data",
"inform_affected_employees",
],
red_herrings=[
"high_risk_classification",
"needs_bias_audit",
"medical_exception_applies",
],
documentation_data="""\
TECHNICAL DOCUMENTATION INVENTORY
System: EngageMetrics Pro __VERSION__
Deployer: __COMPANY__
Audit Date: __DATE__
DOCUMENT STATUS:
Document Status Last Updated
─────────────────────────────────────────────────────────────────
Platform Architecture Present 2025-10-22
ML Model Documentation Present 2025-10-22
Data Processing Agreement (DPA) Present 2025-08-01
Employee Privacy Notice Present 2025-09-15
Works Council Consultation Record Absent β€”
DPIA Present 2025-07-10
ML MODEL DETAILS:
Architecture: Multi-modal CNN (video) + Transformer (audio)
Input: Webcam video frames (10 fps) + microphone audio
Output: Engagement probability score (0-100)
Features analyzed:
- Facial Action Units (AU1-AU28) β€” eyebrow, lip, jaw movements
- Gaze direction and duration
- Head pose (pitch, yaw, roll)
- Voice pitch variation and speaking rate
- Micro-expression detection (< 500ms duration)
Training data: 154K labeled video clips from US call centers
Accuracy: "86% correlation with human engagement ratings"
DEPLOYER'S CLAIMED PURPOSE:
"EngageMetrics helps organizations understand meeting effectiveness
and employee well-being. The tool provides aggregate insights to
improve team dynamics and reduce meeting fatigue."
DPIA FINDINGS:
The DPIA conducted in July 2025 concluded that the system processes
"behavioral analytics data" rather than biometric data, and classified
the processing as "legitimate interest" under GDPR Article 6(1)(f).
The DPIA does not reference the EU AI Act or its provisions on
emotion recognition.""",
training_data_info="""\
DATA PROCESSING REPORT
System: EngageMetrics Pro __VERSION__
DATA COLLECTION:
Source: Corporate video conferencing platform API
Collection method: Real-time video frame extraction during meetings
Frequency: 10 frames/second during active video
Audio: Continuous during meetings (voice characteristics only)
Storage: Frames processed in-memory, engagement scores stored
PROCESSING DETAILS:
The system extracts the following biometric indicators:
- 28 Facial Action Units per the Facial Action Coding System (FACS)
- Gaze tracking (eye position relative to screen center)
- Head movement patterns
- Voice fundamental frequency (F0) and formants
- Speech rate, pause duration, filler word frequency
- Micro-expression detection (expressions lasting < 500ms)
These indicators are processed through the ML model to produce
a scalar "engagement score" for each participant, each meeting.
EMPLOYEE DATA RETENTION:
Per-meeting scores: Retained 12 months
Aggregated weekly scores: Retained 24 months
Raw video/audio: Not retained (processed in real-time)
Individual score history: Accessible to employee and their manager
CONSENT MECHANISM:
Employees are notified via a banner at the start of each meeting:
"This meeting uses engagement analytics. By joining, you consent
to having your engagement level measured."
Employees can "opt out" by disabling their camera, but this is
noted in the team dashboard as "camera off β€” engagement unknown"
and managers receive a monthly report of camera-off frequency.
TRAINING DATA COMPOSITION:
Source: 154,291 labeled video clips from US-based customer service
call centers. Labels assigned by human raters scoring engagement
on a 1-5 scale.
Demographic representation of training data:
Age 20-35: 72%
Age 36-50: 23%
Age 50+: 5%
Note: Training data from US only. System deployed on EU employees
with different cultural norms for facial expression.""",
oversight_info="""\
HUMAN OVERSIGHT & GOVERNANCE
System: EngageMetrics Pro __VERSION__
MANAGEMENT ACCESS:
Team managers receive:
- Weekly aggregated engagement dashboard per team member
- Meeting-level engagement scores (per person, per meeting)
- "Low engagement alerts" when an employee's score drops below
40 for 3 consecutive meetings
- Trend analysis showing engagement trajectory over months
HR department receives:
- Department-level aggregated engagement reports (monthly)
- Individual engagement data accessible "for performance review
purposes" per company HR policy
EMPLOYEE ACCESS:
Employees can view their own engagement scores in a personal
dashboard. They cannot see other employees' scores.
DOCUMENTED USES OF ENGAGEMENT DATA:
Per the deployer's case studies and client testimonials:
- "Identified and coached underperforming team members" (Client A)
- "Used engagement data as one factor in performance reviews" (Client B)
- "Detected early signs of burnout in engineering team" (Client C)
WORKS COUNCIL CONSULTATION:
No works council consultation record exists. The deployer states
that implementation was handled as an "IT tool deployment" not
requiring works council approval. In Germany and several other
EU member states, workplace monitoring systems require works
council agreement (Betriebsrat Mitbestimmung).
EMPLOYEE GRIEVANCES:
17 formal complaints filed in Q3-Q4 2025:
- 8 complaints about feeling "surveilled" during meetings
- 5 complaints that camera-off reporting is coercive
- 4 complaints that engagement scores affected performance reviews""",
transparency_info="""\
TRANSPARENCY REVIEW
System: EngageMetrics Pro __VERSION__
EMPLOYEE NOTIFICATION:
Meeting banner: "This meeting uses engagement analytics."
No further detail provided about:
- What specific facial/voice features are analyzed
- How the engagement score is calculated
- Who has access to individual scores
- How long scores are retained
- The employee's right to object
Employee onboarding materials include a section titled
"Digital Workplace Tools" that states: "We use various
analytics tools to improve collaboration and meeting
effectiveness. These tools may process behavioral data."
The word "emotion" does not appear in any employee-facing
communication. The system is marketed internally as
"engagement analytics" rather than "emotion recognition."
ARTICLE 50(3) β€” EMOTION RECOGNITION DISCLOSURE:
Article 50(3) requires: "Users of an emotion recognition system
or a biometric categorisation system shall inform the natural
persons exposed thereto of the operation of the system."
The current notification ("engagement analytics") does not
inform employees that the system recognizes emotional states
from their facial expressions and voice characteristics.
ARTICLE 5(1)(f) β€” PROHIBITION:
Article 5(1)(f) prohibits: "the use of emotion recognition
systems in the workplace [...] except where the use of such
system is intended to be put in place or put on the market
for medical or safety reasons."
The deployer's stated purpose is measuring "engagement" for
productivity optimization and performance management. This
does not fall under the medical or safety exception.""",
risk_assessment_info="""\
RISK CLASSIFICATION ANALYSIS
System: EngageMetrics Pro __VERSION__
DEPLOYER'S SELF-CLASSIFICATION:
The deployer classified the system as "limited risk β€” workplace
analytics tool" and argues that it measures "engagement" not
"emotions," citing that the output is a single numeric score
rather than discrete emotion labels (happy, sad, angry, etc.).
AUDITOR'S ANALYSIS:
EMOTION RECOGNITION DEFINITION (Article 3(39)):
"emotion recognition system means an AI system for the purpose
of identifying or inferring emotions or intentions of natural
persons on the basis of their biometric data"
The system processes:
- Facial Action Units (biometric data under GDPR)
- Voice pitch and tone characteristics (biometric data)
- Micro-expressions (inherently emotional indicators)
The system's output β€” an "engagement score" β€” is derived from
emotional and attentional indicators. Regardless of whether the
output is labeled "engagement" or "emotion," the underlying
processing constitutes emotion recognition per Article 3(39).
ARTICLE 5(1)(f) APPLICABILITY:
- Location: workplace (employee meetings) β€” YES
- Purpose: productivity monitoring, performance review β€” YES
- Medical exception: not applicable (not for health/safety)
- Safety exception: not applicable (office work, not hazardous)
The deployer's argument that "engagement β‰  emotion" contradicts
the technical reality: the system reads facial micro-expressions
and voice stress patterns β€” precisely the biometric data that
Article 3(39) identifies as emotion recognition inputs.""",
logging_info="""\
LOGGING & DATA PROCESSING REVIEW
System: EngageMetrics Pro __VERSION__
PROCESSING LOGS:
Event Type Logged Retention
──────────────────────────────────────────────────
Meeting start/end Yes 24 months
Per-meeting engagement score Yes 12 months
Weekly aggregated score Yes 24 months
Manager dashboard access Yes 6 months
Low engagement alerts sent Yes 12 months
Employee opt-out events Yes 12 months
Camera-off events Yes 12 months
NOTE: If the system constitutes prohibited emotion recognition
under Article 5(1)(f), the existence and quality of logging
is irrelevant to the primary compliance determination. The
system must cease operation regardless of its logging capabilities.
Camera-off tracking may constitute additional coercion, as
employees who exercise their right to avoid emotion recognition
are identifiable and their behavior is reported to management.""",
)
# Prohibited system β€” short investigation then findings
s.graph = _build_scenario_graph(
investigation_tools=["check_documentation", "audit_training_data",
"verify_human_oversight", "check_transparency",
"assess_risk_management"],
is_prohibited=True,
)
return s
# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------
_SCENARIO_FACTORIES = {
"easy_chatbot_transparency_001": _easy_chatbot,
"easy_recommendation_minimal_001": _easy_recommendation,
"medium_hiring_bias_001": _medium_hiring,
"medium_credit_scoring_001": _medium_credit,
"medium_medical_triage_001": _medium_medical,
"medium_emotion_recognition_workplace_001": _medium_emotion_recognition,
"hard_social_scoring_prohibited_001": _hard_social_scoring,
"hard_deepfake_generation_001": _hard_deepfake,
"hard_multi_system_corporate_001": _hard_multi_system,
}
SCENARIOS: Dict[str, type] = _SCENARIO_FACTORIES
SCENARIO_LIST = [
{"id": "easy_chatbot_transparency_001", "title": "Customer Service Chatbot", "difficulty": "easy"},
{"id": "easy_recommendation_minimal_001", "title": "Music Recommendation Engine", "difficulty": "easy"},
{"id": "medium_hiring_bias_001", "title": "AI Resume Screener", "difficulty": "medium"},
{"id": "medium_credit_scoring_001", "title": "Credit Scoring Model", "difficulty": "medium"},
{"id": "medium_medical_triage_001", "title": "Emergency Triage AI", "difficulty": "medium"},
{"id": "medium_emotion_recognition_workplace_001", "title": "Workplace Emotion Recognition (PROHIBITED)", "difficulty": "medium"},
{"id": "hard_social_scoring_prohibited_001", "title": "Citizen Wellness App (PROHIBITED)", "difficulty": "hard"},
{"id": "hard_deepfake_generation_001", "title": "AI Content Studio (Deepfake)", "difficulty": "hard"},
{"id": "hard_multi_system_corporate_001", "title": "Corporate AI Portfolio Audit", "difficulty": "hard"},
]
DIFFICULTY_TIERS = {
"easy": ["easy_chatbot_transparency_001", "easy_recommendation_minimal_001"],
"medium": ["medium_hiring_bias_001", "medium_credit_scoring_001", "medium_medical_triage_001", "medium_emotion_recognition_workplace_001"],
"hard": ["hard_social_scoring_prohibited_001", "hard_deepfake_generation_001", "hard_multi_system_corporate_001"],
}
def get_scenario(scenario_id: str, seed: Optional[int] = None) -> AuditScenario:
"""Create and randomize a scenario by ID.
Supports both fixed scenarios (e.g. 'medium_hiring_bias_001') and
procedurally generated ones (e.g. 'procedural_medium_42' or 'procedural_hard_12345').
Procedural scenarios are generated from seed, producing infinite unique combinations.
"""
# Handle procedural scenario IDs
if scenario_id.startswith("procedural_"):
from scenarios.procedural import generate_procedural_scenario
parts = scenario_id.split("_")
# Format: procedural_{difficulty}_{seed} or procedural_{difficulty}
difficulty = parts[1] if len(parts) > 1 else "medium"
proc_seed = int(parts[2]) if len(parts) > 2 else (seed or 42)
return generate_procedural_scenario(proc_seed, difficulty)
factory = _SCENARIO_FACTORIES.get(scenario_id)
if factory is None:
raise ValueError(f"Unknown scenario: {scenario_id}. Available: {list(_SCENARIO_FACTORIES.keys())} + procedural_{{difficulty}}_{{seed}}")
scenario = factory()
scenario.randomize(seed)
return scenario
def get_scenarios_by_difficulty(difficulty: str) -> List[str]:
"""Get scenario IDs for a difficulty tier."""
return DIFFICULTY_TIERS.get(difficulty, [])
def get_random_scenario(difficulty: str, seed: Optional[int] = None) -> AuditScenario:
"""Pick a random scenario from a difficulty tier."""
rng = random.Random(seed)
ids = get_scenarios_by_difficulty(difficulty)
if not ids:
raise ValueError(f"Unknown difficulty: {difficulty}")
return get_scenario(rng.choice(ids), seed)