""" Toxic Content Detector - Bahasa Indonesia ========================================== Deteksi kata kasar, hate speech, dan konten tidak pantas. """ import re from typing import Dict, List # Kata kasar / offensive words Bahasa Indonesia (censored for safety) TOXIC_WORDS = [ "bodoh", "goblok", "tolol", "idiot", "dungu", "brengsek", "bangsat", "bajingan", "keparat", "bedebah", "sialan", "setan", "iblis", "laknat", "babi", "anjing", "monyet", "binatang", # when used as insults "sampah", "busuk", "mampus", "matilah", "bacot", "bego", "geblek", "kampret", "tai", "tahi", "najis", "pembunuh", "bunuh", "hajar", "bantai", ] # Pola hate speech HATE_PATTERNS = [ r"semua\s+(orang\s+)?(suku|ras|agama|etnis)\s+\w+\s+(adalah|itu)\s+(jahat|bodoh|buruk)", r"(usir|basmi|habisi|musnahkan)\s+(semua\s+)?(orang\s+)?\w+", r"(bunuh|hajar|bantai)\s+(semua|mereka)", ] # Ancaman THREAT_PATTERNS = [ r"(akan|mau|ingin)\s+(membunuh|menghancurkan|membakar|meledakkan)", r"(bom|senjata|racun)\s+(untuk|buat)\s+(membunuh|menyerang)", ] class ToxicDetector: """Deteksi konten toxic dalam Bahasa Indonesia.""" def __init__(self, custom_words: List[str] = None, sensitivity: str = "medium"): """ Args: custom_words: Kata-kata tambahan yang dianggap toxic sensitivity: 'low', 'medium', 'high' """ self.toxic_words = set(TOXIC_WORDS) if custom_words: self.toxic_words.update(custom_words) self.sensitivity = sensitivity self.hate_patterns = [re.compile(p, re.IGNORECASE) for p in HATE_PATTERNS] self.threat_patterns = [re.compile(p, re.IGNORECASE) for p in THREAT_PATTERNS] def check(self, text: str) -> Dict: """ Cek teks untuk konten toxic. Returns: { "safe": bool, "score": float (0-1, higher = more toxic), "violations": list of violation details, "flagged_words": list of detected toxic words, } """ text_lower = text.lower() violations = [] flagged_words = [] score = 0.0 # 1. Check toxic words words = re.findall(r'\w+', text_lower) for word in words: if word in self.toxic_words: flagged_words.append(word) if flagged_words: word_score = min(len(flagged_words) * 0.2, 0.6) score += word_score violations.append({ "type": "toxic_words", "severity": "medium" if len(flagged_words) <= 2 else "high", "detail": f"Kata tidak pantas terdeteksi: {', '.join(flagged_words[:5])}", }) # 2. Check hate speech patterns for pattern in self.hate_patterns: match = pattern.search(text_lower) if match: score += 0.4 violations.append({ "type": "hate_speech", "severity": "high", "detail": f"Pola hate speech terdeteksi", }) break # 3. Check threats for pattern in self.threat_patterns: match = pattern.search(text_lower) if match: score += 0.5 violations.append({ "type": "threat", "severity": "critical", "detail": f"Ancaman terdeteksi", }) break # 4. Check excessive caps (shouting) if len(text) > 10: upper_ratio = sum(1 for c in text if c.isupper()) / len(text) if upper_ratio > 0.7: score += 0.1 violations.append({ "type": "shouting", "severity": "low", "detail": "Terlalu banyak huruf kapital (kemungkinan berteriak)", }) # Adjust by sensitivity threshold = {"low": 0.5, "medium": 0.3, "high": 0.1}[self.sensitivity] score = min(score, 1.0) return { "safe": score < threshold, "score": round(score, 2), "violations": violations, "flagged_words": flagged_words, }