Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| import csv, tempfile | |
| # ---------- Pipelines ---------- | |
| hate = pipeline( | |
| "text-classification", | |
| model="hossam87/bert-base-arabic-hate-speech", | |
| tokenizer="hossam87/bert-base-arabic-hate-speech", | |
| return_all_scores=False | |
| ) | |
| dialect = pipeline( | |
| "text-classification", | |
| model="IbrahimAmin/marbertv2-arabic-written-dialect-classifier", | |
| tokenizer="IbrahimAmin/marbertv2-arabic-written-dialect-classifier", | |
| return_all_scores=False | |
| ) | |
| # ---------- Inference ---------- | |
| def analyze(text: str): | |
| try: | |
| if not text or not text.strip(): | |
| return ("", "", "", "", "", "Please enter some Arabic text.") | |
| h = hate(text)[0] | |
| d = dialect(text)[0] | |
| hate_label = h.get("label", "") | |
| hate_conf = float(h.get("score", 0.0)) | |
| dial_label = d.get("label", "") | |
| dial_conf = float(d.get("score", 0.0)) | |
| weights = { | |
| "Neutral": 0.0, | |
| "Offensive": 0.5, | |
| "Sexism": 1.0, | |
| "Racism": 1.0, | |
| "Religious Discrimination": 1.0, | |
| } | |
| score = hate_conf * weights.get(hate_label, 0.0) | |
| if hate_label != "Neutral" and weights.get(hate_label, 0.0) >= 1.0: | |
| action = "🚨 Immediate Review — Severe content detected. Escalate to moderators." | |
| elif hate_label != "Neutral": | |
| action = "⚠️ Potentially Harmful — Contains offensive content. Please review." | |
| elif score >= 0.49: | |
| action = "⚠️ Borderline — Review recommended." | |
| else: | |
| action = "✅ Safe — No action needed." | |
| return (hate_label, f"{hate_conf:.2f}", dial_label, f"{dial_conf:.2f}", f"{score:.2f}", action) | |
| except Exception as e: | |
| # keep the UI alive even if a model throws | |
| return ("", "", "", "", "", f"Runtime error: {e}") | |
| def analyze_file(file): | |
| if file is None: | |
| return [], None, "Please upload a .txt file." | |
| try: | |
| # Read uploaded file (utf-8 with fallback) | |
| text = open(file.name, "r", encoding="utf-8", errors="ignore").read() | |
| lines = [ln.strip() for ln in text.splitlines() if ln.strip()] | |
| rows = [] | |
| headers = ["#", "Text (truncated)", "Hate Label", "Label Conf.", | |
| "Dialect", "Dialect Conf.", "Threat Score", "Recommended Action"] | |
| # Process up to 200 lines to keep UI responsive | |
| for i, ln in enumerate(lines[:200], start=1): | |
| hate_label, hate_conf, dial_label, dial_conf, score, action = analyze(ln) | |
| rows.append([ | |
| i, | |
| (ln[:80] + "…") if len(ln) > 80 else ln, | |
| hate_label, hate_conf, dial_label, dial_conf, score, action | |
| ]) | |
| # Write CSV to temp file for download | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| with open(tmp.name, "w", encoding="utf-8", newline="") as f: | |
| writer = csv.writer(f) | |
| writer.writerow(headers) | |
| writer.writerows(rows) | |
| status = f"Processed {len(rows)} lines." | |
| return rows, tmp.name, status | |
| except Exception as e: | |
| return [], None, f"Error reading file: {e}" | |
| # ---------- CSS (sidebar only) ---------- | |
| CSS = """ | |
| /* Only style the sidebar box; leave main area/theme untouched */ | |
| #sidebar-box { | |
| border: 1px solid rgba(255,255,255,0.08); | |
| border-radius: 10px; | |
| padding: 20px; | |
| } | |
| """ | |
| # ---------- UI ---------- | |
| with gr.Blocks(css=CSS) as demo: | |
| with gr.Row(equal_height=True): | |
| # Sidebar | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_id="sidebar-box"): | |
| gr.Markdown(""" | |
| ## 🛡️ AI Watchdog: Arabic Content Safety Analyzer | |
| **Purpose** | |
| Analyze Arabic text for harmful or threatening language. | |
| **Features** | |
| - **Hate Speech Classification** | |
| - **Dialect Detection** | |
| - **Threat Severity Score** | |
| - **Recommended Action** (rule-based) | |
| **How to Use** | |
| - Type text or | |
| - Upload a .txt file (one entry per line) | |
| ### Dialect Abbreviation Guide | |
| | Abbreviation | Meaning | | |
| |--------------|---------| | |
| | **MAGHREB** | North African dialects | | |
| | **LEV** | Levantine dialects | | |
| | **MSA** | Modern Standard Arabic | | |
| | **GLF** | Gulf dialects | | |
| | **EGY** | Egyptian Arabic | | |
| """) | |
| # Main area | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| with gr.Tab("Text"): | |
| gr.Markdown("### Enter Arabic Text for Analysis") | |
| input_text = gr.Textbox(lines=4, placeholder="اكتب هنا...", label="Arabic Text") | |
| out_hate = gr.Textbox(label="Hate Speech Label", interactive=False) | |
| out_hate_conf = gr.Textbox(label="Label Confidence", interactive=False) | |
| out_dialect = gr.Textbox(label="Dialect", interactive=False) | |
| out_dialect_conf = gr.Textbox(label="Dialect Confidence", interactive=False) | |
| out_score = gr.Textbox(label="Threat Score", interactive=False) | |
| out_action = gr.Textbox(label="Recommended Action", interactive=False) | |
| analyze_btn = gr.Button("Analyze", variant="primary") | |
| analyze_btn.click( | |
| fn=analyze, | |
| inputs=input_text, | |
| outputs=[out_hate, out_hate_conf, out_dialect, out_dialect_conf, out_score, out_action], | |
| ) | |
| with gr.Tab("Upload"): | |
| gr.Markdown("### Upload a .txt file (one entry per line)") | |
| file_input = gr.File(file_types=[".txt"], label="Upload .txt") | |
| table = gr.Dataframe( | |
| headers=["#", "Text (truncated)", "Hate Label", "Label Conf.", "Dialect", | |
| "Dialect Conf.", "Threat Score", "Recommended Action"], | |
| row_count=1, col_count=8, wrap=True, interactive=False | |
| ) | |
| download_csv = gr.File(label="Download Results (CSV)") | |
| status = gr.Markdown() | |
| run_file = gr.Button("Analyze File", variant="primary") | |
| run_file.click( | |
| fn=analyze_file, | |
| inputs=file_input, | |
| outputs=[table, download_csv, status] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |