Spaces:

boffire
/

Kabyle-POS-tagger

Sleeping

App Files Files Community

boffire commited on 22 days ago

Commit

f3bda9e

verified ·

1 Parent(s): c8f7466

Create app.py

Browse files

Files changed (1) hide show

app.py +134 -0

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import gradio as gr
+from transformers import pipeline
+# Load the model
+tagger = pipeline(
+    "token-classification",
+    model="boffire/kabyle-pos",
+    aggregation_strategy="simple"
+)
+# Universal Dependencies POS tagset with descriptions
+POS_DESCRIPTIONS = {
+    "ADJ": "Adjective",
+    "ADP": "Adposition",
+    "ADV": "Adverb",
+    "AUX": "Auxiliary verb",
+    "CCONJ": "Coordinating conjunction",
+    "DET": "Determiner",
+    "INTJ": "Interjection",
+    "NOUN": "Noun",
+    "NUM": "Numeral",
+    "PART": "Particle",
+    "PRON": "Pronoun",
+    "PROPN": "Proper noun",
+    "PUNCT": "Punctuation",
+    "SCONJ": "Subordinating conjunction",
+    "SYM": "Symbol",
+    "VERB": "Verb",
+    "X": "Other"
+}
+# Color mapping for POS tags
+POS_COLORS = {
+    "NOUN": "#e3f2fd", "PROPN": "#bbdefb", "PRON": "#90caf9",
+    "VERB": "#e8f5e9", "AUX": "#c8e6c9",
+    "ADJ": "#fff3e0", "ADV": "#ffe0b2",
+    "ADP": "#f3e5f5", "PART": "#e1bee7",
+    "DET": "#fce4ec", "NUM": "#f8bbd0",
+    "CCONJ": "#e0f7fa", "SCONJ": "#b2ebf2",
+    "INTJ": "#fff9c4", "PUNCT": "#f5f5f5",
+    "SYM": "#eeeeee", "X": "#ffccbc"
+}
+def tag_text(text):
+    if not text or not text.strip():
+        return "", "Please enter some Kabyle text."
+    try:
+        results = tagger(text.strip())
+    except Exception as e:
+        return "", f"Error: {str(e)}"
+    if not results:
+        return "", "No tokens found."
+    # Build HTML visualization
+    html_parts = ['<div style="font-size: 1.15rem; line-height: 2.2; font-family: system-ui, sans-serif; padding: 10px;">']
+    for token in results:
+        word = token["word"]
+        label = token["entity_group"]
+        score = token["score"]
+        desc = POS_DESCRIPTIONS.get(label, label)
+        color = POS_COLORS.get(label, "#ffffff")
+        block = (
+            '<span style="display: inline-block; margin: 2px; vertical-align: top;">'
+            '<span style="background: ' + color + '; border: 1px solid #ccc; border-radius: 6px 6px 0 0; padding: 4px 8px; display: block; text-align: center; font-weight: 500;">'
+            + word +
+            '</span>'
+            '<span style="background: #333; color: #fff; border-radius: 0 0 6px 6px; padding: 2px 8px; display: block; text-align: center; font-size: 0.75rem;">'
+            + label + ' <span style="opacity: 0.7;">(' + f"{score:.2f}" + ')</span>'
+            '</span>'
+            '</span>'
+        )
+        html_parts.append(block)
+    html_parts.append('</div>')
+    # Build markdown table
+    table_lines = [
+        "| Token | POS Tag | Description | Confidence |",
+        "|-------|---------|-------------|------------|"
+    ]
+    for token in results:
+        word = token["word"]
+        label = token["entity_group"]
+        desc = POS_DESCRIPTIONS.get(label, label)
+        score = token["score"]
+        table_lines.append(f"| {word} | `{label}` | {desc} | {score:.3f} |")
+    return "\n".join(html_parts), "\n".join(table_lines)
+# Example sentences in Kabyle
+examples = [
+    "Aṭas n medden i yessen.",
+    "Taqbaylit d tutlayt tamezwarut deg Lezzayer.",
+    "Yella wuccen ameqran deg wexxam.",
+    "Tameddakt-nneɣ teɣra adlis ameqran.",
+    "D nekkni i d-yusan d imezwura.",
+]
+demo = gr.Interface(
+    fn=tag_text,
+    inputs=gr.Textbox(
+        label="Kabyle Text",
+        placeholder="Enter a sentence in Kabyle (e.g., Aṭas n medden i yessen.)",
+        lines=2
+    ),
+    outputs=[
+        gr.HTML(label="Tagged Visualization"),
+        gr.Markdown(label="Results Table")
+    ],
+    title="Kabyle POS Tagger",
+    description="""
+    <div style="text-align: center;">
+        <h2>Kabyle Part-of-Speech Tagger</h2>
+        <p>Enter a sentence in <strong>Kabyle</strong> (Berber language) to see POS tags predicted by
+        <a href="https://huggingface.co/boffire/kabyle-pos" target="_blank">boffire/kabyle-pos</a>
+        (XLM-RoBERTa-base, F1: 87.5%).</p>
+        <p style="font-size: 0.9rem; color: #666;">
+            Tags follow the <a href="https://universaldependencies.org/u/pos/" target="_blank">Universal Dependencies</a> POS tagset.
+        </p>
+    </div>
+    """,
+    examples=examples,
+    allow_flagging="never",
+    cache_examples=False,
+)
+if __name__ == "__main__":
+    demo.launch()