boffire's picture
Create app.py
f3bda9e verified
Raw
History Blame
4.22 kB
import gradio as gr
from transformers import pipeline
# Load the model
tagger = pipeline(
"token-classification",
model="boffire/kabyle-pos",
aggregation_strategy="simple"
)
# Universal Dependencies POS tagset with descriptions
POS_DESCRIPTIONS = {
"ADJ": "Adjective",
"ADP": "Adposition",
"ADV": "Adverb",
"AUX": "Auxiliary verb",
"CCONJ": "Coordinating conjunction",
"DET": "Determiner",
"INTJ": "Interjection",
"NOUN": "Noun",
"NUM": "Numeral",
"PART": "Particle",
"PRON": "Pronoun",
"PROPN": "Proper noun",
"PUNCT": "Punctuation",
"SCONJ": "Subordinating conjunction",
"SYM": "Symbol",
"VERB": "Verb",
"X": "Other"
}
# Color mapping for POS tags
POS_COLORS = {
"NOUN": "#e3f2fd", "PROPN": "#bbdefb", "PRON": "#90caf9",
"VERB": "#e8f5e9", "AUX": "#c8e6c9",
"ADJ": "#fff3e0", "ADV": "#ffe0b2",
"ADP": "#f3e5f5", "PART": "#e1bee7",
"DET": "#fce4ec", "NUM": "#f8bbd0",
"CCONJ": "#e0f7fa", "SCONJ": "#b2ebf2",
"INTJ": "#fff9c4", "PUNCT": "#f5f5f5",
"SYM": "#eeeeee", "X": "#ffccbc"
}
def tag_text(text):
if not text or not text.strip():
return "", "Please enter some Kabyle text."
try:
results = tagger(text.strip())
except Exception as e:
return "", f"Error: {str(e)}"
if not results:
return "", "No tokens found."
# Build HTML visualization
html_parts = ['<div style="font-size: 1.15rem; line-height: 2.2; font-family: system-ui, sans-serif; padding: 10px;">']
for token in results:
word = token["word"]
label = token["entity_group"]
score = token["score"]
desc = POS_DESCRIPTIONS.get(label, label)
color = POS_COLORS.get(label, "#ffffff")
block = (
'<span style="display: inline-block; margin: 2px; vertical-align: top;">'
'<span style="background: ' + color + '; border: 1px solid #ccc; border-radius: 6px 6px 0 0; padding: 4px 8px; display: block; text-align: center; font-weight: 500;">'
+ word +
'</span>'
'<span style="background: #333; color: #fff; border-radius: 0 0 6px 6px; padding: 2px 8px; display: block; text-align: center; font-size: 0.75rem;">'
+ label + ' <span style="opacity: 0.7;">(' + f"{score:.2f}" + ')</span>'
'</span>'
'</span>'
)
html_parts.append(block)
html_parts.append('</div>')
# Build markdown table
table_lines = [
"| Token | POS Tag | Description | Confidence |",
"|-------|---------|-------------|------------|"
]
for token in results:
word = token["word"]
label = token["entity_group"]
desc = POS_DESCRIPTIONS.get(label, label)
score = token["score"]
table_lines.append(f"| {word} | `{label}` | {desc} | {score:.3f} |")
return "\n".join(html_parts), "\n".join(table_lines)
# Example sentences in Kabyle
examples = [
"Aṭas n medden i yessen.",
"Taqbaylit d tutlayt tamezwarut deg Lezzayer.",
"Yella wuccen ameqran deg wexxam.",
"Tameddakt-nneɣ teɣra adlis ameqran.",
"D nekkni i d-yusan d imezwura.",
]
demo = gr.Interface(
fn=tag_text,
inputs=gr.Textbox(
label="Kabyle Text",
placeholder="Enter a sentence in Kabyle (e.g., Aṭas n medden i yessen.)",
lines=2
),
outputs=[
gr.HTML(label="Tagged Visualization"),
gr.Markdown(label="Results Table")
],
title="Kabyle POS Tagger",
description="""
<div style="text-align: center;">
<h2>Kabyle Part-of-Speech Tagger</h2>
<p>Enter a sentence in <strong>Kabyle</strong> (Berber language) to see POS tags predicted by
<a href="https://huggingface.co/boffire/kabyle-pos" target="_blank">boffire/kabyle-pos</a>
(XLM-RoBERTa-base, F1: 87.5%).</p>
<p style="font-size: 0.9rem; color: #666;">
Tags follow the <a href="https://universaldependencies.org/u/pos/" target="_blank">Universal Dependencies</a> POS tagset.
</p>
</div>
""",
examples=examples,
allow_flagging="never",
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()