import gradio as gr from transformers import pipeline # Load the model tagger = pipeline( "token-classification", model="boffire/kabyle-pos", aggregation_strategy="simple" ) # Universal Dependencies POS tagset with descriptions POS_DESCRIPTIONS = { "ADJ": "Adjective", "ADP": "Adposition", "ADV": "Adverb", "AUX": "Auxiliary verb", "CCONJ": "Coordinating conjunction", "DET": "Determiner", "INTJ": "Interjection", "NOUN": "Noun", "NUM": "Numeral", "PART": "Particle", "PRON": "Pronoun", "PROPN": "Proper noun", "PUNCT": "Punctuation", "SCONJ": "Subordinating conjunction", "SYM": "Symbol", "VERB": "Verb", "X": "Other" } # Color mapping for POS tags POS_COLORS = { "NOUN": "#e3f2fd", "PROPN": "#bbdefb", "PRON": "#90caf9", "VERB": "#e8f5e9", "AUX": "#c8e6c9", "ADJ": "#fff3e0", "ADV": "#ffe0b2", "ADP": "#f3e5f5", "PART": "#e1bee7", "DET": "#fce4ec", "NUM": "#f8bbd0", "CCONJ": "#e0f7fa", "SCONJ": "#b2ebf2", "INTJ": "#fff9c4", "PUNCT": "#f5f5f5", "SYM": "#eeeeee", "X": "#ffccbc" } def tag_text(text): if not text or not text.strip(): return "", "Please enter some Kabyle text." try: results = tagger(text.strip()) except Exception as e: return "", f"Error: {str(e)}" if not results: return "", "No tokens found." # Build HTML visualization html_parts = ['
'] for token in results: word = token["word"] label = token["entity_group"] score = token["score"] desc = POS_DESCRIPTIONS.get(label, label) color = POS_COLORS.get(label, "#ffffff") block = ( '' '' + word + '' '' + label + ' (' + f"{score:.2f}" + ')' '' '' ) html_parts.append(block) html_parts.append('
') # Build markdown table table_lines = [ "| Token | POS Tag | Description | Confidence |", "|-------|---------|-------------|------------|" ] for token in results: word = token["word"] label = token["entity_group"] desc = POS_DESCRIPTIONS.get(label, label) score = token["score"] table_lines.append(f"| {word} | `{label}` | {desc} | {score:.3f} |") return "\n".join(html_parts), "\n".join(table_lines) # Example sentences in Kabyle examples = [ "Aṭas n medden i yessen.", "Taqbaylit d tutlayt tamezwarut deg Lezzayer.", "Yella wuccen ameqran deg wexxam.", "Tameddakt-nneɣ teɣra adlis ameqran.", "D nekkni i d-yusan d imezwura.", ] demo = gr.Interface( fn=tag_text, inputs=gr.Textbox( label="Kabyle Text", placeholder="Enter a sentence in Kabyle (e.g., Aṭas n medden i yessen.)", lines=2 ), outputs=[ gr.HTML(label="Tagged Visualization"), gr.Markdown(label="Results Table") ], title="Kabyle POS Tagger", description="""

Kabyle Part-of-Speech Tagger

Enter a sentence in Kabyle (Berber language) to see POS tags predicted by boffire/kabyle-pos (XLM-RoBERTa-base, F1: 87.5%).

Tags follow the Universal Dependencies POS tagset.

""", examples=examples, allow_flagging="never", cache_examples=False, ) if __name__ == "__main__": demo.launch()