Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| # Load the model | |
| tagger = pipeline( | |
| "token-classification", | |
| model="boffire/kabyle-pos", | |
| aggregation_strategy="simple" | |
| ) | |
| # Universal Dependencies POS tagset with descriptions | |
| POS_DESCRIPTIONS = { | |
| "ADJ": "Adjective", | |
| "ADP": "Adposition", | |
| "ADV": "Adverb", | |
| "AUX": "Auxiliary verb", | |
| "CCONJ": "Coordinating conjunction", | |
| "DET": "Determiner", | |
| "INTJ": "Interjection", | |
| "NOUN": "Noun", | |
| "NUM": "Numeral", | |
| "PART": "Particle", | |
| "PRON": "Pronoun", | |
| "PROPN": "Proper noun", | |
| "PUNCT": "Punctuation", | |
| "SCONJ": "Subordinating conjunction", | |
| "SYM": "Symbol", | |
| "VERB": "Verb", | |
| "X": "Other" | |
| } | |
| # Color mapping for POS tags | |
| POS_COLORS = { | |
| "NOUN": "#e3f2fd", "PROPN": "#bbdefb", "PRON": "#90caf9", | |
| "VERB": "#e8f5e9", "AUX": "#c8e6c9", | |
| "ADJ": "#fff3e0", "ADV": "#ffe0b2", | |
| "ADP": "#f3e5f5", "PART": "#e1bee7", | |
| "DET": "#fce4ec", "NUM": "#f8bbd0", | |
| "CCONJ": "#e0f7fa", "SCONJ": "#b2ebf2", | |
| "INTJ": "#fff9c4", "PUNCT": "#f5f5f5", | |
| "SYM": "#eeeeee", "X": "#ffccbc" | |
| } | |
| def tag_text(text): | |
| if not text or not text.strip(): | |
| return "", "Please enter some Kabyle text." | |
| try: | |
| results = tagger(text.strip()) | |
| except Exception as e: | |
| return "", f"Error: {str(e)}" | |
| if not results: | |
| return "", "No tokens found." | |
| # Build HTML visualization | |
| html_parts = ['<div style="font-size: 1.15rem; line-height: 2.2; font-family: system-ui, sans-serif; padding: 10px;">'] | |
| for token in results: | |
| word = token["word"] | |
| label = token["entity_group"] | |
| score = token["score"] | |
| desc = POS_DESCRIPTIONS.get(label, label) | |
| color = POS_COLORS.get(label, "#ffffff") | |
| block = ( | |
| '<span style="display: inline-block; margin: 2px; vertical-align: top;">' | |
| '<span style="background: ' + color + '; border: 1px solid #ccc; border-radius: 6px 6px 0 0; padding: 4px 8px; display: block; text-align: center; font-weight: 500;">' | |
| + word + | |
| '</span>' | |
| '<span style="background: #333; color: #fff; border-radius: 0 0 6px 6px; padding: 2px 8px; display: block; text-align: center; font-size: 0.75rem;">' | |
| + label + ' <span style="opacity: 0.7;">(' + f"{score:.2f}" + ')</span>' | |
| '</span>' | |
| '</span>' | |
| ) | |
| html_parts.append(block) | |
| html_parts.append('</div>') | |
| # Build markdown table | |
| table_lines = [ | |
| "| Token | POS Tag | Description | Confidence |", | |
| "|-------|---------|-------------|------------|" | |
| ] | |
| for token in results: | |
| word = token["word"] | |
| label = token["entity_group"] | |
| desc = POS_DESCRIPTIONS.get(label, label) | |
| score = token["score"] | |
| table_lines.append(f"| {word} | `{label}` | {desc} | {score:.3f} |") | |
| return "\n".join(html_parts), "\n".join(table_lines) | |
| # Example sentences in Kabyle | |
| examples = [ | |
| "Aṭas n medden i yessen.", | |
| "Taqbaylit d tutlayt tamezwarut deg Lezzayer.", | |
| "Yella wuccen ameqran deg wexxam.", | |
| "Tameddakt-nneɣ teɣra adlis ameqran.", | |
| "D nekkni i d-yusan d imezwura.", | |
| ] | |
| demo = gr.Interface( | |
| fn=tag_text, | |
| inputs=gr.Textbox( | |
| label="Kabyle Text", | |
| placeholder="Enter a sentence in Kabyle (e.g., Aṭas n medden i yessen.)", | |
| lines=2 | |
| ), | |
| outputs=[ | |
| gr.HTML(label="Tagged Visualization"), | |
| gr.Markdown(label="Results Table") | |
| ], | |
| title="Kabyle POS Tagger", | |
| description=""" | |
| <div style="text-align: center;"> | |
| <h2>Kabyle Part-of-Speech Tagger</h2> | |
| <p>Enter a sentence in <strong>Kabyle</strong> (Berber language) to see POS tags predicted by | |
| <a href="https://huggingface.co/boffire/kabyle-pos" target="_blank">boffire/kabyle-pos</a> | |
| (XLM-RoBERTa-base, F1: 87.5%).</p> | |
| <p style="font-size: 0.9rem; color: #666;"> | |
| Tags follow the <a href="https://universaldependencies.org/u/pos/" target="_blank">Universal Dependencies</a> POS tagset. | |
| </p> | |
| </div> | |
| """, | |
| examples=examples, | |
| allow_flagging="never", | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |