boffire commited on
Commit
f3bda9e
·
verified ·
1 Parent(s): c8f7466

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load the model
5
+ tagger = pipeline(
6
+ "token-classification",
7
+ model="boffire/kabyle-pos",
8
+ aggregation_strategy="simple"
9
+ )
10
+
11
+ # Universal Dependencies POS tagset with descriptions
12
+ POS_DESCRIPTIONS = {
13
+ "ADJ": "Adjective",
14
+ "ADP": "Adposition",
15
+ "ADV": "Adverb",
16
+ "AUX": "Auxiliary verb",
17
+ "CCONJ": "Coordinating conjunction",
18
+ "DET": "Determiner",
19
+ "INTJ": "Interjection",
20
+ "NOUN": "Noun",
21
+ "NUM": "Numeral",
22
+ "PART": "Particle",
23
+ "PRON": "Pronoun",
24
+ "PROPN": "Proper noun",
25
+ "PUNCT": "Punctuation",
26
+ "SCONJ": "Subordinating conjunction",
27
+ "SYM": "Symbol",
28
+ "VERB": "Verb",
29
+ "X": "Other"
30
+ }
31
+
32
+ # Color mapping for POS tags
33
+ POS_COLORS = {
34
+ "NOUN": "#e3f2fd", "PROPN": "#bbdefb", "PRON": "#90caf9",
35
+ "VERB": "#e8f5e9", "AUX": "#c8e6c9",
36
+ "ADJ": "#fff3e0", "ADV": "#ffe0b2",
37
+ "ADP": "#f3e5f5", "PART": "#e1bee7",
38
+ "DET": "#fce4ec", "NUM": "#f8bbd0",
39
+ "CCONJ": "#e0f7fa", "SCONJ": "#b2ebf2",
40
+ "INTJ": "#fff9c4", "PUNCT": "#f5f5f5",
41
+ "SYM": "#eeeeee", "X": "#ffccbc"
42
+ }
43
+
44
+
45
+ def tag_text(text):
46
+ if not text or not text.strip():
47
+ return "", "Please enter some Kabyle text."
48
+
49
+ try:
50
+ results = tagger(text.strip())
51
+ except Exception as e:
52
+ return "", f"Error: {str(e)}"
53
+
54
+ if not results:
55
+ return "", "No tokens found."
56
+
57
+ # Build HTML visualization
58
+ html_parts = ['<div style="font-size: 1.15rem; line-height: 2.2; font-family: system-ui, sans-serif; padding: 10px;">']
59
+
60
+ for token in results:
61
+ word = token["word"]
62
+ label = token["entity_group"]
63
+ score = token["score"]
64
+ desc = POS_DESCRIPTIONS.get(label, label)
65
+ color = POS_COLORS.get(label, "#ffffff")
66
+
67
+ block = (
68
+ '<span style="display: inline-block; margin: 2px; vertical-align: top;">'
69
+ '<span style="background: ' + color + '; border: 1px solid #ccc; border-radius: 6px 6px 0 0; padding: 4px 8px; display: block; text-align: center; font-weight: 500;">'
70
+ + word +
71
+ '</span>'
72
+ '<span style="background: #333; color: #fff; border-radius: 0 0 6px 6px; padding: 2px 8px; display: block; text-align: center; font-size: 0.75rem;">'
73
+ + label + ' <span style="opacity: 0.7;">(' + f"{score:.2f}" + ')</span>'
74
+ '</span>'
75
+ '</span>'
76
+ )
77
+ html_parts.append(block)
78
+
79
+ html_parts.append('</div>')
80
+
81
+ # Build markdown table
82
+ table_lines = [
83
+ "| Token | POS Tag | Description | Confidence |",
84
+ "|-------|---------|-------------|------------|"
85
+ ]
86
+ for token in results:
87
+ word = token["word"]
88
+ label = token["entity_group"]
89
+ desc = POS_DESCRIPTIONS.get(label, label)
90
+ score = token["score"]
91
+ table_lines.append(f"| {word} | `{label}` | {desc} | {score:.3f} |")
92
+
93
+ return "\n".join(html_parts), "\n".join(table_lines)
94
+
95
+
96
+ # Example sentences in Kabyle
97
+ examples = [
98
+ "Aṭas n medden i yessen.",
99
+ "Taqbaylit d tutlayt tamezwarut deg Lezzayer.",
100
+ "Yella wuccen ameqran deg wexxam.",
101
+ "Tameddakt-nneɣ teɣra adlis ameqran.",
102
+ "D nekkni i d-yusan d imezwura.",
103
+ ]
104
+
105
+ demo = gr.Interface(
106
+ fn=tag_text,
107
+ inputs=gr.Textbox(
108
+ label="Kabyle Text",
109
+ placeholder="Enter a sentence in Kabyle (e.g., Aṭas n medden i yessen.)",
110
+ lines=2
111
+ ),
112
+ outputs=[
113
+ gr.HTML(label="Tagged Visualization"),
114
+ gr.Markdown(label="Results Table")
115
+ ],
116
+ title="Kabyle POS Tagger",
117
+ description="""
118
+ <div style="text-align: center;">
119
+ <h2>Kabyle Part-of-Speech Tagger</h2>
120
+ <p>Enter a sentence in <strong>Kabyle</strong> (Berber language) to see POS tags predicted by
121
+ <a href="https://huggingface.co/boffire/kabyle-pos" target="_blank">boffire/kabyle-pos</a>
122
+ (XLM-RoBERTa-base, F1: 87.5%).</p>
123
+ <p style="font-size: 0.9rem; color: #666;">
124
+ Tags follow the <a href="https://universaldependencies.org/u/pos/" target="_blank">Universal Dependencies</a> POS tagset.
125
+ </p>
126
+ </div>
127
+ """,
128
+ examples=examples,
129
+ allow_flagging="never",
130
+ cache_examples=False,
131
+ )
132
+
133
+ if __name__ == "__main__":
134
+ demo.launch()