Spaces:

boffire
/

Kabyle-POS-tagger

Sleeping

App Files Files Community

Kabyle-POS-tagger / app.py

boffire

Create app.py

f3bda9e verified 22 days ago

Raw

History Blame

4.22 kB

	import gradio as gr
	from transformers import pipeline

	# Load the model
	tagger = pipeline(
	"token-classification",
	model="boffire/kabyle-pos",
	aggregation_strategy="simple"
	)

	# Universal Dependencies POS tagset with descriptions
	POS_DESCRIPTIONS = {
	"ADJ": "Adjective",
	"ADP": "Adposition",
	"ADV": "Adverb",
	"AUX": "Auxiliary verb",
	"CCONJ": "Coordinating conjunction",
	"DET": "Determiner",
	"INTJ": "Interjection",
	"NOUN": "Noun",
	"NUM": "Numeral",
	"PART": "Particle",
	"PRON": "Pronoun",
	"PROPN": "Proper noun",
	"PUNCT": "Punctuation",
	"SCONJ": "Subordinating conjunction",
	"SYM": "Symbol",
	"VERB": "Verb",
	"X": "Other"
	}

	# Color mapping for POS tags
	POS_COLORS = {
	"NOUN": "#e3f2fd", "PROPN": "#bbdefb", "PRON": "#90caf9",
	"VERB": "#e8f5e9", "AUX": "#c8e6c9",
	"ADJ": "#fff3e0", "ADV": "#ffe0b2",
	"ADP": "#f3e5f5", "PART": "#e1bee7",
	"DET": "#fce4ec", "NUM": "#f8bbd0",
	"CCONJ": "#e0f7fa", "SCONJ": "#b2ebf2",
	"INTJ": "#fff9c4", "PUNCT": "#f5f5f5",
	"SYM": "#eeeeee", "X": "#ffccbc"
	}


	def tag_text(text):
	if not text or not text.strip():
	return "", "Please enter some Kabyle text."

	try:
	results = tagger(text.strip())
	except Exception as e:
	return "", f"Error: {str(e)}"

	if not results:
	return "", "No tokens found."

	# Build HTML visualization
	html_parts = ['<div style="font-size: 1.15rem; line-height: 2.2; font-family: system-ui, sans-serif; padding: 10px;">']

	for token in results:
	word = token["word"]
	label = token["entity_group"]
	score = token["score"]
	desc = POS_DESCRIPTIONS.get(label, label)
	color = POS_COLORS.get(label, "#ffffff")

	block = (
	'<span style="display: inline-block; margin: 2px; vertical-align: top;">'
	'<span style="background: ' + color + '; border: 1px solid #ccc; border-radius: 6px 6px 0 0; padding: 4px 8px; display: block; text-align: center; font-weight: 500;">'
	+ word +
	'</span>'
	'<span style="background: #333; color: #fff; border-radius: 0 0 6px 6px; padding: 2px 8px; display: block; text-align: center; font-size: 0.75rem;">'
	+ label + ' <span style="opacity: 0.7;">(' + f"{score:.2f}" + ')</span>'
	'</span>'
	'</span>'
	)
	html_parts.append(block)

	html_parts.append('</div>')

	# Build markdown table
	table_lines = [
	"\| Token \| POS Tag \| Description \| Confidence \|",
	"\|-------\|---------\|-------------\|------------\|"
	]
	for token in results:
	word = token["word"]
	label = token["entity_group"]
	desc = POS_DESCRIPTIONS.get(label, label)
	score = token["score"]
	table_lines.append(f"\| {word} \| `{label}` \| {desc} \| {score:.3f} \|")

	return "\n".join(html_parts), "\n".join(table_lines)


	# Example sentences in Kabyle
	examples = [
	"Aṭas n medden i yessen.",
	"Taqbaylit d tutlayt tamezwarut deg Lezzayer.",
	"Yella wuccen ameqran deg wexxam.",
	"Tameddakt-nneɣ teɣra adlis ameqran.",
	"D nekkni i d-yusan d imezwura.",
	]

	demo = gr.Interface(
	fn=tag_text,
	inputs=gr.Textbox(
	label="Kabyle Text",
	placeholder="Enter a sentence in Kabyle (e.g., Aṭas n medden i yessen.)",
	lines=2
	),
	outputs=[
	gr.HTML(label="Tagged Visualization"),
	gr.Markdown(label="Results Table")
	],
	title="Kabyle POS Tagger",
	description="""
	<div style="text-align: center;">
	<h2>Kabyle Part-of-Speech Tagger</h2>
	<p>Enter a sentence in <strong>Kabyle</strong> (Berber language) to see POS tags predicted by
	<a href="https://huggingface.co/boffire/kabyle-pos" target="_blank">boffire/kabyle-pos</a>
	(XLM-RoBERTa-base, F1: 87.5%).</p>
	<p style="font-size: 0.9rem; color: #666;">
	Tags follow the <a href="https://universaldependencies.org/u/pos/" target="_blank">Universal Dependencies</a> POS tagset.
	</p>
	</div>
	""",
	examples=examples,
	allow_flagging="never",
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch()