musaw

Sync main snapshot to Hugging Face (no local binary banner)

2f53244 4 months ago

79 kB

	{
	"generated_on": "2026-02-17T00:00:00Z",
	"count": 95,
	"resources": [
	{
	"id": "dataset-common-voice-ps-v24",
	"title": "Common Voice Scripted Speech 24.0 - Pashto",
	"url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
	"category": "dataset",
	"source": "mozilla",
	"status": "verified",
	"summary": "Large open Pashto speech dataset for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"speech",
	"asr"
	],
	"evidence_text": "Official dataset page is for Pashto.",
	"evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-google-fleurs",
	"title": "Google FLEURS",
	"url": "https://huggingface.co/datasets/google/fleurs",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Standard multilingual speech benchmark dataset with Pashto subset.",
	"primary_use": "Speech benchmark and external evaluation",
	"tasks": [
	"asr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"speech",
	"benchmark"
	],
	"evidence_text": "Dataset config includes ps_af.",
	"evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py",
	"markers": [
	"ps_af"
	]
	},
	{
	"id": "dataset-oscar-ps",
	"title": "OSCAR Corpus",
	"url": "https://huggingface.co/datasets/oscar-corpus/oscar",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Large web text corpus that includes Pashto text split.",
	"primary_use": "Language modeling and lexicon expansion",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"text",
	"nlp"
	],
	"evidence_text": "Dataset includes unshuffled_deduplicated_ps split.",
	"evidence_url": "https://huggingface.co/datasets/oscar-corpus/oscar",
	"markers": [
	"unshuffled_deduplicated_ps"
	]
	},
	{
	"id": "dataset-wikipedia-ps",
	"title": "Wikimedia Wikipedia",
	"url": "https://huggingface.co/datasets/wikimedia/wikipedia",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Wikipedia corpus with Pashto edition for cleaner text resources.",
	"primary_use": "Terminology and balanced text corpus",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"text",
	"nlp"
	],
	"evidence_text": "Dataset includes 20231101.ps subset.",
	"evidence_url": "https://huggingface.co/datasets/wikimedia/wikipedia",
	"markers": [
	"20231101.ps"
	]
	},
	{
	"id": "dataset-belebele-pbt-arab",
	"title": "Belebele",
	"url": "https://huggingface.co/datasets/facebook/belebele",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Reading comprehension dataset with Pashto script subset.",
	"primary_use": "Comprehension and multilingual NLP benchmark",
	"tasks": [
	"nlp",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"nlp",
	"benchmark"
	],
	"evidence_text": "Dataset includes pbt_Arab subset.",
	"evidence_url": "https://huggingface.co/datasets/facebook/belebele",
	"markers": [
	"pbt_Arab"
	]
	},
	{
	"id": "dataset-opus100-en-ps",
	"title": "OPUS-100",
	"url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Parallel corpus with English to Pashto split for MT tasks.",
	"primary_use": "Machine translation training and evaluation",
	"tasks": [
	"mt",
	"nlp"
	],
	"tags": [
	"pashto",
	"mt",
	"parallel-corpus"
	],
	"evidence_text": "Dataset viewer includes en-ps split.",
	"evidence_url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps",
	"markers": [
	"en-ps"
	]
	},
	{
	"id": "dataset-kaggle-pashto-isolated-words",
	"title": "Pashto Isolated Words Speech Dataset",
	"url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Speech dataset focused on isolated Pashto words.",
	"primary_use": "Keyword spotting and constrained ASR experiments",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"speech",
	"kaggle"
	],
	"evidence_text": "Dataset title explicitly states Pashto speech dataset.",
	"evidence_url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-kaggle-pashto-word-embeddings",
	"title": "Pashto Word Embeddings",
	"url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Pretrained Pashto word vectors for classic NLP baselines.",
	"primary_use": "Lexical semantics and lightweight NLP baselines",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"nlp",
	"embeddings",
	"kaggle"
	],
	"evidence_text": "Dataset description states pretrained Pashto embeddings.",
	"evidence_url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "model-pashto-bert",
	"title": "PashtoBERT",
	"url": "https://huggingface.co/mdarhri/pashto-bert",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-specific encoder model for NLP transfer tasks.",
	"primary_use": "Pashto NLP baseline encoder",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"nlp",
	"bert"
	],
	"evidence_text": "Model card states training on Pashto corpus data.",
	"evidence_url": "https://huggingface.co/mdarhri/pashto-bert",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "benchmark-fleurs-ps-af",
	"title": "FLEURS Pashto Benchmark",
	"url": "https://huggingface.co/datasets/google/fleurs",
	"category": "benchmark",
	"source": "huggingface",
	"status": "verified",
	"summary": "Fixed multilingual speech benchmark with Pashto subset for WER and CER.",
	"primary_use": "ASR benchmark reporting",
	"tasks": [
	"asr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"benchmark",
	"asr"
	],
	"evidence_text": "Dataset includes ps_af split.",
	"evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py",
	"markers": [
	"ps_af"
	]
	},
	{
	"id": "benchmark-common-voice-ps-v24",
	"title": "Common Voice Pashto v24 Benchmark",
	"url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
	"category": "benchmark",
	"source": "mozilla",
	"status": "verified",
	"summary": "Core benchmark reference for project-level Pashto ASR tracking.",
	"primary_use": "ASR baseline tracking",
	"tasks": [
	"asr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"benchmark",
	"asr"
	],
	"evidence_text": "Official Pashto split and versioned release.",
	"evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "benchmark-belebele-pbt-arab",
	"title": "Belebele Pashto Benchmark",
	"url": "https://huggingface.co/datasets/facebook/belebele",
	"category": "benchmark",
	"source": "huggingface",
	"status": "verified",
	"summary": "Comprehension benchmark for multilingual NLP with Pashto variant.",
	"primary_use": "NLP benchmark reporting",
	"tasks": [
	"nlp",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"benchmark",
	"nlp"
	],
	"evidence_text": "Includes pbt_Arab language variant.",
	"evidence_url": "https://huggingface.co/datasets/facebook/belebele",
	"markers": [
	"pbt_Arab"
	]
	},
	{
	"id": "benchmark-flores-200-pbt-arab",
	"title": "FLORES-200 Pashto Benchmark",
	"url": "https://github.com/facebookresearch/flores/tree/main/flores200",
	"category": "benchmark",
	"source": "github",
	"status": "verified",
	"summary": "Translation benchmark language inventory including Pashto script variant.",
	"primary_use": "MT benchmark with BLEU and chrF",
	"tasks": [
	"mt",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"benchmark",
	"mt"
	],
	"evidence_text": "Language list includes pbt_Arab.",
	"evidence_url": "https://raw.githubusercontent.com/facebookresearch/flores/main/flores200/README.md",
	"markers": [
	"pbt_Arab"
	]
	},
	{
	"id": "dataset-nexdata-99h-pashto-dialogue",
	"title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
	"url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
	"primary_use": "Spontaneous speech ASR training and robustness evaluation",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"speech",
	"asr",
	"dialogue"
	],
	"evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
	"evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-zirak-ai-pashto-ocr",
	"title": "Zirak-AI PashtoOCR",
	"url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
	"primary_use": "OCR and text extraction benchmarking",
	"tasks": [
	"ocr",
	"nlp"
	],
	"tags": [
	"pashto",
	"ocr",
	"nlp",
	"vision"
	],
	"evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
	"evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
	"markers": [
	"ps",
	"PashtoOCR"
	]
	},
	{
	"id": "dataset-ihanif-pashto-wikipedia-corpus",
	"title": "Pashto Wikipedia Corpus",
	"url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
	"primary_use": "Pashto text corpus for NLP baselines",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"text",
	"nlp",
	"wikipedia"
	],
	"evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
	"evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
	"markers": [
	"ps",
	"Pashto"
	]
	},
	{
	"id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
	"title": "wav2vec2 XLS-R 300M Pashto",
	"url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
	"primary_use": "Pashto ASR baseline and comparative experiments",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"asr",
	"wav2vec2",
	"fleurs"
	],
	"evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
	"evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
	"markers": [
	"pashto",
	"ps",
	"ps_af"
	]
	},
	{
	"id": "model-ihanif-whisper-medium-pashto",
	"title": "Whisper Medium Pashto",
	"url": "https://huggingface.co/ihanif/whisper-medium-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
	"primary_use": "Pashto ASR baseline and transcription quality comparisons",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"asr",
	"whisper",
	"fleurs"
	],
	"evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
	"evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
	"markers": [
	"pashto",
	"ps",
	"ps_af"
	]
	},
	{
	"id": "dataset-kaggle-pold-pashto-offensive",
	"title": "POLD - Pashto Offensive Language Dataset",
	"url": "https://www.kaggle.com/datasets/drijaz/pold-pashto-offensive-language-dataset",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Benchmark dataset for offensive content detection in Pashto social text.",
	"primary_use": "Pashto toxicity and moderation NLP benchmarks",
	"tasks": [
	"nlp",
	"classification"
	],
	"tags": [
	"pashto",
	"kaggle",
	"nlp",
	"toxicity"
	],
	"evidence_text": "Kaggle title and description explicitly state Pashto offensive language benchmark dataset.",
	"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pold-pashto-offensive-language-dataset",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-kaggle-pashto-english-sentiment-corpus",
	"title": "Pashto English Bilingual Sentiment Corpus",
	"url": "https://www.kaggle.com/datasets/farhadkhan66/pashto-translated-corpus",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Pashto to English bilingual sentiment corpus useful for low-resource sentiment tasks.",
	"primary_use": "Sentiment analysis and bilingual NLP experiments",
	"tasks": [
	"nlp",
	"sentiment"
	],
	"tags": [
	"pashto",
	"kaggle",
	"sentiment",
	"bilingual"
	],
	"evidence_text": "Kaggle dataset title and description identify the corpus as Pashto-English sentiment data.",
	"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/farhadkhan66/pashto-translated-corpus",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-kaggle-urdu-pashto-lexicon",
	"title": "Urdu-Pashto Lexicon Dataset",
	"url": "https://www.kaggle.com/datasets/shafeeqgigyani/urdu-pashto-lexicon-dataset",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Lexicon of Urdu words with Pashto translations for dictionary and MT support.",
	"primary_use": "Lexicon and translation lexeme mapping",
	"tasks": [
	"nlp",
	"mt"
	],
	"tags": [
	"pashto",
	"kaggle",
	"lexicon",
	"translation"
	],
	"evidence_text": "Kaggle metadata describes 7,601 Urdu entries with Pashto translations.",
	"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/shafeeqgigyani/urdu-pashto-lexicon-dataset",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "project-hf-space-ihanif-pashto-asr-v3",
	"title": "Pashto ASR V3 Space",
	"url": "https://huggingface.co/spaces/ihanif/pashto-asr-v3",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Interactive Hugging Face Space for Pashto automatic speech recognition demos.",
	"primary_use": "Project demo for Pashto ASR user testing",
	"tasks": [
	"asr",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface-space",
	"asr"
	],
	"evidence_text": "Space card title is Pashto ASR V3 and short description states Pashto ASR.",
	"evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr-v3",
	"markers": [
	"Pashto",
	"ASR"
	]
	},
	{
	"id": "project-hf-space-pashto2english-dictionary",
	"title": "Pashto to English Dictionary Space",
	"url": "https://huggingface.co/spaces/EngrAamirBangash/Pashto2English-Dictionary",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Streamlit project for Pashto to English dictionary lookups.",
	"primary_use": "Interactive bilingual lookup project",
	"tasks": [
	"dictionary",
	"translation",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface-space",
	"dictionary"
	],
	"evidence_text": "Space metadata title states Pashto to English Dictionary.",
	"evidence_url": "https://huggingface.co/api/spaces/EngrAamirBangash/Pashto2English-Dictionary",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "project-hf-space-umar4321-pashto-translator",
	"title": "Pashto Translator Space",
	"url": "https://huggingface.co/spaces/Umar4321/Pashto-Translator",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Streamlit translator project for Pashto to English and Urdu conversion.",
	"primary_use": "Interactive translation project demo",
	"tasks": [
	"translation",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface-space",
	"translation"
	],
	"evidence_text": "Space title is Pashto Translator and description states Pashto to English and Urdu translation.",
	"evidence_url": "https://huggingface.co/api/spaces/Umar4321/Pashto-Translator",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "code-github-ijazul-haq-nlpashto",
	"title": "nlpashto Toolkit",
	"url": "https://github.com/ijazul-haq/nlpashto",
	"category": "code",
	"source": "github",
	"status": "verified",
	"summary": "Pashto NLP toolkit codebase for tokenization, embeddings, and downstream NLP workflows.",
	"primary_use": "Pashto NLP code integration and experimentation",
	"tasks": [
	"nlp",
	"tooling"
	],
	"tags": [
	"pashto",
	"code",
	"github",
	"nlp"
	],
	"evidence_text": "Repository name and description explicitly identify a Pashto NLP toolkit.",
	"evidence_url": "https://api.github.com/repos/ijazul-haq/nlpashto",
	"markers": [
	"Pashto",
	"NLP"
	]
	},
	{
	"id": "dataset-kaggle-drijaz-pashtoocr",
	"title": "PashtoOCR (Kaggle)",
	"url": "https://www.kaggle.com/datasets/drijaz/pashtoocr",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Synthetic OCR dataset focused on Pashto ligatures and text recognition tasks.",
	"primary_use": "Pashto OCR dataset benchmarking and training",
	"tasks": [
	"ocr",
	"nlp"
	],
	"tags": [
	"pashto",
	"kaggle",
	"ocr",
	"dataset"
	],
	"evidence_text": "Kaggle dataset title and subtitle explicitly identify a Pashto OCR dataset.",
	"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pashtoocr",
	"markers": [
	"Pashto",
	"OCR"
	]
	},
	{
	"id": "model-hf-zirak-ai-pashto-bert-v1",
	"title": "zirak-ai/pashto-bert-v1",
	"url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto BERT model checkpoint for low-resource Pashto NLP experiments.",
	"primary_use": "Pashto encoder baseline for NLP tasks",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"huggingface",
	"bert",
	"nlp"
	],
	"evidence_text": "Hugging Face model ID and search tags explicitly include pashto marker.",
	"evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-ihanif-pashto-asr",
	"title": "Pashto ASR Space",
	"url": "https://huggingface.co/spaces/ihanif/pashto-asr",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Interactive Hugging Face Space for Pashto ASR inference demos.",
	"primary_use": "Live Pashto speech-to-text demo project",
	"tasks": [
	"asr",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface-space",
	"asr"
	],
	"evidence_text": "Space ID includes pashto-asr and is returned by Hugging Face Pashto space search.",
	"evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr",
	"markers": [
	"pashto",
	"asr"
	]
	},
	{
	"id": "paper-s2-psocr-lmm-pashto",
	"title": "PsOCR: Benchmarking Large Multimodal Models for Optical Character Recognition in Low-resource Pashto Language",
	"url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Research paper benchmarking multimodal OCR models on low-resource Pashto OCR tasks.",
	"primary_use": "Pashto OCR research baseline and evaluation reference",
	"tasks": [
	"ocr",
	"research"
	],
	"tags": [
	"pashto",
	"paper",
	"ocr",
	"multimodal"
	],
	"evidence_text": "Paper title explicitly references low-resource Pashto language OCR benchmarking.",
	"evidence_url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f",
	"markers": [
	"Pashto",
	"OCR"
	]
	},
	{
	"id": "dataset-hf-adnankhan769-english-to-pashto",
	"title": "English to Pashto Sentences Dataset",
	"url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Parallel English-Pashto sentence dataset for bilingual NLP and translation experiments.",
	"primary_use": "MT and bilingual sentence alignment baseline",
	"tasks": [
	"mt",
	"nlp"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"translation"
	],
	"evidence_text": "Dataset ID explicitly states English-to-Pashto and includes Pashto-script sentence column.",
	"evidence_url": "https://huggingface.co/api/datasets/adnankhan769/english_to_pashto_sentences_dataset",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-hf-saillab-alpaca-pashto-cleaned",
	"title": "alpaca-pashto-cleaned",
	"url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Instruction-style Pashto text dataset suitable for LLM tuning and instruction-following research.",
	"primary_use": "Pashto instruction tuning and conversational NLP experiments",
	"tasks": [
	"nlp",
	"llm"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"instruction"
	],
	"evidence_text": "Dataset metadata includes language:ps and dataset name includes Pashto.",
	"evidence_url": "https://huggingface.co/api/datasets/saillab/alpaca-pashto-cleaned",
	"markers": [
	"ps",
	"Pashto"
	]
	},
	{
	"id": "model-hf-ihanif-whisper-base-pashto",
	"title": "Whisper Base Pashto",
	"url": "https://huggingface.co/ihanif/whisper-base-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Fine-tuned Whisper Base checkpoint for Pashto ASR with FLEURS ps_af evaluation metadata.",
	"primary_use": "Pashto ASR baseline and speed-accuracy comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Model ID includes Pashto and card metadata references FLEURS config ps_af.",
	"evidence_url": "https://huggingface.co/api/models/ihanif/whisper-base-pashto",
	"markers": [
	"Pashto",
	"ps_af"
	]
	},
	{
	"id": "project-hf-space-zamai-mistral-7b-pashto",
	"title": "ZamAI-Mistral-7B-Pashto Space",
	"url": "https://huggingface.co/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Gradio project space demonstrating a Pashto-adapted Mistral 7B interface.",
	"primary_use": "Interactive Pashto LLM project demo",
	"tasks": [
	"llm",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface-space",
	"llm"
	],
	"evidence_text": "Space title and ID explicitly include Pashto and model card metadata exposes project details.",
	"evidence_url": "https://huggingface.co/api/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-hf-adnankhan769-proper-dataset-english-2-pashto",
	"title": "adnankhan769/proper_dataset_english_2_pashto",
	"url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto bilingual/translation dataset discovered from huggingface for MT experimentation.",
	"primary_use": "Machine translation and bilingual corpus development",
	"tasks": [
	"mt"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"mt"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-ihanif-pashto-asr-wer",
	"title": "ihanif/pashto_asr_wer",
	"url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-ihanif-pashto-speech-ds",
	"title": "ihanif/pashto_speech_ds",
	"url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-ihanif-pashto-speech-parquet-10k",
	"title": "ihanif/pashto_speech_parquet_10k",
	"url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-saillab-alpaca-pashto-taco",
	"title": "saillab/alpaca_pashto_taco",
	"url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
	"primary_use": "Instruction tuning and LLM adaptation data source",
	"tasks": [
	"llm"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"llm"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-sherwindesouza-pashto-common-voice-20",
	"title": "SherwinDesouza/pashto-common-voice-20",
	"url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
	"primary_use": "Pashto data source for NLP experimentation",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"nlp"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-tasal9-zamai-pashto-dataset",
	"title": "tasal9/ZamAI_Pashto_Dataset",
	"url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
	"primary_use": "Pashto data source for NLP experimentation",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"nlp"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-kaggle-english-pashto-language-dataset-epld",
	"title": "English-Pashto Language Dataset (EPLD)",
	"url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Pashto bilingual/translation dataset discovered from kaggle for MT experimentation.",
	"primary_use": "Machine translation and bilingual corpus development",
	"tasks": [
	"mt"
	],
	"tags": [
	"pashto",
	"dataset",
	"kaggle",
	"mt"
	],
	"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
	"evidence_url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-kaggle-katib-s-pashto-text-imagebase-kpti",
	"title": "Katib's Pashto Text Imagebase (KPTI)",
	"url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.",
	"primary_use": "OCR training and evaluation data source",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"dataset",
	"kaggle",
	"ocr"
	],
	"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
	"evidence_url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-kaggle-pashto-ocr",
	"title": "Pashto OCR",
	"url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.",
	"primary_use": "OCR training and evaluation data source",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"dataset",
	"kaggle",
	"ocr"
	],
	"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
	"evidence_url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "dataset-kaggle-common-voice-24-0-pashto-speech-dataset",
	"title": "Common Voice 24.0: Pashto Speech Dataset",
	"url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto",
	"category": "dataset",
	"source": "kaggle",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from kaggle for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"kaggle",
	"asr"
	],
	"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
	"evidence_url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto",
	"markers": [
	"Pashto"
	]
	},
	{
	"id": "model-hf-ihanif-pashto-asr-base",
	"title": "ihanif/pashto-asr-base",
	"url": "https://huggingface.co/ihanif/pashto-asr-base",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/pashto-asr-base",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ihanif-wav2vec2-xls-r-300m-pashto-lm",
	"title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
	"url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ihanif-whisper-large-pashto",
	"title": "ihanif/whisper-large-pashto",
	"url": "https://huggingface.co/ihanif/whisper-large-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ihanif-whisper-medium-pashto-3e-7",
	"title": "ihanif/whisper-medium-pashto-3e-7",
	"url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ihanif-whisper-small-pashto",
	"title": "ihanif/whisper-small-pashto",
	"url": "https://huggingface.co/ihanif/whisper-small-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ihanif-xls-r-1b-pashto",
	"title": "ihanif/xls-r-1b-pashto",
	"url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ijazulhaq-bert-base-pashto-v1",
	"title": "ijazulhaq/bert-base-pashto-v1",
	"url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto model baseline for downstream NLP tasks",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"nlp"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-ihanif-wav2vec2-bert-pashto-asr",
	"title": "ihanif/wav2vec2-bert-pashto-asr",
	"url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"asr",
	"nlp",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"asr",
	"nlp",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-nasirkhansayyad-pashto-whisper-demo",
	"title": "nasirkhansayyad/pashto-whisper-demo",
	"url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"asr",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"asr",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-tasal9-zamai-phi3-mini-pashto-demo",
	"title": "tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
	"url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"llm",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"llm",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-umar4321-pashto-to-english-urdu",
	"title": "Umar4321/Pashto-To-English-Urdu",
	"url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"mt",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"mt",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-github-fazlullahmamond-pashto-typing",
	"title": "Fazlullahmamond/Pashto-Typing",
	"url": "https://github.com/Fazlullahmamond/Pashto-Typing",
	"category": "project",
	"source": "github",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from github for demonstration and quick evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"github",
	"demo"
	],
	"evidence_text": "Repository metadata (name/description/topics) includes Pashto markers.",
	"evidence_url": "https://github.com/Fazlullahmamond/Pashto-Typing",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-s2-benchmarking-whisper-for-low-resource-speech-recognition-an-n-shot-evaluation-on-pashto-pu",
	"title": "Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on Pashto, Punjabi, and Urdu",
	"url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from other for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"asr",
	"mt"
	],
	"tags": [
	"pashto",
	"paper",
	"other",
	"asr",
	"mt"
	],
	"evidence_text": "Matched by Semantic Scholar query: pashto.",
	"evidence_url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-s2-deep-learning-based-detection-of-one-and-two-column-textual-blocks-in-camera-captured-pash",
	"title": "Deep Learning-Based Detection of One and Two-Column Textual Blocks in Camera-Captured Pashto Documents Images",
	"url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from other for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"paper",
	"other",
	"ocr"
	],
	"evidence_text": "Matched by Semantic Scholar query: pashto.",
	"evidence_url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-s2-out-of-vocabulary-pashto-spell-checker-using-morphological-operations",
	"title": "Out-of-Vocabulary Pashto Spell Checker using Morphological Operations",
	"url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from other for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"other",
	"nlp"
	],
	"evidence_text": "Matched by Semantic Scholar query: pashto.",
	"evidence_url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-s2-pashto-shallow-parsing-a-deep-learning-approach",
	"title": "Pashto Shallow Parsing: A Deep Learning Approach",
	"url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from other for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"other",
	"nlp"
	],
	"evidence_text": "Matched by Semantic Scholar query: pashto.",
	"evidence_url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-s2-pos-tagging-of-low-resource-pashto-language-annotated-corpus-and-bert-based-model",
	"title": "POS tagging of low-resource Pashto language: annotated corpus and BERT-based model",
	"url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from other for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"other",
	"nlp"
	],
	"evidence_text": "Matched by Semantic Scholar query: pashto.",
	"evidence_url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-arxiv-enhancing-pashto-text-classification-using-language-processing-techniques-for-single-and-m",
	"title": "Enhancing Pashto Text Classification using Language Processing Techniques for Single And Multi-Label Analysis",
	"url": "http://arxiv.org/abs/2305.03201v1",
	"category": "paper",
	"source": "arxiv",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from arxiv for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"arxiv",
	"nlp"
	],
	"evidence_text": "Matched by arXiv query: all:pashto.",
	"evidence_url": "http://arxiv.org/abs/2305.03201v1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-arxiv-knn-and-ann-based-recognition-of-handwritten-pashto-letters-using-zoning-features",
	"title": "KNN and ANN-based Recognition of Handwritten Pashto Letters using Zoning Features",
	"url": "http://arxiv.org/abs/1904.03391v2",
	"category": "paper",
	"source": "arxiv",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from arxiv for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"paper",
	"arxiv",
	"ocr"
	],
	"evidence_text": "Matched by arXiv query: all:pashto.",
	"evidence_url": "http://arxiv.org/abs/1904.03391v2",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-oowais-pushto-text-to-speech-dataset",
	"title": "oowais/pushto-text-to-speech-dataset",
	"url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr",
	"tts"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"asr",
	"tts"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-ihanif-pashto-speech-20k",
	"title": "ihanif/pashto_speech_20k",
	"url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-ihanif-pashto-speech-5k",
	"title": "ihanif/pashto_speech_5k",
	"url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
	"primary_use": "ASR training and evaluation data source",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-tasal9-pashto-dataset",
	"title": "tasal9/Pashto_Dataset",
	"url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
	"primary_use": "Pashto data source for NLP experimentation",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"nlp"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ijazulhaq-bert-base-pashto",
	"title": "ijazulhaq/bert-base-pashto",
	"url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto model baseline for downstream NLP tasks",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"nlp"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-ihanif-whisper-small-pashto-dropout",
	"title": "ihanif/whisper-small-pashto-dropout",
	"url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-koochikoo25-pashto-whisper-large",
	"title": "koochikoo25/pashto-whisper-large",
	"url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
	"primary_use": "Pashto ASR baseline and model comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-ihanif-wav2vec-pashto-asr",
	"title": "ihanif/wav2vec-pashto-asr",
	"url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"asr",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"asr",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-afaqalinagra-pashto-asr-model",
	"title": "afaqalinagra/PASHTO-ASR-MODEL",
	"url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"asr",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"asr",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-ilyas02828-pashto-sign-language",
	"title": "ilyas02828/Pashto_Sign_Language",
	"url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-mahmudaq-pashtoasrnmt1",
	"title": "mahmudaq/PashtoASRNMT1",
	"url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
	"primary_use": "Interactive Pashto demo and quick qualitative validation",
	"tasks": [
	"asr",
	"mt",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"asr",
	"mt",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-s2-enhancing-pashto-ner-using-machine-labeled-data-and-transformer-based-models",
	"title": "Enhancing Pashto NER Using Machine-Labeled Data and Transformer-Based Models",
	"url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da",
	"category": "paper",
	"source": "other",
	"status": "verified",
	"summary": "Pashto language technology paper discovered from other for research reference.",
	"primary_use": "Pashto research reference for methods and benchmarking",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"other",
	"nlp"
	],
	"evidence_text": "Matched by explicit Pashto marker in paper title from Semantic Scholar search.",
	"evidence_url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-aamirhs-pashto-audio-wav2vec",
	"title": "aamirhs/pashto-audio-wav2vec",
	"url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto speech dataset surfaced from Hugging Face candidate sync for ASR experiments.",
	"primary_use": "Pashto ASR data exploration and baseline training",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"speech",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-hf-alimuhammad73-pashto-poetry",
	"title": "AliMuhammad73/Pashto-Poetry",
	"url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
	"category": "dataset",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto poetry text dataset surfaced from Hugging Face candidate sync for NLP experiments.",
	"primary_use": "Pashto poetry corpus for language modeling and text analysis",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"dataset",
	"huggingface",
	"text",
	"poetry",
	"nlp"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "model-hf-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab",
	"title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
	"url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
	"category": "model",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR model checkpoint surfaced from Hugging Face candidate sync.",
	"primary_use": "Pashto ASR baseline and transfer-learning comparison",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"model",
	"huggingface",
	"asr"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
	"evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "project-hf-space-aizazayyubi-pashto-asr",
	"title": "Aizazayyubi/pashto_asr",
	"url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr",
	"category": "project",
	"source": "huggingface",
	"status": "verified",
	"summary": "Pashto ASR interactive demo surfaced from Hugging Face Spaces candidate sync.",
	"primary_use": "Interactive Pashto ASR demo for qualitative evaluation",
	"tasks": [
	"asr",
	"demo"
	],
	"tags": [
	"pashto",
	"project",
	"huggingface",
	"asr",
	"demo"
	],
	"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
	"evidence_url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-arxiv-from-scarcity-to-scale-pashto-common-voice",
	"title": "From Scarcity to Scale: A Release-Level Analysis of the Pashto Common Voice Dataset",
	"url": "http://arxiv.org/abs/2602.14062v1",
	"category": "paper",
	"source": "arxiv",
	"status": "verified",
	"summary": "Research paper analyzing Pashto Common Voice releases and dataset scaling characteristics.",
	"primary_use": "ASR data quality and release trend reference",
	"tasks": [
	"asr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"arxiv",
	"asr",
	"common-voice"
	],
	"evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
	"evidence_url": "http://arxiv.org/abs/2602.14062v1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-arxiv-tuning-traditional-pashto-text-classification",
	"title": "Tuning Traditional Language Processing Approaches for Pashto Text Classification",
	"url": "http://arxiv.org/abs/2305.03737v1",
	"category": "paper",
	"source": "arxiv",
	"status": "verified",
	"summary": "Research paper focused on Pashto text classification using traditional NLP approaches.",
	"primary_use": "Pashto text classification method reference",
	"tasks": [
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"arxiv",
	"nlp",
	"classification"
	],
	"evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
	"evidence_url": "http://arxiv.org/abs/2305.03737v1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "dataset-dataverse-iarpa-babel-pashto-language-pack-v0-4by",
	"title": "IARPA Babel Pashto Language Pack IARPA-babel104b-v0.4bY",
	"url": "https://hdl.handle.net/11272.1/AB2/GLFN3X",
	"category": "dataset",
	"source": "dataverse",
	"status": "verified",
	"summary": "Pashto Babel language pack dataset for speech and language processing evaluation.",
	"primary_use": "Pashto speech dataset for ASR and language identification experiments",
	"tasks": [
	"asr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"dataset",
	"dataverse",
	"speech",
	"asr",
	"babel"
	],
	"evidence_text": "Dataverse metadata includes Pashto markers in dataset title or description.",
	"evidence_url": "https://hdl.handle.net/11272.1/AB2/GLFN3X",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-arxiv-image-to-text-pashto-farsi-traditional-chinese",
	"title": "Development of a New Image-to-text Conversion System for Pashto, Farsi and Traditional Chinese",
	"url": "http://arxiv.org/abs/2005.08650v1",
	"category": "paper",
	"source": "arxiv",
	"status": "verified",
	"summary": "Research paper on image-to-text conversion including Pashto OCR.",
	"primary_use": "Pashto OCR method reference",
	"tasks": [
	"ocr",
	"nlp"
	],
	"tags": [
	"pashto",
	"paper",
	"arxiv",
	"ocr"
	],
	"evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
	"evidence_url": "http://arxiv.org/abs/2005.08650v1",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-benchmark-pashto-handwritten-character-dataset-ocr",
	"title": "Benchmark Pashto Handwritten Character Dataset and Pashto Object Character Recognition (OCR) Using Deep Neural Network with Rule Activation Function",
	"url": "https://doi.org/10.1155/2021/6669672",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper introducing a benchmark dataset and OCR approach for Pashto handwritten characters.",
	"primary_use": "Pashto handwritten OCR benchmark and methodology reference",
	"tasks": [
	"ocr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"benchmark"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1155/2021/6669672",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-asr-isolated-pashto-spoken-digits-mfcc-knn",
	"title": "Database development and automatic speech recognition of isolated Pashto spoken digits using MFCC and K-NN",
	"url": "https://doi.org/10.1007/s10772-014-9267-z",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on isolated Pashto spoken-digit ASR with MFCC and K-NN.",
	"primary_use": "Pashto ASR baseline method reference for digit recognition",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"asr",
	"speech"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1007/s10772-014-9267-z",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-pashto-isolated-digits-recognition-dcnn",
	"title": "Pashto isolated digits recognition using deep convolutional neural network",
	"url": "https://doi.org/10.1016/j.heliyon.2020.e03372",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on Pashto isolated-digit recognition using deep convolutional neural networks.",
	"primary_use": "Pashto speech recognition research reference",
	"tasks": [
	"asr"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"asr",
	"deep-learning"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1016/j.heliyon.2020.e03372",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-pashto-offensive-language-detection-benchmark-bert",
	"title": "Pashto offensive language detection: a benchmark dataset and monolingual Pashto BERT",
	"url": "https://doi.org/10.7717/peerj-cs.1617",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on Pashto offensive language detection with benchmark dataset and monolingual BERT model.",
	"primary_use": "Pashto NLP toxicity detection benchmark and model reference",
	"tasks": [
	"nlp",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"nlp",
	"bert",
	"benchmark"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.7717/peerj-cs.1617",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-phti-pashto-handwritten-text-imagebase",
	"title": "PHTI: Pashto Handwritten Text Imagebase for Deep Learning Applications",
	"url": "https://doi.org/10.1109/access.2022.3216881",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper describing PHTI, a Pashto handwritten text imagebase for deep learning.",
	"primary_use": "Pashto OCR dataset and benchmark reference",
	"tasks": [
	"ocr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"dataset"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1109/access.2022.3216881",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-recognition-of-pashto-handwritten-characters-deep-learning",
	"title": "Recognition of Pashto Handwritten Characters Based on Deep Learning",
	"url": "https://doi.org/10.3390/s20205884",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on deep-learning-based recognition of Pashto handwritten characters.",
	"primary_use": "Pashto OCR model reference for handwritten character recognition",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"deep-learning"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.3390/s20205884",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-kpti-katib-pashto-text-imagebase-benchmark",
	"title": "KPTI: Katib's Pashto Text Imagebase and Deep Learning Benchmark",
	"url": "https://doi.org/10.1109/icfhr.2016.0090",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper introducing KPTI, a Pashto text imagebase and benchmark for handwritten recognition.",
	"primary_use": "Pashto OCR dataset and benchmarking reference",
	"tasks": [
	"ocr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"benchmark"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1109/icfhr.2016.0090",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-pioneer-dataset-handwritten-pashto-cnn",
	"title": "Pioneer dataset and recognition of Handwritten Pashto characters using Convolution Neural Networks",
	"url": "https://doi.org/10.1177/0020294020964826",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on a pioneer handwritten Pashto character dataset with CNN-based recognition.",
	"primary_use": "Pashto handwritten character recognition reference",
	"tasks": [
	"ocr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"deep-learning"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1177/0020294020964826",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-scale-rotation-invariant-ocr-pashto-mdlstm",
	"title": "Scale and rotation invariant OCR for Pashto cursive script using MDLSTM network",
	"url": "https://doi.org/10.1109/icdar.2015.7333931",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on scale- and rotation-invariant OCR for cursive Pashto using MDLSTM.",
	"primary_use": "Pashto OCR model architecture reference",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"mdlstm"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1109/icdar.2015.7333931",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-recognizable-units-pashto-ocr",
	"title": "Recognizable units in Pashto language for OCR",
	"url": "https://doi.org/10.1109/icdar.2015.7333963",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper defining recognizable units in Pashto for OCR workflows.",
	"primary_use": "Pashto OCR preprocessing and unit-design reference",
	"tasks": [
	"ocr"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1109/icdar.2015.7333963",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-shape-analysis-pashto-script-image-database-ocr",
	"title": "Shape analysis of Pashto script and creation of image database for OCR",
	"url": "https://doi.org/10.1109/icet.2009.5353160",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on Pashto script shape analysis and image database creation for OCR.",
	"primary_use": "Pashto OCR dataset design and feature reference",
	"tasks": [
	"ocr",
	"benchmarking"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"ocr",
	"dataset"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.1109/icet.2009.5353160",
	"markers": [
	"pashto"
	]
	},
	{
	"id": "paper-openalex-speech-translation-low-resource-case-pashto",
	"title": "Speech translation for low-resource languages: the case of Pashto",
	"url": "https://doi.org/10.21437/interspeech.2005-723",
	"category": "paper",
	"source": "openalex",
	"status": "verified",
	"summary": "Research paper on speech translation for low-resource languages, including Pashto.",
	"primary_use": "Pashto speech translation and low-resource MT reference",
	"tasks": [
	"asr",
	"mt"
	],
	"tags": [
	"pashto",
	"paper",
	"openalex",
	"speech",
	"translation"
	],
	"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
	"evidence_url": "https://doi.org/10.21437/interspeech.2005-723",
	"markers": [
	"pashto"
	]
	}
	]
	}