musaw
Sync main snapshot to Hugging Face (no local binary banner)
2f53244
Raw
History Blame
79 kB
{
"generated_on": "2026-02-17T00:00:00Z",
"count": 95,
"resources": [
{
"id": "dataset-common-voice-ps-v24",
"title": "Common Voice Scripted Speech 24.0 - Pashto",
"url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
"category": "dataset",
"source": "mozilla",
"status": "verified",
"summary": "Large open Pashto speech dataset for ASR training and evaluation.",
"primary_use": "ASR training and evaluation",
"tasks": [
"asr"
],
"tags": [
"pashto",
"speech",
"asr"
],
"evidence_text": "Official dataset page is for Pashto.",
"evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
"markers": [
"Pashto"
]
},
{
"id": "dataset-google-fleurs",
"title": "Google FLEURS",
"url": "https://huggingface.co/datasets/google/fleurs",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Standard multilingual speech benchmark dataset with Pashto subset.",
"primary_use": "Speech benchmark and external evaluation",
"tasks": [
"asr",
"benchmarking"
],
"tags": [
"pashto",
"speech",
"benchmark"
],
"evidence_text": "Dataset config includes ps_af.",
"evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py",
"markers": [
"ps_af"
]
},
{
"id": "dataset-oscar-ps",
"title": "OSCAR Corpus",
"url": "https://huggingface.co/datasets/oscar-corpus/oscar",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Large web text corpus that includes Pashto text split.",
"primary_use": "Language modeling and lexicon expansion",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"text",
"nlp"
],
"evidence_text": "Dataset includes unshuffled_deduplicated_ps split.",
"evidence_url": "https://huggingface.co/datasets/oscar-corpus/oscar",
"markers": [
"unshuffled_deduplicated_ps"
]
},
{
"id": "dataset-wikipedia-ps",
"title": "Wikimedia Wikipedia",
"url": "https://huggingface.co/datasets/wikimedia/wikipedia",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Wikipedia corpus with Pashto edition for cleaner text resources.",
"primary_use": "Terminology and balanced text corpus",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"text",
"nlp"
],
"evidence_text": "Dataset includes 20231101.ps subset.",
"evidence_url": "https://huggingface.co/datasets/wikimedia/wikipedia",
"markers": [
"20231101.ps"
]
},
{
"id": "dataset-belebele-pbt-arab",
"title": "Belebele",
"url": "https://huggingface.co/datasets/facebook/belebele",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Reading comprehension dataset with Pashto script subset.",
"primary_use": "Comprehension and multilingual NLP benchmark",
"tasks": [
"nlp",
"benchmarking"
],
"tags": [
"pashto",
"nlp",
"benchmark"
],
"evidence_text": "Dataset includes pbt_Arab subset.",
"evidence_url": "https://huggingface.co/datasets/facebook/belebele",
"markers": [
"pbt_Arab"
]
},
{
"id": "dataset-opus100-en-ps",
"title": "OPUS-100",
"url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Parallel corpus with English to Pashto split for MT tasks.",
"primary_use": "Machine translation training and evaluation",
"tasks": [
"mt",
"nlp"
],
"tags": [
"pashto",
"mt",
"parallel-corpus"
],
"evidence_text": "Dataset viewer includes en-ps split.",
"evidence_url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps",
"markers": [
"en-ps"
]
},
{
"id": "dataset-kaggle-pashto-isolated-words",
"title": "Pashto Isolated Words Speech Dataset",
"url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Speech dataset focused on isolated Pashto words.",
"primary_use": "Keyword spotting and constrained ASR experiments",
"tasks": [
"asr"
],
"tags": [
"pashto",
"speech",
"kaggle"
],
"evidence_text": "Dataset title explicitly states Pashto speech dataset.",
"evidence_url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset",
"markers": [
"Pashto"
]
},
{
"id": "dataset-kaggle-pashto-word-embeddings",
"title": "Pashto Word Embeddings",
"url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Pretrained Pashto word vectors for classic NLP baselines.",
"primary_use": "Lexical semantics and lightweight NLP baselines",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"nlp",
"embeddings",
"kaggle"
],
"evidence_text": "Dataset description states pretrained Pashto embeddings.",
"evidence_url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings",
"markers": [
"Pashto"
]
},
{
"id": "model-pashto-bert",
"title": "PashtoBERT",
"url": "https://huggingface.co/mdarhri/pashto-bert",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-specific encoder model for NLP transfer tasks.",
"primary_use": "Pashto NLP baseline encoder",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"nlp",
"bert"
],
"evidence_text": "Model card states training on Pashto corpus data.",
"evidence_url": "https://huggingface.co/mdarhri/pashto-bert",
"markers": [
"Pashto"
]
},
{
"id": "benchmark-fleurs-ps-af",
"title": "FLEURS Pashto Benchmark",
"url": "https://huggingface.co/datasets/google/fleurs",
"category": "benchmark",
"source": "huggingface",
"status": "verified",
"summary": "Fixed multilingual speech benchmark with Pashto subset for WER and CER.",
"primary_use": "ASR benchmark reporting",
"tasks": [
"asr",
"benchmarking"
],
"tags": [
"pashto",
"benchmark",
"asr"
],
"evidence_text": "Dataset includes ps_af split.",
"evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py",
"markers": [
"ps_af"
]
},
{
"id": "benchmark-common-voice-ps-v24",
"title": "Common Voice Pashto v24 Benchmark",
"url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
"category": "benchmark",
"source": "mozilla",
"status": "verified",
"summary": "Core benchmark reference for project-level Pashto ASR tracking.",
"primary_use": "ASR baseline tracking",
"tasks": [
"asr",
"benchmarking"
],
"tags": [
"pashto",
"benchmark",
"asr"
],
"evidence_text": "Official Pashto split and versioned release.",
"evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
"markers": [
"Pashto"
]
},
{
"id": "benchmark-belebele-pbt-arab",
"title": "Belebele Pashto Benchmark",
"url": "https://huggingface.co/datasets/facebook/belebele",
"category": "benchmark",
"source": "huggingface",
"status": "verified",
"summary": "Comprehension benchmark for multilingual NLP with Pashto variant.",
"primary_use": "NLP benchmark reporting",
"tasks": [
"nlp",
"benchmarking"
],
"tags": [
"pashto",
"benchmark",
"nlp"
],
"evidence_text": "Includes pbt_Arab language variant.",
"evidence_url": "https://huggingface.co/datasets/facebook/belebele",
"markers": [
"pbt_Arab"
]
},
{
"id": "benchmark-flores-200-pbt-arab",
"title": "FLORES-200 Pashto Benchmark",
"url": "https://github.com/facebookresearch/flores/tree/main/flores200",
"category": "benchmark",
"source": "github",
"status": "verified",
"summary": "Translation benchmark language inventory including Pashto script variant.",
"primary_use": "MT benchmark with BLEU and chrF",
"tasks": [
"mt",
"benchmarking"
],
"tags": [
"pashto",
"benchmark",
"mt"
],
"evidence_text": "Language list includes pbt_Arab.",
"evidence_url": "https://raw.githubusercontent.com/facebookresearch/flores/main/flores200/README.md",
"markers": [
"pbt_Arab"
]
},
{
"id": "dataset-nexdata-99h-pashto-dialogue",
"title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
"url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
"primary_use": "Spontaneous speech ASR training and robustness evaluation",
"tasks": [
"asr"
],
"tags": [
"pashto",
"speech",
"asr",
"dialogue"
],
"evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
"evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
"markers": [
"Pashto"
]
},
{
"id": "dataset-zirak-ai-pashto-ocr",
"title": "Zirak-AI PashtoOCR",
"url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
"primary_use": "OCR and text extraction benchmarking",
"tasks": [
"ocr",
"nlp"
],
"tags": [
"pashto",
"ocr",
"nlp",
"vision"
],
"evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
"evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
"markers": [
"ps",
"PashtoOCR"
]
},
{
"id": "dataset-ihanif-pashto-wikipedia-corpus",
"title": "Pashto Wikipedia Corpus",
"url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
"primary_use": "Pashto text corpus for NLP baselines",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"text",
"nlp",
"wikipedia"
],
"evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
"evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
"markers": [
"ps",
"Pashto"
]
},
{
"id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
"title": "wav2vec2 XLS-R 300M Pashto",
"url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
"primary_use": "Pashto ASR baseline and comparative experiments",
"tasks": [
"asr"
],
"tags": [
"pashto",
"asr",
"wav2vec2",
"fleurs"
],
"evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
"evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
"markers": [
"pashto",
"ps",
"ps_af"
]
},
{
"id": "model-ihanif-whisper-medium-pashto",
"title": "Whisper Medium Pashto",
"url": "https://huggingface.co/ihanif/whisper-medium-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
"primary_use": "Pashto ASR baseline and transcription quality comparisons",
"tasks": [
"asr"
],
"tags": [
"pashto",
"asr",
"whisper",
"fleurs"
],
"evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
"evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
"markers": [
"pashto",
"ps",
"ps_af"
]
},
{
"id": "dataset-kaggle-pold-pashto-offensive",
"title": "POLD - Pashto Offensive Language Dataset",
"url": "https://www.kaggle.com/datasets/drijaz/pold-pashto-offensive-language-dataset",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Benchmark dataset for offensive content detection in Pashto social text.",
"primary_use": "Pashto toxicity and moderation NLP benchmarks",
"tasks": [
"nlp",
"classification"
],
"tags": [
"pashto",
"kaggle",
"nlp",
"toxicity"
],
"evidence_text": "Kaggle title and description explicitly state Pashto offensive language benchmark dataset.",
"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pold-pashto-offensive-language-dataset",
"markers": [
"Pashto"
]
},
{
"id": "dataset-kaggle-pashto-english-sentiment-corpus",
"title": "Pashto English Bilingual Sentiment Corpus",
"url": "https://www.kaggle.com/datasets/farhadkhan66/pashto-translated-corpus",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Pashto to English bilingual sentiment corpus useful for low-resource sentiment tasks.",
"primary_use": "Sentiment analysis and bilingual NLP experiments",
"tasks": [
"nlp",
"sentiment"
],
"tags": [
"pashto",
"kaggle",
"sentiment",
"bilingual"
],
"evidence_text": "Kaggle dataset title and description identify the corpus as Pashto-English sentiment data.",
"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/farhadkhan66/pashto-translated-corpus",
"markers": [
"Pashto"
]
},
{
"id": "dataset-kaggle-urdu-pashto-lexicon",
"title": "Urdu-Pashto Lexicon Dataset",
"url": "https://www.kaggle.com/datasets/shafeeqgigyani/urdu-pashto-lexicon-dataset",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Lexicon of Urdu words with Pashto translations for dictionary and MT support.",
"primary_use": "Lexicon and translation lexeme mapping",
"tasks": [
"nlp",
"mt"
],
"tags": [
"pashto",
"kaggle",
"lexicon",
"translation"
],
"evidence_text": "Kaggle metadata describes 7,601 Urdu entries with Pashto translations.",
"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/shafeeqgigyani/urdu-pashto-lexicon-dataset",
"markers": [
"Pashto"
]
},
{
"id": "project-hf-space-ihanif-pashto-asr-v3",
"title": "Pashto ASR V3 Space",
"url": "https://huggingface.co/spaces/ihanif/pashto-asr-v3",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Interactive Hugging Face Space for Pashto automatic speech recognition demos.",
"primary_use": "Project demo for Pashto ASR user testing",
"tasks": [
"asr",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface-space",
"asr"
],
"evidence_text": "Space card title is Pashto ASR V3 and short description states Pashto ASR.",
"evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr-v3",
"markers": [
"Pashto",
"ASR"
]
},
{
"id": "project-hf-space-pashto2english-dictionary",
"title": "Pashto to English Dictionary Space",
"url": "https://huggingface.co/spaces/EngrAamirBangash/Pashto2English-Dictionary",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Streamlit project for Pashto to English dictionary lookups.",
"primary_use": "Interactive bilingual lookup project",
"tasks": [
"dictionary",
"translation",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface-space",
"dictionary"
],
"evidence_text": "Space metadata title states Pashto to English Dictionary.",
"evidence_url": "https://huggingface.co/api/spaces/EngrAamirBangash/Pashto2English-Dictionary",
"markers": [
"Pashto"
]
},
{
"id": "project-hf-space-umar4321-pashto-translator",
"title": "Pashto Translator Space",
"url": "https://huggingface.co/spaces/Umar4321/Pashto-Translator",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Streamlit translator project for Pashto to English and Urdu conversion.",
"primary_use": "Interactive translation project demo",
"tasks": [
"translation",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface-space",
"translation"
],
"evidence_text": "Space title is Pashto Translator and description states Pashto to English and Urdu translation.",
"evidence_url": "https://huggingface.co/api/spaces/Umar4321/Pashto-Translator",
"markers": [
"Pashto"
]
},
{
"id": "code-github-ijazul-haq-nlpashto",
"title": "nlpashto Toolkit",
"url": "https://github.com/ijazul-haq/nlpashto",
"category": "code",
"source": "github",
"status": "verified",
"summary": "Pashto NLP toolkit codebase for tokenization, embeddings, and downstream NLP workflows.",
"primary_use": "Pashto NLP code integration and experimentation",
"tasks": [
"nlp",
"tooling"
],
"tags": [
"pashto",
"code",
"github",
"nlp"
],
"evidence_text": "Repository name and description explicitly identify a Pashto NLP toolkit.",
"evidence_url": "https://api.github.com/repos/ijazul-haq/nlpashto",
"markers": [
"Pashto",
"NLP"
]
},
{
"id": "dataset-kaggle-drijaz-pashtoocr",
"title": "PashtoOCR (Kaggle)",
"url": "https://www.kaggle.com/datasets/drijaz/pashtoocr",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Synthetic OCR dataset focused on Pashto ligatures and text recognition tasks.",
"primary_use": "Pashto OCR dataset benchmarking and training",
"tasks": [
"ocr",
"nlp"
],
"tags": [
"pashto",
"kaggle",
"ocr",
"dataset"
],
"evidence_text": "Kaggle dataset title and subtitle explicitly identify a Pashto OCR dataset.",
"evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pashtoocr",
"markers": [
"Pashto",
"OCR"
]
},
{
"id": "model-hf-zirak-ai-pashto-bert-v1",
"title": "zirak-ai/pashto-bert-v1",
"url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto BERT model checkpoint for low-resource Pashto NLP experiments.",
"primary_use": "Pashto encoder baseline for NLP tasks",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"huggingface",
"bert",
"nlp"
],
"evidence_text": "Hugging Face model ID and search tags explicitly include pashto marker.",
"evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-ihanif-pashto-asr",
"title": "Pashto ASR Space",
"url": "https://huggingface.co/spaces/ihanif/pashto-asr",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Interactive Hugging Face Space for Pashto ASR inference demos.",
"primary_use": "Live Pashto speech-to-text demo project",
"tasks": [
"asr",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface-space",
"asr"
],
"evidence_text": "Space ID includes pashto-asr and is returned by Hugging Face Pashto space search.",
"evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr",
"markers": [
"pashto",
"asr"
]
},
{
"id": "paper-s2-psocr-lmm-pashto",
"title": "PsOCR: Benchmarking Large Multimodal Models for Optical Character Recognition in Low-resource Pashto Language",
"url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Research paper benchmarking multimodal OCR models on low-resource Pashto OCR tasks.",
"primary_use": "Pashto OCR research baseline and evaluation reference",
"tasks": [
"ocr",
"research"
],
"tags": [
"pashto",
"paper",
"ocr",
"multimodal"
],
"evidence_text": "Paper title explicitly references low-resource Pashto language OCR benchmarking.",
"evidence_url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f",
"markers": [
"Pashto",
"OCR"
]
},
{
"id": "dataset-hf-adnankhan769-english-to-pashto",
"title": "English to Pashto Sentences Dataset",
"url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Parallel English-Pashto sentence dataset for bilingual NLP and translation experiments.",
"primary_use": "MT and bilingual sentence alignment baseline",
"tasks": [
"mt",
"nlp"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"translation"
],
"evidence_text": "Dataset ID explicitly states English-to-Pashto and includes Pashto-script sentence column.",
"evidence_url": "https://huggingface.co/api/datasets/adnankhan769/english_to_pashto_sentences_dataset",
"markers": [
"Pashto"
]
},
{
"id": "dataset-hf-saillab-alpaca-pashto-cleaned",
"title": "alpaca-pashto-cleaned",
"url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Instruction-style Pashto text dataset suitable for LLM tuning and instruction-following research.",
"primary_use": "Pashto instruction tuning and conversational NLP experiments",
"tasks": [
"nlp",
"llm"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"instruction"
],
"evidence_text": "Dataset metadata includes language:ps and dataset name includes Pashto.",
"evidence_url": "https://huggingface.co/api/datasets/saillab/alpaca-pashto-cleaned",
"markers": [
"ps",
"Pashto"
]
},
{
"id": "model-hf-ihanif-whisper-base-pashto",
"title": "Whisper Base Pashto",
"url": "https://huggingface.co/ihanif/whisper-base-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Fine-tuned Whisper Base checkpoint for Pashto ASR with FLEURS ps_af evaluation metadata.",
"primary_use": "Pashto ASR baseline and speed-accuracy comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Model ID includes Pashto and card metadata references FLEURS config ps_af.",
"evidence_url": "https://huggingface.co/api/models/ihanif/whisper-base-pashto",
"markers": [
"Pashto",
"ps_af"
]
},
{
"id": "project-hf-space-zamai-mistral-7b-pashto",
"title": "ZamAI-Mistral-7B-Pashto Space",
"url": "https://huggingface.co/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Gradio project space demonstrating a Pashto-adapted Mistral 7B interface.",
"primary_use": "Interactive Pashto LLM project demo",
"tasks": [
"llm",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface-space",
"llm"
],
"evidence_text": "Space title and ID explicitly include Pashto and model card metadata exposes project details.",
"evidence_url": "https://huggingface.co/api/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space",
"markers": [
"Pashto"
]
},
{
"id": "dataset-hf-adnankhan769-proper-dataset-english-2-pashto",
"title": "adnankhan769/proper_dataset_english_2_pashto",
"url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto bilingual/translation dataset discovered from huggingface for MT experimentation.",
"primary_use": "Machine translation and bilingual corpus development",
"tasks": [
"mt"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"mt"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-ihanif-pashto-asr-wer",
"title": "ihanif/pashto_asr_wer",
"url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-ihanif-pashto-speech-ds",
"title": "ihanif/pashto_speech_ds",
"url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-ihanif-pashto-speech-parquet-10k",
"title": "ihanif/pashto_speech_parquet_10k",
"url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-saillab-alpaca-pashto-taco",
"title": "saillab/alpaca_pashto_taco",
"url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
"primary_use": "Instruction tuning and LLM adaptation data source",
"tasks": [
"llm"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"llm"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-sherwindesouza-pashto-common-voice-20",
"title": "SherwinDesouza/pashto-common-voice-20",
"url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
"primary_use": "Pashto data source for NLP experimentation",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"nlp"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-tasal9-zamai-pashto-dataset",
"title": "tasal9/ZamAI_Pashto_Dataset",
"url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
"primary_use": "Pashto data source for NLP experimentation",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"nlp"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
"markers": [
"pashto"
]
},
{
"id": "dataset-kaggle-english-pashto-language-dataset-epld",
"title": "English-Pashto Language Dataset (EPLD)",
"url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Pashto bilingual/translation dataset discovered from kaggle for MT experimentation.",
"primary_use": "Machine translation and bilingual corpus development",
"tasks": [
"mt"
],
"tags": [
"pashto",
"dataset",
"kaggle",
"mt"
],
"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
"evidence_url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld",
"markers": [
"Pashto"
]
},
{
"id": "dataset-kaggle-katib-s-pashto-text-imagebase-kpti",
"title": "Katib's Pashto Text Imagebase (KPTI)",
"url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.",
"primary_use": "OCR training and evaluation data source",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"dataset",
"kaggle",
"ocr"
],
"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
"evidence_url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti",
"markers": [
"Pashto"
]
},
{
"id": "dataset-kaggle-pashto-ocr",
"title": "Pashto OCR",
"url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.",
"primary_use": "OCR training and evaluation data source",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"dataset",
"kaggle",
"ocr"
],
"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
"evidence_url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr",
"markers": [
"Pashto"
]
},
{
"id": "dataset-kaggle-common-voice-24-0-pashto-speech-dataset",
"title": "Common Voice 24.0: Pashto Speech Dataset",
"url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto",
"category": "dataset",
"source": "kaggle",
"status": "verified",
"summary": "Pashto speech dataset discovered from kaggle for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"kaggle",
"asr"
],
"evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
"evidence_url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto",
"markers": [
"Pashto"
]
},
{
"id": "model-hf-ihanif-pashto-asr-base",
"title": "ihanif/pashto-asr-base",
"url": "https://huggingface.co/ihanif/pashto-asr-base",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/pashto-asr-base",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ihanif-wav2vec2-xls-r-300m-pashto-lm",
"title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
"url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ihanif-whisper-large-pashto",
"title": "ihanif/whisper-large-pashto",
"url": "https://huggingface.co/ihanif/whisper-large-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ihanif-whisper-medium-pashto-3e-7",
"title": "ihanif/whisper-medium-pashto-3e-7",
"url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ihanif-whisper-small-pashto",
"title": "ihanif/whisper-small-pashto",
"url": "https://huggingface.co/ihanif/whisper-small-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ihanif-xls-r-1b-pashto",
"title": "ihanif/xls-r-1b-pashto",
"url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ijazulhaq-bert-base-pashto-v1",
"title": "ijazulhaq/bert-base-pashto-v1",
"url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto model baseline for downstream NLP tasks",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"model",
"huggingface",
"nlp"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-ihanif-wav2vec2-bert-pashto-asr",
"title": "ihanif/wav2vec2-bert-pashto-asr",
"url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"asr",
"nlp",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"asr",
"nlp",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-nasirkhansayyad-pashto-whisper-demo",
"title": "nasirkhansayyad/pashto-whisper-demo",
"url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"asr",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"asr",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-tasal9-zamai-phi3-mini-pashto-demo",
"title": "tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
"url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"llm",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"llm",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-umar4321-pashto-to-english-urdu",
"title": "Umar4321/Pashto-To-English-Urdu",
"url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"mt",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"mt",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu",
"markers": [
"pashto"
]
},
{
"id": "project-github-fazlullahmamond-pashto-typing",
"title": "Fazlullahmamond/Pashto-Typing",
"url": "https://github.com/Fazlullahmamond/Pashto-Typing",
"category": "project",
"source": "github",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from github for demonstration and quick evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"demo"
],
"tags": [
"pashto",
"project",
"github",
"demo"
],
"evidence_text": "Repository metadata (name/description/topics) includes Pashto markers.",
"evidence_url": "https://github.com/Fazlullahmamond/Pashto-Typing",
"markers": [
"pashto"
]
},
{
"id": "paper-s2-benchmarking-whisper-for-low-resource-speech-recognition-an-n-shot-evaluation-on-pashto-pu",
"title": "Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on Pashto, Punjabi, and Urdu",
"url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Pashto language technology paper discovered from other for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"asr",
"mt"
],
"tags": [
"pashto",
"paper",
"other",
"asr",
"mt"
],
"evidence_text": "Matched by Semantic Scholar query: pashto.",
"evidence_url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693",
"markers": [
"pashto"
]
},
{
"id": "paper-s2-deep-learning-based-detection-of-one-and-two-column-textual-blocks-in-camera-captured-pash",
"title": "Deep Learning-Based Detection of One and Two-Column Textual Blocks in Camera-Captured Pashto Documents Images",
"url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Pashto language technology paper discovered from other for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"paper",
"other",
"ocr"
],
"evidence_text": "Matched by Semantic Scholar query: pashto.",
"evidence_url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182",
"markers": [
"pashto"
]
},
{
"id": "paper-s2-out-of-vocabulary-pashto-spell-checker-using-morphological-operations",
"title": "Out-of-Vocabulary Pashto Spell Checker using Morphological Operations",
"url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Pashto language technology paper discovered from other for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"paper",
"other",
"nlp"
],
"evidence_text": "Matched by Semantic Scholar query: pashto.",
"evidence_url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7",
"markers": [
"pashto"
]
},
{
"id": "paper-s2-pashto-shallow-parsing-a-deep-learning-approach",
"title": "Pashto Shallow Parsing: A Deep Learning Approach",
"url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Pashto language technology paper discovered from other for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"paper",
"other",
"nlp"
],
"evidence_text": "Matched by Semantic Scholar query: pashto.",
"evidence_url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5",
"markers": [
"pashto"
]
},
{
"id": "paper-s2-pos-tagging-of-low-resource-pashto-language-annotated-corpus-and-bert-based-model",
"title": "POS tagging of low-resource Pashto language: annotated corpus and BERT-based model",
"url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Pashto language technology paper discovered from other for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"paper",
"other",
"nlp"
],
"evidence_text": "Matched by Semantic Scholar query: pashto.",
"evidence_url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769",
"markers": [
"pashto"
]
},
{
"id": "paper-arxiv-enhancing-pashto-text-classification-using-language-processing-techniques-for-single-and-m",
"title": "Enhancing Pashto Text Classification using Language Processing Techniques for Single And Multi-Label Analysis",
"url": "http://arxiv.org/abs/2305.03201v1",
"category": "paper",
"source": "arxiv",
"status": "verified",
"summary": "Pashto language technology paper discovered from arxiv for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"paper",
"arxiv",
"nlp"
],
"evidence_text": "Matched by arXiv query: all:pashto.",
"evidence_url": "http://arxiv.org/abs/2305.03201v1",
"markers": [
"pashto"
]
},
{
"id": "paper-arxiv-knn-and-ann-based-recognition-of-handwritten-pashto-letters-using-zoning-features",
"title": "KNN and ANN-based Recognition of Handwritten Pashto Letters using Zoning Features",
"url": "http://arxiv.org/abs/1904.03391v2",
"category": "paper",
"source": "arxiv",
"status": "verified",
"summary": "Pashto language technology paper discovered from arxiv for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"paper",
"arxiv",
"ocr"
],
"evidence_text": "Matched by arXiv query: all:pashto.",
"evidence_url": "http://arxiv.org/abs/1904.03391v2",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-oowais-pushto-text-to-speech-dataset",
"title": "oowais/pushto-text-to-speech-dataset",
"url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr",
"tts"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"asr",
"tts"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-ihanif-pashto-speech-20k",
"title": "ihanif/pashto_speech_20k",
"url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-ihanif-pashto-speech-5k",
"title": "ihanif/pashto_speech_5k",
"url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
"primary_use": "ASR training and evaluation data source",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-tasal9-pashto-dataset",
"title": "tasal9/Pashto_Dataset",
"url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
"primary_use": "Pashto data source for NLP experimentation",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"nlp"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ijazulhaq-bert-base-pashto",
"title": "ijazulhaq/bert-base-pashto",
"url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto model baseline for downstream NLP tasks",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"model",
"huggingface",
"nlp"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
"markers": [
"pashto"
]
},
{
"id": "model-hf-ihanif-whisper-small-pashto-dropout",
"title": "ihanif/whisper-small-pashto-dropout",
"url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
"markers": [
"pashto"
]
},
{
"id": "model-hf-koochikoo25-pashto-whisper-large",
"title": "koochikoo25/pashto-whisper-large",
"url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
"primary_use": "Pashto ASR baseline and model comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-ihanif-wav2vec-pashto-asr",
"title": "ihanif/wav2vec-pashto-asr",
"url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"asr",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"asr",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-afaqalinagra-pashto-asr-model",
"title": "afaqalinagra/PASHTO-ASR-MODEL",
"url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"asr",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"asr",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-ilyas02828-pashto-sign-language",
"title": "ilyas02828/Pashto_Sign_Language",
"url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-mahmudaq-pashtoasrnmt1",
"title": "mahmudaq/PashtoASRNMT1",
"url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
"primary_use": "Interactive Pashto demo and quick qualitative validation",
"tasks": [
"asr",
"mt",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"asr",
"mt",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1",
"markers": [
"pashto"
]
},
{
"id": "paper-s2-enhancing-pashto-ner-using-machine-labeled-data-and-transformer-based-models",
"title": "Enhancing Pashto NER Using Machine-Labeled Data and Transformer-Based Models",
"url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da",
"category": "paper",
"source": "other",
"status": "verified",
"summary": "Pashto language technology paper discovered from other for research reference.",
"primary_use": "Pashto research reference for methods and benchmarking",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"paper",
"other",
"nlp"
],
"evidence_text": "Matched by explicit Pashto marker in paper title from Semantic Scholar search.",
"evidence_url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-aamirhs-pashto-audio-wav2vec",
"title": "aamirhs/pashto-audio-wav2vec",
"url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto speech dataset surfaced from Hugging Face candidate sync for ASR experiments.",
"primary_use": "Pashto ASR data exploration and baseline training",
"tasks": [
"asr"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"speech",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec",
"markers": [
"pashto"
]
},
{
"id": "dataset-hf-alimuhammad73-pashto-poetry",
"title": "AliMuhammad73/Pashto-Poetry",
"url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
"category": "dataset",
"source": "huggingface",
"status": "verified",
"summary": "Pashto poetry text dataset surfaced from Hugging Face candidate sync for NLP experiments.",
"primary_use": "Pashto poetry corpus for language modeling and text analysis",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"dataset",
"huggingface",
"text",
"poetry",
"nlp"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
"markers": [
"pashto"
]
},
{
"id": "model-hf-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab",
"title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
"url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
"category": "model",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR model checkpoint surfaced from Hugging Face candidate sync.",
"primary_use": "Pashto ASR baseline and transfer-learning comparison",
"tasks": [
"asr"
],
"tags": [
"pashto",
"model",
"huggingface",
"asr"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
"evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
"markers": [
"pashto"
]
},
{
"id": "project-hf-space-aizazayyubi-pashto-asr",
"title": "Aizazayyubi/pashto_asr",
"url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr",
"category": "project",
"source": "huggingface",
"status": "verified",
"summary": "Pashto ASR interactive demo surfaced from Hugging Face Spaces candidate sync.",
"primary_use": "Interactive Pashto ASR demo for qualitative evaluation",
"tasks": [
"asr",
"demo"
],
"tags": [
"pashto",
"project",
"huggingface",
"asr",
"demo"
],
"evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
"evidence_url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr",
"markers": [
"pashto"
]
},
{
"id": "paper-arxiv-from-scarcity-to-scale-pashto-common-voice",
"title": "From Scarcity to Scale: A Release-Level Analysis of the Pashto Common Voice Dataset",
"url": "http://arxiv.org/abs/2602.14062v1",
"category": "paper",
"source": "arxiv",
"status": "verified",
"summary": "Research paper analyzing Pashto Common Voice releases and dataset scaling characteristics.",
"primary_use": "ASR data quality and release trend reference",
"tasks": [
"asr",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"arxiv",
"asr",
"common-voice"
],
"evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
"evidence_url": "http://arxiv.org/abs/2602.14062v1",
"markers": [
"pashto"
]
},
{
"id": "paper-arxiv-tuning-traditional-pashto-text-classification",
"title": "Tuning Traditional Language Processing Approaches for Pashto Text Classification",
"url": "http://arxiv.org/abs/2305.03737v1",
"category": "paper",
"source": "arxiv",
"status": "verified",
"summary": "Research paper focused on Pashto text classification using traditional NLP approaches.",
"primary_use": "Pashto text classification method reference",
"tasks": [
"nlp"
],
"tags": [
"pashto",
"paper",
"arxiv",
"nlp",
"classification"
],
"evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
"evidence_url": "http://arxiv.org/abs/2305.03737v1",
"markers": [
"pashto"
]
},
{
"id": "dataset-dataverse-iarpa-babel-pashto-language-pack-v0-4by",
"title": "IARPA Babel Pashto Language Pack IARPA-babel104b-v0.4bY",
"url": "https://hdl.handle.net/11272.1/AB2/GLFN3X",
"category": "dataset",
"source": "dataverse",
"status": "verified",
"summary": "Pashto Babel language pack dataset for speech and language processing evaluation.",
"primary_use": "Pashto speech dataset for ASR and language identification experiments",
"tasks": [
"asr",
"benchmarking"
],
"tags": [
"pashto",
"dataset",
"dataverse",
"speech",
"asr",
"babel"
],
"evidence_text": "Dataverse metadata includes Pashto markers in dataset title or description.",
"evidence_url": "https://hdl.handle.net/11272.1/AB2/GLFN3X",
"markers": [
"pashto"
]
},
{
"id": "paper-arxiv-image-to-text-pashto-farsi-traditional-chinese",
"title": "Development of a New Image-to-text Conversion System for Pashto, Farsi and Traditional Chinese",
"url": "http://arxiv.org/abs/2005.08650v1",
"category": "paper",
"source": "arxiv",
"status": "verified",
"summary": "Research paper on image-to-text conversion including Pashto OCR.",
"primary_use": "Pashto OCR method reference",
"tasks": [
"ocr",
"nlp"
],
"tags": [
"pashto",
"paper",
"arxiv",
"ocr"
],
"evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
"evidence_url": "http://arxiv.org/abs/2005.08650v1",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-benchmark-pashto-handwritten-character-dataset-ocr",
"title": "Benchmark Pashto Handwritten Character Dataset and Pashto Object Character Recognition (OCR) Using Deep Neural Network with Rule Activation Function",
"url": "https://doi.org/10.1155/2021/6669672",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper introducing a benchmark dataset and OCR approach for Pashto handwritten characters.",
"primary_use": "Pashto handwritten OCR benchmark and methodology reference",
"tasks": [
"ocr",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"benchmark"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1155/2021/6669672",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-asr-isolated-pashto-spoken-digits-mfcc-knn",
"title": "Database development and automatic speech recognition of isolated Pashto spoken digits using MFCC and K-NN",
"url": "https://doi.org/10.1007/s10772-014-9267-z",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on isolated Pashto spoken-digit ASR with MFCC and K-NN.",
"primary_use": "Pashto ASR baseline method reference for digit recognition",
"tasks": [
"asr"
],
"tags": [
"pashto",
"paper",
"openalex",
"asr",
"speech"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1007/s10772-014-9267-z",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-pashto-isolated-digits-recognition-dcnn",
"title": "Pashto isolated digits recognition using deep convolutional neural network",
"url": "https://doi.org/10.1016/j.heliyon.2020.e03372",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on Pashto isolated-digit recognition using deep convolutional neural networks.",
"primary_use": "Pashto speech recognition research reference",
"tasks": [
"asr"
],
"tags": [
"pashto",
"paper",
"openalex",
"asr",
"deep-learning"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1016/j.heliyon.2020.e03372",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-pashto-offensive-language-detection-benchmark-bert",
"title": "Pashto offensive language detection: a benchmark dataset and monolingual Pashto BERT",
"url": "https://doi.org/10.7717/peerj-cs.1617",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on Pashto offensive language detection with benchmark dataset and monolingual BERT model.",
"primary_use": "Pashto NLP toxicity detection benchmark and model reference",
"tasks": [
"nlp",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"openalex",
"nlp",
"bert",
"benchmark"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.7717/peerj-cs.1617",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-phti-pashto-handwritten-text-imagebase",
"title": "PHTI: Pashto Handwritten Text Imagebase for Deep Learning Applications",
"url": "https://doi.org/10.1109/access.2022.3216881",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper describing PHTI, a Pashto handwritten text imagebase for deep learning.",
"primary_use": "Pashto OCR dataset and benchmark reference",
"tasks": [
"ocr",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"dataset"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1109/access.2022.3216881",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-recognition-of-pashto-handwritten-characters-deep-learning",
"title": "Recognition of Pashto Handwritten Characters Based on Deep Learning",
"url": "https://doi.org/10.3390/s20205884",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on deep-learning-based recognition of Pashto handwritten characters.",
"primary_use": "Pashto OCR model reference for handwritten character recognition",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"deep-learning"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.3390/s20205884",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-kpti-katib-pashto-text-imagebase-benchmark",
"title": "KPTI: Katib's Pashto Text Imagebase and Deep Learning Benchmark",
"url": "https://doi.org/10.1109/icfhr.2016.0090",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper introducing KPTI, a Pashto text imagebase and benchmark for handwritten recognition.",
"primary_use": "Pashto OCR dataset and benchmarking reference",
"tasks": [
"ocr",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"benchmark"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1109/icfhr.2016.0090",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-pioneer-dataset-handwritten-pashto-cnn",
"title": "Pioneer dataset and recognition of Handwritten Pashto characters using Convolution Neural Networks",
"url": "https://doi.org/10.1177/0020294020964826",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on a pioneer handwritten Pashto character dataset with CNN-based recognition.",
"primary_use": "Pashto handwritten character recognition reference",
"tasks": [
"ocr",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"deep-learning"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1177/0020294020964826",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-scale-rotation-invariant-ocr-pashto-mdlstm",
"title": "Scale and rotation invariant OCR for Pashto cursive script using MDLSTM network",
"url": "https://doi.org/10.1109/icdar.2015.7333931",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on scale- and rotation-invariant OCR for cursive Pashto using MDLSTM.",
"primary_use": "Pashto OCR model architecture reference",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"mdlstm"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1109/icdar.2015.7333931",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-recognizable-units-pashto-ocr",
"title": "Recognizable units in Pashto language for OCR",
"url": "https://doi.org/10.1109/icdar.2015.7333963",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper defining recognizable units in Pashto for OCR workflows.",
"primary_use": "Pashto OCR preprocessing and unit-design reference",
"tasks": [
"ocr"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1109/icdar.2015.7333963",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-shape-analysis-pashto-script-image-database-ocr",
"title": "Shape analysis of Pashto script and creation of image database for OCR",
"url": "https://doi.org/10.1109/icet.2009.5353160",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on Pashto script shape analysis and image database creation for OCR.",
"primary_use": "Pashto OCR dataset design and feature reference",
"tasks": [
"ocr",
"benchmarking"
],
"tags": [
"pashto",
"paper",
"openalex",
"ocr",
"dataset"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.1109/icet.2009.5353160",
"markers": [
"pashto"
]
},
{
"id": "paper-openalex-speech-translation-low-resource-case-pashto",
"title": "Speech translation for low-resource languages: the case of Pashto",
"url": "https://doi.org/10.21437/interspeech.2005-723",
"category": "paper",
"source": "openalex",
"status": "verified",
"summary": "Research paper on speech translation for low-resource languages, including Pashto.",
"primary_use": "Pashto speech translation and low-resource MT reference",
"tasks": [
"asr",
"mt"
],
"tags": [
"pashto",
"paper",
"openalex",
"speech",
"translation"
],
"evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
"evidence_url": "https://doi.org/10.21437/interspeech.2005-723",
"markers": [
"pashto"
]
}
]
}