{ "generated_on": "2026-02-17T00:00:00Z", "count": 95, "resources": [ { "id": "dataset-common-voice-ps-v24", "title": "Common Voice Scripted Speech 24.0 - Pashto", "url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", "category": "dataset", "source": "mozilla", "status": "verified", "summary": "Large open Pashto speech dataset for ASR training and evaluation.", "primary_use": "ASR training and evaluation", "tasks": [ "asr" ], "tags": [ "pashto", "speech", "asr" ], "evidence_text": "Official dataset page is for Pashto.", "evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", "markers": [ "Pashto" ] }, { "id": "dataset-google-fleurs", "title": "Google FLEURS", "url": "https://huggingface.co/datasets/google/fleurs", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Standard multilingual speech benchmark dataset with Pashto subset.", "primary_use": "Speech benchmark and external evaluation", "tasks": [ "asr", "benchmarking" ], "tags": [ "pashto", "speech", "benchmark" ], "evidence_text": "Dataset config includes ps_af.", "evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py", "markers": [ "ps_af" ] }, { "id": "dataset-oscar-ps", "title": "OSCAR Corpus", "url": "https://huggingface.co/datasets/oscar-corpus/oscar", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Large web text corpus that includes Pashto text split.", "primary_use": "Language modeling and lexicon expansion", "tasks": [ "nlp" ], "tags": [ "pashto", "text", "nlp" ], "evidence_text": "Dataset includes unshuffled_deduplicated_ps split.", "evidence_url": "https://huggingface.co/datasets/oscar-corpus/oscar", "markers": [ "unshuffled_deduplicated_ps" ] }, { "id": "dataset-wikipedia-ps", "title": "Wikimedia Wikipedia", "url": "https://huggingface.co/datasets/wikimedia/wikipedia", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Wikipedia corpus with Pashto edition for cleaner text resources.", "primary_use": "Terminology and balanced text corpus", "tasks": [ "nlp" ], "tags": [ "pashto", "text", "nlp" ], "evidence_text": "Dataset includes 20231101.ps subset.", "evidence_url": "https://huggingface.co/datasets/wikimedia/wikipedia", "markers": [ "20231101.ps" ] }, { "id": "dataset-belebele-pbt-arab", "title": "Belebele", "url": "https://huggingface.co/datasets/facebook/belebele", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Reading comprehension dataset with Pashto script subset.", "primary_use": "Comprehension and multilingual NLP benchmark", "tasks": [ "nlp", "benchmarking" ], "tags": [ "pashto", "nlp", "benchmark" ], "evidence_text": "Dataset includes pbt_Arab subset.", "evidence_url": "https://huggingface.co/datasets/facebook/belebele", "markers": [ "pbt_Arab" ] }, { "id": "dataset-opus100-en-ps", "title": "OPUS-100", "url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Parallel corpus with English to Pashto split for MT tasks.", "primary_use": "Machine translation training and evaluation", "tasks": [ "mt", "nlp" ], "tags": [ "pashto", "mt", "parallel-corpus" ], "evidence_text": "Dataset viewer includes en-ps split.", "evidence_url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps", "markers": [ "en-ps" ] }, { "id": "dataset-kaggle-pashto-isolated-words", "title": "Pashto Isolated Words Speech Dataset", "url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Speech dataset focused on isolated Pashto words.", "primary_use": "Keyword spotting and constrained ASR experiments", "tasks": [ "asr" ], "tags": [ "pashto", "speech", "kaggle" ], "evidence_text": "Dataset title explicitly states Pashto speech dataset.", "evidence_url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset", "markers": [ "Pashto" ] }, { "id": "dataset-kaggle-pashto-word-embeddings", "title": "Pashto Word Embeddings", "url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Pretrained Pashto word vectors for classic NLP baselines.", "primary_use": "Lexical semantics and lightweight NLP baselines", "tasks": [ "nlp" ], "tags": [ "pashto", "nlp", "embeddings", "kaggle" ], "evidence_text": "Dataset description states pretrained Pashto embeddings.", "evidence_url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings", "markers": [ "Pashto" ] }, { "id": "model-pashto-bert", "title": "PashtoBERT", "url": "https://huggingface.co/mdarhri/pashto-bert", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto-specific encoder model for NLP transfer tasks.", "primary_use": "Pashto NLP baseline encoder", "tasks": [ "nlp" ], "tags": [ "pashto", "nlp", "bert" ], "evidence_text": "Model card states training on Pashto corpus data.", "evidence_url": "https://huggingface.co/mdarhri/pashto-bert", "markers": [ "Pashto" ] }, { "id": "benchmark-fleurs-ps-af", "title": "FLEURS Pashto Benchmark", "url": "https://huggingface.co/datasets/google/fleurs", "category": "benchmark", "source": "huggingface", "status": "verified", "summary": "Fixed multilingual speech benchmark with Pashto subset for WER and CER.", "primary_use": "ASR benchmark reporting", "tasks": [ "asr", "benchmarking" ], "tags": [ "pashto", "benchmark", "asr" ], "evidence_text": "Dataset includes ps_af split.", "evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py", "markers": [ "ps_af" ] }, { "id": "benchmark-common-voice-ps-v24", "title": "Common Voice Pashto v24 Benchmark", "url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", "category": "benchmark", "source": "mozilla", "status": "verified", "summary": "Core benchmark reference for project-level Pashto ASR tracking.", "primary_use": "ASR baseline tracking", "tasks": [ "asr", "benchmarking" ], "tags": [ "pashto", "benchmark", "asr" ], "evidence_text": "Official Pashto split and versioned release.", "evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", "markers": [ "Pashto" ] }, { "id": "benchmark-belebele-pbt-arab", "title": "Belebele Pashto Benchmark", "url": "https://huggingface.co/datasets/facebook/belebele", "category": "benchmark", "source": "huggingface", "status": "verified", "summary": "Comprehension benchmark for multilingual NLP with Pashto variant.", "primary_use": "NLP benchmark reporting", "tasks": [ "nlp", "benchmarking" ], "tags": [ "pashto", "benchmark", "nlp" ], "evidence_text": "Includes pbt_Arab language variant.", "evidence_url": "https://huggingface.co/datasets/facebook/belebele", "markers": [ "pbt_Arab" ] }, { "id": "benchmark-flores-200-pbt-arab", "title": "FLORES-200 Pashto Benchmark", "url": "https://github.com/facebookresearch/flores/tree/main/flores200", "category": "benchmark", "source": "github", "status": "verified", "summary": "Translation benchmark language inventory including Pashto script variant.", "primary_use": "MT benchmark with BLEU and chrF", "tasks": [ "mt", "benchmarking" ], "tags": [ "pashto", "benchmark", "mt" ], "evidence_text": "Language list includes pbt_Arab.", "evidence_url": "https://raw.githubusercontent.com/facebookresearch/flores/main/flores200/README.md", "markers": [ "pbt_Arab" ] }, { "id": "dataset-nexdata-99h-pashto-dialogue", "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset", "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.", "primary_use": "Spontaneous speech ASR training and robustness evaluation", "tasks": [ "asr" ], "tags": [ "pashto", "speech", "asr", "dialogue" ], "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.", "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset", "markers": [ "Pashto" ] }, { "id": "dataset-zirak-ai-pashto-ocr", "title": "Zirak-AI PashtoOCR", "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.", "primary_use": "OCR and text extraction benchmarking", "tasks": [ "ocr", "nlp" ], "tags": [ "pashto", "ocr", "nlp", "vision" ], "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.", "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR", "markers": [ "ps", "PashtoOCR" ] }, { "id": "dataset-ihanif-pashto-wikipedia-corpus", "title": "Pashto Wikipedia Corpus", "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.", "primary_use": "Pashto text corpus for NLP baselines", "tasks": [ "nlp" ], "tags": [ "pashto", "text", "nlp", "wikipedia" ], "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.", "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus", "markers": [ "ps", "Pashto" ] }, { "id": "model-ihanif-wav2vec2-xls-r-300m-pashto", "title": "wav2vec2 XLS-R 300M Pashto", "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.", "primary_use": "Pashto ASR baseline and comparative experiments", "tasks": [ "asr" ], "tags": [ "pashto", "asr", "wav2vec2", "fleurs" ], "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.", "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto", "markers": [ "pashto", "ps", "ps_af" ] }, { "id": "model-ihanif-whisper-medium-pashto", "title": "Whisper Medium Pashto", "url": "https://huggingface.co/ihanif/whisper-medium-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.", "primary_use": "Pashto ASR baseline and transcription quality comparisons", "tasks": [ "asr" ], "tags": [ "pashto", "asr", "whisper", "fleurs" ], "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.", "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto", "markers": [ "pashto", "ps", "ps_af" ] }, { "id": "dataset-kaggle-pold-pashto-offensive", "title": "POLD - Pashto Offensive Language Dataset", "url": "https://www.kaggle.com/datasets/drijaz/pold-pashto-offensive-language-dataset", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Benchmark dataset for offensive content detection in Pashto social text.", "primary_use": "Pashto toxicity and moderation NLP benchmarks", "tasks": [ "nlp", "classification" ], "tags": [ "pashto", "kaggle", "nlp", "toxicity" ], "evidence_text": "Kaggle title and description explicitly state Pashto offensive language benchmark dataset.", "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pold-pashto-offensive-language-dataset", "markers": [ "Pashto" ] }, { "id": "dataset-kaggle-pashto-english-sentiment-corpus", "title": "Pashto English Bilingual Sentiment Corpus", "url": "https://www.kaggle.com/datasets/farhadkhan66/pashto-translated-corpus", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Pashto to English bilingual sentiment corpus useful for low-resource sentiment tasks.", "primary_use": "Sentiment analysis and bilingual NLP experiments", "tasks": [ "nlp", "sentiment" ], "tags": [ "pashto", "kaggle", "sentiment", "bilingual" ], "evidence_text": "Kaggle dataset title and description identify the corpus as Pashto-English sentiment data.", "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/farhadkhan66/pashto-translated-corpus", "markers": [ "Pashto" ] }, { "id": "dataset-kaggle-urdu-pashto-lexicon", "title": "Urdu-Pashto Lexicon Dataset", "url": "https://www.kaggle.com/datasets/shafeeqgigyani/urdu-pashto-lexicon-dataset", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Lexicon of Urdu words with Pashto translations for dictionary and MT support.", "primary_use": "Lexicon and translation lexeme mapping", "tasks": [ "nlp", "mt" ], "tags": [ "pashto", "kaggle", "lexicon", "translation" ], "evidence_text": "Kaggle metadata describes 7,601 Urdu entries with Pashto translations.", "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/shafeeqgigyani/urdu-pashto-lexicon-dataset", "markers": [ "Pashto" ] }, { "id": "project-hf-space-ihanif-pashto-asr-v3", "title": "Pashto ASR V3 Space", "url": "https://huggingface.co/spaces/ihanif/pashto-asr-v3", "category": "project", "source": "huggingface", "status": "verified", "summary": "Interactive Hugging Face Space for Pashto automatic speech recognition demos.", "primary_use": "Project demo for Pashto ASR user testing", "tasks": [ "asr", "demo" ], "tags": [ "pashto", "project", "huggingface-space", "asr" ], "evidence_text": "Space card title is Pashto ASR V3 and short description states Pashto ASR.", "evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr-v3", "markers": [ "Pashto", "ASR" ] }, { "id": "project-hf-space-pashto2english-dictionary", "title": "Pashto to English Dictionary Space", "url": "https://huggingface.co/spaces/EngrAamirBangash/Pashto2English-Dictionary", "category": "project", "source": "huggingface", "status": "verified", "summary": "Streamlit project for Pashto to English dictionary lookups.", "primary_use": "Interactive bilingual lookup project", "tasks": [ "dictionary", "translation", "demo" ], "tags": [ "pashto", "project", "huggingface-space", "dictionary" ], "evidence_text": "Space metadata title states Pashto to English Dictionary.", "evidence_url": "https://huggingface.co/api/spaces/EngrAamirBangash/Pashto2English-Dictionary", "markers": [ "Pashto" ] }, { "id": "project-hf-space-umar4321-pashto-translator", "title": "Pashto Translator Space", "url": "https://huggingface.co/spaces/Umar4321/Pashto-Translator", "category": "project", "source": "huggingface", "status": "verified", "summary": "Streamlit translator project for Pashto to English and Urdu conversion.", "primary_use": "Interactive translation project demo", "tasks": [ "translation", "demo" ], "tags": [ "pashto", "project", "huggingface-space", "translation" ], "evidence_text": "Space title is Pashto Translator and description states Pashto to English and Urdu translation.", "evidence_url": "https://huggingface.co/api/spaces/Umar4321/Pashto-Translator", "markers": [ "Pashto" ] }, { "id": "code-github-ijazul-haq-nlpashto", "title": "nlpashto Toolkit", "url": "https://github.com/ijazul-haq/nlpashto", "category": "code", "source": "github", "status": "verified", "summary": "Pashto NLP toolkit codebase for tokenization, embeddings, and downstream NLP workflows.", "primary_use": "Pashto NLP code integration and experimentation", "tasks": [ "nlp", "tooling" ], "tags": [ "pashto", "code", "github", "nlp" ], "evidence_text": "Repository name and description explicitly identify a Pashto NLP toolkit.", "evidence_url": "https://api.github.com/repos/ijazul-haq/nlpashto", "markers": [ "Pashto", "NLP" ] }, { "id": "dataset-kaggle-drijaz-pashtoocr", "title": "PashtoOCR (Kaggle)", "url": "https://www.kaggle.com/datasets/drijaz/pashtoocr", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Synthetic OCR dataset focused on Pashto ligatures and text recognition tasks.", "primary_use": "Pashto OCR dataset benchmarking and training", "tasks": [ "ocr", "nlp" ], "tags": [ "pashto", "kaggle", "ocr", "dataset" ], "evidence_text": "Kaggle dataset title and subtitle explicitly identify a Pashto OCR dataset.", "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pashtoocr", "markers": [ "Pashto", "OCR" ] }, { "id": "model-hf-zirak-ai-pashto-bert-v1", "title": "zirak-ai/pashto-bert-v1", "url": "https://huggingface.co/zirak-ai/pashto-bert-v1", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto BERT model checkpoint for low-resource Pashto NLP experiments.", "primary_use": "Pashto encoder baseline for NLP tasks", "tasks": [ "nlp" ], "tags": [ "pashto", "huggingface", "bert", "nlp" ], "evidence_text": "Hugging Face model ID and search tags explicitly include pashto marker.", "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1", "markers": [ "pashto" ] }, { "id": "project-hf-space-ihanif-pashto-asr", "title": "Pashto ASR Space", "url": "https://huggingface.co/spaces/ihanif/pashto-asr", "category": "project", "source": "huggingface", "status": "verified", "summary": "Interactive Hugging Face Space for Pashto ASR inference demos.", "primary_use": "Live Pashto speech-to-text demo project", "tasks": [ "asr", "demo" ], "tags": [ "pashto", "project", "huggingface-space", "asr" ], "evidence_text": "Space ID includes pashto-asr and is returned by Hugging Face Pashto space search.", "evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr", "markers": [ "pashto", "asr" ] }, { "id": "paper-s2-psocr-lmm-pashto", "title": "PsOCR: Benchmarking Large Multimodal Models for Optical Character Recognition in Low-resource Pashto Language", "url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f", "category": "paper", "source": "other", "status": "verified", "summary": "Research paper benchmarking multimodal OCR models on low-resource Pashto OCR tasks.", "primary_use": "Pashto OCR research baseline and evaluation reference", "tasks": [ "ocr", "research" ], "tags": [ "pashto", "paper", "ocr", "multimodal" ], "evidence_text": "Paper title explicitly references low-resource Pashto language OCR benchmarking.", "evidence_url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f", "markers": [ "Pashto", "OCR" ] }, { "id": "dataset-hf-adnankhan769-english-to-pashto", "title": "English to Pashto Sentences Dataset", "url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Parallel English-Pashto sentence dataset for bilingual NLP and translation experiments.", "primary_use": "MT and bilingual sentence alignment baseline", "tasks": [ "mt", "nlp" ], "tags": [ "pashto", "dataset", "huggingface", "translation" ], "evidence_text": "Dataset ID explicitly states English-to-Pashto and includes Pashto-script sentence column.", "evidence_url": "https://huggingface.co/api/datasets/adnankhan769/english_to_pashto_sentences_dataset", "markers": [ "Pashto" ] }, { "id": "dataset-hf-saillab-alpaca-pashto-cleaned", "title": "alpaca-pashto-cleaned", "url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Instruction-style Pashto text dataset suitable for LLM tuning and instruction-following research.", "primary_use": "Pashto instruction tuning and conversational NLP experiments", "tasks": [ "nlp", "llm" ], "tags": [ "pashto", "dataset", "huggingface", "instruction" ], "evidence_text": "Dataset metadata includes language:ps and dataset name includes Pashto.", "evidence_url": "https://huggingface.co/api/datasets/saillab/alpaca-pashto-cleaned", "markers": [ "ps", "Pashto" ] }, { "id": "model-hf-ihanif-whisper-base-pashto", "title": "Whisper Base Pashto", "url": "https://huggingface.co/ihanif/whisper-base-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Fine-tuned Whisper Base checkpoint for Pashto ASR with FLEURS ps_af evaluation metadata.", "primary_use": "Pashto ASR baseline and speed-accuracy comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Model ID includes Pashto and card metadata references FLEURS config ps_af.", "evidence_url": "https://huggingface.co/api/models/ihanif/whisper-base-pashto", "markers": [ "Pashto", "ps_af" ] }, { "id": "project-hf-space-zamai-mistral-7b-pashto", "title": "ZamAI-Mistral-7B-Pashto Space", "url": "https://huggingface.co/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space", "category": "project", "source": "huggingface", "status": "verified", "summary": "Gradio project space demonstrating a Pashto-adapted Mistral 7B interface.", "primary_use": "Interactive Pashto LLM project demo", "tasks": [ "llm", "demo" ], "tags": [ "pashto", "project", "huggingface-space", "llm" ], "evidence_text": "Space title and ID explicitly include Pashto and model card metadata exposes project details.", "evidence_url": "https://huggingface.co/api/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space", "markers": [ "Pashto" ] }, { "id": "dataset-hf-adnankhan769-proper-dataset-english-2-pashto", "title": "adnankhan769/proper_dataset_english_2_pashto", "url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto bilingual/translation dataset discovered from huggingface for MT experimentation.", "primary_use": "Machine translation and bilingual corpus development", "tasks": [ "mt" ], "tags": [ "pashto", "dataset", "huggingface", "mt" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto", "markers": [ "pashto" ] }, { "id": "dataset-hf-ihanif-pashto-asr-wer", "title": "ihanif/pashto_asr_wer", "url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer", "markers": [ "pashto" ] }, { "id": "dataset-hf-ihanif-pashto-speech-ds", "title": "ihanif/pashto_speech_ds", "url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds", "markers": [ "pashto" ] }, { "id": "dataset-hf-ihanif-pashto-speech-parquet-10k", "title": "ihanif/pashto_speech_parquet_10k", "url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k", "markers": [ "pashto" ] }, { "id": "dataset-hf-saillab-alpaca-pashto-taco", "title": "saillab/alpaca_pashto_taco", "url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", "primary_use": "Instruction tuning and LLM adaptation data source", "tasks": [ "llm" ], "tags": [ "pashto", "dataset", "huggingface", "llm" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco", "markers": [ "pashto" ] }, { "id": "dataset-hf-sherwindesouza-pashto-common-voice-20", "title": "SherwinDesouza/pashto-common-voice-20", "url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", "primary_use": "Pashto data source for NLP experimentation", "tasks": [ "nlp" ], "tags": [ "pashto", "dataset", "huggingface", "nlp" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20", "markers": [ "pashto" ] }, { "id": "dataset-hf-tasal9-zamai-pashto-dataset", "title": "tasal9/ZamAI_Pashto_Dataset", "url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", "primary_use": "Pashto data source for NLP experimentation", "tasks": [ "nlp" ], "tags": [ "pashto", "dataset", "huggingface", "nlp" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset", "markers": [ "pashto" ] }, { "id": "dataset-kaggle-english-pashto-language-dataset-epld", "title": "English-Pashto Language Dataset (EPLD)", "url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Pashto bilingual/translation dataset discovered from kaggle for MT experimentation.", "primary_use": "Machine translation and bilingual corpus development", "tasks": [ "mt" ], "tags": [ "pashto", "dataset", "kaggle", "mt" ], "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", "evidence_url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld", "markers": [ "Pashto" ] }, { "id": "dataset-kaggle-katib-s-pashto-text-imagebase-kpti", "title": "Katib's Pashto Text Imagebase (KPTI)", "url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.", "primary_use": "OCR training and evaluation data source", "tasks": [ "ocr" ], "tags": [ "pashto", "dataset", "kaggle", "ocr" ], "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", "evidence_url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti", "markers": [ "Pashto" ] }, { "id": "dataset-kaggle-pashto-ocr", "title": "Pashto OCR", "url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.", "primary_use": "OCR training and evaluation data source", "tasks": [ "ocr" ], "tags": [ "pashto", "dataset", "kaggle", "ocr" ], "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", "evidence_url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr", "markers": [ "Pashto" ] }, { "id": "dataset-kaggle-common-voice-24-0-pashto-speech-dataset", "title": "Common Voice 24.0: Pashto Speech Dataset", "url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto", "category": "dataset", "source": "kaggle", "status": "verified", "summary": "Pashto speech dataset discovered from kaggle for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "kaggle", "asr" ], "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", "evidence_url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto", "markers": [ "Pashto" ] }, { "id": "model-hf-ihanif-pashto-asr-base", "title": "ihanif/pashto-asr-base", "url": "https://huggingface.co/ihanif/pashto-asr-base", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/pashto-asr-base", "markers": [ "pashto" ] }, { "id": "model-hf-ihanif-wav2vec2-xls-r-300m-pashto-lm", "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm", "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm", "markers": [ "pashto" ] }, { "id": "model-hf-ihanif-whisper-large-pashto", "title": "ihanif/whisper-large-pashto", "url": "https://huggingface.co/ihanif/whisper-large-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto", "markers": [ "pashto" ] }, { "id": "model-hf-ihanif-whisper-medium-pashto-3e-7", "title": "ihanif/whisper-medium-pashto-3e-7", "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7", "markers": [ "pashto" ] }, { "id": "model-hf-ihanif-whisper-small-pashto", "title": "ihanif/whisper-small-pashto", "url": "https://huggingface.co/ihanif/whisper-small-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto", "markers": [ "pashto" ] }, { "id": "model-hf-ihanif-xls-r-1b-pashto", "title": "ihanif/xls-r-1b-pashto", "url": "https://huggingface.co/ihanif/xls-r-1b-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto", "markers": [ "pashto" ] }, { "id": "model-hf-ijazulhaq-bert-base-pashto-v1", "title": "ijazulhaq/bert-base-pashto-v1", "url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto model baseline for downstream NLP tasks", "tasks": [ "nlp" ], "tags": [ "pashto", "model", "huggingface", "nlp" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1", "markers": [ "pashto" ] }, { "id": "project-hf-space-ihanif-wav2vec2-bert-pashto-asr", "title": "ihanif/wav2vec2-bert-pashto-asr", "url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "asr", "nlp", "demo" ], "tags": [ "pashto", "project", "huggingface", "asr", "nlp", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr", "markers": [ "pashto" ] }, { "id": "project-hf-space-nasirkhansayyad-pashto-whisper-demo", "title": "nasirkhansayyad/pashto-whisper-demo", "url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "asr", "demo" ], "tags": [ "pashto", "project", "huggingface", "asr", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo", "markers": [ "pashto" ] }, { "id": "project-hf-space-tasal9-zamai-phi3-mini-pashto-demo", "title": "tasal9/ZamAI-Phi3-Mini-Pashto-Demo", "url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "llm", "demo" ], "tags": [ "pashto", "project", "huggingface", "llm", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo", "markers": [ "pashto" ] }, { "id": "project-hf-space-umar4321-pashto-to-english-urdu", "title": "Umar4321/Pashto-To-English-Urdu", "url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "mt", "demo" ], "tags": [ "pashto", "project", "huggingface", "mt", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu", "markers": [ "pashto" ] }, { "id": "project-github-fazlullahmamond-pashto-typing", "title": "Fazlullahmamond/Pashto-Typing", "url": "https://github.com/Fazlullahmamond/Pashto-Typing", "category": "project", "source": "github", "status": "verified", "summary": "Pashto-focused interactive project discovered from github for demonstration and quick evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "demo" ], "tags": [ "pashto", "project", "github", "demo" ], "evidence_text": "Repository metadata (name/description/topics) includes Pashto markers.", "evidence_url": "https://github.com/Fazlullahmamond/Pashto-Typing", "markers": [ "pashto" ] }, { "id": "paper-s2-benchmarking-whisper-for-low-resource-speech-recognition-an-n-shot-evaluation-on-pashto-pu", "title": "Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on Pashto, Punjabi, and Urdu", "url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693", "category": "paper", "source": "other", "status": "verified", "summary": "Pashto language technology paper discovered from other for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "asr", "mt" ], "tags": [ "pashto", "paper", "other", "asr", "mt" ], "evidence_text": "Matched by Semantic Scholar query: pashto.", "evidence_url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693", "markers": [ "pashto" ] }, { "id": "paper-s2-deep-learning-based-detection-of-one-and-two-column-textual-blocks-in-camera-captured-pash", "title": "Deep Learning-Based Detection of One and Two-Column Textual Blocks in Camera-Captured Pashto Documents Images", "url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182", "category": "paper", "source": "other", "status": "verified", "summary": "Pashto language technology paper discovered from other for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "ocr" ], "tags": [ "pashto", "paper", "other", "ocr" ], "evidence_text": "Matched by Semantic Scholar query: pashto.", "evidence_url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182", "markers": [ "pashto" ] }, { "id": "paper-s2-out-of-vocabulary-pashto-spell-checker-using-morphological-operations", "title": "Out-of-Vocabulary Pashto Spell Checker using Morphological Operations", "url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7", "category": "paper", "source": "other", "status": "verified", "summary": "Pashto language technology paper discovered from other for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "nlp" ], "tags": [ "pashto", "paper", "other", "nlp" ], "evidence_text": "Matched by Semantic Scholar query: pashto.", "evidence_url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7", "markers": [ "pashto" ] }, { "id": "paper-s2-pashto-shallow-parsing-a-deep-learning-approach", "title": "Pashto Shallow Parsing: A Deep Learning Approach", "url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5", "category": "paper", "source": "other", "status": "verified", "summary": "Pashto language technology paper discovered from other for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "nlp" ], "tags": [ "pashto", "paper", "other", "nlp" ], "evidence_text": "Matched by Semantic Scholar query: pashto.", "evidence_url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5", "markers": [ "pashto" ] }, { "id": "paper-s2-pos-tagging-of-low-resource-pashto-language-annotated-corpus-and-bert-based-model", "title": "POS tagging of low-resource Pashto language: annotated corpus and BERT-based model", "url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769", "category": "paper", "source": "other", "status": "verified", "summary": "Pashto language technology paper discovered from other for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "nlp" ], "tags": [ "pashto", "paper", "other", "nlp" ], "evidence_text": "Matched by Semantic Scholar query: pashto.", "evidence_url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769", "markers": [ "pashto" ] }, { "id": "paper-arxiv-enhancing-pashto-text-classification-using-language-processing-techniques-for-single-and-m", "title": "Enhancing Pashto Text Classification using Language Processing Techniques for Single And Multi-Label Analysis", "url": "http://arxiv.org/abs/2305.03201v1", "category": "paper", "source": "arxiv", "status": "verified", "summary": "Pashto language technology paper discovered from arxiv for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "nlp" ], "tags": [ "pashto", "paper", "arxiv", "nlp" ], "evidence_text": "Matched by arXiv query: all:pashto.", "evidence_url": "http://arxiv.org/abs/2305.03201v1", "markers": [ "pashto" ] }, { "id": "paper-arxiv-knn-and-ann-based-recognition-of-handwritten-pashto-letters-using-zoning-features", "title": "KNN and ANN-based Recognition of Handwritten Pashto Letters using Zoning Features", "url": "http://arxiv.org/abs/1904.03391v2", "category": "paper", "source": "arxiv", "status": "verified", "summary": "Pashto language technology paper discovered from arxiv for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "ocr" ], "tags": [ "pashto", "paper", "arxiv", "ocr" ], "evidence_text": "Matched by arXiv query: all:pashto.", "evidence_url": "http://arxiv.org/abs/1904.03391v2", "markers": [ "pashto" ] }, { "id": "dataset-hf-oowais-pushto-text-to-speech-dataset", "title": "oowais/pushto-text-to-speech-dataset", "url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr", "tts" ], "tags": [ "pashto", "dataset", "huggingface", "asr", "tts" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset", "markers": [ "pashto" ] }, { "id": "dataset-hf-ihanif-pashto-speech-20k", "title": "ihanif/pashto_speech_20k", "url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k", "markers": [ "pashto" ] }, { "id": "dataset-hf-ihanif-pashto-speech-5k", "title": "ihanif/pashto_speech_5k", "url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.", "primary_use": "ASR training and evaluation data source", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k", "markers": [ "pashto" ] }, { "id": "dataset-hf-tasal9-pashto-dataset", "title": "tasal9/Pashto_Dataset", "url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", "primary_use": "Pashto data source for NLP experimentation", "tasks": [ "nlp" ], "tags": [ "pashto", "dataset", "huggingface", "nlp" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset", "markers": [ "pashto" ] }, { "id": "model-hf-ijazulhaq-bert-base-pashto", "title": "ijazulhaq/bert-base-pashto", "url": "https://huggingface.co/ijazulhaq/bert-base-pashto", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto model baseline for downstream NLP tasks", "tasks": [ "nlp" ], "tags": [ "pashto", "model", "huggingface", "nlp" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto", "markers": [ "pashto" ] }, { "id": "model-hf-ihanif-whisper-small-pashto-dropout", "title": "ihanif/whisper-small-pashto-dropout", "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout", "markers": [ "pashto" ] }, { "id": "model-hf-koochikoo25-pashto-whisper-large", "title": "koochikoo25/pashto-whisper-large", "url": "https://huggingface.co/koochikoo25/pashto-whisper-large", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", "primary_use": "Pashto ASR baseline and model comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large", "markers": [ "pashto" ] }, { "id": "project-hf-space-ihanif-wav2vec-pashto-asr", "title": "ihanif/wav2vec-pashto-asr", "url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "asr", "demo" ], "tags": [ "pashto", "project", "huggingface", "asr", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr", "markers": [ "pashto" ] }, { "id": "project-hf-space-afaqalinagra-pashto-asr-model", "title": "afaqalinagra/PASHTO-ASR-MODEL", "url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "asr", "demo" ], "tags": [ "pashto", "project", "huggingface", "asr", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL", "markers": [ "pashto" ] }, { "id": "project-hf-space-ilyas02828-pashto-sign-language", "title": "ilyas02828/Pashto_Sign_Language", "url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "demo" ], "tags": [ "pashto", "project", "huggingface", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language", "markers": [ "pashto" ] }, { "id": "project-hf-space-mahmudaq-pashtoasrnmt1", "title": "mahmudaq/PashtoASRNMT1", "url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", "primary_use": "Interactive Pashto demo and quick qualitative validation", "tasks": [ "asr", "mt", "demo" ], "tags": [ "pashto", "project", "huggingface", "asr", "mt", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1", "markers": [ "pashto" ] }, { "id": "paper-s2-enhancing-pashto-ner-using-machine-labeled-data-and-transformer-based-models", "title": "Enhancing Pashto NER Using Machine-Labeled Data and Transformer-Based Models", "url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da", "category": "paper", "source": "other", "status": "verified", "summary": "Pashto language technology paper discovered from other for research reference.", "primary_use": "Pashto research reference for methods and benchmarking", "tasks": [ "nlp" ], "tags": [ "pashto", "paper", "other", "nlp" ], "evidence_text": "Matched by explicit Pashto marker in paper title from Semantic Scholar search.", "evidence_url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da", "markers": [ "pashto" ] }, { "id": "dataset-hf-aamirhs-pashto-audio-wav2vec", "title": "aamirhs/pashto-audio-wav2vec", "url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto speech dataset surfaced from Hugging Face candidate sync for ASR experiments.", "primary_use": "Pashto ASR data exploration and baseline training", "tasks": [ "asr" ], "tags": [ "pashto", "dataset", "huggingface", "speech", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec", "markers": [ "pashto" ] }, { "id": "dataset-hf-alimuhammad73-pashto-poetry", "title": "AliMuhammad73/Pashto-Poetry", "url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry", "category": "dataset", "source": "huggingface", "status": "verified", "summary": "Pashto poetry text dataset surfaced from Hugging Face candidate sync for NLP experiments.", "primary_use": "Pashto poetry corpus for language modeling and text analysis", "tasks": [ "nlp" ], "tags": [ "pashto", "dataset", "huggingface", "text", "poetry", "nlp" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry", "markers": [ "pashto" ] }, { "id": "model-hf-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab", "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab", "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab", "category": "model", "source": "huggingface", "status": "verified", "summary": "Pashto ASR model checkpoint surfaced from Hugging Face candidate sync.", "primary_use": "Pashto ASR baseline and transfer-learning comparison", "tasks": [ "asr" ], "tags": [ "pashto", "model", "huggingface", "asr" ], "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab", "markers": [ "pashto" ] }, { "id": "project-hf-space-aizazayyubi-pashto-asr", "title": "Aizazayyubi/pashto_asr", "url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr", "category": "project", "source": "huggingface", "status": "verified", "summary": "Pashto ASR interactive demo surfaced from Hugging Face Spaces candidate sync.", "primary_use": "Interactive Pashto ASR demo for qualitative evaluation", "tasks": [ "asr", "demo" ], "tags": [ "pashto", "project", "huggingface", "asr", "demo" ], "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", "evidence_url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr", "markers": [ "pashto" ] }, { "id": "paper-arxiv-from-scarcity-to-scale-pashto-common-voice", "title": "From Scarcity to Scale: A Release-Level Analysis of the Pashto Common Voice Dataset", "url": "http://arxiv.org/abs/2602.14062v1", "category": "paper", "source": "arxiv", "status": "verified", "summary": "Research paper analyzing Pashto Common Voice releases and dataset scaling characteristics.", "primary_use": "ASR data quality and release trend reference", "tasks": [ "asr", "benchmarking" ], "tags": [ "pashto", "paper", "arxiv", "asr", "common-voice" ], "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.", "evidence_url": "http://arxiv.org/abs/2602.14062v1", "markers": [ "pashto" ] }, { "id": "paper-arxiv-tuning-traditional-pashto-text-classification", "title": "Tuning Traditional Language Processing Approaches for Pashto Text Classification", "url": "http://arxiv.org/abs/2305.03737v1", "category": "paper", "source": "arxiv", "status": "verified", "summary": "Research paper focused on Pashto text classification using traditional NLP approaches.", "primary_use": "Pashto text classification method reference", "tasks": [ "nlp" ], "tags": [ "pashto", "paper", "arxiv", "nlp", "classification" ], "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.", "evidence_url": "http://arxiv.org/abs/2305.03737v1", "markers": [ "pashto" ] }, { "id": "dataset-dataverse-iarpa-babel-pashto-language-pack-v0-4by", "title": "IARPA Babel Pashto Language Pack IARPA-babel104b-v0.4bY", "url": "https://hdl.handle.net/11272.1/AB2/GLFN3X", "category": "dataset", "source": "dataverse", "status": "verified", "summary": "Pashto Babel language pack dataset for speech and language processing evaluation.", "primary_use": "Pashto speech dataset for ASR and language identification experiments", "tasks": [ "asr", "benchmarking" ], "tags": [ "pashto", "dataset", "dataverse", "speech", "asr", "babel" ], "evidence_text": "Dataverse metadata includes Pashto markers in dataset title or description.", "evidence_url": "https://hdl.handle.net/11272.1/AB2/GLFN3X", "markers": [ "pashto" ] }, { "id": "paper-arxiv-image-to-text-pashto-farsi-traditional-chinese", "title": "Development of a New Image-to-text Conversion System for Pashto, Farsi and Traditional Chinese", "url": "http://arxiv.org/abs/2005.08650v1", "category": "paper", "source": "arxiv", "status": "verified", "summary": "Research paper on image-to-text conversion including Pashto OCR.", "primary_use": "Pashto OCR method reference", "tasks": [ "ocr", "nlp" ], "tags": [ "pashto", "paper", "arxiv", "ocr" ], "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.", "evidence_url": "http://arxiv.org/abs/2005.08650v1", "markers": [ "pashto" ] }, { "id": "paper-openalex-benchmark-pashto-handwritten-character-dataset-ocr", "title": "Benchmark Pashto Handwritten Character Dataset and Pashto Object Character Recognition (OCR) Using Deep Neural Network with Rule Activation Function", "url": "https://doi.org/10.1155/2021/6669672", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper introducing a benchmark dataset and OCR approach for Pashto handwritten characters.", "primary_use": "Pashto handwritten OCR benchmark and methodology reference", "tasks": [ "ocr", "benchmarking" ], "tags": [ "pashto", "paper", "openalex", "ocr", "benchmark" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1155/2021/6669672", "markers": [ "pashto" ] }, { "id": "paper-openalex-asr-isolated-pashto-spoken-digits-mfcc-knn", "title": "Database development and automatic speech recognition of isolated Pashto spoken digits using MFCC and K-NN", "url": "https://doi.org/10.1007/s10772-014-9267-z", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on isolated Pashto spoken-digit ASR with MFCC and K-NN.", "primary_use": "Pashto ASR baseline method reference for digit recognition", "tasks": [ "asr" ], "tags": [ "pashto", "paper", "openalex", "asr", "speech" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1007/s10772-014-9267-z", "markers": [ "pashto" ] }, { "id": "paper-openalex-pashto-isolated-digits-recognition-dcnn", "title": "Pashto isolated digits recognition using deep convolutional neural network", "url": "https://doi.org/10.1016/j.heliyon.2020.e03372", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on Pashto isolated-digit recognition using deep convolutional neural networks.", "primary_use": "Pashto speech recognition research reference", "tasks": [ "asr" ], "tags": [ "pashto", "paper", "openalex", "asr", "deep-learning" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1016/j.heliyon.2020.e03372", "markers": [ "pashto" ] }, { "id": "paper-openalex-pashto-offensive-language-detection-benchmark-bert", "title": "Pashto offensive language detection: a benchmark dataset and monolingual Pashto BERT", "url": "https://doi.org/10.7717/peerj-cs.1617", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on Pashto offensive language detection with benchmark dataset and monolingual BERT model.", "primary_use": "Pashto NLP toxicity detection benchmark and model reference", "tasks": [ "nlp", "benchmarking" ], "tags": [ "pashto", "paper", "openalex", "nlp", "bert", "benchmark" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.7717/peerj-cs.1617", "markers": [ "pashto" ] }, { "id": "paper-openalex-phti-pashto-handwritten-text-imagebase", "title": "PHTI: Pashto Handwritten Text Imagebase for Deep Learning Applications", "url": "https://doi.org/10.1109/access.2022.3216881", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper describing PHTI, a Pashto handwritten text imagebase for deep learning.", "primary_use": "Pashto OCR dataset and benchmark reference", "tasks": [ "ocr", "benchmarking" ], "tags": [ "pashto", "paper", "openalex", "ocr", "dataset" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1109/access.2022.3216881", "markers": [ "pashto" ] }, { "id": "paper-openalex-recognition-of-pashto-handwritten-characters-deep-learning", "title": "Recognition of Pashto Handwritten Characters Based on Deep Learning", "url": "https://doi.org/10.3390/s20205884", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on deep-learning-based recognition of Pashto handwritten characters.", "primary_use": "Pashto OCR model reference for handwritten character recognition", "tasks": [ "ocr" ], "tags": [ "pashto", "paper", "openalex", "ocr", "deep-learning" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.3390/s20205884", "markers": [ "pashto" ] }, { "id": "paper-openalex-kpti-katib-pashto-text-imagebase-benchmark", "title": "KPTI: Katib's Pashto Text Imagebase and Deep Learning Benchmark", "url": "https://doi.org/10.1109/icfhr.2016.0090", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper introducing KPTI, a Pashto text imagebase and benchmark for handwritten recognition.", "primary_use": "Pashto OCR dataset and benchmarking reference", "tasks": [ "ocr", "benchmarking" ], "tags": [ "pashto", "paper", "openalex", "ocr", "benchmark" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1109/icfhr.2016.0090", "markers": [ "pashto" ] }, { "id": "paper-openalex-pioneer-dataset-handwritten-pashto-cnn", "title": "Pioneer dataset and recognition of Handwritten Pashto characters using Convolution Neural Networks", "url": "https://doi.org/10.1177/0020294020964826", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on a pioneer handwritten Pashto character dataset with CNN-based recognition.", "primary_use": "Pashto handwritten character recognition reference", "tasks": [ "ocr", "benchmarking" ], "tags": [ "pashto", "paper", "openalex", "ocr", "deep-learning" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1177/0020294020964826", "markers": [ "pashto" ] }, { "id": "paper-openalex-scale-rotation-invariant-ocr-pashto-mdlstm", "title": "Scale and rotation invariant OCR for Pashto cursive script using MDLSTM network", "url": "https://doi.org/10.1109/icdar.2015.7333931", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on scale- and rotation-invariant OCR for cursive Pashto using MDLSTM.", "primary_use": "Pashto OCR model architecture reference", "tasks": [ "ocr" ], "tags": [ "pashto", "paper", "openalex", "ocr", "mdlstm" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1109/icdar.2015.7333931", "markers": [ "pashto" ] }, { "id": "paper-openalex-recognizable-units-pashto-ocr", "title": "Recognizable units in Pashto language for OCR", "url": "https://doi.org/10.1109/icdar.2015.7333963", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper defining recognizable units in Pashto for OCR workflows.", "primary_use": "Pashto OCR preprocessing and unit-design reference", "tasks": [ "ocr" ], "tags": [ "pashto", "paper", "openalex", "ocr" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1109/icdar.2015.7333963", "markers": [ "pashto" ] }, { "id": "paper-openalex-shape-analysis-pashto-script-image-database-ocr", "title": "Shape analysis of Pashto script and creation of image database for OCR", "url": "https://doi.org/10.1109/icet.2009.5353160", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on Pashto script shape analysis and image database creation for OCR.", "primary_use": "Pashto OCR dataset design and feature reference", "tasks": [ "ocr", "benchmarking" ], "tags": [ "pashto", "paper", "openalex", "ocr", "dataset" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.1109/icet.2009.5353160", "markers": [ "pashto" ] }, { "id": "paper-openalex-speech-translation-low-resource-case-pashto", "title": "Speech translation for low-resource languages: the case of Pashto", "url": "https://doi.org/10.21437/interspeech.2005-723", "category": "paper", "source": "openalex", "status": "verified", "summary": "Research paper on speech translation for low-resource languages, including Pashto.", "primary_use": "Pashto speech translation and low-resource MT reference", "tasks": [ "asr", "mt" ], "tags": [ "pashto", "paper", "openalex", "speech", "translation" ], "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", "evidence_url": "https://doi.org/10.21437/interspeech.2005-723", "markers": [ "pashto" ] } ] }