{
  "generated_on": "2026-02-17T00:00:00Z",
  "count": 95,
  "resources": [
    {
      "id": "dataset-common-voice-ps-v24",
      "title": "Common Voice Scripted Speech 24.0 - Pashto",
      "url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
      "category": "dataset",
      "source": "mozilla",
      "status": "verified",
      "summary": "Large open Pashto speech dataset for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "speech",
        "asr"
      ],
      "evidence_text": "Official dataset page is for Pashto.",
      "evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-google-fleurs",
      "title": "Google FLEURS",
      "url": "https://huggingface.co/datasets/google/fleurs",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Standard multilingual speech benchmark dataset with Pashto subset.",
      "primary_use": "Speech benchmark and external evaluation",
      "tasks": [
        "asr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "speech",
        "benchmark"
      ],
      "evidence_text": "Dataset config includes ps_af.",
      "evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py",
      "markers": [
        "ps_af"
      ]
    },
    {
      "id": "dataset-oscar-ps",
      "title": "OSCAR Corpus",
      "url": "https://huggingface.co/datasets/oscar-corpus/oscar",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Large web text corpus that includes Pashto text split.",
      "primary_use": "Language modeling and lexicon expansion",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "text",
        "nlp"
      ],
      "evidence_text": "Dataset includes unshuffled_deduplicated_ps split.",
      "evidence_url": "https://huggingface.co/datasets/oscar-corpus/oscar",
      "markers": [
        "unshuffled_deduplicated_ps"
      ]
    },
    {
      "id": "dataset-wikipedia-ps",
      "title": "Wikimedia Wikipedia",
      "url": "https://huggingface.co/datasets/wikimedia/wikipedia",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Wikipedia corpus with Pashto edition for cleaner text resources.",
      "primary_use": "Terminology and balanced text corpus",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "text",
        "nlp"
      ],
      "evidence_text": "Dataset includes 20231101.ps subset.",
      "evidence_url": "https://huggingface.co/datasets/wikimedia/wikipedia",
      "markers": [
        "20231101.ps"
      ]
    },
    {
      "id": "dataset-belebele-pbt-arab",
      "title": "Belebele",
      "url": "https://huggingface.co/datasets/facebook/belebele",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Reading comprehension dataset with Pashto script subset.",
      "primary_use": "Comprehension and multilingual NLP benchmark",
      "tasks": [
        "nlp",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "nlp",
        "benchmark"
      ],
      "evidence_text": "Dataset includes pbt_Arab subset.",
      "evidence_url": "https://huggingface.co/datasets/facebook/belebele",
      "markers": [
        "pbt_Arab"
      ]
    },
    {
      "id": "dataset-opus100-en-ps",
      "title": "OPUS-100",
      "url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Parallel corpus with English to Pashto split for MT tasks.",
      "primary_use": "Machine translation training and evaluation",
      "tasks": [
        "mt",
        "nlp"
      ],
      "tags": [
        "pashto",
        "mt",
        "parallel-corpus"
      ],
      "evidence_text": "Dataset viewer includes en-ps split.",
      "evidence_url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps",
      "markers": [
        "en-ps"
      ]
    },
    {
      "id": "dataset-kaggle-pashto-isolated-words",
      "title": "Pashto Isolated Words Speech Dataset",
      "url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Speech dataset focused on isolated Pashto words.",
      "primary_use": "Keyword spotting and constrained ASR experiments",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "speech",
        "kaggle"
      ],
      "evidence_text": "Dataset title explicitly states Pashto speech dataset.",
      "evidence_url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-kaggle-pashto-word-embeddings",
      "title": "Pashto Word Embeddings",
      "url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Pretrained Pashto word vectors for classic NLP baselines.",
      "primary_use": "Lexical semantics and lightweight NLP baselines",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "nlp",
        "embeddings",
        "kaggle"
      ],
      "evidence_text": "Dataset description states pretrained Pashto embeddings.",
      "evidence_url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "model-pashto-bert",
      "title": "PashtoBERT",
      "url": "https://huggingface.co/mdarhri/pashto-bert",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-specific encoder model for NLP transfer tasks.",
      "primary_use": "Pashto NLP baseline encoder",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "nlp",
        "bert"
      ],
      "evidence_text": "Model card states training on Pashto corpus data.",
      "evidence_url": "https://huggingface.co/mdarhri/pashto-bert",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "benchmark-fleurs-ps-af",
      "title": "FLEURS Pashto Benchmark",
      "url": "https://huggingface.co/datasets/google/fleurs",
      "category": "benchmark",
      "source": "huggingface",
      "status": "verified",
      "summary": "Fixed multilingual speech benchmark with Pashto subset for WER and CER.",
      "primary_use": "ASR benchmark reporting",
      "tasks": [
        "asr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "benchmark",
        "asr"
      ],
      "evidence_text": "Dataset includes ps_af split.",
      "evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py",
      "markers": [
        "ps_af"
      ]
    },
    {
      "id": "benchmark-common-voice-ps-v24",
      "title": "Common Voice Pashto v24 Benchmark",
      "url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
      "category": "benchmark",
      "source": "mozilla",
      "status": "verified",
      "summary": "Core benchmark reference for project-level Pashto ASR tracking.",
      "primary_use": "ASR baseline tracking",
      "tasks": [
        "asr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "benchmark",
        "asr"
      ],
      "evidence_text": "Official Pashto split and versioned release.",
      "evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "benchmark-belebele-pbt-arab",
      "title": "Belebele Pashto Benchmark",
      "url": "https://huggingface.co/datasets/facebook/belebele",
      "category": "benchmark",
      "source": "huggingface",
      "status": "verified",
      "summary": "Comprehension benchmark for multilingual NLP with Pashto variant.",
      "primary_use": "NLP benchmark reporting",
      "tasks": [
        "nlp",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "benchmark",
        "nlp"
      ],
      "evidence_text": "Includes pbt_Arab language variant.",
      "evidence_url": "https://huggingface.co/datasets/facebook/belebele",
      "markers": [
        "pbt_Arab"
      ]
    },
    {
      "id": "benchmark-flores-200-pbt-arab",
      "title": "FLORES-200 Pashto Benchmark",
      "url": "https://github.com/facebookresearch/flores/tree/main/flores200",
      "category": "benchmark",
      "source": "github",
      "status": "verified",
      "summary": "Translation benchmark language inventory including Pashto script variant.",
      "primary_use": "MT benchmark with BLEU and chrF",
      "tasks": [
        "mt",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "benchmark",
        "mt"
      ],
      "evidence_text": "Language list includes pbt_Arab.",
      "evidence_url": "https://raw.githubusercontent.com/facebookresearch/flores/main/flores200/README.md",
      "markers": [
        "pbt_Arab"
      ]
    },
    {
      "id": "dataset-nexdata-99h-pashto-dialogue",
      "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
      "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
      "primary_use": "Spontaneous speech ASR training and robustness evaluation",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "speech",
        "asr",
        "dialogue"
      ],
      "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
      "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-zirak-ai-pashto-ocr",
      "title": "Zirak-AI PashtoOCR",
      "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
      "primary_use": "OCR and text extraction benchmarking",
      "tasks": [
        "ocr",
        "nlp"
      ],
      "tags": [
        "pashto",
        "ocr",
        "nlp",
        "vision"
      ],
      "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
      "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
      "markers": [
        "ps",
        "PashtoOCR"
      ]
    },
    {
      "id": "dataset-ihanif-pashto-wikipedia-corpus",
      "title": "Pashto Wikipedia Corpus",
      "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
      "primary_use": "Pashto text corpus for NLP baselines",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "text",
        "nlp",
        "wikipedia"
      ],
      "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
      "markers": [
        "ps",
        "Pashto"
      ]
    },
    {
      "id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
      "title": "wav2vec2 XLS-R 300M Pashto",
      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
      "primary_use": "Pashto ASR baseline and comparative experiments",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "asr",
        "wav2vec2",
        "fleurs"
      ],
      "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
      "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
      "markers": [
        "pashto",
        "ps",
        "ps_af"
      ]
    },
    {
      "id": "model-ihanif-whisper-medium-pashto",
      "title": "Whisper Medium Pashto",
      "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
      "primary_use": "Pashto ASR baseline and transcription quality comparisons",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "asr",
        "whisper",
        "fleurs"
      ],
      "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
      "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
      "markers": [
        "pashto",
        "ps",
        "ps_af"
      ]
    },
    {
      "id": "dataset-kaggle-pold-pashto-offensive",
      "title": "POLD - Pashto Offensive Language Dataset",
      "url": "https://www.kaggle.com/datasets/drijaz/pold-pashto-offensive-language-dataset",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Benchmark dataset for offensive content detection in Pashto social text.",
      "primary_use": "Pashto toxicity and moderation NLP benchmarks",
      "tasks": [
        "nlp",
        "classification"
      ],
      "tags": [
        "pashto",
        "kaggle",
        "nlp",
        "toxicity"
      ],
      "evidence_text": "Kaggle title and description explicitly state Pashto offensive language benchmark dataset.",
      "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pold-pashto-offensive-language-dataset",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-kaggle-pashto-english-sentiment-corpus",
      "title": "Pashto English Bilingual Sentiment Corpus",
      "url": "https://www.kaggle.com/datasets/farhadkhan66/pashto-translated-corpus",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Pashto to English bilingual sentiment corpus useful for low-resource sentiment tasks.",
      "primary_use": "Sentiment analysis and bilingual NLP experiments",
      "tasks": [
        "nlp",
        "sentiment"
      ],
      "tags": [
        "pashto",
        "kaggle",
        "sentiment",
        "bilingual"
      ],
      "evidence_text": "Kaggle dataset title and description identify the corpus as Pashto-English sentiment data.",
      "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/farhadkhan66/pashto-translated-corpus",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-kaggle-urdu-pashto-lexicon",
      "title": "Urdu-Pashto Lexicon Dataset",
      "url": "https://www.kaggle.com/datasets/shafeeqgigyani/urdu-pashto-lexicon-dataset",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Lexicon of Urdu words with Pashto translations for dictionary and MT support.",
      "primary_use": "Lexicon and translation lexeme mapping",
      "tasks": [
        "nlp",
        "mt"
      ],
      "tags": [
        "pashto",
        "kaggle",
        "lexicon",
        "translation"
      ],
      "evidence_text": "Kaggle metadata describes 7,601 Urdu entries with Pashto translations.",
      "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/shafeeqgigyani/urdu-pashto-lexicon-dataset",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "project-hf-space-ihanif-pashto-asr-v3",
      "title": "Pashto ASR V3 Space",
      "url": "https://huggingface.co/spaces/ihanif/pashto-asr-v3",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Interactive Hugging Face Space for Pashto automatic speech recognition demos.",
      "primary_use": "Project demo for Pashto ASR user testing",
      "tasks": [
        "asr",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface-space",
        "asr"
      ],
      "evidence_text": "Space card title is Pashto ASR V3 and short description states Pashto ASR.",
      "evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr-v3",
      "markers": [
        "Pashto",
        "ASR"
      ]
    },
    {
      "id": "project-hf-space-pashto2english-dictionary",
      "title": "Pashto to English Dictionary Space",
      "url": "https://huggingface.co/spaces/EngrAamirBangash/Pashto2English-Dictionary",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Streamlit project for Pashto to English dictionary lookups.",
      "primary_use": "Interactive bilingual lookup project",
      "tasks": [
        "dictionary",
        "translation",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface-space",
        "dictionary"
      ],
      "evidence_text": "Space metadata title states Pashto to English Dictionary.",
      "evidence_url": "https://huggingface.co/api/spaces/EngrAamirBangash/Pashto2English-Dictionary",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "project-hf-space-umar4321-pashto-translator",
      "title": "Pashto Translator Space",
      "url": "https://huggingface.co/spaces/Umar4321/Pashto-Translator",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Streamlit translator project for Pashto to English and Urdu conversion.",
      "primary_use": "Interactive translation project demo",
      "tasks": [
        "translation",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface-space",
        "translation"
      ],
      "evidence_text": "Space title is Pashto Translator and description states Pashto to English and Urdu translation.",
      "evidence_url": "https://huggingface.co/api/spaces/Umar4321/Pashto-Translator",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "code-github-ijazul-haq-nlpashto",
      "title": "nlpashto Toolkit",
      "url": "https://github.com/ijazul-haq/nlpashto",
      "category": "code",
      "source": "github",
      "status": "verified",
      "summary": "Pashto NLP toolkit codebase for tokenization, embeddings, and downstream NLP workflows.",
      "primary_use": "Pashto NLP code integration and experimentation",
      "tasks": [
        "nlp",
        "tooling"
      ],
      "tags": [
        "pashto",
        "code",
        "github",
        "nlp"
      ],
      "evidence_text": "Repository name and description explicitly identify a Pashto NLP toolkit.",
      "evidence_url": "https://api.github.com/repos/ijazul-haq/nlpashto",
      "markers": [
        "Pashto",
        "NLP"
      ]
    },
    {
      "id": "dataset-kaggle-drijaz-pashtoocr",
      "title": "PashtoOCR (Kaggle)",
      "url": "https://www.kaggle.com/datasets/drijaz/pashtoocr",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Synthetic OCR dataset focused on Pashto ligatures and text recognition tasks.",
      "primary_use": "Pashto OCR dataset benchmarking and training",
      "tasks": [
        "ocr",
        "nlp"
      ],
      "tags": [
        "pashto",
        "kaggle",
        "ocr",
        "dataset"
      ],
      "evidence_text": "Kaggle dataset title and subtitle explicitly identify a Pashto OCR dataset.",
      "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pashtoocr",
      "markers": [
        "Pashto",
        "OCR"
      ]
    },
    {
      "id": "model-hf-zirak-ai-pashto-bert-v1",
      "title": "zirak-ai/pashto-bert-v1",
      "url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto BERT model checkpoint for low-resource Pashto NLP experiments.",
      "primary_use": "Pashto encoder baseline for NLP tasks",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "huggingface",
        "bert",
        "nlp"
      ],
      "evidence_text": "Hugging Face model ID and search tags explicitly include pashto marker.",
      "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-ihanif-pashto-asr",
      "title": "Pashto ASR Space",
      "url": "https://huggingface.co/spaces/ihanif/pashto-asr",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Interactive Hugging Face Space for Pashto ASR inference demos.",
      "primary_use": "Live Pashto speech-to-text demo project",
      "tasks": [
        "asr",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface-space",
        "asr"
      ],
      "evidence_text": "Space ID includes pashto-asr and is returned by Hugging Face Pashto space search.",
      "evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr",
      "markers": [
        "pashto",
        "asr"
      ]
    },
    {
      "id": "paper-s2-psocr-lmm-pashto",
      "title": "PsOCR: Benchmarking Large Multimodal Models for Optical Character Recognition in Low-resource Pashto Language",
      "url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Research paper benchmarking multimodal OCR models on low-resource Pashto OCR tasks.",
      "primary_use": "Pashto OCR research baseline and evaluation reference",
      "tasks": [
        "ocr",
        "research"
      ],
      "tags": [
        "pashto",
        "paper",
        "ocr",
        "multimodal"
      ],
      "evidence_text": "Paper title explicitly references low-resource Pashto language OCR benchmarking.",
      "evidence_url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f",
      "markers": [
        "Pashto",
        "OCR"
      ]
    },
    {
      "id": "dataset-hf-adnankhan769-english-to-pashto",
      "title": "English to Pashto Sentences Dataset",
      "url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Parallel English-Pashto sentence dataset for bilingual NLP and translation experiments.",
      "primary_use": "MT and bilingual sentence alignment baseline",
      "tasks": [
        "mt",
        "nlp"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "translation"
      ],
      "evidence_text": "Dataset ID explicitly states English-to-Pashto and includes Pashto-script sentence column.",
      "evidence_url": "https://huggingface.co/api/datasets/adnankhan769/english_to_pashto_sentences_dataset",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-hf-saillab-alpaca-pashto-cleaned",
      "title": "alpaca-pashto-cleaned",
      "url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Instruction-style Pashto text dataset suitable for LLM tuning and instruction-following research.",
      "primary_use": "Pashto instruction tuning and conversational NLP experiments",
      "tasks": [
        "nlp",
        "llm"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "instruction"
      ],
      "evidence_text": "Dataset metadata includes language:ps and dataset name includes Pashto.",
      "evidence_url": "https://huggingface.co/api/datasets/saillab/alpaca-pashto-cleaned",
      "markers": [
        "ps",
        "Pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-whisper-base-pashto",
      "title": "Whisper Base Pashto",
      "url": "https://huggingface.co/ihanif/whisper-base-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Fine-tuned Whisper Base checkpoint for Pashto ASR with FLEURS ps_af evaluation metadata.",
      "primary_use": "Pashto ASR baseline and speed-accuracy comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Model ID includes Pashto and card metadata references FLEURS config ps_af.",
      "evidence_url": "https://huggingface.co/api/models/ihanif/whisper-base-pashto",
      "markers": [
        "Pashto",
        "ps_af"
      ]
    },
    {
      "id": "project-hf-space-zamai-mistral-7b-pashto",
      "title": "ZamAI-Mistral-7B-Pashto Space",
      "url": "https://huggingface.co/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Gradio project space demonstrating a Pashto-adapted Mistral 7B interface.",
      "primary_use": "Interactive Pashto LLM project demo",
      "tasks": [
        "llm",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface-space",
        "llm"
      ],
      "evidence_text": "Space title and ID explicitly include Pashto and model card metadata exposes project details.",
      "evidence_url": "https://huggingface.co/api/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-hf-adnankhan769-proper-dataset-english-2-pashto",
      "title": "adnankhan769/proper_dataset_english_2_pashto",
      "url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto bilingual/translation dataset discovered from huggingface for MT experimentation.",
      "primary_use": "Machine translation and bilingual corpus development",
      "tasks": [
        "mt"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "mt"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-ihanif-pashto-asr-wer",
      "title": "ihanif/pashto_asr_wer",
      "url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-ihanif-pashto-speech-ds",
      "title": "ihanif/pashto_speech_ds",
      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-ihanif-pashto-speech-parquet-10k",
      "title": "ihanif/pashto_speech_parquet_10k",
      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-saillab-alpaca-pashto-taco",
      "title": "saillab/alpaca_pashto_taco",
      "url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
      "primary_use": "Instruction tuning and LLM adaptation data source",
      "tasks": [
        "llm"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "llm"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-sherwindesouza-pashto-common-voice-20",
      "title": "SherwinDesouza/pashto-common-voice-20",
      "url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
      "primary_use": "Pashto data source for NLP experimentation",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "nlp"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-tasal9-zamai-pashto-dataset",
      "title": "tasal9/ZamAI_Pashto_Dataset",
      "url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
      "primary_use": "Pashto data source for NLP experimentation",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "nlp"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-kaggle-english-pashto-language-dataset-epld",
      "title": "English-Pashto Language Dataset (EPLD)",
      "url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Pashto bilingual/translation dataset discovered from kaggle for MT experimentation.",
      "primary_use": "Machine translation and bilingual corpus development",
      "tasks": [
        "mt"
      ],
      "tags": [
        "pashto",
        "dataset",
        "kaggle",
        "mt"
      ],
      "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
      "evidence_url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-kaggle-katib-s-pashto-text-imagebase-kpti",
      "title": "Katib's Pashto Text Imagebase (KPTI)",
      "url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.",
      "primary_use": "OCR training and evaluation data source",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "kaggle",
        "ocr"
      ],
      "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
      "evidence_url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-kaggle-pashto-ocr",
      "title": "Pashto OCR",
      "url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.",
      "primary_use": "OCR training and evaluation data source",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "kaggle",
        "ocr"
      ],
      "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
      "evidence_url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "dataset-kaggle-common-voice-24-0-pashto-speech-dataset",
      "title": "Common Voice 24.0: Pashto Speech Dataset",
      "url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto",
      "category": "dataset",
      "source": "kaggle",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from kaggle for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "kaggle",
        "asr"
      ],
      "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.",
      "evidence_url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto",
      "markers": [
        "Pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-pashto-asr-base",
      "title": "ihanif/pashto-asr-base",
      "url": "https://huggingface.co/ihanif/pashto-asr-base",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/pashto-asr-base",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-wav2vec2-xls-r-300m-pashto-lm",
      "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-whisper-large-pashto",
      "title": "ihanif/whisper-large-pashto",
      "url": "https://huggingface.co/ihanif/whisper-large-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-whisper-medium-pashto-3e-7",
      "title": "ihanif/whisper-medium-pashto-3e-7",
      "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-whisper-small-pashto",
      "title": "ihanif/whisper-small-pashto",
      "url": "https://huggingface.co/ihanif/whisper-small-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-xls-r-1b-pashto",
      "title": "ihanif/xls-r-1b-pashto",
      "url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ijazulhaq-bert-base-pashto-v1",
      "title": "ijazulhaq/bert-base-pashto-v1",
      "url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto model baseline for downstream NLP tasks",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "nlp"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-ihanif-wav2vec2-bert-pashto-asr",
      "title": "ihanif/wav2vec2-bert-pashto-asr",
      "url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "asr",
        "nlp",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "asr",
        "nlp",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-nasirkhansayyad-pashto-whisper-demo",
      "title": "nasirkhansayyad/pashto-whisper-demo",
      "url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "asr",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "asr",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-tasal9-zamai-phi3-mini-pashto-demo",
      "title": "tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
      "url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "llm",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "llm",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-umar4321-pashto-to-english-urdu",
      "title": "Umar4321/Pashto-To-English-Urdu",
      "url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "mt",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "mt",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-github-fazlullahmamond-pashto-typing",
      "title": "Fazlullahmamond/Pashto-Typing",
      "url": "https://github.com/Fazlullahmamond/Pashto-Typing",
      "category": "project",
      "source": "github",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from github for demonstration and quick evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "github",
        "demo"
      ],
      "evidence_text": "Repository metadata (name/description/topics) includes Pashto markers.",
      "evidence_url": "https://github.com/Fazlullahmamond/Pashto-Typing",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-s2-benchmarking-whisper-for-low-resource-speech-recognition-an-n-shot-evaluation-on-pashto-pu",
      "title": "Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on Pashto, Punjabi, and Urdu",
      "url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from other for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "asr",
        "mt"
      ],
      "tags": [
        "pashto",
        "paper",
        "other",
        "asr",
        "mt"
      ],
      "evidence_text": "Matched by Semantic Scholar query: pashto.",
      "evidence_url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-s2-deep-learning-based-detection-of-one-and-two-column-textual-blocks-in-camera-captured-pash",
      "title": "Deep Learning-Based Detection of One and Two-Column Textual Blocks in Camera-Captured Pashto Documents Images",
      "url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from other for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "paper",
        "other",
        "ocr"
      ],
      "evidence_text": "Matched by Semantic Scholar query: pashto.",
      "evidence_url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-s2-out-of-vocabulary-pashto-spell-checker-using-morphological-operations",
      "title": "Out-of-Vocabulary Pashto Spell Checker using Morphological Operations",
      "url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from other for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "other",
        "nlp"
      ],
      "evidence_text": "Matched by Semantic Scholar query: pashto.",
      "evidence_url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-s2-pashto-shallow-parsing-a-deep-learning-approach",
      "title": "Pashto Shallow Parsing: A Deep Learning Approach",
      "url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from other for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "other",
        "nlp"
      ],
      "evidence_text": "Matched by Semantic Scholar query: pashto.",
      "evidence_url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-s2-pos-tagging-of-low-resource-pashto-language-annotated-corpus-and-bert-based-model",
      "title": "POS tagging of low-resource Pashto language: annotated corpus and BERT-based model",
      "url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from other for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "other",
        "nlp"
      ],
      "evidence_text": "Matched by Semantic Scholar query: pashto.",
      "evidence_url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-arxiv-enhancing-pashto-text-classification-using-language-processing-techniques-for-single-and-m",
      "title": "Enhancing Pashto Text Classification using Language Processing Techniques for Single And Multi-Label Analysis",
      "url": "http://arxiv.org/abs/2305.03201v1",
      "category": "paper",
      "source": "arxiv",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from arxiv for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "arxiv",
        "nlp"
      ],
      "evidence_text": "Matched by arXiv query: all:pashto.",
      "evidence_url": "http://arxiv.org/abs/2305.03201v1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-arxiv-knn-and-ann-based-recognition-of-handwritten-pashto-letters-using-zoning-features",
      "title": "KNN and ANN-based Recognition of Handwritten Pashto Letters using Zoning Features",
      "url": "http://arxiv.org/abs/1904.03391v2",
      "category": "paper",
      "source": "arxiv",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from arxiv for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "paper",
        "arxiv",
        "ocr"
      ],
      "evidence_text": "Matched by arXiv query: all:pashto.",
      "evidence_url": "http://arxiv.org/abs/1904.03391v2",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-oowais-pushto-text-to-speech-dataset",
      "title": "oowais/pushto-text-to-speech-dataset",
      "url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr",
        "tts"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "asr",
        "tts"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-ihanif-pashto-speech-20k",
      "title": "ihanif/pashto_speech_20k",
      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-ihanif-pashto-speech-5k",
      "title": "ihanif/pashto_speech_5k",
      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.",
      "primary_use": "ASR training and evaluation data source",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-tasal9-pashto-dataset",
      "title": "tasal9/Pashto_Dataset",
      "url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused dataset discovered from huggingface candidate sync.",
      "primary_use": "Pashto data source for NLP experimentation",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "nlp"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ijazulhaq-bert-base-pashto",
      "title": "ijazulhaq/bert-base-pashto",
      "url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto model baseline for downstream NLP tasks",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "nlp"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-ihanif-whisper-small-pashto-dropout",
      "title": "ihanif/whisper-small-pashto-dropout",
      "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-koochikoo25-pashto-whisper-large",
      "title": "koochikoo25/pashto-whisper-large",
      "url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.",
      "primary_use": "Pashto ASR baseline and model comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-ihanif-wav2vec-pashto-asr",
      "title": "ihanif/wav2vec-pashto-asr",
      "url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "asr",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "asr",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-afaqalinagra-pashto-asr-model",
      "title": "afaqalinagra/PASHTO-ASR-MODEL",
      "url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "asr",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "asr",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-ilyas02828-pashto-sign-language",
      "title": "ilyas02828/Pashto_Sign_Language",
      "url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-mahmudaq-pashtoasrnmt1",
      "title": "mahmudaq/PashtoASRNMT1",
      "url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.",
      "primary_use": "Interactive Pashto demo and quick qualitative validation",
      "tasks": [
        "asr",
        "mt",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "asr",
        "mt",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-s2-enhancing-pashto-ner-using-machine-labeled-data-and-transformer-based-models",
      "title": "Enhancing Pashto NER Using Machine-Labeled Data and Transformer-Based Models",
      "url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da",
      "category": "paper",
      "source": "other",
      "status": "verified",
      "summary": "Pashto language technology paper discovered from other for research reference.",
      "primary_use": "Pashto research reference for methods and benchmarking",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "other",
        "nlp"
      ],
      "evidence_text": "Matched by explicit Pashto marker in paper title from Semantic Scholar search.",
      "evidence_url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-aamirhs-pashto-audio-wav2vec",
      "title": "aamirhs/pashto-audio-wav2vec",
      "url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto speech dataset surfaced from Hugging Face candidate sync for ASR experiments.",
      "primary_use": "Pashto ASR data exploration and baseline training",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "speech",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-hf-alimuhammad73-pashto-poetry",
      "title": "AliMuhammad73/Pashto-Poetry",
      "url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
      "category": "dataset",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto poetry text dataset surfaced from Hugging Face candidate sync for NLP experiments.",
      "primary_use": "Pashto poetry corpus for language modeling and text analysis",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "dataset",
        "huggingface",
        "text",
        "poetry",
        "nlp"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "model-hf-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab",
      "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
      "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
      "category": "model",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR model checkpoint surfaced from Hugging Face candidate sync.",
      "primary_use": "Pashto ASR baseline and transfer-learning comparison",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "model",
        "huggingface",
        "asr"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
      "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "project-hf-space-aizazayyubi-pashto-asr",
      "title": "Aizazayyubi/pashto_asr",
      "url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr",
      "category": "project",
      "source": "huggingface",
      "status": "verified",
      "summary": "Pashto ASR interactive demo surfaced from Hugging Face Spaces candidate sync.",
      "primary_use": "Interactive Pashto ASR demo for qualitative evaluation",
      "tasks": [
        "asr",
        "demo"
      ],
      "tags": [
        "pashto",
        "project",
        "huggingface",
        "asr",
        "demo"
      ],
      "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.",
      "evidence_url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-arxiv-from-scarcity-to-scale-pashto-common-voice",
      "title": "From Scarcity to Scale: A Release-Level Analysis of the Pashto Common Voice Dataset",
      "url": "http://arxiv.org/abs/2602.14062v1",
      "category": "paper",
      "source": "arxiv",
      "status": "verified",
      "summary": "Research paper analyzing Pashto Common Voice releases and dataset scaling characteristics.",
      "primary_use": "ASR data quality and release trend reference",
      "tasks": [
        "asr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "arxiv",
        "asr",
        "common-voice"
      ],
      "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
      "evidence_url": "http://arxiv.org/abs/2602.14062v1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-arxiv-tuning-traditional-pashto-text-classification",
      "title": "Tuning Traditional Language Processing Approaches for Pashto Text Classification",
      "url": "http://arxiv.org/abs/2305.03737v1",
      "category": "paper",
      "source": "arxiv",
      "status": "verified",
      "summary": "Research paper focused on Pashto text classification using traditional NLP approaches.",
      "primary_use": "Pashto text classification method reference",
      "tasks": [
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "arxiv",
        "nlp",
        "classification"
      ],
      "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
      "evidence_url": "http://arxiv.org/abs/2305.03737v1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "dataset-dataverse-iarpa-babel-pashto-language-pack-v0-4by",
      "title": "IARPA Babel Pashto Language Pack IARPA-babel104b-v0.4bY",
      "url": "https://hdl.handle.net/11272.1/AB2/GLFN3X",
      "category": "dataset",
      "source": "dataverse",
      "status": "verified",
      "summary": "Pashto Babel language pack dataset for speech and language processing evaluation.",
      "primary_use": "Pashto speech dataset for ASR and language identification experiments",
      "tasks": [
        "asr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "dataset",
        "dataverse",
        "speech",
        "asr",
        "babel"
      ],
      "evidence_text": "Dataverse metadata includes Pashto markers in dataset title or description.",
      "evidence_url": "https://hdl.handle.net/11272.1/AB2/GLFN3X",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-arxiv-image-to-text-pashto-farsi-traditional-chinese",
      "title": "Development of a New Image-to-text Conversion System for Pashto, Farsi and Traditional Chinese",
      "url": "http://arxiv.org/abs/2005.08650v1",
      "category": "paper",
      "source": "arxiv",
      "status": "verified",
      "summary": "Research paper on image-to-text conversion including Pashto OCR.",
      "primary_use": "Pashto OCR method reference",
      "tasks": [
        "ocr",
        "nlp"
      ],
      "tags": [
        "pashto",
        "paper",
        "arxiv",
        "ocr"
      ],
      "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.",
      "evidence_url": "http://arxiv.org/abs/2005.08650v1",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-benchmark-pashto-handwritten-character-dataset-ocr",
      "title": "Benchmark Pashto Handwritten Character Dataset and Pashto Object Character Recognition (OCR) Using Deep Neural Network with Rule Activation Function",
      "url": "https://doi.org/10.1155/2021/6669672",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper introducing a benchmark dataset and OCR approach for Pashto handwritten characters.",
      "primary_use": "Pashto handwritten OCR benchmark and methodology reference",
      "tasks": [
        "ocr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "benchmark"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1155/2021/6669672",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-asr-isolated-pashto-spoken-digits-mfcc-knn",
      "title": "Database development and automatic speech recognition of isolated Pashto spoken digits using MFCC and K-NN",
      "url": "https://doi.org/10.1007/s10772-014-9267-z",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on isolated Pashto spoken-digit ASR with MFCC and K-NN.",
      "primary_use": "Pashto ASR baseline method reference for digit recognition",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "asr",
        "speech"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1007/s10772-014-9267-z",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-pashto-isolated-digits-recognition-dcnn",
      "title": "Pashto isolated digits recognition using deep convolutional neural network",
      "url": "https://doi.org/10.1016/j.heliyon.2020.e03372",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on Pashto isolated-digit recognition using deep convolutional neural networks.",
      "primary_use": "Pashto speech recognition research reference",
      "tasks": [
        "asr"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "asr",
        "deep-learning"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1016/j.heliyon.2020.e03372",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-pashto-offensive-language-detection-benchmark-bert",
      "title": "Pashto offensive language detection: a benchmark dataset and monolingual Pashto BERT",
      "url": "https://doi.org/10.7717/peerj-cs.1617",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on Pashto offensive language detection with benchmark dataset and monolingual BERT model.",
      "primary_use": "Pashto NLP toxicity detection benchmark and model reference",
      "tasks": [
        "nlp",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "nlp",
        "bert",
        "benchmark"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.7717/peerj-cs.1617",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-phti-pashto-handwritten-text-imagebase",
      "title": "PHTI: Pashto Handwritten Text Imagebase for Deep Learning Applications",
      "url": "https://doi.org/10.1109/access.2022.3216881",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper describing PHTI, a Pashto handwritten text imagebase for deep learning.",
      "primary_use": "Pashto OCR dataset and benchmark reference",
      "tasks": [
        "ocr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "dataset"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1109/access.2022.3216881",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-recognition-of-pashto-handwritten-characters-deep-learning",
      "title": "Recognition of Pashto Handwritten Characters Based on Deep Learning",
      "url": "https://doi.org/10.3390/s20205884",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on deep-learning-based recognition of Pashto handwritten characters.",
      "primary_use": "Pashto OCR model reference for handwritten character recognition",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "deep-learning"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.3390/s20205884",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-kpti-katib-pashto-text-imagebase-benchmark",
      "title": "KPTI: Katib's Pashto Text Imagebase and Deep Learning Benchmark",
      "url": "https://doi.org/10.1109/icfhr.2016.0090",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper introducing KPTI, a Pashto text imagebase and benchmark for handwritten recognition.",
      "primary_use": "Pashto OCR dataset and benchmarking reference",
      "tasks": [
        "ocr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "benchmark"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1109/icfhr.2016.0090",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-pioneer-dataset-handwritten-pashto-cnn",
      "title": "Pioneer dataset and recognition of Handwritten Pashto characters using Convolution Neural Networks",
      "url": "https://doi.org/10.1177/0020294020964826",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on a pioneer handwritten Pashto character dataset with CNN-based recognition.",
      "primary_use": "Pashto handwritten character recognition reference",
      "tasks": [
        "ocr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "deep-learning"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1177/0020294020964826",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-scale-rotation-invariant-ocr-pashto-mdlstm",
      "title": "Scale and rotation invariant OCR for Pashto cursive script using MDLSTM network",
      "url": "https://doi.org/10.1109/icdar.2015.7333931",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on scale- and rotation-invariant OCR for cursive Pashto using MDLSTM.",
      "primary_use": "Pashto OCR model architecture reference",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "mdlstm"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1109/icdar.2015.7333931",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-recognizable-units-pashto-ocr",
      "title": "Recognizable units in Pashto language for OCR",
      "url": "https://doi.org/10.1109/icdar.2015.7333963",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper defining recognizable units in Pashto for OCR workflows.",
      "primary_use": "Pashto OCR preprocessing and unit-design reference",
      "tasks": [
        "ocr"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1109/icdar.2015.7333963",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-shape-analysis-pashto-script-image-database-ocr",
      "title": "Shape analysis of Pashto script and creation of image database for OCR",
      "url": "https://doi.org/10.1109/icet.2009.5353160",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on Pashto script shape analysis and image database creation for OCR.",
      "primary_use": "Pashto OCR dataset design and feature reference",
      "tasks": [
        "ocr",
        "benchmarking"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "ocr",
        "dataset"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.1109/icet.2009.5353160",
      "markers": [
        "pashto"
      ]
    },
    {
      "id": "paper-openalex-speech-translation-low-resource-case-pashto",
      "title": "Speech translation for low-resource languages: the case of Pashto",
      "url": "https://doi.org/10.21437/interspeech.2005-723",
      "category": "paper",
      "source": "openalex",
      "status": "verified",
      "summary": "Research paper on speech translation for low-resource languages, including Pashto.",
      "primary_use": "Pashto speech translation and low-resource MT reference",
      "tasks": [
        "asr",
        "mt"
      ],
      "tags": [
        "pashto",
        "paper",
        "openalex",
        "speech",
        "translation"
      ],
      "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.",
      "evidence_url": "https://doi.org/10.21437/interspeech.2005-723",
      "markers": [
        "pashto"
      ]
    }
  ]
}