| { |
| "generated_on": "2026-02-17T00:00:00Z", |
| "count": 95, |
| "resources": [ |
| { |
| "id": "dataset-common-voice-ps-v24", |
| "title": "Common Voice Scripted Speech 24.0 - Pashto", |
| "url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", |
| "category": "dataset", |
| "source": "mozilla", |
| "status": "verified", |
| "summary": "Large open Pashto speech dataset for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "speech", |
| "asr" |
| ], |
| "evidence_text": "Official dataset page is for Pashto.", |
| "evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-google-fleurs", |
| "title": "Google FLEURS", |
| "url": "https://huggingface.co/datasets/google/fleurs", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Standard multilingual speech benchmark dataset with Pashto subset.", |
| "primary_use": "Speech benchmark and external evaluation", |
| "tasks": [ |
| "asr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "speech", |
| "benchmark" |
| ], |
| "evidence_text": "Dataset config includes ps_af.", |
| "evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py", |
| "markers": [ |
| "ps_af" |
| ] |
| }, |
| { |
| "id": "dataset-oscar-ps", |
| "title": "OSCAR Corpus", |
| "url": "https://huggingface.co/datasets/oscar-corpus/oscar", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Large web text corpus that includes Pashto text split.", |
| "primary_use": "Language modeling and lexicon expansion", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "text", |
| "nlp" |
| ], |
| "evidence_text": "Dataset includes unshuffled_deduplicated_ps split.", |
| "evidence_url": "https://huggingface.co/datasets/oscar-corpus/oscar", |
| "markers": [ |
| "unshuffled_deduplicated_ps" |
| ] |
| }, |
| { |
| "id": "dataset-wikipedia-ps", |
| "title": "Wikimedia Wikipedia", |
| "url": "https://huggingface.co/datasets/wikimedia/wikipedia", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Wikipedia corpus with Pashto edition for cleaner text resources.", |
| "primary_use": "Terminology and balanced text corpus", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "text", |
| "nlp" |
| ], |
| "evidence_text": "Dataset includes 20231101.ps subset.", |
| "evidence_url": "https://huggingface.co/datasets/wikimedia/wikipedia", |
| "markers": [ |
| "20231101.ps" |
| ] |
| }, |
| { |
| "id": "dataset-belebele-pbt-arab", |
| "title": "Belebele", |
| "url": "https://huggingface.co/datasets/facebook/belebele", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Reading comprehension dataset with Pashto script subset.", |
| "primary_use": "Comprehension and multilingual NLP benchmark", |
| "tasks": [ |
| "nlp", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "nlp", |
| "benchmark" |
| ], |
| "evidence_text": "Dataset includes pbt_Arab subset.", |
| "evidence_url": "https://huggingface.co/datasets/facebook/belebele", |
| "markers": [ |
| "pbt_Arab" |
| ] |
| }, |
| { |
| "id": "dataset-opus100-en-ps", |
| "title": "OPUS-100", |
| "url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Parallel corpus with English to Pashto split for MT tasks.", |
| "primary_use": "Machine translation training and evaluation", |
| "tasks": [ |
| "mt", |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "mt", |
| "parallel-corpus" |
| ], |
| "evidence_text": "Dataset viewer includes en-ps split.", |
| "evidence_url": "https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps", |
| "markers": [ |
| "en-ps" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-pashto-isolated-words", |
| "title": "Pashto Isolated Words Speech Dataset", |
| "url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Speech dataset focused on isolated Pashto words.", |
| "primary_use": "Keyword spotting and constrained ASR experiments", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "speech", |
| "kaggle" |
| ], |
| "evidence_text": "Dataset title explicitly states Pashto speech dataset.", |
| "evidence_url": "https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-pashto-word-embeddings", |
| "title": "Pashto Word Embeddings", |
| "url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Pretrained Pashto word vectors for classic NLP baselines.", |
| "primary_use": "Lexical semantics and lightweight NLP baselines", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "nlp", |
| "embeddings", |
| "kaggle" |
| ], |
| "evidence_text": "Dataset description states pretrained Pashto embeddings.", |
| "evidence_url": "https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "model-pashto-bert", |
| "title": "PashtoBERT", |
| "url": "https://huggingface.co/mdarhri/pashto-bert", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-specific encoder model for NLP transfer tasks.", |
| "primary_use": "Pashto NLP baseline encoder", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "nlp", |
| "bert" |
| ], |
| "evidence_text": "Model card states training on Pashto corpus data.", |
| "evidence_url": "https://huggingface.co/mdarhri/pashto-bert", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "benchmark-fleurs-ps-af", |
| "title": "FLEURS Pashto Benchmark", |
| "url": "https://huggingface.co/datasets/google/fleurs", |
| "category": "benchmark", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Fixed multilingual speech benchmark with Pashto subset for WER and CER.", |
| "primary_use": "ASR benchmark reporting", |
| "tasks": [ |
| "asr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "benchmark", |
| "asr" |
| ], |
| "evidence_text": "Dataset includes ps_af split.", |
| "evidence_url": "https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py", |
| "markers": [ |
| "ps_af" |
| ] |
| }, |
| { |
| "id": "benchmark-common-voice-ps-v24", |
| "title": "Common Voice Pashto v24 Benchmark", |
| "url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", |
| "category": "benchmark", |
| "source": "mozilla", |
| "status": "verified", |
| "summary": "Core benchmark reference for project-level Pashto ASR tracking.", |
| "primary_use": "ASR baseline tracking", |
| "tasks": [ |
| "asr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "benchmark", |
| "asr" |
| ], |
| "evidence_text": "Official Pashto split and versioned release.", |
| "evidence_url": "https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "benchmark-belebele-pbt-arab", |
| "title": "Belebele Pashto Benchmark", |
| "url": "https://huggingface.co/datasets/facebook/belebele", |
| "category": "benchmark", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Comprehension benchmark for multilingual NLP with Pashto variant.", |
| "primary_use": "NLP benchmark reporting", |
| "tasks": [ |
| "nlp", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "benchmark", |
| "nlp" |
| ], |
| "evidence_text": "Includes pbt_Arab language variant.", |
| "evidence_url": "https://huggingface.co/datasets/facebook/belebele", |
| "markers": [ |
| "pbt_Arab" |
| ] |
| }, |
| { |
| "id": "benchmark-flores-200-pbt-arab", |
| "title": "FLORES-200 Pashto Benchmark", |
| "url": "https://github.com/facebookresearch/flores/tree/main/flores200", |
| "category": "benchmark", |
| "source": "github", |
| "status": "verified", |
| "summary": "Translation benchmark language inventory including Pashto script variant.", |
| "primary_use": "MT benchmark with BLEU and chrF", |
| "tasks": [ |
| "mt", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "benchmark", |
| "mt" |
| ], |
| "evidence_text": "Language list includes pbt_Arab.", |
| "evidence_url": "https://raw.githubusercontent.com/facebookresearch/flores/main/flores200/README.md", |
| "markers": [ |
| "pbt_Arab" |
| ] |
| }, |
| { |
| "id": "dataset-nexdata-99h-pashto-dialogue", |
| "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset", |
| "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.", |
| "primary_use": "Spontaneous speech ASR training and robustness evaluation", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "speech", |
| "asr", |
| "dialogue" |
| ], |
| "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.", |
| "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-zirak-ai-pashto-ocr", |
| "title": "Zirak-AI PashtoOCR", |
| "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.", |
| "primary_use": "OCR and text extraction benchmarking", |
| "tasks": [ |
| "ocr", |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "ocr", |
| "nlp", |
| "vision" |
| ], |
| "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.", |
| "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR", |
| "markers": [ |
| "ps", |
| "PashtoOCR" |
| ] |
| }, |
| { |
| "id": "dataset-ihanif-pashto-wikipedia-corpus", |
| "title": "Pashto Wikipedia Corpus", |
| "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.", |
| "primary_use": "Pashto text corpus for NLP baselines", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "text", |
| "nlp", |
| "wikipedia" |
| ], |
| "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.", |
| "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus", |
| "markers": [ |
| "ps", |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "model-ihanif-wav2vec2-xls-r-300m-pashto", |
| "title": "wav2vec2 XLS-R 300M Pashto", |
| "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.", |
| "primary_use": "Pashto ASR baseline and comparative experiments", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "asr", |
| "wav2vec2", |
| "fleurs" |
| ], |
| "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.", |
| "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto", |
| "markers": [ |
| "pashto", |
| "ps", |
| "ps_af" |
| ] |
| }, |
| { |
| "id": "model-ihanif-whisper-medium-pashto", |
| "title": "Whisper Medium Pashto", |
| "url": "https://huggingface.co/ihanif/whisper-medium-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.", |
| "primary_use": "Pashto ASR baseline and transcription quality comparisons", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "asr", |
| "whisper", |
| "fleurs" |
| ], |
| "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.", |
| "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto", |
| "markers": [ |
| "pashto", |
| "ps", |
| "ps_af" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-pold-pashto-offensive", |
| "title": "POLD - Pashto Offensive Language Dataset", |
| "url": "https://www.kaggle.com/datasets/drijaz/pold-pashto-offensive-language-dataset", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Benchmark dataset for offensive content detection in Pashto social text.", |
| "primary_use": "Pashto toxicity and moderation NLP benchmarks", |
| "tasks": [ |
| "nlp", |
| "classification" |
| ], |
| "tags": [ |
| "pashto", |
| "kaggle", |
| "nlp", |
| "toxicity" |
| ], |
| "evidence_text": "Kaggle title and description explicitly state Pashto offensive language benchmark dataset.", |
| "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pold-pashto-offensive-language-dataset", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-pashto-english-sentiment-corpus", |
| "title": "Pashto English Bilingual Sentiment Corpus", |
| "url": "https://www.kaggle.com/datasets/farhadkhan66/pashto-translated-corpus", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Pashto to English bilingual sentiment corpus useful for low-resource sentiment tasks.", |
| "primary_use": "Sentiment analysis and bilingual NLP experiments", |
| "tasks": [ |
| "nlp", |
| "sentiment" |
| ], |
| "tags": [ |
| "pashto", |
| "kaggle", |
| "sentiment", |
| "bilingual" |
| ], |
| "evidence_text": "Kaggle dataset title and description identify the corpus as Pashto-English sentiment data.", |
| "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/farhadkhan66/pashto-translated-corpus", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-urdu-pashto-lexicon", |
| "title": "Urdu-Pashto Lexicon Dataset", |
| "url": "https://www.kaggle.com/datasets/shafeeqgigyani/urdu-pashto-lexicon-dataset", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Lexicon of Urdu words with Pashto translations for dictionary and MT support.", |
| "primary_use": "Lexicon and translation lexeme mapping", |
| "tasks": [ |
| "nlp", |
| "mt" |
| ], |
| "tags": [ |
| "pashto", |
| "kaggle", |
| "lexicon", |
| "translation" |
| ], |
| "evidence_text": "Kaggle metadata describes 7,601 Urdu entries with Pashto translations.", |
| "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/shafeeqgigyani/urdu-pashto-lexicon-dataset", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-ihanif-pashto-asr-v3", |
| "title": "Pashto ASR V3 Space", |
| "url": "https://huggingface.co/spaces/ihanif/pashto-asr-v3", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Interactive Hugging Face Space for Pashto automatic speech recognition demos.", |
| "primary_use": "Project demo for Pashto ASR user testing", |
| "tasks": [ |
| "asr", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface-space", |
| "asr" |
| ], |
| "evidence_text": "Space card title is Pashto ASR V3 and short description states Pashto ASR.", |
| "evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr-v3", |
| "markers": [ |
| "Pashto", |
| "ASR" |
| ] |
| }, |
| { |
| "id": "project-hf-space-pashto2english-dictionary", |
| "title": "Pashto to English Dictionary Space", |
| "url": "https://huggingface.co/spaces/EngrAamirBangash/Pashto2English-Dictionary", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Streamlit project for Pashto to English dictionary lookups.", |
| "primary_use": "Interactive bilingual lookup project", |
| "tasks": [ |
| "dictionary", |
| "translation", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface-space", |
| "dictionary" |
| ], |
| "evidence_text": "Space metadata title states Pashto to English Dictionary.", |
| "evidence_url": "https://huggingface.co/api/spaces/EngrAamirBangash/Pashto2English-Dictionary", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-umar4321-pashto-translator", |
| "title": "Pashto Translator Space", |
| "url": "https://huggingface.co/spaces/Umar4321/Pashto-Translator", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Streamlit translator project for Pashto to English and Urdu conversion.", |
| "primary_use": "Interactive translation project demo", |
| "tasks": [ |
| "translation", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface-space", |
| "translation" |
| ], |
| "evidence_text": "Space title is Pashto Translator and description states Pashto to English and Urdu translation.", |
| "evidence_url": "https://huggingface.co/api/spaces/Umar4321/Pashto-Translator", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "code-github-ijazul-haq-nlpashto", |
| "title": "nlpashto Toolkit", |
| "url": "https://github.com/ijazul-haq/nlpashto", |
| "category": "code", |
| "source": "github", |
| "status": "verified", |
| "summary": "Pashto NLP toolkit codebase for tokenization, embeddings, and downstream NLP workflows.", |
| "primary_use": "Pashto NLP code integration and experimentation", |
| "tasks": [ |
| "nlp", |
| "tooling" |
| ], |
| "tags": [ |
| "pashto", |
| "code", |
| "github", |
| "nlp" |
| ], |
| "evidence_text": "Repository name and description explicitly identify a Pashto NLP toolkit.", |
| "evidence_url": "https://api.github.com/repos/ijazul-haq/nlpashto", |
| "markers": [ |
| "Pashto", |
| "NLP" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-drijaz-pashtoocr", |
| "title": "PashtoOCR (Kaggle)", |
| "url": "https://www.kaggle.com/datasets/drijaz/pashtoocr", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Synthetic OCR dataset focused on Pashto ligatures and text recognition tasks.", |
| "primary_use": "Pashto OCR dataset benchmarking and training", |
| "tasks": [ |
| "ocr", |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "kaggle", |
| "ocr", |
| "dataset" |
| ], |
| "evidence_text": "Kaggle dataset title and subtitle explicitly identify a Pashto OCR dataset.", |
| "evidence_url": "https://www.kaggle.com/api/v1/datasets/view/drijaz/pashtoocr", |
| "markers": [ |
| "Pashto", |
| "OCR" |
| ] |
| }, |
| { |
| "id": "model-hf-zirak-ai-pashto-bert-v1", |
| "title": "zirak-ai/pashto-bert-v1", |
| "url": "https://huggingface.co/zirak-ai/pashto-bert-v1", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto BERT model checkpoint for low-resource Pashto NLP experiments.", |
| "primary_use": "Pashto encoder baseline for NLP tasks", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "huggingface", |
| "bert", |
| "nlp" |
| ], |
| "evidence_text": "Hugging Face model ID and search tags explicitly include pashto marker.", |
| "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-ihanif-pashto-asr", |
| "title": "Pashto ASR Space", |
| "url": "https://huggingface.co/spaces/ihanif/pashto-asr", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Interactive Hugging Face Space for Pashto ASR inference demos.", |
| "primary_use": "Live Pashto speech-to-text demo project", |
| "tasks": [ |
| "asr", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface-space", |
| "asr" |
| ], |
| "evidence_text": "Space ID includes pashto-asr and is returned by Hugging Face Pashto space search.", |
| "evidence_url": "https://huggingface.co/api/spaces/ihanif/pashto-asr", |
| "markers": [ |
| "pashto", |
| "asr" |
| ] |
| }, |
| { |
| "id": "paper-s2-psocr-lmm-pashto", |
| "title": "PsOCR: Benchmarking Large Multimodal Models for Optical Character Recognition in Low-resource Pashto Language", |
| "url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Research paper benchmarking multimodal OCR models on low-resource Pashto OCR tasks.", |
| "primary_use": "Pashto OCR research baseline and evaluation reference", |
| "tasks": [ |
| "ocr", |
| "research" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "ocr", |
| "multimodal" |
| ], |
| "evidence_text": "Paper title explicitly references low-resource Pashto language OCR benchmarking.", |
| "evidence_url": "https://www.semanticscholar.org/paper/d2743c0dcdbc65f5b46fcec2f0ba7cb379c4134f", |
| "markers": [ |
| "Pashto", |
| "OCR" |
| ] |
| }, |
| { |
| "id": "dataset-hf-adnankhan769-english-to-pashto", |
| "title": "English to Pashto Sentences Dataset", |
| "url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Parallel English-Pashto sentence dataset for bilingual NLP and translation experiments.", |
| "primary_use": "MT and bilingual sentence alignment baseline", |
| "tasks": [ |
| "mt", |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "translation" |
| ], |
| "evidence_text": "Dataset ID explicitly states English-to-Pashto and includes Pashto-script sentence column.", |
| "evidence_url": "https://huggingface.co/api/datasets/adnankhan769/english_to_pashto_sentences_dataset", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-saillab-alpaca-pashto-cleaned", |
| "title": "alpaca-pashto-cleaned", |
| "url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Instruction-style Pashto text dataset suitable for LLM tuning and instruction-following research.", |
| "primary_use": "Pashto instruction tuning and conversational NLP experiments", |
| "tasks": [ |
| "nlp", |
| "llm" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "instruction" |
| ], |
| "evidence_text": "Dataset metadata includes language:ps and dataset name includes Pashto.", |
| "evidence_url": "https://huggingface.co/api/datasets/saillab/alpaca-pashto-cleaned", |
| "markers": [ |
| "ps", |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-whisper-base-pashto", |
| "title": "Whisper Base Pashto", |
| "url": "https://huggingface.co/ihanif/whisper-base-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Fine-tuned Whisper Base checkpoint for Pashto ASR with FLEURS ps_af evaluation metadata.", |
| "primary_use": "Pashto ASR baseline and speed-accuracy comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Model ID includes Pashto and card metadata references FLEURS config ps_af.", |
| "evidence_url": "https://huggingface.co/api/models/ihanif/whisper-base-pashto", |
| "markers": [ |
| "Pashto", |
| "ps_af" |
| ] |
| }, |
| { |
| "id": "project-hf-space-zamai-mistral-7b-pashto", |
| "title": "ZamAI-Mistral-7B-Pashto Space", |
| "url": "https://huggingface.co/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Gradio project space demonstrating a Pashto-adapted Mistral 7B interface.", |
| "primary_use": "Interactive Pashto LLM project demo", |
| "tasks": [ |
| "llm", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface-space", |
| "llm" |
| ], |
| "evidence_text": "Space title and ID explicitly include Pashto and model card metadata exposes project details.", |
| "evidence_url": "https://huggingface.co/api/spaces/tasal9/ZamAI-Mistral-7B-Pashto-space", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-adnankhan769-proper-dataset-english-2-pashto", |
| "title": "adnankhan769/proper_dataset_english_2_pashto", |
| "url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto bilingual/translation dataset discovered from huggingface for MT experimentation.", |
| "primary_use": "Machine translation and bilingual corpus development", |
| "tasks": [ |
| "mt" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "mt" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-ihanif-pashto-asr-wer", |
| "title": "ihanif/pashto_asr_wer", |
| "url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-ihanif-pashto-speech-ds", |
| "title": "ihanif/pashto_speech_ds", |
| "url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-ihanif-pashto-speech-parquet-10k", |
| "title": "ihanif/pashto_speech_parquet_10k", |
| "url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from huggingface for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-saillab-alpaca-pashto-taco", |
| "title": "saillab/alpaca_pashto_taco", |
| "url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", |
| "primary_use": "Instruction tuning and LLM adaptation data source", |
| "tasks": [ |
| "llm" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "llm" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/saillab/alpaca_pashto_taco", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-sherwindesouza-pashto-common-voice-20", |
| "title": "SherwinDesouza/pashto-common-voice-20", |
| "url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", |
| "primary_use": "Pashto data source for NLP experimentation", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-tasal9-zamai-pashto-dataset", |
| "title": "tasal9/ZamAI_Pashto_Dataset", |
| "url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", |
| "primary_use": "Pashto data source for NLP experimentation", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-english-pashto-language-dataset-epld", |
| "title": "English-Pashto Language Dataset (EPLD)", |
| "url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Pashto bilingual/translation dataset discovered from kaggle for MT experimentation.", |
| "primary_use": "Machine translation and bilingual corpus development", |
| "tasks": [ |
| "mt" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "kaggle", |
| "mt" |
| ], |
| "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", |
| "evidence_url": "https://www.kaggle.com/datasets/rabiakhan827/english-pashto-language-dataset-epld", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-katib-s-pashto-text-imagebase-kpti", |
| "title": "Katib's Pashto Text Imagebase (KPTI)", |
| "url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.", |
| "primary_use": "OCR training and evaluation data source", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "kaggle", |
| "ocr" |
| ], |
| "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", |
| "evidence_url": "https://www.kaggle.com/datasets/hassanamin/katibs-pashto-text-imagebase-kpti", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-pashto-ocr", |
| "title": "Pashto OCR", |
| "url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Pashto OCR-oriented dataset discovered from kaggle for document and script recognition work.", |
| "primary_use": "OCR training and evaluation data source", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "kaggle", |
| "ocr" |
| ], |
| "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", |
| "evidence_url": "https://www.kaggle.com/datasets/hassanamin/pashto-ocr", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "dataset-kaggle-common-voice-24-0-pashto-speech-dataset", |
| "title": "Common Voice 24.0: Pashto Speech Dataset", |
| "url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto", |
| "category": "dataset", |
| "source": "kaggle", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from kaggle for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "kaggle", |
| "asr" |
| ], |
| "evidence_text": "Kaggle dataset title/subtitle includes Pashto keyword.", |
| "evidence_url": "https://www.kaggle.com/datasets/ataullahaali/common-voice-scripted-speech-24-0-pashto", |
| "markers": [ |
| "Pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-pashto-asr-base", |
| "title": "ihanif/pashto-asr-base", |
| "url": "https://huggingface.co/ihanif/pashto-asr-base", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/pashto-asr-base", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-wav2vec2-xls-r-300m-pashto-lm", |
| "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm", |
| "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-whisper-large-pashto", |
| "title": "ihanif/whisper-large-pashto", |
| "url": "https://huggingface.co/ihanif/whisper-large-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-whisper-medium-pashto-3e-7", |
| "title": "ihanif/whisper-medium-pashto-3e-7", |
| "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-whisper-small-pashto", |
| "title": "ihanif/whisper-small-pashto", |
| "url": "https://huggingface.co/ihanif/whisper-small-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-xls-r-1b-pashto", |
| "title": "ihanif/xls-r-1b-pashto", |
| "url": "https://huggingface.co/ihanif/xls-r-1b-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ijazulhaq-bert-base-pashto-v1", |
| "title": "ijazulhaq/bert-base-pashto-v1", |
| "url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto model baseline for downstream NLP tasks", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-ihanif-wav2vec2-bert-pashto-asr", |
| "title": "ihanif/wav2vec2-bert-pashto-asr", |
| "url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "asr", |
| "nlp", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "asr", |
| "nlp", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec2-bert-pashto-asr", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-nasirkhansayyad-pashto-whisper-demo", |
| "title": "nasirkhansayyad/pashto-whisper-demo", |
| "url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "asr", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "asr", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/nasirkhansayyad/pashto-whisper-demo", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-tasal9-zamai-phi3-mini-pashto-demo", |
| "title": "tasal9/ZamAI-Phi3-Mini-Pashto-Demo", |
| "url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "llm", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "llm", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/tasal9/ZamAI-Phi3-Mini-Pashto-Demo", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-umar4321-pashto-to-english-urdu", |
| "title": "Umar4321/Pashto-To-English-Urdu", |
| "url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and quick evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "mt", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "mt", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/Umar4321/Pashto-To-English-Urdu", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-github-fazlullahmamond-pashto-typing", |
| "title": "Fazlullahmamond/Pashto-Typing", |
| "url": "https://github.com/Fazlullahmamond/Pashto-Typing", |
| "category": "project", |
| "source": "github", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from github for demonstration and quick evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "github", |
| "demo" |
| ], |
| "evidence_text": "Repository metadata (name/description/topics) includes Pashto markers.", |
| "evidence_url": "https://github.com/Fazlullahmamond/Pashto-Typing", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-s2-benchmarking-whisper-for-low-resource-speech-recognition-an-n-shot-evaluation-on-pashto-pu", |
| "title": "Benchmarking Whisper for Low-Resource Speech Recognition: An N-Shot Evaluation on Pashto, Punjabi, and Urdu", |
| "url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from other for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "asr", |
| "mt" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "other", |
| "asr", |
| "mt" |
| ], |
| "evidence_text": "Matched by Semantic Scholar query: pashto.", |
| "evidence_url": "https://www.semanticscholar.org/paper/13104eddc785756132a19242ac7e74442b145693", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-s2-deep-learning-based-detection-of-one-and-two-column-textual-blocks-in-camera-captured-pash", |
| "title": "Deep Learning-Based Detection of One and Two-Column Textual Blocks in Camera-Captured Pashto Documents Images", |
| "url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from other for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "other", |
| "ocr" |
| ], |
| "evidence_text": "Matched by Semantic Scholar query: pashto.", |
| "evidence_url": "https://www.semanticscholar.org/paper/8c9d2628e23d5c27edc656071f11f0e78124d182", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-s2-out-of-vocabulary-pashto-spell-checker-using-morphological-operations", |
| "title": "Out-of-Vocabulary Pashto Spell Checker using Morphological Operations", |
| "url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from other for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "other", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Semantic Scholar query: pashto.", |
| "evidence_url": "https://www.semanticscholar.org/paper/802aae68a6a7fdfb29d51be03fb2b09e29311fa7", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-s2-pashto-shallow-parsing-a-deep-learning-approach", |
| "title": "Pashto Shallow Parsing: A Deep Learning Approach", |
| "url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from other for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "other", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Semantic Scholar query: pashto.", |
| "evidence_url": "https://www.semanticscholar.org/paper/be36455bb4eca60accb3e6866f345132f0dac1e5", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-s2-pos-tagging-of-low-resource-pashto-language-annotated-corpus-and-bert-based-model", |
| "title": "POS tagging of low-resource Pashto language: annotated corpus and BERT-based model", |
| "url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from other for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "other", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Semantic Scholar query: pashto.", |
| "evidence_url": "https://www.semanticscholar.org/paper/1b2d5c896fec735483e8c8fb0a75e13125e08769", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-arxiv-enhancing-pashto-text-classification-using-language-processing-techniques-for-single-and-m", |
| "title": "Enhancing Pashto Text Classification using Language Processing Techniques for Single And Multi-Label Analysis", |
| "url": "http://arxiv.org/abs/2305.03201v1", |
| "category": "paper", |
| "source": "arxiv", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from arxiv for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "arxiv", |
| "nlp" |
| ], |
| "evidence_text": "Matched by arXiv query: all:pashto.", |
| "evidence_url": "http://arxiv.org/abs/2305.03201v1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-arxiv-knn-and-ann-based-recognition-of-handwritten-pashto-letters-using-zoning-features", |
| "title": "KNN and ANN-based Recognition of Handwritten Pashto Letters using Zoning Features", |
| "url": "http://arxiv.org/abs/1904.03391v2", |
| "category": "paper", |
| "source": "arxiv", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from arxiv for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "arxiv", |
| "ocr" |
| ], |
| "evidence_text": "Matched by arXiv query: all:pashto.", |
| "evidence_url": "http://arxiv.org/abs/1904.03391v2", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-oowais-pushto-text-to-speech-dataset", |
| "title": "oowais/pushto-text-to-speech-dataset", |
| "url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr", |
| "tts" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "asr", |
| "tts" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/oowais/pushto-text-to-speech-dataset", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-ihanif-pashto-speech-20k", |
| "title": "ihanif/pashto_speech_20k", |
| "url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-ihanif-pashto-speech-5k", |
| "title": "ihanif/pashto_speech_5k", |
| "url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset discovered from huggingface candidate sync for ASR training and evaluation.", |
| "primary_use": "ASR training and evaluation data source", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-tasal9-pashto-dataset", |
| "title": "tasal9/Pashto_Dataset", |
| "url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused dataset discovered from huggingface candidate sync.", |
| "primary_use": "Pashto data source for NLP experimentation", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ijazulhaq-bert-base-pashto", |
| "title": "ijazulhaq/bert-base-pashto", |
| "url": "https://huggingface.co/ijazulhaq/bert-base-pashto", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto NLP model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto model baseline for downstream NLP tasks", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-ihanif-whisper-small-pashto-dropout", |
| "title": "ihanif/whisper-small-pashto-dropout", |
| "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-koochikoo25-pashto-whisper-large", |
| "title": "koochikoo25/pashto-whisper-large", |
| "url": "https://huggingface.co/koochikoo25/pashto-whisper-large", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint discovered from huggingface candidate sync.", |
| "primary_use": "Pashto ASR baseline and model comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-ihanif-wav2vec-pashto-asr", |
| "title": "ihanif/wav2vec-pashto-asr", |
| "url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "asr", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "asr", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/ihanif/wav2vec-pashto-asr", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-afaqalinagra-pashto-asr-model", |
| "title": "afaqalinagra/PASHTO-ASR-MODEL", |
| "url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "asr", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "asr", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/afaqalinagra/PASHTO-ASR-MODEL", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-ilyas02828-pashto-sign-language", |
| "title": "ilyas02828/Pashto_Sign_Language", |
| "url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/ilyas02828/Pashto_Sign_Language", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-mahmudaq-pashtoasrnmt1", |
| "title": "mahmudaq/PashtoASRNMT1", |
| "url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto-focused interactive project discovered from huggingface for demonstration and evaluation.", |
| "primary_use": "Interactive Pashto demo and quick qualitative validation", |
| "tasks": [ |
| "asr", |
| "mt", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "asr", |
| "mt", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/mahmudaq/PashtoASRNMT1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-s2-enhancing-pashto-ner-using-machine-labeled-data-and-transformer-based-models", |
| "title": "Enhancing Pashto NER Using Machine-Labeled Data and Transformer-Based Models", |
| "url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da", |
| "category": "paper", |
| "source": "other", |
| "status": "verified", |
| "summary": "Pashto language technology paper discovered from other for research reference.", |
| "primary_use": "Pashto research reference for methods and benchmarking", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "other", |
| "nlp" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in paper title from Semantic Scholar search.", |
| "evidence_url": "https://www.semanticscholar.org/paper/be851ecf9197ef9bb8bf764abf4db0dda95cd9da", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-aamirhs-pashto-audio-wav2vec", |
| "title": "aamirhs/pashto-audio-wav2vec", |
| "url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto speech dataset surfaced from Hugging Face candidate sync for ASR experiments.", |
| "primary_use": "Pashto ASR data exploration and baseline training", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "speech", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/aamirhs/pashto-audio-wav2vec", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-hf-alimuhammad73-pashto-poetry", |
| "title": "AliMuhammad73/Pashto-Poetry", |
| "url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry", |
| "category": "dataset", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto poetry text dataset surfaced from Hugging Face candidate sync for NLP experiments.", |
| "primary_use": "Pashto poetry corpus for language modeling and text analysis", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "huggingface", |
| "text", |
| "poetry", |
| "nlp" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "model-hf-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab", |
| "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab", |
| "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab", |
| "category": "model", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR model checkpoint surfaced from Hugging Face candidate sync.", |
| "primary_use": "Pashto ASR baseline and transfer-learning comparison", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "model", |
| "huggingface", |
| "asr" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face search results.", |
| "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "project-hf-space-aizazayyubi-pashto-asr", |
| "title": "Aizazayyubi/pashto_asr", |
| "url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr", |
| "category": "project", |
| "source": "huggingface", |
| "status": "verified", |
| "summary": "Pashto ASR interactive demo surfaced from Hugging Face Spaces candidate sync.", |
| "primary_use": "Interactive Pashto ASR demo for qualitative evaluation", |
| "tasks": [ |
| "asr", |
| "demo" |
| ], |
| "tags": [ |
| "pashto", |
| "project", |
| "huggingface", |
| "asr", |
| "demo" |
| ], |
| "evidence_text": "Matched by Pashto keyword in Hugging Face Spaces search.", |
| "evidence_url": "https://huggingface.co/spaces/Aizazayyubi/pashto_asr", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-arxiv-from-scarcity-to-scale-pashto-common-voice", |
| "title": "From Scarcity to Scale: A Release-Level Analysis of the Pashto Common Voice Dataset", |
| "url": "http://arxiv.org/abs/2602.14062v1", |
| "category": "paper", |
| "source": "arxiv", |
| "status": "verified", |
| "summary": "Research paper analyzing Pashto Common Voice releases and dataset scaling characteristics.", |
| "primary_use": "ASR data quality and release trend reference", |
| "tasks": [ |
| "asr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "arxiv", |
| "asr", |
| "common-voice" |
| ], |
| "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.", |
| "evidence_url": "http://arxiv.org/abs/2602.14062v1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-arxiv-tuning-traditional-pashto-text-classification", |
| "title": "Tuning Traditional Language Processing Approaches for Pashto Text Classification", |
| "url": "http://arxiv.org/abs/2305.03737v1", |
| "category": "paper", |
| "source": "arxiv", |
| "status": "verified", |
| "summary": "Research paper focused on Pashto text classification using traditional NLP approaches.", |
| "primary_use": "Pashto text classification method reference", |
| "tasks": [ |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "arxiv", |
| "nlp", |
| "classification" |
| ], |
| "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.", |
| "evidence_url": "http://arxiv.org/abs/2305.03737v1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "dataset-dataverse-iarpa-babel-pashto-language-pack-v0-4by", |
| "title": "IARPA Babel Pashto Language Pack IARPA-babel104b-v0.4bY", |
| "url": "https://hdl.handle.net/11272.1/AB2/GLFN3X", |
| "category": "dataset", |
| "source": "dataverse", |
| "status": "verified", |
| "summary": "Pashto Babel language pack dataset for speech and language processing evaluation.", |
| "primary_use": "Pashto speech dataset for ASR and language identification experiments", |
| "tasks": [ |
| "asr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "dataset", |
| "dataverse", |
| "speech", |
| "asr", |
| "babel" |
| ], |
| "evidence_text": "Dataverse metadata includes Pashto markers in dataset title or description.", |
| "evidence_url": "https://hdl.handle.net/11272.1/AB2/GLFN3X", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-arxiv-image-to-text-pashto-farsi-traditional-chinese", |
| "title": "Development of a New Image-to-text Conversion System for Pashto, Farsi and Traditional Chinese", |
| "url": "http://arxiv.org/abs/2005.08650v1", |
| "category": "paper", |
| "source": "arxiv", |
| "status": "verified", |
| "summary": "Research paper on image-to-text conversion including Pashto OCR.", |
| "primary_use": "Pashto OCR method reference", |
| "tasks": [ |
| "ocr", |
| "nlp" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "arxiv", |
| "ocr" |
| ], |
| "evidence_text": "Matched by Pashto marker in paper title from arXiv query results.", |
| "evidence_url": "http://arxiv.org/abs/2005.08650v1", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-benchmark-pashto-handwritten-character-dataset-ocr", |
| "title": "Benchmark Pashto Handwritten Character Dataset and Pashto Object Character Recognition (OCR) Using Deep Neural Network with Rule Activation Function", |
| "url": "https://doi.org/10.1155/2021/6669672", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper introducing a benchmark dataset and OCR approach for Pashto handwritten characters.", |
| "primary_use": "Pashto handwritten OCR benchmark and methodology reference", |
| "tasks": [ |
| "ocr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "benchmark" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1155/2021/6669672", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-asr-isolated-pashto-spoken-digits-mfcc-knn", |
| "title": "Database development and automatic speech recognition of isolated Pashto spoken digits using MFCC and K-NN", |
| "url": "https://doi.org/10.1007/s10772-014-9267-z", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on isolated Pashto spoken-digit ASR with MFCC and K-NN.", |
| "primary_use": "Pashto ASR baseline method reference for digit recognition", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "asr", |
| "speech" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1007/s10772-014-9267-z", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-pashto-isolated-digits-recognition-dcnn", |
| "title": "Pashto isolated digits recognition using deep convolutional neural network", |
| "url": "https://doi.org/10.1016/j.heliyon.2020.e03372", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on Pashto isolated-digit recognition using deep convolutional neural networks.", |
| "primary_use": "Pashto speech recognition research reference", |
| "tasks": [ |
| "asr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "asr", |
| "deep-learning" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1016/j.heliyon.2020.e03372", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-pashto-offensive-language-detection-benchmark-bert", |
| "title": "Pashto offensive language detection: a benchmark dataset and monolingual Pashto BERT", |
| "url": "https://doi.org/10.7717/peerj-cs.1617", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on Pashto offensive language detection with benchmark dataset and monolingual BERT model.", |
| "primary_use": "Pashto NLP toxicity detection benchmark and model reference", |
| "tasks": [ |
| "nlp", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "nlp", |
| "bert", |
| "benchmark" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.7717/peerj-cs.1617", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-phti-pashto-handwritten-text-imagebase", |
| "title": "PHTI: Pashto Handwritten Text Imagebase for Deep Learning Applications", |
| "url": "https://doi.org/10.1109/access.2022.3216881", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper describing PHTI, a Pashto handwritten text imagebase for deep learning.", |
| "primary_use": "Pashto OCR dataset and benchmark reference", |
| "tasks": [ |
| "ocr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "dataset" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1109/access.2022.3216881", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-recognition-of-pashto-handwritten-characters-deep-learning", |
| "title": "Recognition of Pashto Handwritten Characters Based on Deep Learning", |
| "url": "https://doi.org/10.3390/s20205884", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on deep-learning-based recognition of Pashto handwritten characters.", |
| "primary_use": "Pashto OCR model reference for handwritten character recognition", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "deep-learning" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.3390/s20205884", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-kpti-katib-pashto-text-imagebase-benchmark", |
| "title": "KPTI: Katib's Pashto Text Imagebase and Deep Learning Benchmark", |
| "url": "https://doi.org/10.1109/icfhr.2016.0090", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper introducing KPTI, a Pashto text imagebase and benchmark for handwritten recognition.", |
| "primary_use": "Pashto OCR dataset and benchmarking reference", |
| "tasks": [ |
| "ocr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "benchmark" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1109/icfhr.2016.0090", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-pioneer-dataset-handwritten-pashto-cnn", |
| "title": "Pioneer dataset and recognition of Handwritten Pashto characters using Convolution Neural Networks", |
| "url": "https://doi.org/10.1177/0020294020964826", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on a pioneer handwritten Pashto character dataset with CNN-based recognition.", |
| "primary_use": "Pashto handwritten character recognition reference", |
| "tasks": [ |
| "ocr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "deep-learning" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1177/0020294020964826", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-scale-rotation-invariant-ocr-pashto-mdlstm", |
| "title": "Scale and rotation invariant OCR for Pashto cursive script using MDLSTM network", |
| "url": "https://doi.org/10.1109/icdar.2015.7333931", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on scale- and rotation-invariant OCR for cursive Pashto using MDLSTM.", |
| "primary_use": "Pashto OCR model architecture reference", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "mdlstm" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1109/icdar.2015.7333931", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-recognizable-units-pashto-ocr", |
| "title": "Recognizable units in Pashto language for OCR", |
| "url": "https://doi.org/10.1109/icdar.2015.7333963", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper defining recognizable units in Pashto for OCR workflows.", |
| "primary_use": "Pashto OCR preprocessing and unit-design reference", |
| "tasks": [ |
| "ocr" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1109/icdar.2015.7333963", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-shape-analysis-pashto-script-image-database-ocr", |
| "title": "Shape analysis of Pashto script and creation of image database for OCR", |
| "url": "https://doi.org/10.1109/icet.2009.5353160", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on Pashto script shape analysis and image database creation for OCR.", |
| "primary_use": "Pashto OCR dataset design and feature reference", |
| "tasks": [ |
| "ocr", |
| "benchmarking" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "ocr", |
| "dataset" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.1109/icet.2009.5353160", |
| "markers": [ |
| "pashto" |
| ] |
| }, |
| { |
| "id": "paper-openalex-speech-translation-low-resource-case-pashto", |
| "title": "Speech translation for low-resource languages: the case of Pashto", |
| "url": "https://doi.org/10.21437/interspeech.2005-723", |
| "category": "paper", |
| "source": "openalex", |
| "status": "verified", |
| "summary": "Research paper on speech translation for low-resource languages, including Pashto.", |
| "primary_use": "Pashto speech translation and low-resource MT reference", |
| "tasks": [ |
| "asr", |
| "mt" |
| ], |
| "tags": [ |
| "pashto", |
| "paper", |
| "openalex", |
| "speech", |
| "translation" |
| ], |
| "evidence_text": "Matched by explicit Pashto marker in title from OpenAlex works search.", |
| "evidence_url": "https://doi.org/10.21437/interspeech.2005-723", |
| "markers": [ |
| "pashto" |
| ] |
| } |
| ] |
| } |
|
|