Run one-time resource sync and promote new Pashto resources

Files changed (6) hide show

docs/search/resources.json +128 -1
resources/README.md +3 -3
resources/catalog/pending_candidates.json +645 -70
resources/catalog/resources.json +142 -0
resources/datasets/README.md +3 -0
resources/models/README.md +2 -0

docs/search/resources.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "generated_on": "2026-02-15T00:00:00Z",
-  "count": 25,
   "resources": [
     {
       "id": "dataset-common-voice-ps-v24",
@@ -590,6 +590,133 @@
       "markers": [
         "ps_af"
       ]
     }
   ]
 }

 {
   "generated_on": "2026-02-15T00:00:00Z",
+  "count": 30,
   "resources": [
     {
       "id": "dataset-common-voice-ps-v24",
       "markers": [
         "ps_af"
       ]
+    },
+    {
+      "id": "dataset-nexdata-99h-pashto-dialogue",
+      "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
+      "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
+      "primary_use": "Spontaneous speech ASR training and robustness evaluation",
+      "tasks": [
+        "asr"
+      ],
+      "tags": [
+        "pashto",
+        "speech",
+        "asr",
+        "dialogue"
+      ],
+      "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
+      "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
+      "markers": [
+        "Pashto"
+      ]
+    },
+    {
+      "id": "dataset-zirak-ai-pashto-ocr",
+      "title": "Zirak-AI PashtoOCR",
+      "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
+      "primary_use": "OCR and text extraction benchmarking",
+      "tasks": [
+        "ocr",
+        "nlp"
+      ],
+      "tags": [
+        "pashto",
+        "ocr",
+        "nlp",
+        "vision"
+      ],
+      "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
+      "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
+      "markers": [
+        "ps",
+        "PashtoOCR"
+      ]
+    },
+    {
+      "id": "dataset-ihanif-pashto-wikipedia-corpus",
+      "title": "Pashto Wikipedia Corpus",
+      "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
+      "primary_use": "Pashto text corpus for NLP baselines",
+      "tasks": [
+        "nlp"
+      ],
+      "tags": [
+        "pashto",
+        "text",
+        "nlp",
+        "wikipedia"
+      ],
+      "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
+      "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
+      "markers": [
+        "ps",
+        "Pashto"
+      ]
+    },
+    {
+      "id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
+      "title": "wav2vec2 XLS-R 300M Pashto",
+      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
+      "category": "model",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
+      "primary_use": "Pashto ASR baseline and comparative experiments",
+      "tasks": [
+        "asr"
+      ],
+      "tags": [
+        "pashto",
+        "asr",
+        "wav2vec2",
+        "fleurs"
+      ],
+      "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
+      "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
+      "markers": [
+        "pashto",
+        "ps",
+        "ps_af"
+      ]
+    },
+    {
+      "id": "model-ihanif-whisper-medium-pashto",
+      "title": "Whisper Medium Pashto",
+      "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
+      "category": "model",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
+      "primary_use": "Pashto ASR baseline and transcription quality comparisons",
+      "tasks": [
+        "asr"
+      ],
+      "tags": [
+        "pashto",
+        "asr",
+        "whisper",
+        "fleurs"
+      ],
+      "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
+      "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
+      "markers": [
+        "pashto",
+        "ps",
+        "ps_af"
+      ]
     }
   ]
 }

resources/README.md CHANGED Viewed

@@ -3,8 +3,8 @@
 Structured, Pashto-focused resource tracking lives in this folder.
 ## Sections
-- Datasets (8): [datasets/README.md](datasets/README.md)
-- Models (7): [models/README.md](models/README.md)
 - Benchmarks (4): [benchmarks/README.md](benchmarks/README.md)
 - Tools (2): [tools/README.md](tools/README.md)
 - Papers (4): [papers/README.md](papers/README.md)
@@ -20,4 +20,4 @@ Structured, Pashto-focused resource tracking lives in this folder.
 - Run `python scripts/validate_resource_catalog.py` before opening a PR.
 - Run `python scripts/generate_resource_views.py` after catalog changes.
-Verified resource count: `25`

 Structured, Pashto-focused resource tracking lives in this folder.
 ## Sections
+- Datasets (11): [datasets/README.md](datasets/README.md)
+- Models (9): [models/README.md](models/README.md)
 - Benchmarks (4): [benchmarks/README.md](benchmarks/README.md)
 - Tools (2): [tools/README.md](tools/README.md)
 - Papers (4): [papers/README.md](papers/README.md)
 - Run `python scripts/validate_resource_catalog.py` before opening a PR.
 - Run `python scripts/generate_resource_views.py` after catalog changes.
+Verified resource count: `30`

resources/catalog/pending_candidates.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "generated_on": "2026-02-15T09:45:32.641403+00:00",
   "sources": [
     "huggingface-datasets",
     "huggingface-models"
   ],
-  "candidate_count": 20,
   "candidates": [
     {
       "id": "candidate-hf-dataset-aamirhs-pashto",
@@ -75,6 +75,121 @@
         "dataset"
       ]
     },
     {
       "id": "candidate-hf-dataset-arsalagrey-pashto",
       "title": "arsalagrey/pashto",
@@ -82,12 +197,288 @@
       "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
-      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto",
         "markers": [
           "pashto"
         ]
@@ -95,22 +486,22 @@
       "tags": [
         "pashto",
         "candidate",
-        "dataset"
       ]
     },
     {
-      "id": "candidate-hf-dataset-arsalagrey-pashto-books",
-      "title": "arsalagrey/pashto-books",
-      "url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
-      "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
-      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
         "markers": [
           "pashto"
         ]
@@ -118,22 +509,22 @@
       "tags": [
         "pashto",
         "candidate",
-        "dataset"
       ]
     },
     {
-      "id": "candidate-hf-dataset-arsalagrey-pashto-books-json",
-      "title": "arsalagrey/pashto-books-json",
-      "url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
-      "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
-      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
         "markers": [
           "pashto"
         ]
@@ -141,13 +532,13 @@
       "tags": [
         "pashto",
         "candidate",
-        "dataset"
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-wav2vec2-xls-r-300m-pashto",
-      "title": "ihanif/wav2vec2-xls-r-300m-pashto",
-      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -156,7 +547,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
         "markers": [
           "pashto"
         ]
@@ -168,9 +559,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-wav2vec2-xls-r-300m-pashto-lm",
-      "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
-      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -179,7 +570,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
         "markers": [
           "pashto"
         ]
@@ -191,9 +582,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-whisper-base-pashto",
-      "title": "ihanif/whisper-base-pashto",
-      "url": "https://huggingface.co/ihanif/whisper-base-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -202,7 +593,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/whisper-base-pashto",
         "markers": [
           "pashto"
         ]
@@ -214,9 +605,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-whisper-large-pashto",
-      "title": "ihanif/whisper-large-pashto",
-      "url": "https://huggingface.co/ihanif/whisper-large-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -225,7 +616,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
         "markers": [
           "pashto"
         ]
@@ -237,9 +628,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-whisper-medium-pashto",
-      "title": "ihanif/whisper-medium-pashto",
-      "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -248,7 +639,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
         "markers": [
           "pashto"
         ]
@@ -260,9 +651,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-whisper-medium-pashto-3e-7",
-      "title": "ihanif/whisper-medium-pashto-3e-7",
-      "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -271,7 +662,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
         "markers": [
           "pashto"
         ]
@@ -283,9 +674,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-whisper-small-pashto",
-      "title": "ihanif/whisper-small-pashto",
-      "url": "https://huggingface.co/ihanif/whisper-small-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -294,7 +685,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
         "markers": [
           "pashto"
         ]
@@ -306,9 +697,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-whisper-small-pashto-dropout",
-      "title": "ihanif/whisper-small-pashto-dropout",
-      "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -317,7 +708,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
         "markers": [
           "pashto"
         ]
@@ -329,9 +720,9 @@
       ]
     },
     {
-      "id": "candidate-hf-model-ihanif-xls-r-1b-pashto",
-      "title": "ihanif/xls-r-1b-pashto",
-      "url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
@@ -340,7 +731,99 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
         "markers": [
           "pashto"
         ]
@@ -375,9 +858,55 @@
       ]
     },
     {
-      "id": "candidate-hf-dataset-nexdata-99-hours-pashto-spontaneous-dialogue-smartphone-speech-dataset",
-      "title": "Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
-      "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
       "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
@@ -386,7 +915,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
         "markers": [
           "pashto"
         ]
@@ -421,18 +950,18 @@
       ]
     },
     {
-      "id": "candidate-hf-model-zirak-ai-pashto-bert-v1",
-      "title": "zirak-ai/pashto-bert-v1",
-      "url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
-      "category": "model",
       "source": "huggingface",
       "status": "candidate",
-      "summary": "Candidate model returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
         "markers": [
           "pashto"
         ]
@@ -440,13 +969,36 @@
       "tags": [
         "pashto",
         "candidate",
-        "model"
       ]
     },
     {
-      "id": "candidate-hf-dataset-zirak-ai-pashtoocr",
-      "title": "zirak-ai/PashtoOCR",
-      "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
       "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
@@ -455,7 +1007,7 @@
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
-        "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
         "markers": [
           "pashto"
         ]
@@ -465,6 +1017,29 @@
         "candidate",
         "dataset"
       ]
     }
   ],
   "errors": [

 {
+  "generated_on": "2026-02-15T10:06:14.796338+00:00",
   "sources": [
     "huggingface-datasets",
     "huggingface-models"
   ],
+  "candidate_count": 45,
   "candidates": [
     {
       "id": "candidate-hf-dataset-aamirhs-pashto",
         "dataset"
       ]
     },
+    {
+      "id": "candidate-hf-model-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab",
+      "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
+      "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab-test-2",
+      "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
+      "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-adnankhan769-english-to-pashto-sentences-dataset",
+      "title": "adnankhan769/english_to_pashto_sentences_dataset",
+      "url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-adnankhan769-proper-dataset-english-2-pashto",
+      "title": "adnankhan769/proper_dataset_english_2_pashto",
+      "url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-alimuhammad73-pashto-poetry",
+      "title": "AliMuhammad73/Pashto-Poetry",
+      "url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
     {
       "id": "candidate-hf-dataset-arsalagrey-pashto",
       "title": "arsalagrey/pashto",
       "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-arsalagrey-pashto-books",
+      "title": "arsalagrey/pashto-books",
+      "url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-arsalagrey-pashto-books-json",
+      "title": "arsalagrey/pashto-books-json",
+      "url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-ihanif-pashto-asr-base",
+      "title": "ihanif/pashto-asr-base",
+      "url": "https://huggingface.co/ihanif/pashto-asr-base",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/pashto-asr-base",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-asr-wer",
+      "title": "ihanif/pashto_asr_wer",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-speech-20k",
+      "title": "ihanif/pashto_speech_20k",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-speech-2k",
+      "title": "ihanif/pashto_speech_2k",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_2k",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_2k",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-speech-3k",
+      "title": "ihanif/pashto_speech_3k",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_3k",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_3k",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-speech-5k",
+      "title": "ihanif/pashto_speech_5k",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-speech-ds",
+      "title": "ihanif/pashto_speech_ds",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-ihanif-pashto-speech-parquet-10k",
+      "title": "ihanif/pashto_speech_parquet_10k",
+      "url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-ihanif-wav2vec2-xls-r-300m-pashto-lm",
+      "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
+      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-ihanif-whisper-base-pashto",
+      "title": "ihanif/whisper-base-pashto",
+      "url": "https://huggingface.co/ihanif/whisper-base-pashto",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/whisper-base-pashto",
         "markers": [
           "pashto"
         ]
       "tags": [
         "pashto",
         "candidate",
+        "model"
       ]
     },
     {
+      "id": "candidate-hf-model-ihanif-whisper-large-pashto",
+      "title": "ihanif/whisper-large-pashto",
+      "url": "https://huggingface.co/ihanif/whisper-large-pashto",
+      "category": "model",
       "source": "huggingface",
       "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
         "markers": [
           "pashto"
         ]
       "tags": [
         "pashto",
         "candidate",
+        "model"
       ]
     },
     {
+      "id": "candidate-hf-model-ihanif-whisper-medium-pashto-3e-7",
+      "title": "ihanif/whisper-medium-pashto-3e-7",
+      "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
+      "category": "model",
       "source": "huggingface",
       "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
         "markers": [
           "pashto"
         ]
       "tags": [
         "pashto",
         "candidate",
+        "model"
       ]
     },
     {
+      "id": "candidate-hf-model-ihanif-whisper-small-pashto",
+      "title": "ihanif/whisper-small-pashto",
+      "url": "https://huggingface.co/ihanif/whisper-small-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-ihanif-whisper-small-pashto-dropout",
+      "title": "ihanif/whisper-small-pashto-dropout",
+      "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-ihanif-xls-r-1b-pashto",
+      "title": "ihanif/xls-r-1b-pashto",
+      "url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-ijazulhaq-bert-base-pashto",
+      "title": "ijazulhaq/bert-base-pashto",
+      "url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-ijazulhaq-bert-base-pashto-v1",
+      "title": "ijazulhaq/bert-base-pashto-v1",
+      "url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-final-1",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-final-1",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-final-1",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-final-1",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-2",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-3",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-3",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-3",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-3",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-4",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-4",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-4",
       "category": "model",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-4",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-5",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-5",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-5",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-5",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-6",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-6",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-6",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-6",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-7",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-7",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-7",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-7",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-8",
+      "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-8",
+      "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-8",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-8",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-model-koochikoo25-pashto-whisper-large",
+      "title": "koochikoo25/pashto-whisper-large",
+      "url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-model-koochikoo25-whisper-medium-pashto",
+      "title": "koochikoo25/Whisper-medium-pashto",
+      "url": "https://huggingface.co/koochikoo25/Whisper-medium-pashto",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/koochikoo25/Whisper-medium-pashto",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-saillab-alpaca-pashto-cleaned",
+      "title": "saillab/alpaca-pashto-cleaned",
+      "url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
       "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
         "markers": [
           "pashto"
         ]
       ]
     },
     {
+      "id": "candidate-hf-dataset-sherwindesouza-pashto-common-voice-20",
+      "title": "SherwinDesouza/pashto-common-voice-20",
+      "url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
+      "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
       "primary_use": "Needs maintainer review before promotion to verified catalog.",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
         "markers": [
           "pashto"
         ]
       "tags": [
         "pashto",
         "candidate",
+        "dataset"
+      ]
+    },
+    {
+      "id": "candidate-hf-dataset-tasal9-pashto-dataset",
+      "title": "tasal9/Pashto_Dataset",
+      "url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "dataset"
       ]
     },
     {
+      "id": "candidate-hf-dataset-tasal9-zamai-pashto-dataset",
+      "title": "tasal9/ZamAI_Pashto_Dataset",
+      "url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
       "category": "dataset",
       "source": "huggingface",
       "status": "candidate",
       "tasks": [],
       "pashto_evidence": {
         "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
         "markers": [
           "pashto"
         ]
         "candidate",
         "dataset"
       ]
+    },
+    {
+      "id": "candidate-hf-model-zirak-ai-pashto-bert-v1",
+      "title": "zirak-ai/pashto-bert-v1",
+      "url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
+      "category": "model",
+      "source": "huggingface",
+      "status": "candidate",
+      "summary": "Candidate model returned from Hugging Face search for Pashto.",
+      "primary_use": "Needs maintainer review before promotion to verified catalog.",
+      "tasks": [],
+      "pashto_evidence": {
+        "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
+        "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
+        "markers": [
+          "pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "candidate",
+        "model"
+      ]
     }
   ],
   "errors": [

resources/catalog/resources.json CHANGED Viewed

@@ -640,6 +640,148 @@
         "paper",
         "benchmark"
       ]
     }
   ]
 }

         "paper",
         "benchmark"
       ]
+    },
+    {
+      "id": "dataset-nexdata-99h-pashto-dialogue",
+      "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
+      "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
+      "primary_use": "Spontaneous speech ASR training and robustness evaluation",
+      "license": "cc-by-nc-nd-4.0",
+      "tasks": [
+        "asr"
+      ],
+      "pashto_evidence": {
+        "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
+        "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
+        "markers": [
+          "Pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "speech",
+        "asr",
+        "dialogue"
+      ]
+    },
+    {
+      "id": "dataset-zirak-ai-pashto-ocr",
+      "title": "Zirak-AI PashtoOCR",
+      "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
+      "primary_use": "OCR and text extraction benchmarking",
+      "license": "mit",
+      "tasks": [
+        "ocr",
+        "nlp"
+      ],
+      "pashto_evidence": {
+        "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
+        "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
+        "markers": [
+          "ps",
+          "PashtoOCR"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "ocr",
+        "nlp",
+        "vision"
+      ]
+    },
+    {
+      "id": "dataset-ihanif-pashto-wikipedia-corpus",
+      "title": "Pashto Wikipedia Corpus",
+      "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
+      "category": "dataset",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
+      "primary_use": "Pashto text corpus for NLP baselines",
+      "license": "cc-by-sa-4.0",
+      "tasks": [
+        "nlp"
+      ],
+      "pashto_evidence": {
+        "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
+        "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
+        "markers": [
+          "ps",
+          "Pashto"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "text",
+        "nlp",
+        "wikipedia"
+      ]
+    },
+    {
+      "id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
+      "title": "wav2vec2 XLS-R 300M Pashto",
+      "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
+      "category": "model",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
+      "primary_use": "Pashto ASR baseline and comparative experiments",
+      "license": "apache-2.0",
+      "tasks": [
+        "asr"
+      ],
+      "pashto_evidence": {
+        "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
+        "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
+        "markers": [
+          "pashto",
+          "ps",
+          "ps_af"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "asr",
+        "wav2vec2",
+        "fleurs"
+      ]
+    },
+    {
+      "id": "model-ihanif-whisper-medium-pashto",
+      "title": "Whisper Medium Pashto",
+      "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
+      "category": "model",
+      "source": "huggingface",
+      "status": "verified",
+      "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
+      "primary_use": "Pashto ASR baseline and transcription quality comparisons",
+      "license": "apache-2.0",
+      "tasks": [
+        "asr"
+      ],
+      "pashto_evidence": {
+        "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
+        "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
+        "markers": [
+          "pashto",
+          "ps",
+          "ps_af"
+        ]
+      },
+      "tags": [
+        "pashto",
+        "asr",
+        "whisper",
+        "fleurs"
+      ]
     }
   ]
 }

resources/datasets/README.md CHANGED Viewed

@@ -4,14 +4,17 @@
 | Resource | Link | Pashto Evidence | Primary Use |
 |---|---|---|---|
 | Belebele | [huggingface](https://huggingface.co/datasets/facebook/belebele) | [Dataset includes pbt_Arab subset. (`pbt_Arab`)](https://huggingface.co/datasets/facebook/belebele) | Comprehension and multilingual NLP benchmark |
 | Common Voice Scripted Speech 24.0 - Pashto | [mozilla](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | [Official dataset page is for Pashto. (`Pashto`)](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | ASR training and evaluation |
 | Google FLEURS | [huggingface](https://huggingface.co/datasets/google/fleurs) | [Dataset config includes ps_af. (`ps_af`)](https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py) | Speech benchmark and external evaluation |
 | OPUS-100 | [huggingface](https://huggingface.co/datasets/Helsinki-NLP/opus-100) | [Dataset viewer includes en-ps split. (`en-ps`)](https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps) | Machine translation training and evaluation |
 | OSCAR Corpus | [huggingface](https://huggingface.co/datasets/oscar-corpus/oscar) | [Dataset includes unshuffled_deduplicated_ps split. (`unshuffled_deduplicated_ps`)](https://huggingface.co/datasets/oscar-corpus/oscar) | Language modeling and lexicon expansion |
 | Pashto Isolated Words Speech Dataset | [kaggle](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | [Dataset title explicitly states Pashto speech dataset. (`Pashto`)](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | Keyword spotting and constrained ASR experiments |
 | Pashto Word Embeddings | [kaggle](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | [Dataset description states pretrained Pashto embeddings. (`Pashto`)](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | Lexical semantics and lightweight NLP baselines |
 | Wikimedia Wikipedia | [huggingface](https://huggingface.co/datasets/wikimedia/wikipedia) | [Dataset includes 20231101.ps subset. (`20231101.ps`)](https://huggingface.co/datasets/wikimedia/wikipedia) | Terminology and balanced text corpus |
 ## Maintenance
 - Source of truth: [../catalog/resources.json](../catalog/resources.json)

 | Resource | Link | Pashto Evidence | Primary Use |
 |---|---|---|---|
+| 99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset | [huggingface](https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset) | [Dataset title explicitly includes Pashto and API metadata marks audio and text modalities. (`Pashto`)](https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset) | Spontaneous speech ASR training and robustness evaluation |
 | Belebele | [huggingface](https://huggingface.co/datasets/facebook/belebele) | [Dataset includes pbt_Arab subset. (`pbt_Arab`)](https://huggingface.co/datasets/facebook/belebele) | Comprehension and multilingual NLP benchmark |
 | Common Voice Scripted Speech 24.0 - Pashto | [mozilla](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | [Official dataset page is for Pashto. (`Pashto`)](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | ASR training and evaluation |
 | Google FLEURS | [huggingface](https://huggingface.co/datasets/google/fleurs) | [Dataset config includes ps_af. (`ps_af`)](https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py) | Speech benchmark and external evaluation |
 | OPUS-100 | [huggingface](https://huggingface.co/datasets/Helsinki-NLP/opus-100) | [Dataset viewer includes en-ps split. (`en-ps`)](https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps) | Machine translation training and evaluation |
 | OSCAR Corpus | [huggingface](https://huggingface.co/datasets/oscar-corpus/oscar) | [Dataset includes unshuffled_deduplicated_ps split. (`unshuffled_deduplicated_ps`)](https://huggingface.co/datasets/oscar-corpus/oscar) | Language modeling and lexicon expansion |
 | Pashto Isolated Words Speech Dataset | [kaggle](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | [Dataset title explicitly states Pashto speech dataset. (`Pashto`)](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | Keyword spotting and constrained ASR experiments |
+| Pashto Wikipedia Corpus | [huggingface](https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus) | [Dataset metadata includes language:ps and the title specifies Pashto corpus. (`ps`, `Pashto`)](https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus) | Pashto text corpus for NLP baselines |
 | Pashto Word Embeddings | [kaggle](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | [Dataset description states pretrained Pashto embeddings. (`Pashto`)](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | Lexical semantics and lightweight NLP baselines |
 | Wikimedia Wikipedia | [huggingface](https://huggingface.co/datasets/wikimedia/wikipedia) | [Dataset includes 20231101.ps subset. (`20231101.ps`)](https://huggingface.co/datasets/wikimedia/wikipedia) | Terminology and balanced text corpus |
+| Zirak-AI PashtoOCR | [huggingface](https://huggingface.co/datasets/zirak-ai/PashtoOCR) | [Dataset tags include language:ps and the dataset name is PashtoOCR. (`ps`, `PashtoOCR`)](https://huggingface.co/datasets/zirak-ai/PashtoOCR) | OCR and text extraction benchmarking |
 ## Maintenance
 - Source of truth: [../catalog/resources.json](../catalog/resources.json)

resources/models/README.md CHANGED Viewed

@@ -10,7 +10,9 @@
 | OPUS MT en-mul | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | English to Pashto translation path |
 | OPUS MT mul-en | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | Pashto to English translation path |
 | PashtoBERT | [huggingface](https://huggingface.co/mdarhri/pashto-bert) | [Model card states training on Pashto corpus data. (`Pashto`)](https://huggingface.co/mdarhri/pashto-bert) | Pashto NLP baseline encoder |
 | Whisper Large v3 | [huggingface](https://huggingface.co/openai/whisper-large-v3) | [Whisper tokenizer map includes ps language key. (`ps`)](https://raw.githubusercontent.com/openai/whisper/main/whisper/tokenizer.py) | ASR baseline and pseudo-labeling |
 ## Maintenance
 - Source of truth: [../catalog/resources.json](../catalog/resources.json)

 | OPUS MT en-mul | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | English to Pashto translation path |
 | OPUS MT mul-en | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | Pashto to English translation path |
 | PashtoBERT | [huggingface](https://huggingface.co/mdarhri/pashto-bert) | [Model card states training on Pashto corpus data. (`Pashto`)](https://huggingface.co/mdarhri/pashto-bert) | Pashto NLP baseline encoder |
+| wav2vec2 XLS-R 300M Pashto | [huggingface](https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto) | [Model tags include pashto and ps, and model index references FLEURS config ps_af. (`pashto`, `ps`, `ps_af`)](https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto) | Pashto ASR baseline and comparative experiments |
 | Whisper Large v3 | [huggingface](https://huggingface.co/openai/whisper-large-v3) | [Whisper tokenizer map includes ps language key. (`ps`)](https://raw.githubusercontent.com/openai/whisper/main/whisper/tokenizer.py) | ASR baseline and pseudo-labeling |
+| Whisper Medium Pashto | [huggingface](https://huggingface.co/ihanif/whisper-medium-pashto) | [Model tags include pashto and ps, and model index uses FLEURS ps_af split. (`pashto`, `ps`, `ps_af`)](https://huggingface.co/ihanif/whisper-medium-pashto) | Pashto ASR baseline and transcription quality comparisons |
 ## Maintenance
 - Source of truth: [../catalog/resources.json](../catalog/resources.json)