musaw commited on
Commit
16953e6
·
1 Parent(s): 4363902

Run one-time resource sync and promote new Pashto resources

Browse files
docs/search/resources.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "generated_on": "2026-02-15T00:00:00Z",
3
- "count": 25,
4
  "resources": [
5
  {
6
  "id": "dataset-common-voice-ps-v24",
@@ -590,6 +590,133 @@
590
  "markers": [
591
  "ps_af"
592
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  }
594
  ]
595
  }
 
1
  {
2
  "generated_on": "2026-02-15T00:00:00Z",
3
+ "count": 30,
4
  "resources": [
5
  {
6
  "id": "dataset-common-voice-ps-v24",
 
590
  "markers": [
591
  "ps_af"
592
  ]
593
+ },
594
+ {
595
+ "id": "dataset-nexdata-99h-pashto-dialogue",
596
+ "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
597
+ "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
598
+ "category": "dataset",
599
+ "source": "huggingface",
600
+ "status": "verified",
601
+ "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
602
+ "primary_use": "Spontaneous speech ASR training and robustness evaluation",
603
+ "tasks": [
604
+ "asr"
605
+ ],
606
+ "tags": [
607
+ "pashto",
608
+ "speech",
609
+ "asr",
610
+ "dialogue"
611
+ ],
612
+ "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
613
+ "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
614
+ "markers": [
615
+ "Pashto"
616
+ ]
617
+ },
618
+ {
619
+ "id": "dataset-zirak-ai-pashto-ocr",
620
+ "title": "Zirak-AI PashtoOCR",
621
+ "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
622
+ "category": "dataset",
623
+ "source": "huggingface",
624
+ "status": "verified",
625
+ "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
626
+ "primary_use": "OCR and text extraction benchmarking",
627
+ "tasks": [
628
+ "ocr",
629
+ "nlp"
630
+ ],
631
+ "tags": [
632
+ "pashto",
633
+ "ocr",
634
+ "nlp",
635
+ "vision"
636
+ ],
637
+ "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
638
+ "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
639
+ "markers": [
640
+ "ps",
641
+ "PashtoOCR"
642
+ ]
643
+ },
644
+ {
645
+ "id": "dataset-ihanif-pashto-wikipedia-corpus",
646
+ "title": "Pashto Wikipedia Corpus",
647
+ "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
648
+ "category": "dataset",
649
+ "source": "huggingface",
650
+ "status": "verified",
651
+ "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
652
+ "primary_use": "Pashto text corpus for NLP baselines",
653
+ "tasks": [
654
+ "nlp"
655
+ ],
656
+ "tags": [
657
+ "pashto",
658
+ "text",
659
+ "nlp",
660
+ "wikipedia"
661
+ ],
662
+ "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
663
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
664
+ "markers": [
665
+ "ps",
666
+ "Pashto"
667
+ ]
668
+ },
669
+ {
670
+ "id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
671
+ "title": "wav2vec2 XLS-R 300M Pashto",
672
+ "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
673
+ "category": "model",
674
+ "source": "huggingface",
675
+ "status": "verified",
676
+ "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
677
+ "primary_use": "Pashto ASR baseline and comparative experiments",
678
+ "tasks": [
679
+ "asr"
680
+ ],
681
+ "tags": [
682
+ "pashto",
683
+ "asr",
684
+ "wav2vec2",
685
+ "fleurs"
686
+ ],
687
+ "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
688
+ "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
689
+ "markers": [
690
+ "pashto",
691
+ "ps",
692
+ "ps_af"
693
+ ]
694
+ },
695
+ {
696
+ "id": "model-ihanif-whisper-medium-pashto",
697
+ "title": "Whisper Medium Pashto",
698
+ "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
699
+ "category": "model",
700
+ "source": "huggingface",
701
+ "status": "verified",
702
+ "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
703
+ "primary_use": "Pashto ASR baseline and transcription quality comparisons",
704
+ "tasks": [
705
+ "asr"
706
+ ],
707
+ "tags": [
708
+ "pashto",
709
+ "asr",
710
+ "whisper",
711
+ "fleurs"
712
+ ],
713
+ "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
714
+ "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
715
+ "markers": [
716
+ "pashto",
717
+ "ps",
718
+ "ps_af"
719
+ ]
720
  }
721
  ]
722
  }
resources/README.md CHANGED
@@ -3,8 +3,8 @@
3
  Structured, Pashto-focused resource tracking lives in this folder.
4
 
5
  ## Sections
6
- - Datasets (8): [datasets/README.md](datasets/README.md)
7
- - Models (7): [models/README.md](models/README.md)
8
  - Benchmarks (4): [benchmarks/README.md](benchmarks/README.md)
9
  - Tools (2): [tools/README.md](tools/README.md)
10
  - Papers (4): [papers/README.md](papers/README.md)
@@ -20,4 +20,4 @@ Structured, Pashto-focused resource tracking lives in this folder.
20
  - Run `python scripts/validate_resource_catalog.py` before opening a PR.
21
  - Run `python scripts/generate_resource_views.py` after catalog changes.
22
 
23
- Verified resource count: `25`
 
3
  Structured, Pashto-focused resource tracking lives in this folder.
4
 
5
  ## Sections
6
+ - Datasets (11): [datasets/README.md](datasets/README.md)
7
+ - Models (9): [models/README.md](models/README.md)
8
  - Benchmarks (4): [benchmarks/README.md](benchmarks/README.md)
9
  - Tools (2): [tools/README.md](tools/README.md)
10
  - Papers (4): [papers/README.md](papers/README.md)
 
20
  - Run `python scripts/validate_resource_catalog.py` before opening a PR.
21
  - Run `python scripts/generate_resource_views.py` after catalog changes.
22
 
23
+ Verified resource count: `30`
resources/catalog/pending_candidates.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "generated_on": "2026-02-15T09:45:32.641403+00:00",
3
  "sources": [
4
  "huggingface-datasets",
5
  "huggingface-models"
6
  ],
7
- "candidate_count": 20,
8
  "candidates": [
9
  {
10
  "id": "candidate-hf-dataset-aamirhs-pashto",
@@ -75,6 +75,121 @@
75
  "dataset"
76
  ]
77
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  {
79
  "id": "candidate-hf-dataset-arsalagrey-pashto",
80
  "title": "arsalagrey/pashto",
@@ -82,12 +197,288 @@
82
  "category": "dataset",
83
  "source": "huggingface",
84
  "status": "candidate",
85
- "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
87
  "tasks": [],
88
  "pashto_evidence": {
89
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
90
- "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto",
91
  "markers": [
92
  "pashto"
93
  ]
@@ -95,22 +486,22 @@
95
  "tags": [
96
  "pashto",
97
  "candidate",
98
- "dataset"
99
  ]
100
  },
101
  {
102
- "id": "candidate-hf-dataset-arsalagrey-pashto-books",
103
- "title": "arsalagrey/pashto-books",
104
- "url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
105
- "category": "dataset",
106
  "source": "huggingface",
107
  "status": "candidate",
108
- "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
109
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
110
  "tasks": [],
111
  "pashto_evidence": {
112
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
113
- "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
114
  "markers": [
115
  "pashto"
116
  ]
@@ -118,22 +509,22 @@
118
  "tags": [
119
  "pashto",
120
  "candidate",
121
- "dataset"
122
  ]
123
  },
124
  {
125
- "id": "candidate-hf-dataset-arsalagrey-pashto-books-json",
126
- "title": "arsalagrey/pashto-books-json",
127
- "url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
128
- "category": "dataset",
129
  "source": "huggingface",
130
  "status": "candidate",
131
- "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
132
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
133
  "tasks": [],
134
  "pashto_evidence": {
135
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
136
- "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
137
  "markers": [
138
  "pashto"
139
  ]
@@ -141,13 +532,13 @@
141
  "tags": [
142
  "pashto",
143
  "candidate",
144
- "dataset"
145
  ]
146
  },
147
  {
148
- "id": "candidate-hf-model-ihanif-wav2vec2-xls-r-300m-pashto",
149
- "title": "ihanif/wav2vec2-xls-r-300m-pashto",
150
- "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
151
  "category": "model",
152
  "source": "huggingface",
153
  "status": "candidate",
@@ -156,7 +547,7 @@
156
  "tasks": [],
157
  "pashto_evidence": {
158
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
159
- "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
160
  "markers": [
161
  "pashto"
162
  ]
@@ -168,9 +559,9 @@
168
  ]
169
  },
170
  {
171
- "id": "candidate-hf-model-ihanif-wav2vec2-xls-r-300m-pashto-lm",
172
- "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
173
- "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
174
  "category": "model",
175
  "source": "huggingface",
176
  "status": "candidate",
@@ -179,7 +570,7 @@
179
  "tasks": [],
180
  "pashto_evidence": {
181
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
182
- "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
183
  "markers": [
184
  "pashto"
185
  ]
@@ -191,9 +582,9 @@
191
  ]
192
  },
193
  {
194
- "id": "candidate-hf-model-ihanif-whisper-base-pashto",
195
- "title": "ihanif/whisper-base-pashto",
196
- "url": "https://huggingface.co/ihanif/whisper-base-pashto",
197
  "category": "model",
198
  "source": "huggingface",
199
  "status": "candidate",
@@ -202,7 +593,7 @@
202
  "tasks": [],
203
  "pashto_evidence": {
204
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
205
- "evidence_url": "https://huggingface.co/ihanif/whisper-base-pashto",
206
  "markers": [
207
  "pashto"
208
  ]
@@ -214,9 +605,9 @@
214
  ]
215
  },
216
  {
217
- "id": "candidate-hf-model-ihanif-whisper-large-pashto",
218
- "title": "ihanif/whisper-large-pashto",
219
- "url": "https://huggingface.co/ihanif/whisper-large-pashto",
220
  "category": "model",
221
  "source": "huggingface",
222
  "status": "candidate",
@@ -225,7 +616,7 @@
225
  "tasks": [],
226
  "pashto_evidence": {
227
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
228
- "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
229
  "markers": [
230
  "pashto"
231
  ]
@@ -237,9 +628,9 @@
237
  ]
238
  },
239
  {
240
- "id": "candidate-hf-model-ihanif-whisper-medium-pashto",
241
- "title": "ihanif/whisper-medium-pashto",
242
- "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
243
  "category": "model",
244
  "source": "huggingface",
245
  "status": "candidate",
@@ -248,7 +639,7 @@
248
  "tasks": [],
249
  "pashto_evidence": {
250
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
251
- "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
252
  "markers": [
253
  "pashto"
254
  ]
@@ -260,9 +651,9 @@
260
  ]
261
  },
262
  {
263
- "id": "candidate-hf-model-ihanif-whisper-medium-pashto-3e-7",
264
- "title": "ihanif/whisper-medium-pashto-3e-7",
265
- "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
266
  "category": "model",
267
  "source": "huggingface",
268
  "status": "candidate",
@@ -271,7 +662,7 @@
271
  "tasks": [],
272
  "pashto_evidence": {
273
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
274
- "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
275
  "markers": [
276
  "pashto"
277
  ]
@@ -283,9 +674,9 @@
283
  ]
284
  },
285
  {
286
- "id": "candidate-hf-model-ihanif-whisper-small-pashto",
287
- "title": "ihanif/whisper-small-pashto",
288
- "url": "https://huggingface.co/ihanif/whisper-small-pashto",
289
  "category": "model",
290
  "source": "huggingface",
291
  "status": "candidate",
@@ -294,7 +685,7 @@
294
  "tasks": [],
295
  "pashto_evidence": {
296
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
297
- "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
298
  "markers": [
299
  "pashto"
300
  ]
@@ -306,9 +697,9 @@
306
  ]
307
  },
308
  {
309
- "id": "candidate-hf-model-ihanif-whisper-small-pashto-dropout",
310
- "title": "ihanif/whisper-small-pashto-dropout",
311
- "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
312
  "category": "model",
313
  "source": "huggingface",
314
  "status": "candidate",
@@ -317,7 +708,7 @@
317
  "tasks": [],
318
  "pashto_evidence": {
319
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
320
- "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
321
  "markers": [
322
  "pashto"
323
  ]
@@ -329,9 +720,9 @@
329
  ]
330
  },
331
  {
332
- "id": "candidate-hf-model-ihanif-xls-r-1b-pashto",
333
- "title": "ihanif/xls-r-1b-pashto",
334
- "url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
335
  "category": "model",
336
  "source": "huggingface",
337
  "status": "candidate",
@@ -340,7 +731,99 @@
340
  "tasks": [],
341
  "pashto_evidence": {
342
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
343
- "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  "markers": [
345
  "pashto"
346
  ]
@@ -375,9 +858,55 @@
375
  ]
376
  },
377
  {
378
- "id": "candidate-hf-dataset-nexdata-99-hours-pashto-spontaneous-dialogue-smartphone-speech-dataset",
379
- "title": "Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
380
- "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  "category": "dataset",
382
  "source": "huggingface",
383
  "status": "candidate",
@@ -386,7 +915,7 @@
386
  "tasks": [],
387
  "pashto_evidence": {
388
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
389
- "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
390
  "markers": [
391
  "pashto"
392
  ]
@@ -421,18 +950,18 @@
421
  ]
422
  },
423
  {
424
- "id": "candidate-hf-model-zirak-ai-pashto-bert-v1",
425
- "title": "zirak-ai/pashto-bert-v1",
426
- "url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
427
- "category": "model",
428
  "source": "huggingface",
429
  "status": "candidate",
430
- "summary": "Candidate model returned from Hugging Face search for Pashto.",
431
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
432
  "tasks": [],
433
  "pashto_evidence": {
434
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
435
- "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
436
  "markers": [
437
  "pashto"
438
  ]
@@ -440,13 +969,36 @@
440
  "tags": [
441
  "pashto",
442
  "candidate",
443
- "model"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  ]
445
  },
446
  {
447
- "id": "candidate-hf-dataset-zirak-ai-pashtoocr",
448
- "title": "zirak-ai/PashtoOCR",
449
- "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
450
  "category": "dataset",
451
  "source": "huggingface",
452
  "status": "candidate",
@@ -455,7 +1007,7 @@
455
  "tasks": [],
456
  "pashto_evidence": {
457
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
458
- "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
459
  "markers": [
460
  "pashto"
461
  ]
@@ -465,6 +1017,29 @@
465
  "candidate",
466
  "dataset"
467
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  }
469
  ],
470
  "errors": [
 
1
  {
2
+ "generated_on": "2026-02-15T10:06:14.796338+00:00",
3
  "sources": [
4
  "huggingface-datasets",
5
  "huggingface-models"
6
  ],
7
+ "candidate_count": 45,
8
  "candidates": [
9
  {
10
  "id": "candidate-hf-dataset-aamirhs-pashto",
 
75
  "dataset"
76
  ]
77
  },
78
+ {
79
+ "id": "candidate-hf-model-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab",
80
+ "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
81
+ "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
82
+ "category": "model",
83
+ "source": "huggingface",
84
+ "status": "candidate",
85
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
86
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
87
+ "tasks": [],
88
+ "pashto_evidence": {
89
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
90
+ "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab",
91
+ "markers": [
92
+ "pashto"
93
+ ]
94
+ },
95
+ "tags": [
96
+ "pashto",
97
+ "candidate",
98
+ "model"
99
+ ]
100
+ },
101
+ {
102
+ "id": "candidate-hf-model-aamirhs-wav2vec2-large-xls-r-300m-pashto-colab-test-2",
103
+ "title": "aamirhs/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
104
+ "url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
105
+ "category": "model",
106
+ "source": "huggingface",
107
+ "status": "candidate",
108
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
109
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
110
+ "tasks": [],
111
+ "pashto_evidence": {
112
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
113
+ "evidence_url": "https://huggingface.co/aamirhs/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
114
+ "markers": [
115
+ "pashto"
116
+ ]
117
+ },
118
+ "tags": [
119
+ "pashto",
120
+ "candidate",
121
+ "model"
122
+ ]
123
+ },
124
+ {
125
+ "id": "candidate-hf-dataset-adnankhan769-english-to-pashto-sentences-dataset",
126
+ "title": "adnankhan769/english_to_pashto_sentences_dataset",
127
+ "url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
128
+ "category": "dataset",
129
+ "source": "huggingface",
130
+ "status": "candidate",
131
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
132
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
133
+ "tasks": [],
134
+ "pashto_evidence": {
135
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
136
+ "evidence_url": "https://huggingface.co/datasets/adnankhan769/english_to_pashto_sentences_dataset",
137
+ "markers": [
138
+ "pashto"
139
+ ]
140
+ },
141
+ "tags": [
142
+ "pashto",
143
+ "candidate",
144
+ "dataset"
145
+ ]
146
+ },
147
+ {
148
+ "id": "candidate-hf-dataset-adnankhan769-proper-dataset-english-2-pashto",
149
+ "title": "adnankhan769/proper_dataset_english_2_pashto",
150
+ "url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
151
+ "category": "dataset",
152
+ "source": "huggingface",
153
+ "status": "candidate",
154
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
155
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
156
+ "tasks": [],
157
+ "pashto_evidence": {
158
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
159
+ "evidence_url": "https://huggingface.co/datasets/adnankhan769/proper_dataset_english_2_pashto",
160
+ "markers": [
161
+ "pashto"
162
+ ]
163
+ },
164
+ "tags": [
165
+ "pashto",
166
+ "candidate",
167
+ "dataset"
168
+ ]
169
+ },
170
+ {
171
+ "id": "candidate-hf-dataset-alimuhammad73-pashto-poetry",
172
+ "title": "AliMuhammad73/Pashto-Poetry",
173
+ "url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
174
+ "category": "dataset",
175
+ "source": "huggingface",
176
+ "status": "candidate",
177
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
178
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
179
+ "tasks": [],
180
+ "pashto_evidence": {
181
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
182
+ "evidence_url": "https://huggingface.co/datasets/AliMuhammad73/Pashto-Poetry",
183
+ "markers": [
184
+ "pashto"
185
+ ]
186
+ },
187
+ "tags": [
188
+ "pashto",
189
+ "candidate",
190
+ "dataset"
191
+ ]
192
+ },
193
  {
194
  "id": "candidate-hf-dataset-arsalagrey-pashto",
195
  "title": "arsalagrey/pashto",
 
197
  "category": "dataset",
198
  "source": "huggingface",
199
  "status": "candidate",
200
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
201
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
202
+ "tasks": [],
203
+ "pashto_evidence": {
204
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
205
+ "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto",
206
+ "markers": [
207
+ "pashto"
208
+ ]
209
+ },
210
+ "tags": [
211
+ "pashto",
212
+ "candidate",
213
+ "dataset"
214
+ ]
215
+ },
216
+ {
217
+ "id": "candidate-hf-dataset-arsalagrey-pashto-books",
218
+ "title": "arsalagrey/pashto-books",
219
+ "url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
220
+ "category": "dataset",
221
+ "source": "huggingface",
222
+ "status": "candidate",
223
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
224
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
225
+ "tasks": [],
226
+ "pashto_evidence": {
227
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
228
+ "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books",
229
+ "markers": [
230
+ "pashto"
231
+ ]
232
+ },
233
+ "tags": [
234
+ "pashto",
235
+ "candidate",
236
+ "dataset"
237
+ ]
238
+ },
239
+ {
240
+ "id": "candidate-hf-dataset-arsalagrey-pashto-books-json",
241
+ "title": "arsalagrey/pashto-books-json",
242
+ "url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
243
+ "category": "dataset",
244
+ "source": "huggingface",
245
+ "status": "candidate",
246
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
247
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
248
+ "tasks": [],
249
+ "pashto_evidence": {
250
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
251
+ "evidence_url": "https://huggingface.co/datasets/arsalagrey/pashto-books-json",
252
+ "markers": [
253
+ "pashto"
254
+ ]
255
+ },
256
+ "tags": [
257
+ "pashto",
258
+ "candidate",
259
+ "dataset"
260
+ ]
261
+ },
262
+ {
263
+ "id": "candidate-hf-model-ihanif-pashto-asr-base",
264
+ "title": "ihanif/pashto-asr-base",
265
+ "url": "https://huggingface.co/ihanif/pashto-asr-base",
266
+ "category": "model",
267
+ "source": "huggingface",
268
+ "status": "candidate",
269
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
270
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
271
+ "tasks": [],
272
+ "pashto_evidence": {
273
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
274
+ "evidence_url": "https://huggingface.co/ihanif/pashto-asr-base",
275
+ "markers": [
276
+ "pashto"
277
+ ]
278
+ },
279
+ "tags": [
280
+ "pashto",
281
+ "candidate",
282
+ "model"
283
+ ]
284
+ },
285
+ {
286
+ "id": "candidate-hf-dataset-ihanif-pashto-asr-wer",
287
+ "title": "ihanif/pashto_asr_wer",
288
+ "url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
289
+ "category": "dataset",
290
+ "source": "huggingface",
291
+ "status": "candidate",
292
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
293
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
294
+ "tasks": [],
295
+ "pashto_evidence": {
296
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
297
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_asr_wer",
298
+ "markers": [
299
+ "pashto"
300
+ ]
301
+ },
302
+ "tags": [
303
+ "pashto",
304
+ "candidate",
305
+ "dataset"
306
+ ]
307
+ },
308
+ {
309
+ "id": "candidate-hf-dataset-ihanif-pashto-speech-20k",
310
+ "title": "ihanif/pashto_speech_20k",
311
+ "url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
312
+ "category": "dataset",
313
+ "source": "huggingface",
314
+ "status": "candidate",
315
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
316
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
317
+ "tasks": [],
318
+ "pashto_evidence": {
319
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
320
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_20k",
321
+ "markers": [
322
+ "pashto"
323
+ ]
324
+ },
325
+ "tags": [
326
+ "pashto",
327
+ "candidate",
328
+ "dataset"
329
+ ]
330
+ },
331
+ {
332
+ "id": "candidate-hf-dataset-ihanif-pashto-speech-2k",
333
+ "title": "ihanif/pashto_speech_2k",
334
+ "url": "https://huggingface.co/datasets/ihanif/pashto_speech_2k",
335
+ "category": "dataset",
336
+ "source": "huggingface",
337
+ "status": "candidate",
338
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
339
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
340
+ "tasks": [],
341
+ "pashto_evidence": {
342
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
343
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_2k",
344
+ "markers": [
345
+ "pashto"
346
+ ]
347
+ },
348
+ "tags": [
349
+ "pashto",
350
+ "candidate",
351
+ "dataset"
352
+ ]
353
+ },
354
+ {
355
+ "id": "candidate-hf-dataset-ihanif-pashto-speech-3k",
356
+ "title": "ihanif/pashto_speech_3k",
357
+ "url": "https://huggingface.co/datasets/ihanif/pashto_speech_3k",
358
+ "category": "dataset",
359
+ "source": "huggingface",
360
+ "status": "candidate",
361
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
362
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
363
+ "tasks": [],
364
+ "pashto_evidence": {
365
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
366
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_3k",
367
+ "markers": [
368
+ "pashto"
369
+ ]
370
+ },
371
+ "tags": [
372
+ "pashto",
373
+ "candidate",
374
+ "dataset"
375
+ ]
376
+ },
377
+ {
378
+ "id": "candidate-hf-dataset-ihanif-pashto-speech-5k",
379
+ "title": "ihanif/pashto_speech_5k",
380
+ "url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
381
+ "category": "dataset",
382
+ "source": "huggingface",
383
+ "status": "candidate",
384
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
385
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
386
+ "tasks": [],
387
+ "pashto_evidence": {
388
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
389
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_5k",
390
+ "markers": [
391
+ "pashto"
392
+ ]
393
+ },
394
+ "tags": [
395
+ "pashto",
396
+ "candidate",
397
+ "dataset"
398
+ ]
399
+ },
400
+ {
401
+ "id": "candidate-hf-dataset-ihanif-pashto-speech-ds",
402
+ "title": "ihanif/pashto_speech_ds",
403
+ "url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
404
+ "category": "dataset",
405
+ "source": "huggingface",
406
+ "status": "candidate",
407
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
408
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
409
+ "tasks": [],
410
+ "pashto_evidence": {
411
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
412
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_ds",
413
+ "markers": [
414
+ "pashto"
415
+ ]
416
+ },
417
+ "tags": [
418
+ "pashto",
419
+ "candidate",
420
+ "dataset"
421
+ ]
422
+ },
423
+ {
424
+ "id": "candidate-hf-dataset-ihanif-pashto-speech-parquet-10k",
425
+ "title": "ihanif/pashto_speech_parquet_10k",
426
+ "url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
427
+ "category": "dataset",
428
+ "source": "huggingface",
429
+ "status": "candidate",
430
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
431
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
432
+ "tasks": [],
433
+ "pashto_evidence": {
434
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
435
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto_speech_parquet_10k",
436
+ "markers": [
437
+ "pashto"
438
+ ]
439
+ },
440
+ "tags": [
441
+ "pashto",
442
+ "candidate",
443
+ "dataset"
444
+ ]
445
+ },
446
+ {
447
+ "id": "candidate-hf-model-ihanif-wav2vec2-xls-r-300m-pashto-lm",
448
+ "title": "ihanif/wav2vec2-xls-r-300m-pashto-lm",
449
+ "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
450
+ "category": "model",
451
+ "source": "huggingface",
452
+ "status": "candidate",
453
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
454
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
455
+ "tasks": [],
456
+ "pashto_evidence": {
457
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
458
+ "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto-lm",
459
+ "markers": [
460
+ "pashto"
461
+ ]
462
+ },
463
+ "tags": [
464
+ "pashto",
465
+ "candidate",
466
+ "model"
467
+ ]
468
+ },
469
+ {
470
+ "id": "candidate-hf-model-ihanif-whisper-base-pashto",
471
+ "title": "ihanif/whisper-base-pashto",
472
+ "url": "https://huggingface.co/ihanif/whisper-base-pashto",
473
+ "category": "model",
474
+ "source": "huggingface",
475
+ "status": "candidate",
476
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
477
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
478
  "tasks": [],
479
  "pashto_evidence": {
480
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
481
+ "evidence_url": "https://huggingface.co/ihanif/whisper-base-pashto",
482
  "markers": [
483
  "pashto"
484
  ]
 
486
  "tags": [
487
  "pashto",
488
  "candidate",
489
+ "model"
490
  ]
491
  },
492
  {
493
+ "id": "candidate-hf-model-ihanif-whisper-large-pashto",
494
+ "title": "ihanif/whisper-large-pashto",
495
+ "url": "https://huggingface.co/ihanif/whisper-large-pashto",
496
+ "category": "model",
497
  "source": "huggingface",
498
  "status": "candidate",
499
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
500
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
501
  "tasks": [],
502
  "pashto_evidence": {
503
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
504
+ "evidence_url": "https://huggingface.co/ihanif/whisper-large-pashto",
505
  "markers": [
506
  "pashto"
507
  ]
 
509
  "tags": [
510
  "pashto",
511
  "candidate",
512
+ "model"
513
  ]
514
  },
515
  {
516
+ "id": "candidate-hf-model-ihanif-whisper-medium-pashto-3e-7",
517
+ "title": "ihanif/whisper-medium-pashto-3e-7",
518
+ "url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
519
+ "category": "model",
520
  "source": "huggingface",
521
  "status": "candidate",
522
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
523
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
524
  "tasks": [],
525
  "pashto_evidence": {
526
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
527
+ "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto-3e-7",
528
  "markers": [
529
  "pashto"
530
  ]
 
532
  "tags": [
533
  "pashto",
534
  "candidate",
535
+ "model"
536
  ]
537
  },
538
  {
539
+ "id": "candidate-hf-model-ihanif-whisper-small-pashto",
540
+ "title": "ihanif/whisper-small-pashto",
541
+ "url": "https://huggingface.co/ihanif/whisper-small-pashto",
542
  "category": "model",
543
  "source": "huggingface",
544
  "status": "candidate",
 
547
  "tasks": [],
548
  "pashto_evidence": {
549
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
550
+ "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto",
551
  "markers": [
552
  "pashto"
553
  ]
 
559
  ]
560
  },
561
  {
562
+ "id": "candidate-hf-model-ihanif-whisper-small-pashto-dropout",
563
+ "title": "ihanif/whisper-small-pashto-dropout",
564
+ "url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
565
  "category": "model",
566
  "source": "huggingface",
567
  "status": "candidate",
 
570
  "tasks": [],
571
  "pashto_evidence": {
572
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
573
+ "evidence_url": "https://huggingface.co/ihanif/whisper-small-pashto-dropout",
574
  "markers": [
575
  "pashto"
576
  ]
 
582
  ]
583
  },
584
  {
585
+ "id": "candidate-hf-model-ihanif-xls-r-1b-pashto",
586
+ "title": "ihanif/xls-r-1b-pashto",
587
+ "url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
588
  "category": "model",
589
  "source": "huggingface",
590
  "status": "candidate",
 
593
  "tasks": [],
594
  "pashto_evidence": {
595
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
596
+ "evidence_url": "https://huggingface.co/ihanif/xls-r-1b-pashto",
597
  "markers": [
598
  "pashto"
599
  ]
 
605
  ]
606
  },
607
  {
608
+ "id": "candidate-hf-model-ijazulhaq-bert-base-pashto",
609
+ "title": "ijazulhaq/bert-base-pashto",
610
+ "url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
611
  "category": "model",
612
  "source": "huggingface",
613
  "status": "candidate",
 
616
  "tasks": [],
617
  "pashto_evidence": {
618
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
619
+ "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto",
620
  "markers": [
621
  "pashto"
622
  ]
 
628
  ]
629
  },
630
  {
631
+ "id": "candidate-hf-model-ijazulhaq-bert-base-pashto-v1",
632
+ "title": "ijazulhaq/bert-base-pashto-v1",
633
+ "url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
634
  "category": "model",
635
  "source": "huggingface",
636
  "status": "candidate",
 
639
  "tasks": [],
640
  "pashto_evidence": {
641
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
642
+ "evidence_url": "https://huggingface.co/ijazulhaq/bert-base-pashto-v1",
643
  "markers": [
644
  "pashto"
645
  ]
 
651
  ]
652
  },
653
  {
654
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-final-1",
655
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-final-1",
656
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-final-1",
657
  "category": "model",
658
  "source": "huggingface",
659
  "status": "candidate",
 
662
  "tasks": [],
663
  "pashto_evidence": {
664
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
665
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-final-1",
666
  "markers": [
667
  "pashto"
668
  ]
 
674
  ]
675
  },
676
  {
677
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-2",
678
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
679
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
680
  "category": "model",
681
  "source": "huggingface",
682
  "status": "candidate",
 
685
  "tasks": [],
686
  "pashto_evidence": {
687
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
688
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-2",
689
  "markers": [
690
  "pashto"
691
  ]
 
697
  ]
698
  },
699
  {
700
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-3",
701
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-3",
702
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-3",
703
  "category": "model",
704
  "source": "huggingface",
705
  "status": "candidate",
 
708
  "tasks": [],
709
  "pashto_evidence": {
710
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
711
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-3",
712
  "markers": [
713
  "pashto"
714
  ]
 
720
  ]
721
  },
722
  {
723
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-4",
724
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-4",
725
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-4",
726
  "category": "model",
727
  "source": "huggingface",
728
  "status": "candidate",
 
731
  "tasks": [],
732
  "pashto_evidence": {
733
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
734
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-4",
735
+ "markers": [
736
+ "pashto"
737
+ ]
738
+ },
739
+ "tags": [
740
+ "pashto",
741
+ "candidate",
742
+ "model"
743
+ ]
744
+ },
745
+ {
746
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-5",
747
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-5",
748
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-5",
749
+ "category": "model",
750
+ "source": "huggingface",
751
+ "status": "candidate",
752
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
753
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
754
+ "tasks": [],
755
+ "pashto_evidence": {
756
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
757
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-5",
758
+ "markers": [
759
+ "pashto"
760
+ ]
761
+ },
762
+ "tags": [
763
+ "pashto",
764
+ "candidate",
765
+ "model"
766
+ ]
767
+ },
768
+ {
769
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-6",
770
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-6",
771
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-6",
772
+ "category": "model",
773
+ "source": "huggingface",
774
+ "status": "candidate",
775
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
776
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
777
+ "tasks": [],
778
+ "pashto_evidence": {
779
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
780
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-6",
781
+ "markers": [
782
+ "pashto"
783
+ ]
784
+ },
785
+ "tags": [
786
+ "pashto",
787
+ "candidate",
788
+ "model"
789
+ ]
790
+ },
791
+ {
792
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-7",
793
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-7",
794
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-7",
795
+ "category": "model",
796
+ "source": "huggingface",
797
+ "status": "candidate",
798
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
799
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
800
+ "tasks": [],
801
+ "pashto_evidence": {
802
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
803
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-7",
804
+ "markers": [
805
+ "pashto"
806
+ ]
807
+ },
808
+ "tags": [
809
+ "pashto",
810
+ "candidate",
811
+ "model"
812
+ ]
813
+ },
814
+ {
815
+ "id": "candidate-hf-model-jawaria-wav2vec2-large-xls-r-300m-pashto-colab-test-8",
816
+ "title": "Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-8",
817
+ "url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-8",
818
+ "category": "model",
819
+ "source": "huggingface",
820
+ "status": "candidate",
821
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
822
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
823
+ "tasks": [],
824
+ "pashto_evidence": {
825
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
826
+ "evidence_url": "https://huggingface.co/Jawaria/wav2vec2-large-xls-r-300m-pashto-colab-test-8",
827
  "markers": [
828
  "pashto"
829
  ]
 
858
  ]
859
  },
860
  {
861
+ "id": "candidate-hf-model-koochikoo25-pashto-whisper-large",
862
+ "title": "koochikoo25/pashto-whisper-large",
863
+ "url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
864
+ "category": "model",
865
+ "source": "huggingface",
866
+ "status": "candidate",
867
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
868
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
869
+ "tasks": [],
870
+ "pashto_evidence": {
871
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
872
+ "evidence_url": "https://huggingface.co/koochikoo25/pashto-whisper-large",
873
+ "markers": [
874
+ "pashto"
875
+ ]
876
+ },
877
+ "tags": [
878
+ "pashto",
879
+ "candidate",
880
+ "model"
881
+ ]
882
+ },
883
+ {
884
+ "id": "candidate-hf-model-koochikoo25-whisper-medium-pashto",
885
+ "title": "koochikoo25/Whisper-medium-pashto",
886
+ "url": "https://huggingface.co/koochikoo25/Whisper-medium-pashto",
887
+ "category": "model",
888
+ "source": "huggingface",
889
+ "status": "candidate",
890
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
891
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
892
+ "tasks": [],
893
+ "pashto_evidence": {
894
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
895
+ "evidence_url": "https://huggingface.co/koochikoo25/Whisper-medium-pashto",
896
+ "markers": [
897
+ "pashto"
898
+ ]
899
+ },
900
+ "tags": [
901
+ "pashto",
902
+ "candidate",
903
+ "model"
904
+ ]
905
+ },
906
+ {
907
+ "id": "candidate-hf-dataset-saillab-alpaca-pashto-cleaned",
908
+ "title": "saillab/alpaca-pashto-cleaned",
909
+ "url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
910
  "category": "dataset",
911
  "source": "huggingface",
912
  "status": "candidate",
 
915
  "tasks": [],
916
  "pashto_evidence": {
917
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
918
+ "evidence_url": "https://huggingface.co/datasets/saillab/alpaca-pashto-cleaned",
919
  "markers": [
920
  "pashto"
921
  ]
 
950
  ]
951
  },
952
  {
953
+ "id": "candidate-hf-dataset-sherwindesouza-pashto-common-voice-20",
954
+ "title": "SherwinDesouza/pashto-common-voice-20",
955
+ "url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
956
+ "category": "dataset",
957
  "source": "huggingface",
958
  "status": "candidate",
959
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
960
  "primary_use": "Needs maintainer review before promotion to verified catalog.",
961
  "tasks": [],
962
  "pashto_evidence": {
963
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
964
+ "evidence_url": "https://huggingface.co/datasets/SherwinDesouza/pashto-common-voice-20",
965
  "markers": [
966
  "pashto"
967
  ]
 
969
  "tags": [
970
  "pashto",
971
  "candidate",
972
+ "dataset"
973
+ ]
974
+ },
975
+ {
976
+ "id": "candidate-hf-dataset-tasal9-pashto-dataset",
977
+ "title": "tasal9/Pashto_Dataset",
978
+ "url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
979
+ "category": "dataset",
980
+ "source": "huggingface",
981
+ "status": "candidate",
982
+ "summary": "Candidate dataset returned from Hugging Face search for Pashto.",
983
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
984
+ "tasks": [],
985
+ "pashto_evidence": {
986
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
987
+ "evidence_url": "https://huggingface.co/datasets/tasal9/Pashto_Dataset",
988
+ "markers": [
989
+ "pashto"
990
+ ]
991
+ },
992
+ "tags": [
993
+ "pashto",
994
+ "candidate",
995
+ "dataset"
996
  ]
997
  },
998
  {
999
+ "id": "candidate-hf-dataset-tasal9-zamai-pashto-dataset",
1000
+ "title": "tasal9/ZamAI_Pashto_Dataset",
1001
+ "url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
1002
  "category": "dataset",
1003
  "source": "huggingface",
1004
  "status": "candidate",
 
1007
  "tasks": [],
1008
  "pashto_evidence": {
1009
  "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
1010
+ "evidence_url": "https://huggingface.co/datasets/tasal9/ZamAI_Pashto_Dataset",
1011
  "markers": [
1012
  "pashto"
1013
  ]
 
1017
  "candidate",
1018
  "dataset"
1019
  ]
1020
+ },
1021
+ {
1022
+ "id": "candidate-hf-model-zirak-ai-pashto-bert-v1",
1023
+ "title": "zirak-ai/pashto-bert-v1",
1024
+ "url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
1025
+ "category": "model",
1026
+ "source": "huggingface",
1027
+ "status": "candidate",
1028
+ "summary": "Candidate model returned from Hugging Face search for Pashto.",
1029
+ "primary_use": "Needs maintainer review before promotion to verified catalog.",
1030
+ "tasks": [],
1031
+ "pashto_evidence": {
1032
+ "evidence_text": "Matched by Pashto keyword in Hugging Face search results.",
1033
+ "evidence_url": "https://huggingface.co/zirak-ai/pashto-bert-v1",
1034
+ "markers": [
1035
+ "pashto"
1036
+ ]
1037
+ },
1038
+ "tags": [
1039
+ "pashto",
1040
+ "candidate",
1041
+ "model"
1042
+ ]
1043
  }
1044
  ],
1045
  "errors": [
resources/catalog/resources.json CHANGED
@@ -640,6 +640,148 @@
640
  "paper",
641
  "benchmark"
642
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  }
644
  ]
645
  }
 
640
  "paper",
641
  "benchmark"
642
  ]
643
+ },
644
+ {
645
+ "id": "dataset-nexdata-99h-pashto-dialogue",
646
+ "title": "99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset",
647
+ "url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
648
+ "category": "dataset",
649
+ "source": "huggingface",
650
+ "status": "verified",
651
+ "summary": "Large spontaneous Pashto smartphone speech dataset for robust ASR experimentation.",
652
+ "primary_use": "Spontaneous speech ASR training and robustness evaluation",
653
+ "license": "cc-by-nc-nd-4.0",
654
+ "tasks": [
655
+ "asr"
656
+ ],
657
+ "pashto_evidence": {
658
+ "evidence_text": "Dataset title explicitly includes Pashto and API metadata marks audio and text modalities.",
659
+ "evidence_url": "https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset",
660
+ "markers": [
661
+ "Pashto"
662
+ ]
663
+ },
664
+ "tags": [
665
+ "pashto",
666
+ "speech",
667
+ "asr",
668
+ "dialogue"
669
+ ]
670
+ },
671
+ {
672
+ "id": "dataset-zirak-ai-pashto-ocr",
673
+ "title": "Zirak-AI PashtoOCR",
674
+ "url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
675
+ "category": "dataset",
676
+ "source": "huggingface",
677
+ "status": "verified",
678
+ "summary": "Pashto-focused OCR dataset with image-text pairs for document understanding tasks.",
679
+ "primary_use": "OCR and text extraction benchmarking",
680
+ "license": "mit",
681
+ "tasks": [
682
+ "ocr",
683
+ "nlp"
684
+ ],
685
+ "pashto_evidence": {
686
+ "evidence_text": "Dataset tags include language:ps and the dataset name is PashtoOCR.",
687
+ "evidence_url": "https://huggingface.co/datasets/zirak-ai/PashtoOCR",
688
+ "markers": [
689
+ "ps",
690
+ "PashtoOCR"
691
+ ]
692
+ },
693
+ "tags": [
694
+ "pashto",
695
+ "ocr",
696
+ "nlp",
697
+ "vision"
698
+ ]
699
+ },
700
+ {
701
+ "id": "dataset-ihanif-pashto-wikipedia-corpus",
702
+ "title": "Pashto Wikipedia Corpus",
703
+ "url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
704
+ "category": "dataset",
705
+ "source": "huggingface",
706
+ "status": "verified",
707
+ "summary": "Pashto text corpus prepared from Wikipedia data for NLP and language modeling.",
708
+ "primary_use": "Pashto text corpus for NLP baselines",
709
+ "license": "cc-by-sa-4.0",
710
+ "tasks": [
711
+ "nlp"
712
+ ],
713
+ "pashto_evidence": {
714
+ "evidence_text": "Dataset metadata includes language:ps and the title specifies Pashto corpus.",
715
+ "evidence_url": "https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus",
716
+ "markers": [
717
+ "ps",
718
+ "Pashto"
719
+ ]
720
+ },
721
+ "tags": [
722
+ "pashto",
723
+ "text",
724
+ "nlp",
725
+ "wikipedia"
726
+ ]
727
+ },
728
+ {
729
+ "id": "model-ihanif-wav2vec2-xls-r-300m-pashto",
730
+ "title": "wav2vec2 XLS-R 300M Pashto",
731
+ "url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
732
+ "category": "model",
733
+ "source": "huggingface",
734
+ "status": "verified",
735
+ "summary": "Fine-tuned wav2vec2 XLS-R model for Pashto ASR with published FLEURS evaluation tags.",
736
+ "primary_use": "Pashto ASR baseline and comparative experiments",
737
+ "license": "apache-2.0",
738
+ "tasks": [
739
+ "asr"
740
+ ],
741
+ "pashto_evidence": {
742
+ "evidence_text": "Model tags include pashto and ps, and model index references FLEURS config ps_af.",
743
+ "evidence_url": "https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto",
744
+ "markers": [
745
+ "pashto",
746
+ "ps",
747
+ "ps_af"
748
+ ]
749
+ },
750
+ "tags": [
751
+ "pashto",
752
+ "asr",
753
+ "wav2vec2",
754
+ "fleurs"
755
+ ]
756
+ },
757
+ {
758
+ "id": "model-ihanif-whisper-medium-pashto",
759
+ "title": "Whisper Medium Pashto",
760
+ "url": "https://huggingface.co/ihanif/whisper-medium-pashto",
761
+ "category": "model",
762
+ "source": "huggingface",
763
+ "status": "verified",
764
+ "summary": "Fine-tuned Whisper Medium checkpoint for Pashto ASR with benchmark metadata.",
765
+ "primary_use": "Pashto ASR baseline and transcription quality comparisons",
766
+ "license": "apache-2.0",
767
+ "tasks": [
768
+ "asr"
769
+ ],
770
+ "pashto_evidence": {
771
+ "evidence_text": "Model tags include pashto and ps, and model index uses FLEURS ps_af split.",
772
+ "evidence_url": "https://huggingface.co/ihanif/whisper-medium-pashto",
773
+ "markers": [
774
+ "pashto",
775
+ "ps",
776
+ "ps_af"
777
+ ]
778
+ },
779
+ "tags": [
780
+ "pashto",
781
+ "asr",
782
+ "whisper",
783
+ "fleurs"
784
+ ]
785
  }
786
  ]
787
  }
resources/datasets/README.md CHANGED
@@ -4,14 +4,17 @@
4
 
5
  | Resource | Link | Pashto Evidence | Primary Use |
6
  |---|---|---|---|
 
7
  | Belebele | [huggingface](https://huggingface.co/datasets/facebook/belebele) | [Dataset includes pbt_Arab subset. (`pbt_Arab`)](https://huggingface.co/datasets/facebook/belebele) | Comprehension and multilingual NLP benchmark |
8
  | Common Voice Scripted Speech 24.0 - Pashto | [mozilla](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | [Official dataset page is for Pashto. (`Pashto`)](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | ASR training and evaluation |
9
  | Google FLEURS | [huggingface](https://huggingface.co/datasets/google/fleurs) | [Dataset config includes ps_af. (`ps_af`)](https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py) | Speech benchmark and external evaluation |
10
  | OPUS-100 | [huggingface](https://huggingface.co/datasets/Helsinki-NLP/opus-100) | [Dataset viewer includes en-ps split. (`en-ps`)](https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps) | Machine translation training and evaluation |
11
  | OSCAR Corpus | [huggingface](https://huggingface.co/datasets/oscar-corpus/oscar) | [Dataset includes unshuffled_deduplicated_ps split. (`unshuffled_deduplicated_ps`)](https://huggingface.co/datasets/oscar-corpus/oscar) | Language modeling and lexicon expansion |
12
  | Pashto Isolated Words Speech Dataset | [kaggle](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | [Dataset title explicitly states Pashto speech dataset. (`Pashto`)](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | Keyword spotting and constrained ASR experiments |
 
13
  | Pashto Word Embeddings | [kaggle](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | [Dataset description states pretrained Pashto embeddings. (`Pashto`)](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | Lexical semantics and lightweight NLP baselines |
14
  | Wikimedia Wikipedia | [huggingface](https://huggingface.co/datasets/wikimedia/wikipedia) | [Dataset includes 20231101.ps subset. (`20231101.ps`)](https://huggingface.co/datasets/wikimedia/wikipedia) | Terminology and balanced text corpus |
 
15
 
16
  ## Maintenance
17
  - Source of truth: [../catalog/resources.json](../catalog/resources.json)
 
4
 
5
  | Resource | Link | Pashto Evidence | Primary Use |
6
  |---|---|---|---|
7
+ | 99 Hours Pashto Spontaneous Dialogue Smartphone Speech Dataset | [huggingface](https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset) | [Dataset title explicitly includes Pashto and API metadata marks audio and text modalities. (`Pashto`)](https://huggingface.co/datasets/Nexdata/99_Hours_Pashto_Spontaneous_Dialogue_Smartphone_speech_dataset) | Spontaneous speech ASR training and robustness evaluation |
8
  | Belebele | [huggingface](https://huggingface.co/datasets/facebook/belebele) | [Dataset includes pbt_Arab subset. (`pbt_Arab`)](https://huggingface.co/datasets/facebook/belebele) | Comprehension and multilingual NLP benchmark |
9
  | Common Voice Scripted Speech 24.0 - Pashto | [mozilla](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | [Official dataset page is for Pashto. (`Pashto`)](https://datacollective.mozillafoundation.org/datasets/cmj8u3pnb00llnxxbfvxo3b14) | ASR training and evaluation |
10
  | Google FLEURS | [huggingface](https://huggingface.co/datasets/google/fleurs) | [Dataset config includes ps_af. (`ps_af`)](https://huggingface.co/datasets/google/fleurs/blob/main/fleurs.py) | Speech benchmark and external evaluation |
11
  | OPUS-100 | [huggingface](https://huggingface.co/datasets/Helsinki-NLP/opus-100) | [Dataset viewer includes en-ps split. (`en-ps`)](https://huggingface.co/datasets/Helsinki-NLP/opus-100/viewer/en-ps) | Machine translation training and evaluation |
12
  | OSCAR Corpus | [huggingface](https://huggingface.co/datasets/oscar-corpus/oscar) | [Dataset includes unshuffled_deduplicated_ps split. (`unshuffled_deduplicated_ps`)](https://huggingface.co/datasets/oscar-corpus/oscar) | Language modeling and lexicon expansion |
13
  | Pashto Isolated Words Speech Dataset | [kaggle](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | [Dataset title explicitly states Pashto speech dataset. (`Pashto`)](https://www.kaggle.com/datasets/engrirf/pashto-isolated-words-speech-dataset) | Keyword spotting and constrained ASR experiments |
14
+ | Pashto Wikipedia Corpus | [huggingface](https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus) | [Dataset metadata includes language:ps and the title specifies Pashto corpus. (`ps`, `Pashto`)](https://huggingface.co/datasets/ihanif/pashto-wikipedia-corpus) | Pashto text corpus for NLP baselines |
15
  | Pashto Word Embeddings | [kaggle](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | [Dataset description states pretrained Pashto embeddings. (`Pashto`)](https://www.kaggle.com/datasets/drijaz/pashto-word-embeddings) | Lexical semantics and lightweight NLP baselines |
16
  | Wikimedia Wikipedia | [huggingface](https://huggingface.co/datasets/wikimedia/wikipedia) | [Dataset includes 20231101.ps subset. (`20231101.ps`)](https://huggingface.co/datasets/wikimedia/wikipedia) | Terminology and balanced text corpus |
17
+ | Zirak-AI PashtoOCR | [huggingface](https://huggingface.co/datasets/zirak-ai/PashtoOCR) | [Dataset tags include language:ps and the dataset name is PashtoOCR. (`ps`, `PashtoOCR`)](https://huggingface.co/datasets/zirak-ai/PashtoOCR) | OCR and text extraction benchmarking |
18
 
19
  ## Maintenance
20
  - Source of truth: [../catalog/resources.json](../catalog/resources.json)
resources/models/README.md CHANGED
@@ -10,7 +10,9 @@
10
  | OPUS MT en-mul | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | English to Pashto translation path |
11
  | OPUS MT mul-en | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | Pashto to English translation path |
12
  | PashtoBERT | [huggingface](https://huggingface.co/mdarhri/pashto-bert) | [Model card states training on Pashto corpus data. (`Pashto`)](https://huggingface.co/mdarhri/pashto-bert) | Pashto NLP baseline encoder |
 
13
  | Whisper Large v3 | [huggingface](https://huggingface.co/openai/whisper-large-v3) | [Whisper tokenizer map includes ps language key. (`ps`)](https://raw.githubusercontent.com/openai/whisper/main/whisper/tokenizer.py) | ASR baseline and pseudo-labeling |
 
14
 
15
  ## Maintenance
16
  - Source of truth: [../catalog/resources.json](../catalog/resources.json)
 
10
  | OPUS MT en-mul | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-en-mul) | English to Pashto translation path |
11
  | OPUS MT mul-en | [huggingface](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | [Language list includes pus code. (`pus`)](https://huggingface.co/Helsinki-NLP/opus-mt-mul-en) | Pashto to English translation path |
12
  | PashtoBERT | [huggingface](https://huggingface.co/mdarhri/pashto-bert) | [Model card states training on Pashto corpus data. (`Pashto`)](https://huggingface.co/mdarhri/pashto-bert) | Pashto NLP baseline encoder |
13
+ | wav2vec2 XLS-R 300M Pashto | [huggingface](https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto) | [Model tags include pashto and ps, and model index references FLEURS config ps_af. (`pashto`, `ps`, `ps_af`)](https://huggingface.co/ihanif/wav2vec2-xls-r-300m-pashto) | Pashto ASR baseline and comparative experiments |
14
  | Whisper Large v3 | [huggingface](https://huggingface.co/openai/whisper-large-v3) | [Whisper tokenizer map includes ps language key. (`ps`)](https://raw.githubusercontent.com/openai/whisper/main/whisper/tokenizer.py) | ASR baseline and pseudo-labeling |
15
+ | Whisper Medium Pashto | [huggingface](https://huggingface.co/ihanif/whisper-medium-pashto) | [Model tags include pashto and ps, and model index uses FLEURS ps_af split. (`pashto`, `ps`, `ps_af`)](https://huggingface.co/ihanif/whisper-medium-pashto) | Pashto ASR baseline and transcription quality comparisons |
16
 
17
  ## Maintenance
18
  - Source of truth: [../catalog/resources.json](../catalog/resources.json)