File size: 3,264 Bytes
f13fd7c ed6f1f9 c55938e ed6f1f9 c55938e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | from scripts.validate_resource_catalog import validate_catalog
def _minimal_catalog() -> dict:
return {
"version": "1.0.0",
"updated_on": "2026-02-15",
"resources": [
{
"id": "dataset-example",
"title": "Example Dataset",
"url": "https://example.org/dataset",
"category": "dataset",
"source": "other",
"status": "verified",
"summary": "Useful Pashto example dataset for testing the validator.",
"primary_use": "Testing",
"pashto_evidence": {
"evidence_text": "Mentions Pashto in title.",
"evidence_url": "https://example.org/dataset",
"markers": ["Pashto"],
},
"tags": ["pashto", "test"],
}
],
}
def test_validate_catalog_passes_for_minimal_valid_catalog() -> None:
errors = validate_catalog(_minimal_catalog())
assert errors == []
def test_validate_catalog_fails_for_duplicate_ids() -> None:
catalog = _minimal_catalog()
catalog["resources"].append(dict(catalog["resources"][0]))
errors = validate_catalog(catalog)
assert any("duplicate resource id" in error for error in errors)
def test_validate_catalog_fails_for_invalid_evidence_url() -> None:
catalog = _minimal_catalog()
catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "not-a-url"
errors = validate_catalog(catalog)
assert any("evidence_url" in error for error in errors)
def test_validate_catalog_fails_for_non_pashto_centric_model() -> None:
catalog = _minimal_catalog()
catalog["resources"][0]["category"] = "model"
catalog["resources"][0]["title"] = "Generic Multilingual Model"
catalog["resources"][0]["url"] = "https://example.org/model"
catalog["resources"][0]["pashto_evidence"]["evidence_text"] = "Language support listed in docs."
catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "https://example.org/model-docs"
catalog["resources"][0]["pashto_evidence"]["markers"] = ["multilingual"]
errors = validate_catalog(catalog)
assert any("must be Pashto-centric" in error for error in errors)
def test_validate_catalog_allows_pashto_centric_model() -> None:
catalog = _minimal_catalog()
catalog["resources"][0]["category"] = "model"
catalog["resources"][0]["title"] = "Pashto ASR Model"
catalog["resources"][0]["url"] = "https://example.org/pashto-model"
errors = validate_catalog(catalog)
assert errors == []
def test_validate_catalog_allows_multilingual_model_with_pashto_evidence() -> None:
catalog = _minimal_catalog()
catalog["resources"][0]["category"] = "model"
catalog["resources"][0]["title"] = "Generic Multilingual Model"
catalog["resources"][0]["url"] = "https://example.org/model"
catalog["resources"][0]["pashto_evidence"]["evidence_text"] = "Language table explicitly includes Pashto."
catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "https://example.org/model/languages"
catalog["resources"][0]["pashto_evidence"]["markers"] = ["Pashto", "ps"]
errors = validate_catalog(catalog)
assert errors == []
|