File size: 3,264 Bytes
f13fd7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed6f1f9
 
 
 
 
 
 
c55938e
 
 
ed6f1f9
 
 
 
 
 
 
 
 
 
 
c55938e
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from scripts.validate_resource_catalog import validate_catalog


def _minimal_catalog() -> dict:
    return {
        "version": "1.0.0",
        "updated_on": "2026-02-15",
        "resources": [
            {
                "id": "dataset-example",
                "title": "Example Dataset",
                "url": "https://example.org/dataset",
                "category": "dataset",
                "source": "other",
                "status": "verified",
                "summary": "Useful Pashto example dataset for testing the validator.",
                "primary_use": "Testing",
                "pashto_evidence": {
                    "evidence_text": "Mentions Pashto in title.",
                    "evidence_url": "https://example.org/dataset",
                    "markers": ["Pashto"],
                },
                "tags": ["pashto", "test"],
            }
        ],
    }


def test_validate_catalog_passes_for_minimal_valid_catalog() -> None:
    errors = validate_catalog(_minimal_catalog())
    assert errors == []


def test_validate_catalog_fails_for_duplicate_ids() -> None:
    catalog = _minimal_catalog()
    catalog["resources"].append(dict(catalog["resources"][0]))
    errors = validate_catalog(catalog)
    assert any("duplicate resource id" in error for error in errors)


def test_validate_catalog_fails_for_invalid_evidence_url() -> None:
    catalog = _minimal_catalog()
    catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "not-a-url"
    errors = validate_catalog(catalog)
    assert any("evidence_url" in error for error in errors)


def test_validate_catalog_fails_for_non_pashto_centric_model() -> None:
    catalog = _minimal_catalog()
    catalog["resources"][0]["category"] = "model"
    catalog["resources"][0]["title"] = "Generic Multilingual Model"
    catalog["resources"][0]["url"] = "https://example.org/model"
    catalog["resources"][0]["pashto_evidence"]["evidence_text"] = "Language support listed in docs."
    catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "https://example.org/model-docs"
    catalog["resources"][0]["pashto_evidence"]["markers"] = ["multilingual"]
    errors = validate_catalog(catalog)
    assert any("must be Pashto-centric" in error for error in errors)


def test_validate_catalog_allows_pashto_centric_model() -> None:
    catalog = _minimal_catalog()
    catalog["resources"][0]["category"] = "model"
    catalog["resources"][0]["title"] = "Pashto ASR Model"
    catalog["resources"][0]["url"] = "https://example.org/pashto-model"
    errors = validate_catalog(catalog)
    assert errors == []


def test_validate_catalog_allows_multilingual_model_with_pashto_evidence() -> None:
    catalog = _minimal_catalog()
    catalog["resources"][0]["category"] = "model"
    catalog["resources"][0]["title"] = "Generic Multilingual Model"
    catalog["resources"][0]["url"] = "https://example.org/model"
    catalog["resources"][0]["pashto_evidence"]["evidence_text"] = "Language table explicitly includes Pashto."
    catalog["resources"][0]["pashto_evidence"]["evidence_url"] = "https://example.org/model/languages"
    catalog["resources"][0]["pashto_evidence"]["markers"] = ["Pashto", "ps"]
    errors = validate_catalog(catalog)
    assert errors == []