File size: 2,214 Bytes
043ecd7
 
 
 
 
 
 
 
5650cd5
043ecd7
 
 
 
 
 
 
 
5650cd5
043ecd7
5650cd5
 
 
 
 
 
 
 
 
 
 
 
 
043ecd7
 
 
 
5650cd5
 
043ecd7
 
 
5650cd5
043ecd7
 
 
5650cd5
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from langmap.langid_mapping import langid_to_language


def test_langid_mapping_is_nonempty():
    assert len(langid_to_language) > 0


def test_langid_mapping_count():
    assert len(langid_to_language) >= 400, f"Expected ~419 languages, got {len(langid_to_language)}"


def test_keys_are_bcp47_tokens():
    for key in langid_to_language:
        assert key.startswith("<2"), f"Key {key} does not start with '<2'"
        assert key.endswith(">"), f"Key {key} does not end with '>'"


def test_values_are_dicts_with_name_and_region():
    for key, value in langid_to_language.items():
        assert isinstance(value, dict), f"Value for {key} is not a dict"
        assert "name" in value, f"Value for {key} missing 'name' key"
        assert "region" in value, f"Value for {key} missing 'region' key"


def test_names_are_nonempty_strings():
    for key, value in langid_to_language.items():
        assert isinstance(value["name"], str) and value["name"].strip(), f"Empty name for {key}"


def test_regions_are_nonempty_strings():
    for key, value in langid_to_language.items():
        assert isinstance(value["region"], str) and value["region"].strip(), f"Empty region for {key}"


def test_no_leading_or_trailing_whitespace():
    for key, value in langid_to_language.items():
        assert value["name"] == value["name"].strip(), f"Name for {key} has whitespace: {value['name']!r}"
        assert value["region"] == value["region"].strip(), f"Region for {key} has whitespace: {value['region']!r}"


def test_no_duplicate_language_names():
    names = [v["name"] for v in langid_to_language.values()]
    assert len(names) == len(set(names)), "Duplicate language names found"


def test_key_languages_present():
    """Spot-check key language codes across regions."""
    key_codes = ["<2en>", "<2fr>", "<2de>", "<2es>", "<2ja>", "<2zh>", "<2ar>", "<2hi>", "<2pt>", "<2ru>"]
    for code in key_codes:
        assert code in langid_to_language, f"Missing language code: {code}"


def test_no_other_region():
    """No language should have 'Other' as its region."""
    for key, value in langid_to_language.items():
        assert value["region"] != "Other", f"{key} ({value['name']}) has region 'Other'"