madlad-400-translate / tests /test_langmap.py
Daryl Lim
test: update langmap tests for expanded dict shape with regions
5650cd5
Raw
History Blame
2.21 kB
from langmap.langid_mapping import langid_to_language
def test_langid_mapping_is_nonempty():
assert len(langid_to_language) > 0
def test_langid_mapping_count():
assert len(langid_to_language) >= 400, f"Expected ~419 languages, got {len(langid_to_language)}"
def test_keys_are_bcp47_tokens():
for key in langid_to_language:
assert key.startswith("<2"), f"Key {key} does not start with '<2'"
assert key.endswith(">"), f"Key {key} does not end with '>'"
def test_values_are_dicts_with_name_and_region():
for key, value in langid_to_language.items():
assert isinstance(value, dict), f"Value for {key} is not a dict"
assert "name" in value, f"Value for {key} missing 'name' key"
assert "region" in value, f"Value for {key} missing 'region' key"
def test_names_are_nonempty_strings():
for key, value in langid_to_language.items():
assert isinstance(value["name"], str) and value["name"].strip(), f"Empty name for {key}"
def test_regions_are_nonempty_strings():
for key, value in langid_to_language.items():
assert isinstance(value["region"], str) and value["region"].strip(), f"Empty region for {key}"
def test_no_leading_or_trailing_whitespace():
for key, value in langid_to_language.items():
assert value["name"] == value["name"].strip(), f"Name for {key} has whitespace: {value['name']!r}"
assert value["region"] == value["region"].strip(), f"Region for {key} has whitespace: {value['region']!r}"
def test_no_duplicate_language_names():
names = [v["name"] for v in langid_to_language.values()]
assert len(names) == len(set(names)), "Duplicate language names found"
def test_key_languages_present():
"""Spot-check key language codes across regions."""
key_codes = ["<2en>", "<2fr>", "<2de>", "<2es>", "<2ja>", "<2zh>", "<2ar>", "<2hi>", "<2pt>", "<2ru>"]
for code in key_codes:
assert code in langid_to_language, f"Missing language code: {code}"
def test_no_other_region():
"""No language should have 'Other' as its region."""
for key, value in langid_to_language.items():
assert value["region"] != "Other", f"{key} ({value['name']}) has region 'Other'"