from langmap.langid_mapping import langid_to_language def test_langid_mapping_is_nonempty(): assert len(langid_to_language) > 0 def test_langid_mapping_count(): assert len(langid_to_language) == 22, f"Expected 22 Tier 1 languages, got {len(langid_to_language)}" def test_keys_are_bcp47_tokens(): for key in langid_to_language: assert key.startswith("<2"), f"Key {key} does not start with '<2'" assert key.endswith(">"), f"Key {key} does not end with '>'" def test_values_are_nonempty_strings(): for key, value in langid_to_language.items(): assert isinstance(value, str) and value.strip(), f"Empty language name for {key}" def test_no_leading_or_trailing_whitespace(): for key, value in langid_to_language.items(): assert value == value.strip(), f"Language name for {key} has leading/trailing whitespace: {value!r}" def test_no_duplicate_language_names(): names = list(langid_to_language.values()) assert len(names) == len(set(names)), "Duplicate language names found" def test_all_tier1_languages_present(): """Verify all 22 Tier 1 languages are present.""" expected = { "Afrikaans", "Bulgarian", "Bosnian", "Catalan", "Czech", "Danish", "German", "Spanish", "Filipino", "French", "Croatian", "Indonesian", "Italian", "Macedonian", "Malay", "Maltese", "Norwegian Nynorsk", "Norwegian", "Portuguese", "Romanian", "Slovak", "Swedish", } values = set(langid_to_language.values()) assert values == expected, f"Mismatch: missing={expected - values}, extra={values - expected}" def test_removed_languages_absent(): """Languages outside Tier 1 must not be in the mapping.""" values = set(langid_to_language.values()) for lang in ["Japanese", "Hindi", "Swahili", "Arabic", "Russian", "Polish", "Dutch"]: assert lang not in values, f"{lang} should have been removed (not Tier 1)"