from unittest.mock import MagicMock, patch

import pytest
import torch

# --- Fast tests (no model loading required) ---


def test_app_imports_without_model_load():
    """Importing app should not trigger model download."""
    import app

    assert hasattr(app, "translate")
    assert hasattr(app, "_load_tokenizer")
    assert hasattr(app, "_load_model")


def test_app_has_main_function():
    """main() should be callable (UI construction)."""
    import app

    assert callable(app.main)


def test_translate_accepts_generation_params():
    """translate() signature must accept generation parameters."""
    import inspect

    import app

    sig = inspect.signature(app.translate)
    params = list(sig.parameters.keys())
    assert "max_new_tokens" in params
    assert "num_beams" in params
    assert "temperature" in params


def test_translate_signature_defaults():
    """translate() should have correct default values for generation parameters."""
    import inspect

    import app

    sig = inspect.signature(app.translate)
    assert sig.parameters["max_new_tokens"].default == 512
    assert sig.parameters["num_beams"].default == 1
    assert sig.parameters["temperature"].default == 1.0


def test_get_device_returns_cpu_when_no_cuda():
    """_get_device() should return CPU when CUDA is not available."""
    import app

    with patch("app.torch.cuda.is_available", return_value=False):
        with pytest.warns(UserWarning):
            device = app._get_device()
    assert device.type == "cpu"


def test_get_device_warns_on_cpu():
    """_get_device() should emit a UserWarning when falling back to CPU."""
    import app

    with patch("app.torch.cuda.is_available", return_value=False):
        with pytest.warns(UserWarning, match="No GPU available"):
            app._get_device()


def test_load_model_uses_bfloat16_on_cuda():
    """T5/MADLAD is numerically unstable in float16, so the model must load in bfloat16 on CUDA."""
    import app

    fake_model = MagicMock()
    fake_model.to.return_value = fake_model
    app._load_model.cache_clear()
    try:
        with (
            patch("app._get_device", return_value=torch.device("cuda")),
            patch("app.AutoModelForSeq2SeqLM.from_pretrained", return_value=fake_model) as mock_load,
        ):
            app._load_model()
        assert mock_load.call_args.kwargs["dtype"] == torch.bfloat16
    finally:
        app._load_model.cache_clear()


def test_load_model_uses_float32_on_cpu():
    """On CPU the model must load in float32."""
    import app

    fake_model = MagicMock()
    fake_model.to.return_value = fake_model
    app._load_model.cache_clear()
    try:
        with (
            patch("app._get_device", return_value=torch.device("cpu")),
            patch("app.AutoModelForSeq2SeqLM.from_pretrained", return_value=fake_model) as mock_load,
        ):
            app._load_model()
        assert mock_load.call_args.kwargs["dtype"] == torch.float32
    finally:
        app._load_model.cache_clear()


def test_maybe_eager_load_skipped_off_zerogpu(monkeypatch):
    """Off ZeroGPU, _maybe_eager_load() must not load the model (no download on import)."""
    import app

    monkeypatch.delenv("SPACES_ZERO_GPU", raising=False)
    with patch.object(app, "_load_model") as load_model, patch.object(app, "_load_tokenizer") as load_tokenizer:
        app._maybe_eager_load()
    load_model.assert_not_called()
    load_tokenizer.assert_not_called()


def test_maybe_eager_load_runs_on_zerogpu(monkeypatch):
    """On ZeroGPU (SPACES_ZERO_GPU=1), _maybe_eager_load() eagerly loads model + tokenizer."""
    import app

    monkeypatch.setenv("SPACES_ZERO_GPU", "1")
    with patch.object(app, "_load_model") as load_model, patch.object(app, "_load_tokenizer") as load_tokenizer:
        app._maybe_eager_load()
    load_model.assert_called_once()
    load_tokenizer.assert_called_once()


def test_estimate_duration_is_input_aware_and_capped():
    """Duration should scale with tokens*beams, give small inputs a smaller reservation, and cap at 120s."""
    import app

    small = app._estimate_duration("hi", "French (fr)", max_new_tokens=10, num_beams=1)
    default = app._estimate_duration("hi", "French (fr)", max_new_tokens=512, num_beams=1)
    heavy = app._estimate_duration("hi", "French (fr)", max_new_tokens=512, num_beams=8)
    assert small < default <= 120
    assert heavy == 120  # capped
    assert all(isinstance(d, int) for d in (small, default, heavy))


def test_estimate_duration_mirrors_translate_signature():
    """ZeroGPU calls the duration callable with translate()'s exact args, so _estimate_duration
    must mirror translate()'s parameter names and order (the load-bearing ZeroGPU contract)."""
    import inspect

    import app

    assert list(inspect.signature(app._estimate_duration).parameters) == list(
        inspect.signature(app.translate).parameters
    )


def test_translate_greedy_by_default_samples_on_custom_temperature():
    """num_beams=1 should greedy-decode at the default temperature (deterministic) and enable
    sampling only when the user sets a non-default temperature."""
    import app

    def run(temperature):
        model = MagicMock()
        model.device = torch.device("cpu")
        model.generate.return_value = [[0]]
        tokenizer = MagicMock()
        tokenizer.decode.return_value = "out"
        with (
            patch("app._load_model", return_value=model),
            patch("app._load_tokenizer", return_value=tokenizer),
            patch("app._build_language_mappings", return_value=({"French (fr)": "<2fr>"}, ["French (fr)"])),
        ):
            app.translate("Hello", "French (fr)", temperature=temperature)
        return model.generate.call_args.kwargs

    greedy = run(1.0)
    sampled = run(0.5)
    assert not greedy.get("do_sample", False), "default temperature should greedy-decode"
    assert sampled.get("do_sample") is True and sampled["temperature"] == 0.5


def _run_translate(text, target, **kwargs):
    """Call translate() against a mocked model/tokenizer and return generate()'s kwargs + result."""
    import app

    model = MagicMock()
    model.device = torch.device("cpu")
    model.generate.return_value = [[0]]
    tokenizer = MagicMock()
    tokenizer.decode.return_value = "out"
    with (
        patch("app._load_model", return_value=model),
        patch("app._load_tokenizer", return_value=tokenizer),
        patch("app._build_language_mappings", return_value=({"French (fr)": "<2fr>"}, ["French (fr)"])),
    ):
        result = app.translate(text, target, **kwargs)
    return model.generate.call_args.kwargs if model.generate.called else None, result


def test_translate_forwards_generation_params():
    """Non-default max_new_tokens/num_beams must reach model.generate; beam search must not sample."""
    kwargs, _ = _run_translate("Hello", "French (fr)", max_new_tokens=10, num_beams=4)
    assert kwargs["max_new_tokens"] == 10
    assert kwargs["num_beams"] == 4
    assert "do_sample" not in kwargs, "beam search must not enable sampling"


def test_translate_applies_token_beam_cap():
    """A high token×beam request must reach model.generate with the capped token count (not the
    raw value) so generation stays within its GPU reservation."""
    import app

    kwargs, _ = _run_translate("Hello", "French (fr)", max_new_tokens=1024, num_beams=8)
    assert kwargs["num_beams"] == 8
    assert kwargs["max_new_tokens"] * kwargs["num_beams"] <= app._MAX_TOKEN_BEAM_PRODUCT


def test_translate_tolerates_near_default_temperature():
    """A temperature within ~1e-6 of 1.0 (float spinner drift) stays greedy; a clearly different
    value still samples."""
    near_one, _ = _run_translate("Hello", "French (fr)", temperature=1.0 - 1e-9)
    sampled, _ = _run_translate("Hello", "French (fr)", temperature=0.7)
    assert "do_sample" not in near_one, "near-1.0 temperature should stay greedy"
    assert sampled.get("do_sample") is True


def test_normalize_params_clamps_and_defaults():
    """_normalize_params coerces None/NaN to defaults and clamps to the advertised ranges."""
    import app

    assert app._normalize_params(None, None, None) == (512, 1, 1.0)
    nan = float("nan")
    assert app._normalize_params(nan, nan, nan) == (512, 1, 1.0)
    assert app._normalize_params(0, 0, 0.0) == (1, 1, 0.1)  # clamp low
    assert app._normalize_params(99999, 1, 9.0) == (720, 1, 2.0)  # tokens to product budget; temp clamped
    assert app._normalize_params(10, 99, 1.0) == (10, 8, 1.0)  # beams clamp high (product 80 within budget)
    mnt, beams, temp = app._normalize_params(10.0, 4.0, 0.5)
    assert (mnt, beams, temp) == (10, 4, 0.5)
    assert type(mnt) is int and type(beams) is int and type(temp) is float


def test_normalize_params_caps_token_beam_product():
    """High token×beam requests get the token count trimmed so the product stays within the GPU
    reservation _estimate_duration grants (the worst case must not outlive its 120s budget)."""
    import app

    mnt, beams, _ = app._normalize_params(1024, 8, 1.0)
    assert beams == 8 and mnt * beams <= app._MAX_TOKEN_BEAM_PRODUCT
    # the duration estimate, built on the same normalization, never exceeds its 120s cap
    assert app._estimate_duration("hi", "French (fr)", 1024, 8, 1.0) <= 120
    # comfortably-small products are left untouched
    assert app._normalize_params(200, 2, 1.0) == (200, 2, 1.0)


def test_translate_normalizes_invalid_params():
    """A cleared gr.Number arrives as None (and temperature can be NaN) on the public path;
    translate() must coerce to defaults instead of crashing or corrupting sampling."""
    kwargs, result = _run_translate(
        "Hello", "French (fr)", max_new_tokens=None, num_beams=None, temperature=float("nan")
    )
    assert result == "out"
    assert kwargs["max_new_tokens"] == 512 and kwargs["num_beams"] == 1
    assert "do_sample" not in kwargs, "NaN temperature must fall back to greedy"


def test_estimate_duration_handles_none_params():
    """The ZeroGPU duration callable runs before translate() with the same uncast args, so a
    cleared gr.Number (None) must not crash it."""
    import app

    assert isinstance(app._estimate_duration("hi", "French (fr)", None, None, None), int)


@pytest.mark.parametrize("blank", ["", "   ", "\n\t", None])
def test_translate_skips_model_on_empty_input(blank):
    """Empty/whitespace/None input short-circuits to '' without loading or running the model."""
    import app

    with (
        patch("app._load_model") as load_model,
        patch("app._load_tokenizer") as load_tokenizer,
    ):
        result = app.translate(blank, "French (fr)")
    assert result == ""
    load_model.assert_not_called()
    load_tokenizer.assert_not_called()


def test_swap_flips_rtl_to_follow_text():
    """Swapping must move each textbox's direction with the text: after EN->Arabic then swap,
    the input box (now holding the Arabic translation) goes RTL and the output box (now holding
    the English source) resets to LTR."""
    import gradio as gr

    import app

    name_to_code = {"English (en)": "<2en>", "Arabic (ar)": "<2ar>"}
    with patch("app._build_language_mappings", return_value=(name_to_code, list(name_to_code))):
        new_source, new_target, input_update, output_update = app._swap_languages(
            "English (en)", "Arabic (ar)", "Hello", "RTL-text"
        )
    assert (new_source, new_target) == ("Arabic (ar)", "English (en)")
    # input box now holds the Arabic translation -> RTL; output box holds the English source -> LTR
    assert input_update == gr.update(value="RTL-text", rtl=True, text_align="right")
    assert output_update == gr.update(value="Hello", rtl=False, text_align="left")


def test_translate_with_loading_flips_rtl_for_rtl_target():
    """The private button path marks the output RTL for right-to-left target languages and
    resets to LTR otherwise (rtl is sticky across reruns)."""
    import app

    def final_output(target_name, code):
        with (
            patch("app.translate", return_value="out"),
            patch("app._build_language_mappings", return_value=({target_name: code}, [target_name])),
        ):
            *_, last = app._translate_with_loading("hi", target_name)
        return last[1]  # the output_text update payload

    rtl = final_output("Arabic (ar)", "<2ar>")
    ltr = final_output("French (fr)", "<2fr>")
    assert rtl["rtl"] is True and rtl["text_align"] == "right"
    assert ltr["rtl"] is False and ltr["text_align"] == "left"
    assert rtl["value"] == "out" and ltr["value"] == "out"  # both branches forward the result


def test_rtl_codes_are_valid_langmap_tokens():
    """Every RTL_CODES token must exist in the langmap, so a langmap regeneration that renames
    or drops a token can't silently disable an RTL flip without failing this test."""
    import app
    from langmap.langid_mapping import langid_to_language

    missing = app.RTL_CODES - set(langid_to_language)
    assert not missing, f"RTL_CODES not in langmap: {missing}"


def test_requirements_excludes_platform_packages():
    """gradio and spaces are provided by the HF Spaces runtime on every tier; pinning them in
    requirements.txt drifts the ZeroGPU runtime, so they must stay out (see requirements-dev.txt)."""
    import re
    from pathlib import Path

    reqs = Path(__file__).resolve().parent.parent / "requirements.txt"
    names = {
        re.split(r"[<>=~!;\[\s,]", line.strip())[0].lower()
        for line in reqs.read_text().splitlines()
        if line.strip() and not line.strip().startswith("#")
    }
    assert "gradio" not in names, "gradio must not be in requirements.txt (locked by sdk_version)"
    assert "spaces" not in names, "spaces must not be in requirements.txt (platform-pinned)"


# --- UI component tests ---


@pytest.fixture
def demo():
    import app

    return app._build_demo()


def test_demo_has_no_tabs(demo):
    """Redesigned UI should have no Tab components."""
    tabs = [b for b in demo.blocks.values() if type(b).__name__ == "Tab"]
    assert len(tabs) == 0, f"Expected no tabs, found {len(tabs)}"


def test_demo_has_no_sliders(demo):
    """Redesigned UI should have no Slider components."""
    sliders = [b for b in demo.blocks.values() if type(b).__name__ == "Slider"]
    assert len(sliders) == 0, f"Expected no sliders, found {len(sliders)}"


def test_demo_has_two_interactive_dropdowns(demo):
    """UI should have two interactive language dropdowns."""
    dropdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Dropdown"]
    assert len(dropdowns) == 2, f"Expected 2 dropdowns, found {len(dropdowns)}"
    interactive = [d for d in dropdowns if d.interactive is not False]
    assert len(interactive) == 2, f"Expected 2 interactive dropdowns, found {len(interactive)}"


def test_source_dropdown_default_is_english(demo):
    """Source dropdown should default to English (en)."""
    dropdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Dropdown"]
    english = [d for d in dropdowns if d.value == "English (en)"]
    assert len(english) == 1, "Expected one dropdown defaulting to 'English (en)'"


def test_target_dropdown_default_is_french(demo):
    """Target dropdown should default to French (fr)."""
    dropdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Dropdown"]
    french = [d for d in dropdowns if d.value == "French (fr)"]
    assert len(french) == 1, "Expected one dropdown defaulting to 'French (fr)'"


def test_both_dropdowns_filterable(demo):
    """Both language dropdowns should be filterable/searchable."""
    dropdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Dropdown"]
    interactive = [d for d in dropdowns if d.interactive is not False]
    assert all(d.filterable is True for d in interactive), "Both dropdowns should be filterable"


def test_dropdown_choices_include_locale_codes(demo):
    """Dropdown choices should include locale codes like 'French (fr)'."""
    dropdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Dropdown"]
    interactive = [d for d in dropdowns if d.interactive is not False]
    # Gradio stores choices as (label, value) tuples
    labels = [c[0] if isinstance(c, tuple) else c for c in interactive[0].choices]
    assert all("(" in label and ")" in label for label in labels), f"Expected locale codes in choices: {labels}"


def test_dropdowns_have_info_captions(demo):
    """Both dropdowns carry info= captions; the source one discloses that the source language
    is auto-detected (it only feeds the swap button)."""
    dropdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Dropdown"]
    assert all(d.info for d in dropdowns), "both dropdowns should carry info captions"
    assert any("auto-detect" in (d.info or "").lower() for d in dropdowns), "source must disclose auto-detection"


def test_textboxes_have_info_captions(demo):
    """Input box carries the Ctrl+Enter hint; output box carries model/arXiv/license provenance."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    assert all(t.info for t in textboxes), "input and output textboxes should carry info captions"
    input_box = next(t for t in textboxes if t.interactive is not False)
    output_box = next(t for t in textboxes if t.interactive is False)
    assert "ctrl+enter" in input_box.info.lower()
    assert "madlad400-3b-mt" in output_box.info


def test_demo_has_two_textboxes(demo):
    """UI should have input and output textboxes."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    assert len(textboxes) == 2, f"Expected 2 textboxes, found {len(textboxes)}"


def test_input_textbox_height(demo):
    """Input textbox should use lines=6."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    interactive = [t for t in textboxes if t.interactive is not False]
    assert interactive[0].lines == 6, f"Expected lines=6, got {interactive[0].lines}"


def test_input_textbox_max_length(demo):
    """Input textbox should enforce 2000 character limit."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    interactive = [t for t in textboxes if t.interactive is not False]
    assert interactive[0].max_length == 2000, f"Expected max_length=2000, got {interactive[0].max_length}"


def test_input_textbox_has_no_placeholder(demo):
    """Input textbox should have no placeholder text."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    interactive = [t for t in textboxes if t.interactive is not False]
    assert interactive[0].placeholder is None, f"Expected no placeholder, got {interactive[0].placeholder!r}"


def test_input_textbox_autofocus(demo):
    """Input textbox should autofocus so the cursor lands there on page load."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    interactive = [t for t in textboxes if t.interactive is not False]
    assert interactive[0].autofocus is True, f"Expected autofocus=True, got {interactive[0].autofocus!r}"


def test_input_textbox_has_no_buttons(demo):
    """Input textbox should expose no toolbar buttons (gradio accepts invalid button values silently)."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    interactive = [t for t in textboxes if t.interactive is not False]
    assert interactive[0].buttons == [], f"Expected no buttons, got {interactive[0].buttons!r}"


def test_output_textbox_is_non_interactive(demo):
    """Output textbox should be non-interactive."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    output = [t for t in textboxes if t.interactive is False]
    assert len(output) == 1, "Expected exactly one non-interactive textbox"


def test_output_textbox_height(demo):
    """Output textbox should use lines=6."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    non_interactive = [t for t in textboxes if t.interactive is False]
    assert non_interactive[0].lines == 6, f"Expected lines=6, got {non_interactive[0].lines}"


def test_output_placeholder(demo):
    """Output textbox should have 'Translation' as placeholder."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    output = [t for t in textboxes if t.interactive is False]
    assert output[0].placeholder == "Translation"


def test_output_textbox_has_copy_button(demo):
    """Output textbox should expose a copy button."""
    textboxes = [b for b in demo.blocks.values() if type(b).__name__ == "Textbox"]
    output = [t for t in textboxes if t.interactive is False]
    assert output[0].buttons == ["copy"], f"Expected ['copy'], got {output[0].buttons!r}"


def test_demo_has_translate_button(demo):
    """UI should have a Translate button."""
    buttons = [b for b in demo.blocks.values() if type(b).__name__ == "Button"]
    assert any(b.value == "Translate" for b in buttons), "Expected a 'Translate' button"


def test_translate_button_outside_columns(demo):
    """Translate button should not be inside either Column."""
    buttons = [b for b in demo.blocks.values() if type(b).__name__ == "Button" and b.value == "Translate"]
    assert len(buttons) == 1
    node = getattr(buttons[0], "parent", None)
    while node is not None:
        assert type(node).__name__ != "Column", "Translate button should not be inside a Column"
        node = getattr(node, "parent", None)


def test_demo_has_no_html_elements(demo):
    """UI should have no HTML elements (hint/char count removed)."""
    html_blocks = [b for b in demo.blocks.values() if type(b).__name__ == "HTML"]
    assert len(html_blocks) == 0, f"Expected no HTML blocks, found {len(html_blocks)}"


def test_demo_has_swap_button(demo):
    """UI should have a swap button."""
    buttons = [b for b in demo.blocks.values() if type(b).__name__ == "Button"]
    swap = [b for b in buttons if "⇄" in str(b.value)]
    assert len(swap) == 1, "Expected one swap button"


def test_swap_handler_wired(demo):
    """Swap button should have a click handler with 4 inputs and 4 outputs."""
    swap_fns = [fn for fn in demo.fns.values() if len(fn.inputs) == 4 and len(fn.outputs) == 4]
    assert len(swap_fns) >= 1, "Expected a handler with 4 inputs and 4 outputs (swap handler)"


def test_translate_handlers_wire_text_and_language(demo):
    """Both translate handlers (click + submit) wire input text and target language as their
    first two inputs; the click handler additionally carries the advanced generation params."""
    translate_fns = [
        fn for fn in demo.fns.values() if [type(i).__name__ for i in fn.inputs][:2] == ["Textbox", "Dropdown"]
    ]
    assert len(translate_fns) == 2, f"Expected 2 translate handlers (click + submit), found {len(translate_fns)}"


def test_advanced_params_are_numbers(demo):
    """Advanced generation params should be exposed as three Number controls (UI stays slider-free)."""
    numbers = [b for b in demo.blocks.values() if type(b).__name__ == "Number"]
    assert len(numbers) == 3, f"Expected 3 Number controls, found {len(numbers)}"


def test_advanced_params_have_safe_bounds(demo):
    """The Number controls must keep their documented bounds — for the public /translate path,
    Gradio's component preprocess is the server-side guard keeping params in range."""
    numbers = {n.label: n for n in demo.blocks.values() if type(n).__name__ == "Number"}
    assert numbers["Max new tokens"].minimum == 1 and numbers["Max new tokens"].maximum == 1024
    assert numbers["Max new tokens"].precision == 0
    assert numbers["Beams"].minimum == 1 and numbers["Beams"].maximum == 8
    assert numbers["Beams"].precision == 0
    assert numbers["Temperature"].minimum == 0.1 and numbers["Temperature"].maximum == 2.0


def test_advanced_params_wired_to_both_translate_handlers(demo):
    """Both translate handlers (button click + public submit) carry the three advanced Number
    params after text + language. Exactly one of them is the public /translate endpoint, so the
    params demonstrably reach the public path (keyed on api_visibility, not a name coincidence)."""
    full_input_fns = [
        fn
        for fn in demo.fns.values()
        if [type(i).__name__ for i in fn.inputs] == ["Textbox", "Dropdown", "Number", "Number", "Number"]
    ]
    assert len(full_input_fns) == 2, "Expected both translate handlers to carry the 3 advanced params"
    public = [fn for fn in full_input_fns if getattr(fn, "api_visibility", None) == "public"]
    assert len(public) == 1, "exactly one full-input handler should be the public endpoint"
    assert getattr(public[0], "api_name", None) == "translate", "the public one must be /translate"


def test_all_handlers_wired(demo):
    """UI should have exactly 3 click/submit handlers: translate click, translate submit, swap."""
    assert len(demo.fns) == 3, f"Expected 3 handlers, found {len(demo.fns)}"


def test_generation_handlers_use_minimal_progress(demo):
    """Both translate handlers use show_progress='minimal' so the multi-second generation does
    not draw a heavy overlay over the output box."""
    gen_fns = [fn for fn in demo.fns.values() if [type(i).__name__ for i in fn.inputs][:2] == ["Textbox", "Dropdown"]]
    assert len(gen_fns) == 2
    assert all(getattr(fn, "show_progress", None) == "minimal" for fn in gen_fns)


def test_translate_endpoint_has_stable_api_name(demo):
    """The submit handler exposes a stable 'translate' API endpoint accepting text + target
    language + the advanced generation params, returning a single string."""
    api_fns = [fn for fn in demo.fns.values() if getattr(fn, "api_name", None) == "translate"]
    assert len(api_fns) == 1, "Expected exactly one handler with api_name='translate'"
    fn = api_fns[0]
    assert [type(i).__name__ for i in fn.inputs] == ["Textbox", "Dropdown", "Number", "Number", "Number"]
    # the three Number inputs are positionally indistinguishable by type, so pin their order by
    # label — a num_beams/temperature swap in the inputs= list would otherwise pass silently.
    assert [i.label for i in fn.inputs[2:]] == ["Max new tokens", "Beams", "Temperature"]
    assert [type(o).__name__ for o in fn.outputs] == ["Textbox"]


def test_only_translate_endpoint_is_public(demo):
    """UI-only handlers (swap, button loading) should be private; only /translate is a public API endpoint."""
    named = list(demo.get_api_info()["named_endpoints"].keys())
    assert named == ["/translate"], f"Expected only ['/translate'] exposed, found {named}"


def test_no_title(demo):
    """UI should not have an H1 title."""
    markdowns = [b for b in demo.blocks.values() if type(b).__name__ == "Markdown"]
    for md in markdowns:
        assert not md.value.startswith("# "), f"Found unexpected title: {md.value}"


# --- Slow tests (require CUDA + model download) ---

gpu_available = torch.cuda.is_available()


@pytest.fixture(scope="module")
def loaded_app():
    import app

    # Force model/tokenizer loading
    app._load_tokenizer()
    app._load_model()
    return app


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_name_to_code_matches_language_names(loaded_app):
    name_to_code, language_names = loaded_app._build_language_mappings()
    assert set(name_to_code.keys()) == set(language_names)


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_language_names_sorted_by_region(loaded_app):
    """Language names should be sorted by region, then alphabetically within each region."""
    from langmap.langid_mapping import langid_to_language

    name_to_code, language_names = loaded_app._build_language_mappings()
    expected = sorted(
        language_names,
        key=lambda n: (langid_to_language[name_to_code[n]]["region"], n),
    )
    assert language_names == expected


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_all_codes_are_bcp47_tokens(loaded_app):
    name_to_code, _ = loaded_app._build_language_mappings()
    for name, code in name_to_code.items():
        assert code.startswith("<2") and code.endswith(">"), f"Invalid code {code} for {name}"


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_unsupported_language(loaded_app):
    with pytest.raises(ValueError, match="Unsupported language"):
        loaded_app.translate("hello", "FakeLanguage")


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_returns_string(loaded_app):
    result = loaded_app.translate("Hello", "French (fr)")
    assert isinstance(result, str)
    assert len(result) > 0


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_with_beam_search(loaded_app):
    """Translation with beam search (num_beams=4) should return a string."""
    result = loaded_app.translate("Hello", "French (fr)", num_beams=4)
    assert isinstance(result, str)
    assert len(result) > 0


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_with_custom_temperature(loaded_app):
    """Translation with custom temperature should return a string."""
    result = loaded_app.translate("Hello", "French (fr)", temperature=0.5)
    assert isinstance(result, str)
    assert len(result) > 0


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_with_custom_max_tokens(loaded_app):
    """Translation with low max_new_tokens should return a short string."""
    result = loaded_app.translate("Hello", "French (fr)", max_new_tokens=10)
    assert isinstance(result, str)


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_empty_string(loaded_app):
    """Translating an empty string should not crash."""
    result = loaded_app.translate("", "French (fr)")
    assert isinstance(result, str)


@pytest.mark.slow
@pytest.mark.skipif(not gpu_available, reason="Requires CUDA")
def test_translate_beam_search_ignores_temperature(loaded_app):
    """When beam search is active with non-default temperature, gr.Info should be called."""
    with patch("app.gr.Info") as mock_info:
        result = loaded_app.translate("Hello", "French (fr)", num_beams=4, temperature=0.5)
        mock_info.assert_called_once()
    assert isinstance(result, str)