"""Minimal torch-native shim for flash_attn used by AIDC-AI/Ovis-U1-3B.

The upstream modeling file imports:
    from flash_attn.layers.rotary import apply_rotary_emb
    from flash_attn import flash_attn_varlen_func

Blackwell/CUDA-13 has no flash-attn prebuilt wheel for cp310+torch>=2.10, and the
package's CUDA build doesn't fit within the @spaces.GPU 1500s budget, so we
provide a small torch-native equivalent that satisfies the two call sites the
model actually exercises.

We also fake a version string within the range xformers tolerates so that
``xformers/ops/fmha/flash.py`` (loaded transitively by ``diffusers``) does not
explode at import time. The xformers FA backend it then registers will never
be invoked along the user-facing demo path (the model uses transformers SDPA
attention + this shim's varlen path; diffusers' xformers backend is only
engaged via an explicit ``set_use_memory_efficient_attention_xformers`` opt-in
which the demo never makes).
"""

__version__ = "2.8.3"

from .funcs import flash_attn_varlen_func  # noqa: F401
from . import flash_attn_interface  # noqa: F401  -- expose submodule eagerly