"""Minimal torch-native shim for flash_attn used by AIDC-AI/Ovis-U1-3B. The upstream modeling file imports: from flash_attn.layers.rotary import apply_rotary_emb from flash_attn import flash_attn_varlen_func Blackwell/CUDA-13 has no flash-attn prebuilt wheel for cp310+torch>=2.10, and the package's CUDA build doesn't fit within the @spaces.GPU 1500s budget, so we provide a small torch-native equivalent that satisfies the two call sites the model actually exercises. We also fake a version string within the range xformers tolerates so that ``xformers/ops/fmha/flash.py`` (loaded transitively by ``diffusers``) does not explode at import time. The xformers FA backend it then registers will never be invoked along the user-facing demo path (the model uses transformers SDPA attention + this shim's varlen path; diffusers' xformers backend is only engaged via an explicit ``set_use_memory_efficient_attention_xformers`` opt-in which the demo never makes). """ __version__ = "2.8.3" from .funcs import flash_attn_varlen_func # noqa: F401 from . import flash_attn_interface # noqa: F401 -- expose submodule eagerly