from transformers.models.qwen3_vl.configuration_qwen3_vl import Qwen3VLConfig class Qwen3VLEmbeddingConfig(Qwen3VLConfig): model_type = "qwen3_vl_embedding" def __init__( self, max_length: int = 8192, image_base_factor: int = 16, fps: float = 1.0, num_frames: int = 64, max_frames: int = 64, default_instruction: str = "Represent the user's input.", normalize_embeddings: bool = True, **kwargs, ): super().__init__(**kwargs) self.max_length = max_length self.image_base_factor = image_base_factor self.image_factor = image_base_factor * 2 self.min_pixels = 4 * self.image_factor * self.image_factor self.max_pixels = 1800 * self.image_factor * self.image_factor self.frame_max_pixels = 768 * self.image_factor * self.image_factor self.max_total_pixels = 10 * self.frame_max_pixels self.fps = fps self.num_frames = num_frames self.max_frames = max_frames self.default_instruction = default_instruction self.normalize_embeddings = normalize_embeddings