Upload SLERP-merged checkpoint (alpha=0.5) from two adversarial-FT runs at step-1500

Browse files

Files changed (10) hide show

added_tokens.json +28 -0
config.json +76 -0
configuration.py +5 -0
merges.txt +0 -0
model.safetensors +3 -0
modeling.py +346 -0
special_tokens_map.json +45 -0
st_quantize.py +143 -0
tokenizer_config.json +249 -0
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "architectures": [
+    "PPLXQwen3ContextualModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration.PPLXQwen3Config",
+    "AutoModel": "modeling.PPLXQwen3ContextualModel"
+  },
+  "bos_token_id": 151643,
+  "dtype": "float32",
+  "eos_token_id": 151643,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 36,
+  "model_type": "bidirectional_pplx_qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000,
+    "rope_type": "default"
+  },
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0.dev0",
+  "use_bidirectional_attention": true,
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

configuration.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from transformers.models.qwen3.configuration_qwen3 import Qwen3Config
+class PPLXQwen3Config(Qwen3Config):
+    model_type = "bidirectional_pplx_qwen3"

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcb5ec8de5d8ca71dbea35cd7d055942537d70ac817d9346f7005a31e2082fec
+size 16089915848

modeling.py ADDED Viewed

	@@ -0,0 +1,346 @@

+from typing import Callable, Literal
+import numpy as np
+import torch
+from transformers import Qwen3Model
+from transformers.cache_utils import Cache
+from transformers.masking_utils import create_causal_mask
+from transformers.modeling_outputs import BaseModelOutputWithPooling
+from transformers.processing_utils import Unpack
+from transformers.utils import TransformersKwargs
+from .configuration import PPLXQwen3Config
+from transformers import AutoTokenizer
+from .st_quantize import FlexibleQuantizer
+# From modeling_t5gemma.py
+def bidirectional_mask_function(attention_mask: torch.Tensor | None) -> Callable:
+    """
+    This creates bidirectional attention mask.
+    """
+    def inner_mask(batch_idx: int, head_idx: int, q_idx: int, kv_idx: int) -> bool:
+        if attention_mask is None:
+            return torch.ones((), dtype=torch.bool)
+        return attention_mask[batch_idx, kv_idx].to(torch.bool)
+    return inner_mask
+class PPLXQwen3Model(Qwen3Model):
+    _supports_flash_attn = True
+    _supports_sdpa = True
+    config_class = PPLXQwen3Config
+    def __init__(self, config):
+        super().__init__(config)
+        self.post_init()
+    def post_init(self):
+        super().post_init()
+        # Override to set all layers to non-causal attention. This'll work with attn_implementation="flash_attention_2" or "sdpa"
+        for layer in self.layers:
+            layer.self_attn.is_causal = False
+    def forward(
+        self,
+        input_ids: torch.LongTensor | None = None,
+        attention_mask: torch.Tensor | None = None,
+        position_ids: torch.LongTensor | None = None,
+        past_key_values: Cache | None = None,
+        inputs_embeds: torch.FloatTensor | None = None,
+        use_cache: bool | None = None,
+        cache_position: torch.LongTensor | None = None,
+        **kwargs: Unpack[TransformersKwargs],
+    ) -> BaseModelOutputWithPooling:
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+            input_ids = None
+        # We construct a dummy tensor imitating initial positions
+        dummy_cache_position = torch.arange(
+            inputs_embeds.shape[1], device=inputs_embeds.device, dtype=torch.long
+        )
+        attention_mask = {
+            "full_attention": create_causal_mask(
+                config=self.config,
+                input_embeds=inputs_embeds,
+                attention_mask=attention_mask,
+                cache_position=dummy_cache_position,
+                past_key_values=None,
+                position_ids=position_ids,
+                or_mask_function=bidirectional_mask_function(attention_mask),
+            )
+        }
+        outputs = super().forward(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            cache_position=cache_position,
+            **kwargs,
+        )
+        return outputs
+class PPLXQwen3ContextualModel(PPLXQwen3Model):
+    """
+    Qwen3 model with contextual encoding support for late chunking.
+    This model extends PPLXQwen3Model with an encode() method that supports both
+    standard encoding (list[str]) and contextual encoding (list[list[str]]) with late chunking.
+    IMPORTANT: This model MUST be loaded with trust_remote_code=True:
+        from transformers import AutoModel
+        model = AutoModel.from_pretrained(
+            "path/to/model",
+            trust_remote_code=True  # REQUIRED!
+        )
+        embeddings = model.encode([["chunk1", "chunk2"]])
+    Loading without trust_remote_code=True will fail to load this custom model class.
+    """
+    config_class = PPLXQwen3Config
+    def __init__(self, config):
+        super().__init__(config)
+        if not isinstance(config, PPLXQwen3Config):
+            raise TypeError(
+                f"PPLXQwen3ContextualModel requires PPLXQwen3Config, got {type(config).__name__}. "
+                f"Did you forget to load with trust_remote_code=True?"
+            )
+        self.tokenizer = AutoTokenizer.from_pretrained(config._name_or_path)
+        self._flexible_quantizer = FlexibleQuantizer()
+    @staticmethod
+    def mean_pooling(
+        token_embeddings: torch.Tensor, attention_mask: torch.Tensor
+    ) -> torch.Tensor:
+        """Apply mean pooling to token embeddings."""
+        input_mask_expanded = (
+            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        )
+        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+            input_mask_expanded.sum(1), min=1e-9
+        )
+    @torch.inference_mode()
+    def encode(
+        self,
+        documents: list[list[str]],
+        batch_size: int = 32,
+        show_progress_bar: bool = False,
+        device: str | torch.device | None = None,
+        normalize_embeddings: bool = False,
+        convert_to_numpy: bool = True,
+        quantization: Literal["int8", "binary", "ubinary"] = "int8",
+    ) -> list[np.ndarray] | list[torch.Tensor]:
+        """
+        Encode documents with late chunking (contextual embeddings).
+        This model is designed specifically for contextual encoding and always expects
+        documents as nested lists where each document is a list of text chunks.
+        The encoding process:
+        1. Concatenate chunks with separator tokens
+        2. Run forward pass to get token embeddings
+        3. Extract and pool individual chunk embeddings (late chunking)
+        4. Apply quantization (Int8 or binary, always enabled)
+        5. Normalize embeddings if requested (applied after quantization)
+        6. Convert to numpy or return as tensors
+        Args:
+            documents: List of documents, where each document is a list of text chunks.
+                Example: [["chunk1", "chunk2"], ["chunk1", "chunk2", "chunk3"]]
+            batch_size: Batch size for encoding
+            show_progress_bar: Show progress bar during encoding
+            device: Device to use for computation (defaults to model's device)
+            normalize_embeddings: Normalize embeddings to unit length (applied after quantization)
+            convert_to_numpy: If True, returns list[np.ndarray], otherwise list[torch.Tensor]
+            quantization: Quantization type to apply. Options:
+                - "int8": Int8 tanh quantization (default)
+                - "binary": Binary tanh quantization (-1.0 or 1.0)
+                - "ubinary": Unsigned packed binary (uint8, 8x compression)
+        Returns:
+            List of numpy arrays or tensors (preserves document structure).
+            Each element has shape (n_chunks, hidden_dim) or (n_chunks, hidden_dim // 8) for ubinary.
+            Example: embeddings[0].shape = (2, 1024), embeddings[1].shape = (3, 1024)
+            Output type depends on quantization method:
+            - "int8": int8 dtype, values in range [-128, 127], shape (..., hidden_dim)
+            - "binary": float32 dtype, values -1.0 or 1.0, shape (..., hidden_dim)
+            - "ubinary": uint8 dtype, packed bits (8x smaller), shape (..., hidden_dim // 8)
+        """
+        if not isinstance(documents, list) or not all(
+            isinstance(doc, list) for doc in documents
+        ):
+            raise TypeError(
+                "Input 'documents' must be a list of lists of strings for contextual encoding."
+            )
+        if quantization not in ["int8", "binary", "ubinary"]:
+            raise ValueError(
+                f"Unsupported quantization type: '{quantization}'. "
+                f"Supported types are: 'int8', 'binary', 'ubinary'. "
+                f"Got: {type(quantization).__name__} = '{quantization}'"
+            )
+        if normalize_embeddings and quantization == "ubinary":
+            raise ValueError(
+                "normalize_embeddings=True is incompatible with quantization='ubinary'. "
+                "Packed binary embeddings (uint8) cannot be normalized because each byte "
+                "represents 8 packed bits, not a single dimension. "
+                "Either set normalize_embeddings=False or use 'binary' quantization instead."
+            )
+        self.eval()
+        if device is None:
+            device = next(self.parameters()).device
+        all_embeddings = []
+        range_iter = range(0, len(documents), batch_size)
+        if show_progress_bar:
+            try:
+                from tqdm import tqdm
+                range_iter = tqdm(range_iter, desc="Encoding documents")
+            except ImportError:
+                pass
+        for i in range_iter:
+            batch_docs = documents[i : i + batch_size]
+            doc_strings = [
+                self.tokenizer.sep_token.join(chunks) for chunks in batch_docs
+            ]
+            inputs = self.tokenizer(
+                doc_strings,
+                padding=True,
+                truncation=True,
+                return_tensors="pt",
+            )
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            outputs = self.forward(**inputs)
+            token_embeddings = outputs.last_hidden_state
+            batch_chunk_embeddings = self._extract_chunks_from_concatenated(
+                input_ids=inputs["input_ids"],
+                token_embeddings=token_embeddings,
+                attention_mask=inputs["attention_mask"],
+            )
+            batch_chunk_embeddings = [
+                torch.stack([chunk for chunk in doc_chunks], dim=0)
+                for doc_chunks in batch_chunk_embeddings
+            ]
+            batch_chunk_embeddings = [
+                self._flexible_quantizer(
+                    {"sentence_embedding": emb}, quantization=quantization
+                )["sentence_embedding"]
+                for emb in batch_chunk_embeddings
+            ]
+            if normalize_embeddings:
+                batch_chunk_embeddings = [
+                    torch.nn.functional.normalize(emb, p=2, dim=-1)
+                    for emb in batch_chunk_embeddings
+                ]
+            batch_chunk_embeddings = [emb.cpu() for emb in batch_chunk_embeddings]
+            all_embeddings.extend(batch_chunk_embeddings)
+        if convert_to_numpy:
+            all_embeddings = [emb.numpy() for emb in all_embeddings]
+        return all_embeddings
+    def _extract_chunks_from_concatenated(
+        self,
+        input_ids: torch.Tensor,
+        token_embeddings: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ) -> list[list[torch.Tensor]]:
+        """
+        Extract individual chunk embeddings from concatenated sequence using late chunking.
+        This method splits concatenated sequences like "[chunk1][SEP][chunk2][SEP]..."
+        back into individual chunk embeddings by finding SEP token positions.
+        Args:
+            input_ids: Token IDs (batch_size, seq_len)
+            token_embeddings: Token embeddings (batch_size, seq_len, hidden_dim)
+            attention_mask: Attention mask (batch_size, seq_len)
+        Returns:
+            list[list[torch.Tensor]]: List of documents, each containing list of chunk embeddings
+        Note:
+            The sep_token_id is retrieved from self.tokenizer.sep_token_id.
+            Common values: Qwen2=151643, BERT=102, varies by tokenizer.
+        """
+        sep_token_id = self.tokenizer.sep_token_id
+        batch_size = input_ids.shape[0]
+        all_doc_chunks = []
+        for batch_idx in range(batch_size):
+            # non-pad sep tokens
+            valid_positions = attention_mask[batch_idx].bool()
+            sep_positions = (
+                (input_ids[batch_idx] == sep_token_id) & valid_positions
+            ).nonzero(as_tuple=True)[0]
+            chunk_embeddings = []
+            start_pos = 0
+            for sep_pos in sep_positions:
+                chunk_tokens = token_embeddings[batch_idx, start_pos:sep_pos]
+                chunk_mask = attention_mask[batch_idx, start_pos:sep_pos]
+                chunk_emb = self.mean_pooling(
+                    chunk_tokens.unsqueeze(0), chunk_mask.unsqueeze(0)
+                ).squeeze(0)
+                chunk_embeddings.append(chunk_emb)
+                start_pos = sep_pos + 1
+            # Handle the last chunk (after the last SEP token)
+            last_valid_pos = attention_mask[batch_idx].sum().item()
+            chunk_tokens = token_embeddings[batch_idx, start_pos:last_valid_pos]
+            chunk_mask = attention_mask[batch_idx, start_pos:last_valid_pos]
+            if chunk_mask.sum() > 0:
+                chunk_emb = self.mean_pooling(
+                    chunk_tokens.unsqueeze(0), chunk_mask.unsqueeze(0)
+                ).squeeze(0)
+            else:
+                # Empty chunk - create zero embedding
+                chunk_emb = torch.zeros(
+                    token_embeddings.shape[-1],
+                    device=token_embeddings.device,
+                    dtype=token_embeddings.dtype,
+                )
+            chunk_embeddings.append(chunk_emb)
+            all_doc_chunks.append(chunk_embeddings)
+        return all_doc_chunks

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "â½Ĺ",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

st_quantize.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import torch
+import numpy as np
+from typing import Literal
+from sentence_transformers.models import Module
+class Quantizer(torch.nn.Module):
+    def __init__(self, hard: bool = True):
+        """
+        Args:
+            hard: Whether to use hard or soft quantization. Defaults to True.
+        """
+        super().__init__()
+        self._hard = hard
+    def _hard_quantize(self, x, *args, **kwargs) -> torch.Tensor:
+        raise NotImplementedError
+    def _soft_quantize(self, x, *args, **kwargs) -> torch.Tensor:
+        raise NotImplementedError
+    def forward(self, x, *args, **kwargs) -> torch.Tensor:
+        soft = self._soft_quantize(x, *args, **kwargs)
+        if not self._hard:
+            result = soft
+        else:
+            result = (
+                self._hard_quantize(x, *args, **kwargs).detach() + soft - soft.detach()
+            )
+        return result
+class Int8TanhQuantizer(Quantizer):
+    def __init__(
+        self,
+        hard: bool = True,
+    ):
+        super().__init__(hard=hard)
+        self.qmin = -128
+        self.qmax = 127
+    def _soft_quantize(self, x, *args, **kwargs):
+        return torch.tanh(x)
+    def _hard_quantize(self, x, *args, **kwargs):
+        soft = self._soft_quantize(x)
+        int_x = torch.round(soft * self.qmax)
+        int_x = torch.clamp(int_x, self.qmin, self.qmax)
+        return int_x
+class BinaryTanhQuantizer(Quantizer):
+    def __init__(
+        self,
+        hard: bool = True,
+        scale: float = 1.0,
+    ):
+        super().__init__(hard)
+        self._scale = scale
+    def _soft_quantize(self, x, *args, **kwargs):
+        return torch.tanh(self._scale * x)
+    def _hard_quantize(self, x, *args, **kwargs):
+        return torch.where(x >= 0, 1.0, -1.0)
+class PackedBinaryQuantizer:
+    """
+    Packs binary embeddings into uint8 format for efficient storage.
+    This quantizer applies a binary threshold (x >= 0) and packs 8 consecutive
+    bits into a single uint8 byte using numpy.packbits. This reduces memory
+    usage by 8x compared to float32 and by 4x compared to int8.
+    IMPORTANT: This is an inference-only quantizer - it is not differentiable
+    and should only be used for encoding/inference, not during training.
+    Args:
+        x: Input tensor of any float dtype, shape (..., embedding_dim)
+    Returns:
+        Packed binary tensor of dtype uint8, shape (..., embedding_dim // 8)
+    Example:
+        >>> quantizer = PackedBinaryQuantizer()
+        >>> embeddings = torch.randn(2, 1024)  # float32
+        >>> packed = quantizer(embeddings)     # uint8, shape (2, 128)
+    """
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        bits = np.where(x.cpu().numpy() >= 0, True, False)
+        packed = np.packbits(bits, axis=-1)
+        return torch.from_numpy(packed).to(x.device)
+class FlexibleQuantizer(Module):
+    def __init__(self):
+        super().__init__()
+        self._int8_quantizer = Int8TanhQuantizer()
+        self._binary_quantizer = BinaryTanhQuantizer()
+        self._packed_binary_quantizer = PackedBinaryQuantizer()
+    def forward(
+        self,
+        features: dict[str, torch.Tensor],
+        quantization: Literal["int8", "binary", "ubinary"] = "int8",
+        **kwargs,
+    ) -> dict[str, torch.Tensor]:
+        if quantization == "int8":
+            features["sentence_embedding"] = self._int8_quantizer(
+                features["sentence_embedding"]
+            )
+        elif quantization == "binary":
+            features["sentence_embedding"] = self._binary_quantizer(
+                features["sentence_embedding"]
+            )
+        elif quantization == "ubinary":
+            features["sentence_embedding"] = self._packed_binary_quantizer(
+                features["sentence_embedding"]
+            )
+        else:
+            raise ValueError(
+                f"Invalid quantization type: {quantization}. Must be 'binary', 'ubinary', or 'int8'."
+            )
+        return features
+    @classmethod
+    def load(
+        cls,
+        model_name_or_path: str,
+        subfolder: str = "",
+        token: bool | str | None = None,
+        cache_folder: str | None = None,
+        revision: str | None = None,
+        local_files_only: bool = False,
+        **kwargs,
+    ):
+        return cls()
+    def save(self, output_path: str, *args, **kwargs) -> None:
+        return

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,249 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151642": {
+      "content": "â½Ĺ",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "â½Ĺ",
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "sep_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff