Instructions to use swadeshb/tivd-gsm8k-colocate with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use swadeshb/tivd-gsm8k-colocate with PEFT:

from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3-1.7B")
model = PeftModel.from_pretrained(base_model, "swadeshb/tivd-gsm8k-colocate")

Transformers

How to use swadeshb/tivd-gsm8k-colocate with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="swadeshb/tivd-gsm8k-colocate")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("swadeshb/tivd-gsm8k-colocate", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use swadeshb/tivd-gsm8k-colocate with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "swadeshb/tivd-gsm8k-colocate"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "swadeshb/tivd-gsm8k-colocate",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/swadeshb/tivd-gsm8k-colocate

SGLang

How to use swadeshb/tivd-gsm8k-colocate with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "swadeshb/tivd-gsm8k-colocate" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "swadeshb/tivd-gsm8k-colocate",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "swadeshb/tivd-gsm8k-colocate" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "swadeshb/tivd-gsm8k-colocate",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use swadeshb/tivd-gsm8k-colocate with Docker Model Runner:
```
docker model run hf.co/swadeshb/tivd-gsm8k-colocate
```

swadeshb commited on Apr 15

Commit

11a6bae

verified ·

1 Parent(s): 9bb0012

Training in progress, step 100

Browse files

Files changed (12) hide show

.gitattributes +1 -0
adapter_config.json +46 -0
adapter_model.safetensors +3 -0
added_tokens.json +28 -0
chat_template.jinja +89 -0
merges.txt +0 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +239 -0
train.py +169 -0
training_args.bin +3 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-1.7B",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "o_proj",
+    "up_proj",
+    "v_proj",
+    "k_proj",
+    "down_proj",
+    "gate_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50840d5522aabae9de689ee8e71c195af20fbac5355e7a2522ce29b2773687e0
+size 69782384

added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

train.py ADDED Viewed

	@@ -0,0 +1,169 @@

+from __future__ import annotations
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+from datasets import load_dataset
+from torch.utils.data import Dataset as TorchDataset
+from transformers import HfArgumentParser, set_seed
+from trainer.tivd.online_trainer import (
+    TIVDConfig,
+    TIVDTrainer,
+    assert_qwen_tokenizer_compatibility,
+    build_student_model,
+    build_teacher_model,
+    build_tokenizer,
+    copy_training_sources,
+    render_math_prompt,
+)
+@dataclass
+class DataArguments:
+    dataset_name: str = field(default="openai/gsm8k")
+    dataset_config_name: Optional[str] = field(default="main")
+    dataset_split: str = field(default="train")
+    question_column: str = field(default="question")
+    answer_column: str = field(default="answer")
+    final_answer_column: str = field(default="")
+    difficulty_column: str = field(default="")
+    topic_column: str = field(default="")
+    solution_columns: str = field(default="")
+    limit: Optional[int] = field(default=None)
+class PromptListDataset(TorchDataset):
+    """Simple Python dataset wrapper to avoid Arrow batched-indexing quirks in custom Trainer flows."""
+    def __init__(self, rows: list[dict]):
+        self.rows = rows
+    def __len__(self) -> int:
+        return len(self.rows)
+    def __getitem__(self, idx: int) -> dict:
+        return self.rows[idx]
+def _parse_gsm8k_final_answer(answer_text: Optional[str]) -> Optional[str]:
+    if not answer_text:
+        return None
+    match = re.search(r"####\s*(.+)$", answer_text.strip(), flags=re.MULTILINE)
+    if match:
+        return match.group(1).strip()
+    return answer_text.strip().splitlines()[-1].strip()
+def build_filtered_dataset(data_args: DataArguments, train_args: TIVDConfig) -> PromptListDataset:
+    load_kwargs = {"path": data_args.dataset_name, "split": data_args.dataset_split}
+    if data_args.dataset_config_name:
+        load_kwargs["name"] = data_args.dataset_config_name
+    dataset = load_dataset(**load_kwargs)
+    if data_args.difficulty_column and data_args.difficulty_column in dataset.column_names:
+        dataset = dataset.filter(
+            lambda ex: ex.get(data_args.difficulty_column) is not None
+            and float(ex[data_args.difficulty_column]) >= float(train_args.difficulty_threshold),
+            desc=f"Filtering difficulty >= {train_args.difficulty_threshold}",
+        )
+    if data_args.limit is not None:
+        dataset = dataset.select(range(min(len(dataset), data_args.limit)))
+    solution_columns = [col.strip() for col in data_args.solution_columns.split(",") if col.strip()]
+    rows: list[dict] = []
+    for example in dataset:
+        raw_answer = example.get(data_args.answer_column) if data_args.answer_column else None
+        if data_args.final_answer_column:
+            final_answer = example.get(data_args.final_answer_column)
+        else:
+            final_answer = _parse_gsm8k_final_answer(raw_answer)
+        row = {
+            "prompt": render_math_prompt(example[data_args.question_column]),
+            "question": example[data_args.question_column],
+            "final_answer": final_answer,
+            "answer": raw_answer,
+            "difficulty": float(example.get(data_args.difficulty_column, 0.0) or 0.0)
+            if data_args.difficulty_column and data_args.difficulty_column in example
+            else 0.0,
+            "topic": example.get(data_args.topic_column) if data_args.topic_column else None,
+        }
+        for col in solution_columns:
+            if col in example:
+                row[col] = example[col]
+        rows.append(row)
+    return PromptListDataset(rows)
+def main() -> None:
+    parser = HfArgumentParser((TIVDConfig, DataArguments))
+    train_args, data_args = parser.parse_args_into_dataclasses()
+    train_args.remove_unused_columns = False
+    train_args.label_names = []
+    if train_args.wandb_project:
+        os.environ.setdefault("WANDB_PROJECT", train_args.wandb_project)
+    if train_args.wandb_run_name:
+        os.environ.setdefault("WANDB_NAME", train_args.wandb_run_name)
+    Path(train_args.output_dir).mkdir(parents=True, exist_ok=True)
+    set_seed(train_args.seed)
+    world_size = int(os.environ.get("WORLD_SIZE", "1"))
+    if train_args.use_vllm and train_args.vllm_mode == "server" and world_size > 1:
+        raise ValueError(
+            "For this trainer, server-mode vLLM should be run with a single training process. "
+            "Use accelerate launch --num_processes 1 so training stays on one GPU and the vLLM server on another, "
+            "or use --vllm_mode colocate for same-GPU execution."
+        )
+    student_tokenizer = build_tokenizer(train_args.student_model_name_or_path, train_args.trust_remote_code)
+    teacher_tokenizer = build_tokenizer(train_args.teacher_model_name_or_path, train_args.trust_remote_code)
+    assert_qwen_tokenizer_compatibility(student_tokenizer, teacher_tokenizer)
+    train_dataset = build_filtered_dataset(data_args, train_args)
+    student_model = build_student_model(train_args)
+    teacher_model = build_teacher_model(train_args)
+    copy_training_sources(train_args.output_dir, __file__, Path(__file__).parent / "online_trainer.py")
+    trainer = TIVDTrainer(
+        model=student_model,
+        args=train_args,
+        tokenizer=student_tokenizer,
+        teacher_model=teacher_model,
+        target_model=None,
+        train_dataset=train_dataset,
+        eval_dataset=None,
+        ref_model=None,
+        source_file_paths=[__file__, str(Path(__file__).parent / "online_trainer.py")],
+    )
+    train_result = trainer.train(resume_from_checkpoint=train_args.resume_from_checkpoint)
+    trainer.save_model(train_args.output_dir)
+    student_tokenizer.save_pretrained(train_args.output_dir)
+    metrics = train_result.metrics
+    metrics["train_examples"] = len(train_dataset)
+    trainer.log_metrics("train", metrics)
+    trainer.save_metrics("train", metrics)
+    trainer.save_state()
+    if train_args.push_to_hub:
+        kwargs = {}
+        if train_args.hub_model_id:
+            kwargs["repo_id"] = train_args.hub_model_id
+        trainer.push_to_hub(**kwargs)
+if __name__ == "__main__":
+    main()

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ceef65fd996b93ee8eed0fe5e2a15deea11fd4f1913065b99c7b26e46bdde44f
+size 7761

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff