ferrazzipietro commited on
Commit
4b54c6d
·
verified ·
1 Parent(s): 4864605

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ base_model: ferrazzipietro/unsup-Qwen3-8B-datav3-only_mask_w_item_mesh
4
+ tags:
5
+ - base_model:adapter:ferrazzipietro/unsup-Qwen3-8B-datav3-only_mask_w_item_mesh
6
+ - lora
7
+ - transformers
8
+ pipeline_tag: text-generation
9
+ model-index:
10
+ - name: meshTask-unsup-Qwen3-8B-datav3-only_mask_w_item_mesh
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # meshTask-unsup-Qwen3-8B-datav3-only_mask_w_item_mesh
18
+
19
+ This model is a fine-tuned version of [ferrazzipietro/unsup-Qwen3-8B-datav3-only_mask_w_item_mesh](https://huggingface.co/ferrazzipietro/unsup-Qwen3-8B-datav3-only_mask_w_item_mesh) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.5778
22
+ - F1 Micro: 0.8977
23
+ - F1 Macro: 0.8905
24
+ - F1 Weighted: 0.8977
25
+ - Class/f1 Results Per Class: {}
26
+ - Items/f1 Scores Per Item: {'Disease Models, Animal': 0.8571184000622544, 'Animals': 0.9414907872696818, 'Pregnancy': 0.9134651504285763, 'Aged': 0.874931822949444, 'Time Factors': 0.621755779322082, 'Surveys and Questionnaires': 0.8991391167031735, 'Cell Line, Tumor': 0.8556286549707601, 'Signal Transduction': 0.8322662440570523, 'Adolescent': 0.8287955699123212, 'Prognosis': 0.8414678860638821, 'Male': 0.7407382861687322, 'Risk Factors': 0.8782002726859567, 'Mice': 0.9083138977163718, 'Treatment Outcome': 0.8537806547787833}
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 0.0003
46
+ - train_batch_size: 32
47
+ - eval_batch_size: 32
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - gradient_accumulation_steps: 2
51
+ - total_train_batch_size: 64
52
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-07 and optimizer_args=No additional optimizer arguments
53
+ - lr_scheduler_type: cosine
54
+ - lr_scheduler_warmup_ratio: 0.1
55
+ - num_epochs: 1
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | F1 Micro | F1 Macro | F1 Weighted | Class/f1 Results Per Class | Items/f1 Scores Per Item |
60
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:--------:|:-----------:|:--------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
61
+ | 4.1828 | 0.0517 | 20 | 2.1711 | 0.0 | 0.0 | 0.0 | {} | {'Pregnancy': 0.0, 'Animals': 0.0, 'Aged': 0.0, 'Disease Models, Animal': 0.0, 'Time Factors': 0.0, 'Surveys and Questionnaires': 0.0, 'Cell Line, Tumor': 0.0, 'Signal Transduction': 0.0, 'Adolescent': 0.0, 'Prognosis': 0.0, 'Male': 0.0, 'Risk Factors': 0.0, 'Mice': 0.0, 'Treatment Outcome': 0.0} |
62
+ | 3.2031 | 0.1034 | 40 | 1.6619 | 0.7409 | 0.6817 | 0.7123 | {} | {'Pregnancy': 0.9137502822307519, 'Animals': 0.8154411764705882, 'Aged': 0.34839842035980695, 'Disease Models, Animal': 0.8119457485654669, 'Time Factors': 0.577951388888889, 'Surveys and Questionnaires': 0.911839351707556, 'Cell Line, Tumor': 0.7375497567448033, 'Signal Transduction': 0.7842612700510916, 'Adolescent': 0.5668395668395668, 'Prognosis': 0.8145669517304712, 'Male': 0.2877874694066782, 'Risk Factors': 0.8322033492249827, 'Mice': 0.7040664442268001, 'Treatment Outcome': 0.6655943881786486} |
63
+ | 3.0344 | 0.1550 | 60 | 1.6264 | 0.8385 | 0.2059 | 0.8353 | {} | {'Pregnancy': 0.6353566591878398, 'Animals': 0.8834189608177866, 'Aged': 0.6418811934399662, 'Disease Models, Animal': 0.8171617817519456, 'Time Factors': 0.46356855995410207, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.6153378337286384, 'Signal Transduction': 0.7662248235496643, 'Adolescent': 0.6860733521415747, 'Prognosis': 0.26052005034505066, 'Male': 0.5031009812390635, 'Risk Factors': 0.8302347046413503, 'Mice': 0.7567812520230466, 'Treatment Outcome': 0.704342330956035} |
64
+ | 3.0719 | 0.2067 | 80 | 1.6134 | 0.8612 | 0.5699 | 0.8617 | {} | {'Pregnancy': 0.935798319327731, 'Animals': 0.9204848606109111, 'Aged': 0.5928352620830497, 'Disease Models, Animal': 0.8220279427219332, 'Time Factors': 0.5357781897316031, 'Surveys and Questionnaires': 0.9180774402648002, 'Cell Line, Tumor': 0.8269592476489028, 'Signal Transduction': 0.8583496769482862, 'Adolescent': 0.507077856420627, 'Prognosis': 0.8353105095541402, 'Male': 0.6488762559492333, 'Risk Factors': 0.8466263378315495, 'Mice': 0.8460322659471038, 'Treatment Outcome': 0.8440387777084868} |
65
+ | 3.0094 | 0.2584 | 100 | 1.6056 | 0.8625 | 0.8586 | 0.8639 | {} | {'Pregnancy': 0.935798319327731, 'Animals': 0.9129560271882369, 'Aged': 0.606174869448235, 'Disease Models, Animal': 0.8165394402035624, 'Time Factors': 0.6113454367626383, 'Surveys and Questionnaires': 0.9130405405405405, 'Cell Line, Tumor': 0.868859649122807, 'Signal Transduction': 0.843268509435051, 'Adolescent': 0.7964833520389076, 'Prognosis': 0.8244682911711153, 'Male': 0.6058505630905477, 'Risk Factors': 0.8467261904761905, 'Mice': 0.8845154845154846, 'Treatment Outcome': 0.8444458241817943} |
66
+ | 3.0125 | 0.3101 | 120 | 1.5999 | 0.8627 | 0.5730 | 0.8643 | {} | {'Pregnancy': 0.9474740807964139, 'Animals': 0.9125069715560513, 'Aged': 0.5767102615694165, 'Disease Models, Animal': 0.8112977099236641, 'Time Factors': 0.42706633031607066, 'Surveys and Questionnaires': 0.9191919191919191, 'Cell Line, Tumor': 0.868859649122807, 'Signal Transduction': 0.8505037587204773, 'Adolescent': 0.8275158533223049, 'Prognosis': 0.8305972482801751, 'Male': 0.6648556073938463, 'Risk Factors': 0.8260135135135135, 'Mice': 0.8729885057471265, 'Treatment Outcome': 0.8514155223519448} |
67
+ | 3.0234 | 0.3618 | 140 | 1.5959 | 0.8823 | 0.5829 | 0.8814 | {} | {'Pregnancy': 0.9276515151515152, 'Animals': 0.9300643799472296, 'Aged': 0.825487012987013, 'Disease Models, Animal': 0.8367159633716443, 'Time Factors': 0.3774724065280966, 'Surveys and Questionnaires': 0.9111685375111039, 'Cell Line, Tumor': 0.8269592476489028, 'Signal Transduction': 0.832628763695971, 'Adolescent': 0.763444739351148, 'Prognosis': 0.8012042113760973, 'Male': 0.7561162038645535, 'Risk Factors': 0.8419342462750257, 'Mice': 0.8750276316939385, 'Treatment Outcome': 0.857577734290063} |
68
+ | 3.0203 | 0.4134 | 160 | 1.5910 | 0.8850 | 0.8783 | 0.8847 | {} | {'Pregnancy': 0.9409673929840828, 'Animals': 0.9231318905675786, 'Aged': 0.8040116086844715, 'Disease Models, Animal': 0.8427006932583141, 'Time Factors': 0.5864715447154472, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.8568804077278653, 'Signal Transduction': 0.8706002181300307, 'Adolescent': 0.7920135938827527, 'Prognosis': 0.8318610506550591, 'Male': 0.7630274032187908, 'Risk Factors': 0.8485100890719854, 'Mice': 0.9004015077023926, 'Treatment Outcome': 0.855926055926056} |
69
+ | 2.9797 | 0.4651 | 180 | 1.5876 | 0.8886 | 0.8838 | 0.8891 | {} | {'Pregnancy': 0.9409673929840828, 'Animals': 0.9201746582259984, 'Aged': 0.8395160739881237, 'Disease Models, Animal': 0.8396327713948244, 'Time Factors': 0.5930467091295117, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.8610551751913009, 'Signal Transduction': 0.8706002181300307, 'Adolescent': 0.8173591114767585, 'Prognosis': 0.8175925925925926, 'Male': 0.7521625934324347, 'Risk Factors': 0.8388713021790302, 'Mice': 0.9027144341559046, 'Treatment Outcome': 0.8497952497952498} |
70
+ | 2.9703 | 0.5168 | 200 | 1.5854 | 0.8865 | 0.8797 | 0.8861 | {} | {'Pregnancy': 0.9276515151515152, 'Animals': 0.9269061445432276, 'Aged': 0.8180563269840233, 'Disease Models, Animal': 0.8438264585271719, 'Time Factors': 0.5962016260162601, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.8486956521739131, 'Signal Transduction': 0.8353187515916358, 'Adolescent': 0.8196286472148542, 'Prognosis': 0.8175925925925926, 'Male': 0.7602640264026403, 'Risk Factors': 0.8467261904761905, 'Mice': 0.9025708061002178, 'Treatment Outcome': 0.851726089417091} |
71
+ | 2.9609 | 0.5685 | 220 | 1.5826 | 0.8878 | 0.8834 | 0.8884 | {} | {'Pregnancy': 0.9276515151515152, 'Animals': 0.9164388842164284, 'Aged': 0.8321133412042503, 'Disease Models, Animal': 0.850780742816141, 'Time Factors': 0.5917817014446228, 'Surveys and Questionnaires': 0.9163026630970833, 'Cell Line, Tumor': 0.8610551751913009, 'Signal Transduction': 0.8784624334362554, 'Adolescent': 0.8175048355899419, 'Prognosis': 0.8300552104899931, 'Male': 0.7568753010511999, 'Risk Factors': 0.8320130475302889, 'Mice': 0.8981269494937079, 'Treatment Outcome': 0.8481004024282108} |
72
+ | 2.9797 | 0.6202 | 240 | 1.5811 | 0.8916 | 0.8870 | 0.8921 | {} | {'Pregnancy': 0.9409673929840828, 'Animals': 0.9162113252631023, 'Aged': 0.8606331076736886, 'Disease Models, Animal': 0.8500224014336917, 'Time Factors': 0.5865385995893814, 'Surveys and Questionnaires': 0.9163026630970833, 'Cell Line, Tumor': 0.8730665646293543, 'Signal Transduction': 0.8745464343452487, 'Adolescent': 0.8153679065978822, 'Prognosis': 0.8369257219268362, 'Male': 0.7538794265619533, 'Risk Factors': 0.8311228224271703, 'Mice': 0.9004015077023926, 'Treatment Outcome': 0.8468341527761123} |
73
+ | 2.9594 | 0.6718 | 260 | 1.5797 | 0.8901 | 0.8863 | 0.8909 | {} | {'Pregnancy': 0.9659930561737737, 'Animals': 0.9159795630725863, 'Aged': 0.8556186353625492, 'Disease Models, Animal': 0.854293588143838, 'Time Factors': 0.6112509549035078, 'Surveys and Questionnaires': 0.9206558005418544, 'Cell Line, Tumor': 0.8974159292035397, 'Signal Transduction': 0.8749727841982877, 'Adolescent': 0.8274987316083207, 'Prognosis': 0.8353564694491158, 'Male': 0.7484183791272223, 'Risk Factors': 0.8154121863799283, 'Mice': 0.9049287118977385, 'Treatment Outcome': 0.8429489077023267} |
74
+ | 3.0016 | 0.7235 | 280 | 1.5781 | 0.8881 | 0.8810 | 0.8875 | {} | {'Pregnancy': 0.9383930587362513, 'Animals': 0.9193819310314895, 'Aged': 0.8699763593380614, 'Disease Models, Animal': 0.8096759291882962, 'Time Factors': 0.5272428794221456, 'Surveys and Questionnaires': 0.8811702925731433, 'Cell Line, Tumor': 0.8486956521739131, 'Signal Transduction': 0.8067113024071417, 'Adolescent': 0.7924629016760969, 'Prognosis': 0.8133796463370624, 'Male': 0.7781196828729682, 'Risk Factors': 0.8494152046783625, 'Mice': 0.9094948502160247, 'Treatment Outcome': 0.8501525165226234} |
75
+ | 2.9047 | 0.7752 | 300 | 1.5768 | 0.8886 | 0.8844 | 0.8893 | {} | {'Pregnancy': 0.9537362238101005, 'Animals': 0.9162113252631023, 'Aged': 0.8386404968603095, 'Disease Models, Animal': 0.8488471096405308, 'Time Factors': 0.5969163274880495, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.8862218780917968, 'Signal Transduction': 0.8754280821917808, 'Adolescent': 0.8210526315789474, 'Prognosis': 0.8337296073284957, 'Male': 0.7484183791272223, 'Risk Factors': 0.8269930179426774, 'Mice': 0.9003790595225899, 'Treatment Outcome': 0.8455383428872294} |
76
+ | 3.0 | 0.8269 | 320 | 1.5761 | 0.8938 | 0.8888 | 0.8940 | {} | {'Pregnancy': 0.9517676767676768, 'Animals': 0.9145822698655777, 'Aged': 0.8628094870158229, 'Disease Models, Animal': 0.8497156957408003, 'Time Factors': 0.5910303701867701, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.868859649122807, 'Signal Transduction': 0.8686971235194585, 'Adolescent': 0.8293936785143279, 'Prognosis': 0.8452885054177677, 'Male': 0.7597883597883598, 'Risk Factors': 0.8345780133301213, 'Mice': 0.9004341415465269, 'Treatment Outcome': 0.8461607949412827} |
77
+ | 2.9703 | 0.8786 | 340 | 1.5758 | 0.8898 | 0.8854 | 0.8904 | {} | {'Pregnancy': 0.9537362238101005, 'Animals': 0.918527583680953, 'Aged': 0.8538615965989608, 'Disease Models, Animal': 0.8444952271152011, 'Time Factors': 0.5939826302729528, 'Surveys and Questionnaires': 0.9169085504458822, 'Cell Line, Tumor': 0.8918556936053801, 'Signal Transduction': 0.8784624334362554, 'Adolescent': 0.8208387206947867, 'Prognosis': 0.8320563069853515, 'Male': 0.7484183791272223, 'Risk Factors': 0.821128374483107, 'Mice': 0.9049072840897449, 'Treatment Outcome': 0.8455383428872294} |
78
+ | 2.9781 | 0.9302 | 360 | 1.5758 | 0.8928 | 0.8882 | 0.8933 | {} | {'Pregnancy': 0.9537362238101005, 'Animals': 0.9187471292023011, 'Aged': 0.8626156433978133, 'Disease Models, Animal': 0.8481744922578447, 'Time Factors': 0.58528276175335, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.8805125836989147, 'Signal Transduction': 0.8784624334362554, 'Adolescent': 0.8229934924078091, 'Prognosis': 0.8419341216216216, 'Male': 0.7568753010511999, 'Risk Factors': 0.8311948763288983, 'Mice': 0.9094559160930842, 'Treatment Outcome': 0.8474925373134329} |
79
+ | 2.9531 | 0.9819 | 380 | 1.5759 | 0.8929 | 0.8883 | 0.8934 | {} | {'Pregnancy': 0.9409673929840828, 'Animals': 0.9187471292023011, 'Aged': 0.8606331076736886, 'Disease Models, Animal': 0.855790770609319, 'Time Factors': 0.5918812745525971, 'Surveys and Questionnaires': 0.9200821290373529, 'Cell Line, Tumor': 0.8805125836989147, 'Signal Transduction': 0.8784624334362554, 'Adolescent': 0.8203830068236848, 'Prognosis': 0.8452885054177677, 'Male': 0.7545061283345349, 'Risk Factors': 0.8277989161766401, 'Mice': 0.9049287118977385, 'Treatment Outcome': 0.8501043279262301} |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - PEFT 0.18.1
85
+ - Transformers 4.51.0
86
+ - Pytorch 2.8.0+cu128
87
+ - Datasets 3.6.0
88
+ - Tokenizers 0.21.0
adapter_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "ferrazzipietro/unsup-Qwen3-8B-datav3-only_mask_w_item_mesh",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 32,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "up_proj",
33
+ "q_proj",
34
+ "gate_proj",
35
+ "down_proj",
36
+ "k_proj",
37
+ "v_proj",
38
+ "o_proj"
39
+ ],
40
+ "target_parameters": null,
41
+ "task_type": "CAUSAL_LM",
42
+ "trainable_token_indices": null,
43
+ "use_dora": false,
44
+ "use_qalora": false,
45
+ "use_rslora": false
46
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:602376375acbff1ac14029cf089add7c842fae28d6327ece2d64bf77f378ec99
3
+ size 349243752
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "151643": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "151644": {
15
+ "content": "<|im_start|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "151645": {
23
+ "content": "<|im_end|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "151646": {
31
+ "content": "<|object_ref_start|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "151647": {
39
+ "content": "<|object_ref_end|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "151648": {
47
+ "content": "<|box_start|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "151649": {
55
+ "content": "<|box_end|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "151650": {
63
+ "content": "<|quad_start|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "151651": {
71
+ "content": "<|quad_end|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "151652": {
79
+ "content": "<|vision_start|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "151653": {
87
+ "content": "<|vision_end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "151654": {
95
+ "content": "<|vision_pad|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "151655": {
103
+ "content": "<|image_pad|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "151656": {
111
+ "content": "<|video_pad|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "151657": {
119
+ "content": "<tool_call>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "151658": {
127
+ "content": "</tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151659": {
135
+ "content": "<|fim_prefix|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151660": {
143
+ "content": "<|fim_middle|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151661": {
151
+ "content": "<|fim_suffix|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151662": {
159
+ "content": "<|fim_pad|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151663": {
167
+ "content": "<|repo_name|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151664": {
175
+ "content": "<|file_sep|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ },
182
+ "151665": {
183
+ "content": "<tool_response>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": false
189
+ },
190
+ "151666": {
191
+ "content": "</tool_response>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": false
197
+ },
198
+ "151667": {
199
+ "content": "<think>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": false
205
+ },
206
+ "151668": {
207
+ "content": "</think>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": false
213
+ }
214
+ },
215
+ "additional_special_tokens": [
216
+ "<|im_start|>",
217
+ "<|im_end|>",
218
+ "<|object_ref_start|>",
219
+ "<|object_ref_end|>",
220
+ "<|box_start|>",
221
+ "<|box_end|>",
222
+ "<|quad_start|>",
223
+ "<|quad_end|>",
224
+ "<|vision_start|>",
225
+ "<|vision_end|>",
226
+ "<|vision_pad|>",
227
+ "<|image_pad|>",
228
+ "<|video_pad|>"
229
+ ],
230
+ "bos_token": null,
231
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
232
+ "clean_up_tokenization_spaces": false,
233
+ "eos_token": "<|im_end|>",
234
+ "errors": "replace",
235
+ "extra_special_tokens": {},
236
+ "model_max_length": 40960,
237
+ "pad_token": "<|endoftext|>",
238
+ "split_special_tokens": false,
239
+ "tokenizer_class": "Qwen2Tokenizer",
240
+ "unk_token": null
241
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3144bcf5b7f6ca4630834fe78a149dbe0798ba5d335ef2176e4c08023ec3f6d
3
+ size 7505
vocab.json ADDED
The diff for this file is too large to render. See raw diff