CaffeineThief commited on
Commit
5831358
·
verified ·
1 Parent(s): 1e739d9

Model save

Browse files
Files changed (2) hide show
  1. README.md +134 -0
  2. generation_config.json +8 -0
README.md ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: kakaocorp/kanana-1.5-2.1b-instruct-2505
5
+ tags:
6
+ - axolotl
7
+ - generated_from_trainer
8
+ datasets:
9
+ - KO_100.jsonl
10
+ model-index:
11
+ - name: ttp_sft_kanana-1.5_ko_100
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
19
+ <details><summary>See axolotl config</summary>
20
+
21
+ axolotl version: `0.12.2`
22
+ ```yaml
23
+ base_model: kakaocorp/kanana-1.5-2.1b-instruct-2505
24
+ hf_cache_dir: ../../../../data5/models
25
+
26
+ load_in_8bit: false
27
+ load_in_4bit: false
28
+
29
+ datasets:
30
+ - path: KO_100.jsonl
31
+ type: chat_template
32
+ split: train
33
+
34
+ dataset_prepared_path: preprocess
35
+ val_set_size: 0
36
+ output_dir: ./outputs-ko100
37
+ dataloader_num_workers: 32
38
+
39
+ sequence_len: 8192
40
+ sample_packing: false
41
+ eval_sample_packing: false
42
+ pad_to_sequence_len: false
43
+
44
+ plugins:
45
+ - axolotl.integrations.liger.LigerPlugin
46
+ liger_rope: true
47
+ liger_rms_norm: true
48
+ liger_swiglu: true
49
+ liger_fused_linear_cross_entropy: true
50
+
51
+ wandb_project: TTP_SFT_LLM_RE
52
+ wandb_entity:
53
+ wandb_watch:
54
+ wandb_name: CaffeineThief/ttp_sft_kanana-1.5_ko_100
55
+ wandb_log_model:
56
+ hub_model_id: CaffeineThief/ttp_sft_kanana-1.5_ko_100
57
+ hub_private_repo: false
58
+
59
+ gradient_accumulation_steps: 4
60
+ micro_batch_size: 4
61
+ num_epochs: 2
62
+ optimizer: adamw_torch_fused
63
+ lr_scheduler: cosine
64
+ learning_rate: 2e-5
65
+
66
+ bf16: auto
67
+ tf32: false
68
+
69
+ gradient_checkpointing: false
70
+ resume_from_checkpoint:
71
+ logging_steps: 1
72
+ flash_attention: false
73
+
74
+ warmup_ratio: 0.05
75
+ weight_decay: 0.01
76
+ # evals_per_epoch: 1
77
+ saves_per_epoch: 0
78
+
79
+ fsdp:
80
+ - full_shard
81
+ - auto_wrap
82
+ fsdp_config:
83
+ fsdp_state_dict_type: FULL_STATE_DICT
84
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
85
+ fsdp_activation_checkpointing: true
86
+ ```
87
+
88
+ </details><br>
89
+
90
+ # ttp_sft_kanana-1.5_ko_100
91
+
92
+ This model is a fine-tuned version of [kakaocorp/kanana-1.5-2.1b-instruct-2505](https://huggingface.co/kakaocorp/kanana-1.5-2.1b-instruct-2505) on the KO_100.jsonl dataset.
93
+
94
+ ## Model description
95
+
96
+ More information needed
97
+
98
+ ## Intended uses & limitations
99
+
100
+ More information needed
101
+
102
+ ## Training and evaluation data
103
+
104
+ More information needed
105
+
106
+ ## Training procedure
107
+
108
+ ### Training hyperparameters
109
+
110
+ The following hyperparameters were used during training:
111
+ - learning_rate: 2e-05
112
+ - train_batch_size: 4
113
+ - eval_batch_size: 4
114
+ - seed: 42
115
+ - distributed_type: multi-GPU
116
+ - num_devices: 3
117
+ - gradient_accumulation_steps: 4
118
+ - total_train_batch_size: 48
119
+ - total_eval_batch_size: 12
120
+ - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
121
+ - lr_scheduler_type: cosine
122
+ - lr_scheduler_warmup_steps: 2
123
+ - training_steps: 49
124
+
125
+ ### Training results
126
+
127
+
128
+
129
+ ### Framework versions
130
+
131
+ - Transformers 4.55.2
132
+ - Pytorch 2.6.0+cu124
133
+ - Datasets 4.0.0
134
+ - Tokenizers 0.21.4
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "do_sample": true,
5
+ "eos_token_id": 128009,
6
+ "pad_token_id": 128001,
7
+ "transformers_version": "4.55.2"
8
+ }