baohuynhbk14 commited on
Commit
4a3dda8
·
verified ·
1 Parent(s): d0477fb

Model save

Browse files
README.md CHANGED
@@ -33,17 +33,18 @@ More information needed
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 1e-06
36
- - train_batch_size: 8
37
- - eval_batch_size: 4
38
  - seed: 42
39
  - distributed_type: multi-GPU
40
  - num_devices: 2
 
41
  - total_train_batch_size: 16
42
- - total_eval_batch_size: 8
43
  - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
44
  - lr_scheduler_type: cosine
45
  - lr_scheduler_warmup_ratio: 0.01
46
- - num_epochs: 1.0
47
 
48
  ### Training results
49
 
@@ -52,7 +53,7 @@ The following hyperparameters were used during training:
52
  ### Framework versions
53
 
54
  - PEFT 0.12.0
55
- - Transformers 4.44.0
56
  - Pytorch 2.1.2
57
  - Datasets 2.20.0
58
  - Tokenizers 0.19.1
 
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 1e-06
36
+ - train_batch_size: 4
37
+ - eval_batch_size: 2
38
  - seed: 42
39
  - distributed_type: multi-GPU
40
  - num_devices: 2
41
+ - gradient_accumulation_steps: 2
42
  - total_train_batch_size: 16
43
+ - total_eval_batch_size: 4
44
  - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.01
47
+ - num_epochs: 2.0
48
 
49
  ### Training results
50
 
 
53
  ### Framework versions
54
 
55
  - PEFT 0.12.0
56
+ - Transformers 4.40.0
57
  - Pytorch 2.1.2
58
  - Datasets 2.20.0
59
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "alpha_pattern": {},
3
  "auto_mapping": {
4
  "base_model_class": "MiniCPMV",
5
- "parent_library": "transformers_modules.openbmb.MiniCPM-V-2_6.ad23470e5e3b45a91ab9b253530c8b9c8cf36759.modeling_minicpmv"
6
  },
7
  "base_model_name_or_path": "openbmb/MiniCPM-V-2_6",
8
  "bias": "none",
@@ -19,7 +19,8 @@
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": [
21
  "embed_tokens",
22
- "resampler"
 
23
  ],
24
  "peft_type": "LORA",
25
  "r": 64,
 
2
  "alpha_pattern": {},
3
  "auto_mapping": {
4
  "base_model_class": "MiniCPMV",
5
+ "parent_library": "transformers_modules.openbmb.MiniCPM-V-2_6.b6f4084e52b67fceb71d3b963cb481f515a7e101.modeling_minicpmv"
6
  },
7
  "base_model_name_or_path": "openbmb/MiniCPM-V-2_6",
8
  "bias": "none",
 
19
  "megatron_core": "megatron.core",
20
  "modules_to_save": [
21
  "embed_tokens",
22
+ "resampler",
23
+ "vpm"
24
  ],
25
  "peft_type": "LORA",
26
  "r": 64,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55d57698119acf4285cd0482969db532ee6b89a2a4f12dbb14e12983bcb2890b
3
- size 1305154552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef5bd84ec9d072204570d549b2d959789a1a5acb9dbf7fb3fb70843598f006d5
3
+ size 2140797224
trainer_state.json CHANGED
@@ -1,140 +1,226 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 1000,
6
- "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.06944444444444445,
13
- "grad_norm": 4.683212757110596,
14
  "learning_rate": 1e-06,
15
- "loss": 2.5448,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.1388888888888889,
20
- "grad_norm": 4.721557140350342,
21
  "learning_rate": 1e-06,
22
- "loss": 2.4235,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.20833333333333334,
27
- "grad_norm": 3.7502753734588623,
28
  "learning_rate": 1e-06,
29
- "loss": 2.5413,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.2777777777777778,
34
- "grad_norm": 4.788341045379639,
35
  "learning_rate": 1e-06,
36
- "loss": 2.7545,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.3472222222222222,
41
- "grad_norm": 4.027582168579102,
42
  "learning_rate": 1e-06,
43
- "loss": 2.398,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.4166666666666667,
48
- "grad_norm": 4.083437442779541,
49
  "learning_rate": 1e-06,
50
- "loss": 2.737,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.4861111111111111,
55
- "grad_norm": 3.840348720550537,
56
  "learning_rate": 1e-06,
57
- "loss": 2.6366,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.5555555555555556,
62
- "grad_norm": 3.376926898956299,
63
  "learning_rate": 1e-06,
64
- "loss": 2.5638,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.625,
69
- "grad_norm": 3.656587600708008,
70
  "learning_rate": 1e-06,
71
- "loss": 2.7568,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.6944444444444444,
76
- "grad_norm": 2.5398614406585693,
77
  "learning_rate": 1e-06,
78
- "loss": 2.3856,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.7638888888888888,
83
- "grad_norm": 2.253296136856079,
84
  "learning_rate": 1e-06,
85
- "loss": 2.268,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.8333333333333334,
90
- "grad_norm": 2.464299201965332,
91
  "learning_rate": 1e-06,
92
- "loss": 2.45,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.9027777777777778,
97
- "grad_norm": 2.9303369522094727,
98
  "learning_rate": 1e-06,
99
- "loss": 2.5548,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.9722222222222222,
104
- "grad_norm": 1.658677339553833,
105
  "learning_rate": 1e-06,
106
- "loss": 2.3566,
107
  "step": 70
108
  },
109
  {
110
- "epoch": 1.0,
111
- "step": 72,
112
- "total_flos": 4.988916731582874e+16,
113
- "train_loss": 2.5252017312579684,
114
- "train_runtime": 672.6439,
115
- "train_samples_per_second": 1.713,
116
- "train_steps_per_second": 0.107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
  ],
119
  "logging_steps": 5,
120
- "max_steps": 72,
121
  "num_input_tokens_seen": 0,
122
- "num_train_epochs": 1,
123
  "save_steps": 200,
124
- "stateful_callbacks": {
125
- "TrainerControl": {
126
- "args": {
127
- "should_epoch_stop": false,
128
- "should_evaluate": false,
129
- "should_log": false,
130
- "should_save": true,
131
- "should_training_stop": true
132
- },
133
- "attributes": {}
134
- }
135
- },
136
- "total_flos": 4.988916731582874e+16,
137
- "train_batch_size": 8,
138
  "trial_name": null,
139
  "trial_params": null
140
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 1000,
6
+ "global_step": 144,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.06944444444444445,
13
+ "grad_norm": 8.71909236907959,
14
  "learning_rate": 1e-06,
15
+ "loss": 2.6593,
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.1388888888888889,
20
+ "grad_norm": 8.853459358215332,
21
  "learning_rate": 1e-06,
22
+ "loss": 2.4766,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.20833333333333334,
27
+ "grad_norm": 6.577568054199219,
28
  "learning_rate": 1e-06,
29
+ "loss": 2.495,
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.2777777777777778,
34
+ "grad_norm": 8.3203706741333,
35
  "learning_rate": 1e-06,
36
+ "loss": 2.6105,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.3472222222222222,
41
+ "grad_norm": 7.732483863830566,
42
  "learning_rate": 1e-06,
43
+ "loss": 2.4744,
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.4166666666666667,
48
+ "grad_norm": 6.6942548751831055,
49
  "learning_rate": 1e-06,
50
+ "loss": 2.5163,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.4861111111111111,
55
+ "grad_norm": 5.781284809112549,
56
  "learning_rate": 1e-06,
57
+ "loss": 2.3854,
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.5555555555555556,
62
+ "grad_norm": 7.311328887939453,
63
  "learning_rate": 1e-06,
64
+ "loss": 2.4442,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.625,
69
+ "grad_norm": 6.254249572753906,
70
  "learning_rate": 1e-06,
71
+ "loss": 2.2468,
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.6944444444444444,
76
+ "grad_norm": 7.6778669357299805,
77
  "learning_rate": 1e-06,
78
+ "loss": 2.2565,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.7638888888888888,
83
+ "grad_norm": 7.495645523071289,
84
  "learning_rate": 1e-06,
85
+ "loss": 2.4391,
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.8333333333333334,
90
+ "grad_norm": 5.458991527557373,
91
  "learning_rate": 1e-06,
92
+ "loss": 2.2362,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.9027777777777778,
97
+ "grad_norm": 5.659170627593994,
98
  "learning_rate": 1e-06,
99
+ "loss": 2.2188,
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.9722222222222222,
104
+ "grad_norm": 4.8217997550964355,
105
  "learning_rate": 1e-06,
106
+ "loss": 2.154,
107
  "step": 70
108
  },
109
  {
110
+ "epoch": 1.0416666666666667,
111
+ "grad_norm": 6.693627834320068,
112
+ "learning_rate": 1e-06,
113
+ "loss": 2.1182,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 1.1111111111111112,
118
+ "grad_norm": 6.67255973815918,
119
+ "learning_rate": 1e-06,
120
+ "loss": 2.1879,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 1.1805555555555556,
125
+ "grad_norm": 4.831326007843018,
126
+ "learning_rate": 1e-06,
127
+ "loss": 2.1077,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 1.25,
132
+ "grad_norm": 4.830414295196533,
133
+ "learning_rate": 1e-06,
134
+ "loss": 2.026,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 1.3194444444444444,
139
+ "grad_norm": 5.039080619812012,
140
+ "learning_rate": 1e-06,
141
+ "loss": 2.0585,
142
+ "step": 95
143
+ },
144
+ {
145
+ "epoch": 1.3888888888888888,
146
+ "grad_norm": 5.749475002288818,
147
+ "learning_rate": 1e-06,
148
+ "loss": 2.0486,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 1.4583333333333333,
153
+ "grad_norm": 5.0571770668029785,
154
+ "learning_rate": 1e-06,
155
+ "loss": 1.9616,
156
+ "step": 105
157
+ },
158
+ {
159
+ "epoch": 1.5277777777777777,
160
+ "grad_norm": 4.597809314727783,
161
+ "learning_rate": 1e-06,
162
+ "loss": 1.9063,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 1.5972222222222223,
167
+ "grad_norm": 5.453224182128906,
168
+ "learning_rate": 1e-06,
169
+ "loss": 2.1802,
170
+ "step": 115
171
+ },
172
+ {
173
+ "epoch": 1.6666666666666665,
174
+ "grad_norm": 4.519564628601074,
175
+ "learning_rate": 1e-06,
176
+ "loss": 2.2049,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 1.7361111111111112,
181
+ "grad_norm": 4.976806163787842,
182
+ "learning_rate": 1e-06,
183
+ "loss": 1.8881,
184
+ "step": 125
185
+ },
186
+ {
187
+ "epoch": 1.8055555555555556,
188
+ "grad_norm": 4.543058395385742,
189
+ "learning_rate": 1e-06,
190
+ "loss": 2.0673,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 1.875,
195
+ "grad_norm": 4.89597225189209,
196
+ "learning_rate": 1e-06,
197
+ "loss": 2.0158,
198
+ "step": 135
199
+ },
200
+ {
201
+ "epoch": 1.9444444444444444,
202
+ "grad_norm": 4.95186185836792,
203
+ "learning_rate": 1e-06,
204
+ "loss": 2.0052,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 2.0,
209
+ "step": 144,
210
+ "total_flos": 9.601343958286336e+16,
211
+ "train_loss": 2.2213990655210285,
212
+ "train_runtime": 2501.8045,
213
+ "train_samples_per_second": 0.921,
214
+ "train_steps_per_second": 0.058
215
  }
216
  ],
217
  "logging_steps": 5,
218
+ "max_steps": 144,
219
  "num_input_tokens_seen": 0,
220
+ "num_train_epochs": 2,
221
  "save_steps": 200,
222
+ "total_flos": 9.601343958286336e+16,
223
+ "train_batch_size": 4,
 
 
 
 
 
 
 
 
 
 
 
 
224
  "trial_name": null,
225
  "trial_params": null
226
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95877359ada36ac411af5909de5cef10f22c8118e25725b3ac062bf5aec92941
3
- size 6968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bce47b54376c8018ec37390823c002788eed73a220893b64e1cf33e79ab99a9
3
+ size 6776