Instructions to use baohuynhbk14/miniCPM_finetune_lora_viet_vqa with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use baohuynhbk14/miniCPM_finetune_lora_viet_vqa with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 100, | |
| "global_step": 360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06944444444444445, | |
| "grad_norm": 6.3636603355407715, | |
| "learning_rate": 1e-06, | |
| "loss": 2.6727, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1388888888888889, | |
| "grad_norm": 7.486879825592041, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3642, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "grad_norm": 6.5991997718811035, | |
| "learning_rate": 1e-06, | |
| "loss": 2.515, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 7.580630779266357, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3997, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3472222222222222, | |
| "grad_norm": 7.5727410316467285, | |
| "learning_rate": 1e-06, | |
| "loss": 2.4551, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 8.835946083068848, | |
| "learning_rate": 1e-06, | |
| "loss": 2.4476, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4861111111111111, | |
| "grad_norm": 7.495606899261475, | |
| "learning_rate": 1e-06, | |
| "loss": 2.4191, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 8.057035446166992, | |
| "learning_rate": 1e-06, | |
| "loss": 2.441, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 6.828744411468506, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3052, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6944444444444444, | |
| "grad_norm": 7.163251876831055, | |
| "learning_rate": 1e-06, | |
| "loss": 2.1357, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7638888888888888, | |
| "grad_norm": 5.414941787719727, | |
| "learning_rate": 1e-06, | |
| "loss": 2.2248, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 6.0801544189453125, | |
| "learning_rate": 1e-06, | |
| "loss": 2.2934, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9027777777777778, | |
| "grad_norm": 6.054081439971924, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3014, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.9722222222222222, | |
| "grad_norm": 5.827741622924805, | |
| "learning_rate": 1e-06, | |
| "loss": 2.2515, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0416666666666667, | |
| "grad_norm": 3.5676162242889404, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0915, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 5.15900993347168, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0749, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1805555555555556, | |
| "grad_norm": 5.206437110900879, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0539, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 5.990969657897949, | |
| "learning_rate": 1e-06, | |
| "loss": 2.1308, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3194444444444444, | |
| "grad_norm": 6.198008060455322, | |
| "learning_rate": 1e-06, | |
| "loss": 2.3256, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 5.184628486633301, | |
| "learning_rate": 1e-06, | |
| "loss": 2.1566, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "eval_loss": 2.0880796909332275, | |
| "eval_runtime": 34.0667, | |
| "eval_samples_per_second": 2.935, | |
| "eval_steps_per_second": 0.734, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4583333333333333, | |
| "grad_norm": 5.412724494934082, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9085, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.5277777777777777, | |
| "grad_norm": 3.459959030151367, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9494, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.5972222222222223, | |
| "grad_norm": 5.159445762634277, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9334, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 5.133082389831543, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0826, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.7361111111111112, | |
| "grad_norm": 4.473026752471924, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0585, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.8055555555555556, | |
| "grad_norm": 5.063863754272461, | |
| "learning_rate": 1e-06, | |
| "loss": 2.1289, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 4.927737236022949, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9872, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.9444444444444444, | |
| "grad_norm": 5.563902854919434, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9803, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.013888888888889, | |
| "grad_norm": 3.901442050933838, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8309, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.0833333333333335, | |
| "grad_norm": 3.771136999130249, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7758, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.1527777777777777, | |
| "grad_norm": 4.6159257888793945, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9193, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 3.758843183517456, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9329, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.2916666666666665, | |
| "grad_norm": 4.267579078674316, | |
| "learning_rate": 1e-06, | |
| "loss": 2.0399, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.361111111111111, | |
| "grad_norm": 3.9819560050964355, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9568, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.4305555555555554, | |
| "grad_norm": 3.8918192386627197, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7377, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 3.9746928215026855, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8949, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.5694444444444446, | |
| "grad_norm": 3.328784704208374, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6509, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.638888888888889, | |
| "grad_norm": 3.835324287414551, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8321, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.7083333333333335, | |
| "grad_norm": 3.3603885173797607, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8628, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 3.7577502727508545, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8447, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "eval_loss": 1.8452154397964478, | |
| "eval_runtime": 34.0911, | |
| "eval_samples_per_second": 2.933, | |
| "eval_steps_per_second": 0.733, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.8472222222222223, | |
| "grad_norm": 4.379385948181152, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8212, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.9166666666666665, | |
| "grad_norm": 3.7095022201538086, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7862, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.986111111111111, | |
| "grad_norm": 4.164438724517822, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8046, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 3.0555555555555554, | |
| "grad_norm": 3.6749582290649414, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6358, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 3.7247958183288574, | |
| "learning_rate": 1e-06, | |
| "loss": 1.791, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 3.1944444444444446, | |
| "grad_norm": 2.9533472061157227, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6251, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.263888888888889, | |
| "grad_norm": 4.062502384185791, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6976, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 4.328882217407227, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8438, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.4027777777777777, | |
| "grad_norm": 4.158596038818359, | |
| "learning_rate": 1e-06, | |
| "loss": 1.8998, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 3.4722222222222223, | |
| "grad_norm": 5.7752556800842285, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7517, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.5416666666666665, | |
| "grad_norm": 4.568635940551758, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6835, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 3.611111111111111, | |
| "grad_norm": 3.6611974239349365, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7852, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.6805555555555554, | |
| "grad_norm": 4.026912212371826, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7916, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 4.750195026397705, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7584, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.8194444444444446, | |
| "grad_norm": 3.936798572540283, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5877, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 3.888888888888889, | |
| "grad_norm": 4.1127800941467285, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5392, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.9583333333333335, | |
| "grad_norm": 3.6437580585479736, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6125, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 4.027777777777778, | |
| "grad_norm": 3.641177177429199, | |
| "learning_rate": 1e-06, | |
| "loss": 1.687, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.097222222222222, | |
| "grad_norm": 3.797327995300293, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7779, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 5.071943283081055, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7103, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "eval_loss": 1.6850143671035767, | |
| "eval_runtime": 34.4694, | |
| "eval_samples_per_second": 2.901, | |
| "eval_steps_per_second": 0.725, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.236111111111111, | |
| "grad_norm": 6.09140682220459, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6347, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 4.305555555555555, | |
| "grad_norm": 5.452902317047119, | |
| "learning_rate": 1e-06, | |
| "loss": 1.7689, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.375, | |
| "grad_norm": 3.5834009647369385, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6514, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 3.288220167160034, | |
| "learning_rate": 1e-06, | |
| "loss": 1.4941, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.513888888888889, | |
| "grad_norm": 4.202756404876709, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5374, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 4.583333333333333, | |
| "grad_norm": 3.9757556915283203, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6289, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.652777777777778, | |
| "grad_norm": 3.3575947284698486, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5446, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 4.722222222222222, | |
| "grad_norm": 4.207667350769043, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5668, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.791666666666667, | |
| "grad_norm": 3.2263221740722656, | |
| "learning_rate": 1e-06, | |
| "loss": 1.4529, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 4.861111111111111, | |
| "grad_norm": 3.272395610809326, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5215, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.930555555555555, | |
| "grad_norm": 3.4315106868743896, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5781, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 3.9581406116485596, | |
| "learning_rate": 1e-06, | |
| "loss": 1.5001, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 360, | |
| "total_flos": 2.3797808143060173e+17, | |
| "train_loss": 1.9143991947174073, | |
| "train_runtime": 6464.4185, | |
| "train_samples_per_second": 0.891, | |
| "train_steps_per_second": 0.056 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "total_flos": 2.3797808143060173e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |