{ "best_metric": 2.8747918605804443, "best_model_checkpoint": "data/paligemma2-3b-pt-224-sft-lora-iphone_gates_lobby_small_ft_sub5/checkpoint-372", "epoch": 1.0, "eval_steps": 124, "global_step": 495, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00202020202020202, "grad_norm": 23.50887680053711, "learning_rate": 2.0000000000000003e-06, "loss": 4.6521, "mean_token_accuracy": 0.27272728085517883, "num_tokens": 1116.0, "step": 1 }, { "epoch": 0.04040404040404041, "grad_norm": 5.875630855560303, "learning_rate": 4e-05, "loss": 3.8258, "mean_token_accuracy": 0.2870813372888063, "num_tokens": 22320.0, "step": 20 }, { "epoch": 0.08080808080808081, "grad_norm": 5.26508903503418, "learning_rate": 8e-05, "loss": 3.4301, "mean_token_accuracy": 0.295454540848732, "num_tokens": 44640.0, "step": 40 }, { "epoch": 0.12121212121212122, "grad_norm": 4.6612935066223145, "learning_rate": 9.987545125753819e-05, "loss": 3.3982, "mean_token_accuracy": 0.2852272763848305, "num_tokens": 66960.0, "step": 60 }, { "epoch": 0.16161616161616163, "grad_norm": 4.843405723571777, "learning_rate": 9.888278119998573e-05, "loss": 3.3965, "mean_token_accuracy": 0.2886363595724106, "num_tokens": 89280.0, "step": 80 }, { "epoch": 0.20202020202020202, "grad_norm": 4.455775260925293, "learning_rate": 9.691719817616147e-05, "loss": 3.3327, "mean_token_accuracy": 0.2886363580822945, "num_tokens": 111600.0, "step": 100 }, { "epoch": 0.24242424242424243, "grad_norm": 3.7211084365844727, "learning_rate": 9.401782314365457e-05, "loss": 3.3342, "mean_token_accuracy": 0.28522727340459825, "num_tokens": 133920.0, "step": 120 }, { "epoch": 0.2505050505050505, "eval_loss": 3.278226137161255, "eval_mean_token_accuracy": 0.2953627150468152, "eval_num_tokens": 138384.0, "eval_runtime": 71.3329, "eval_samples_per_second": 27.757, "eval_steps_per_second": 6.939, "step": 124 }, { "epoch": 0.2828282828282828, "grad_norm": 3.5156655311584473, "learning_rate": 9.024236230276629e-05, "loss": 3.3199, "mean_token_accuracy": 0.28522727489471433, "num_tokens": 156240.0, "step": 140 }, { "epoch": 0.32323232323232326, "grad_norm": 4.203444957733154, "learning_rate": 8.566595857121902e-05, "loss": 3.2951, "mean_token_accuracy": 0.28295454680919646, "num_tokens": 178560.0, "step": 160 }, { "epoch": 0.36363636363636365, "grad_norm": 4.595535755157471, "learning_rate": 8.037969601624495e-05, "loss": 3.279, "mean_token_accuracy": 0.2897727251052856, "num_tokens": 200880.0, "step": 180 }, { "epoch": 0.40404040404040403, "grad_norm": 3.6226227283477783, "learning_rate": 7.448878701031142e-05, "loss": 3.2637, "mean_token_accuracy": 0.2988636329770088, "num_tokens": 223200.0, "step": 200 }, { "epoch": 0.4444444444444444, "grad_norm": 4.923504829406738, "learning_rate": 6.811047819148413e-05, "loss": 3.2352, "mean_token_accuracy": 0.2897727251052856, "num_tokens": 245520.0, "step": 220 }, { "epoch": 0.48484848484848486, "grad_norm": 5.1371893882751465, "learning_rate": 6.137171690605533e-05, "loss": 3.2122, "mean_token_accuracy": 0.30340908616781237, "num_tokens": 267840.0, "step": 240 }, { "epoch": 0.501010101010101, "eval_loss": 3.1343026161193848, "eval_mean_token_accuracy": 0.3041781412832665, "eval_num_tokens": 276768.0, "eval_runtime": 72.9196, "eval_samples_per_second": 27.153, "eval_steps_per_second": 6.788, "step": 248 }, { "epoch": 0.5252525252525253, "grad_norm": 4.763956546783447, "learning_rate": 5.4406624578180096e-05, "loss": 3.1213, "mean_token_accuracy": 0.3011363595724106, "num_tokens": 290160.0, "step": 260 }, { "epoch": 0.5656565656565656, "grad_norm": 7.0650787353515625, "learning_rate": 4.735382729399184e-05, "loss": 3.1887, "mean_token_accuracy": 0.2886363595724106, "num_tokens": 312480.0, "step": 280 }, { "epoch": 0.6060606060606061, "grad_norm": 4.818840980529785, "learning_rate": 4.035369672952516e-05, "loss": 3.1492, "mean_token_accuracy": 0.2988636329770088, "num_tokens": 334800.0, "step": 300 }, { "epoch": 0.6464646464646465, "grad_norm": 5.388601303100586, "learning_rate": 3.35455563361995e-05, "loss": 3.0821, "mean_token_accuracy": 0.2943181797862053, "num_tokens": 357120.0, "step": 320 }, { "epoch": 0.6868686868686869, "grad_norm": 10.147493362426758, "learning_rate": 2.7064908389095468e-05, "loss": 3.0187, "mean_token_accuracy": 0.30113635808229444, "num_tokens": 379440.0, "step": 340 }, { "epoch": 0.7272727272727273, "grad_norm": 8.179190635681152, "learning_rate": 2.1040737088023323e-05, "loss": 3.0241, "mean_token_accuracy": 0.29545454531908033, "num_tokens": 401760.0, "step": 360 }, { "epoch": 0.7515151515151515, "eval_loss": 2.8747918605804443, "eval_mean_token_accuracy": 0.31524333917733394, "eval_num_tokens": 415152.0, "eval_runtime": 72.7411, "eval_samples_per_second": 27.22, "eval_steps_per_second": 6.805, "step": 372 }, { "epoch": 0.7676767676767676, "grad_norm": 7.625748157501221, "learning_rate": 1.559294138770656e-05, "loss": 3.0052, "mean_token_accuracy": 0.30681817382574084, "num_tokens": 424080.0, "step": 380 }, { "epoch": 0.8080808080808081, "grad_norm": 5.577261924743652, "learning_rate": 1.0829948651407374e-05, "loss": 2.9856, "mean_token_accuracy": 0.307954540848732, "num_tokens": 446400.0, "step": 400 }, { "epoch": 0.8484848484848485, "grad_norm": 6.046720027923584, "learning_rate": 6.8465566233957945e-06, "loss": 2.9215, "mean_token_accuracy": 0.2988636314868927, "num_tokens": 468720.0, "step": 420 }, { "epoch": 0.8888888888888888, "grad_norm": 5.791171550750732, "learning_rate": 3.72204667143895e-06, "loss": 2.9103, "mean_token_accuracy": 0.29318181425333023, "num_tokens": 491040.0, "step": 440 }, { "epoch": 0.9292929292929293, "grad_norm": 6.761912822723389, "learning_rate": 1.5186058514055912e-06, "loss": 2.8869, "mean_token_accuracy": 0.2988636359572411, "num_tokens": 513360.0, "step": 460 }, { "epoch": 0.9696969696969697, "grad_norm": 6.14446496963501, "learning_rate": 2.800891996009025e-07, "loss": 2.9217, "mean_token_accuracy": 0.310227270424366, "num_tokens": 535680.0, "step": 480 }, { "epoch": 1.0, "mean_token_accuracy": 0.2969696938991547, "num_tokens": 552420.0, "step": 495, "total_flos": 8163909581144064.0, "train_loss": 3.1818671159069947, "train_runtime": 387.9339, "train_samples_per_second": 5.104, "train_steps_per_second": 1.276 } ], "logging_steps": 20, "max_steps": 495, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 124, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8163909581144064.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }