{ "best_global_step": 7770, "best_metric": 0.06381073594093323, "best_model_checkpoint": "./results_albert_punctuation_casing/checkpoint-7770", "epoch": 2.0, "eval_steps": 500, "global_step": 7770, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1287001287001287, "grad_norm": 4.2918195724487305, "learning_rate": 1.9143715143715144e-05, "loss": 0.2105482635498047, "step": 500 }, { "epoch": 0.2574002574002574, "grad_norm": 3.180814504623413, "learning_rate": 1.8285714285714288e-05, "loss": 0.09811102294921875, "step": 1000 }, { "epoch": 0.3861003861003861, "grad_norm": 4.069736480712891, "learning_rate": 1.742771342771343e-05, "loss": 0.08830223083496094, "step": 1500 }, { "epoch": 0.5148005148005148, "grad_norm": 2.3238089084625244, "learning_rate": 1.656971256971257e-05, "loss": 0.07262681579589844, "step": 2000 }, { "epoch": 0.6435006435006435, "grad_norm": 0.725167989730835, "learning_rate": 1.5711711711711713e-05, "loss": 0.07961682891845703, "step": 2500 }, { "epoch": 0.7722007722007722, "grad_norm": 13.671622276306152, "learning_rate": 1.4853710853710854e-05, "loss": 0.06993846893310547, "step": 3000 }, { "epoch": 0.9009009009009009, "grad_norm": 2.6098945140838623, "learning_rate": 1.3995709995709996e-05, "loss": 0.07217549133300781, "step": 3500 }, { "epoch": 1.0, "eval_casing_accuracy": 0.6387912059001499, "eval_loss": 0.07048454880714417, "eval_overall_accuracy": 0.6404223412931571, "eval_punctuation_accuracy": 0.6420534766861643, "eval_runtime": 86.2448, "eval_samples_per_second": 180.162, "eval_steps_per_second": 11.27, "step": 3885 }, { "epoch": 1.0296010296010296, "grad_norm": 1.678989052772522, "learning_rate": 1.3137709137709139e-05, "loss": 0.05899927520751953, "step": 4000 }, { "epoch": 1.1583011583011582, "grad_norm": 5.855215549468994, "learning_rate": 1.2279708279708281e-05, "loss": 0.05248377227783203, "step": 4500 }, { "epoch": 1.287001287001287, "grad_norm": 20.59808921813965, "learning_rate": 1.1421707421707422e-05, "loss": 0.05537939834594727, "step": 5000 }, { "epoch": 1.4157014157014158, "grad_norm": 3.922346830368042, "learning_rate": 1.0563706563706564e-05, "loss": 0.05087580490112305, "step": 5500 }, { "epoch": 1.5444015444015444, "grad_norm": 0.129458948969841, "learning_rate": 9.705705705705706e-06, "loss": 0.0524902229309082, "step": 6000 }, { "epoch": 1.673101673101673, "grad_norm": 0.10066387057304382, "learning_rate": 8.847704847704849e-06, "loss": 0.04880419921875, "step": 6500 }, { "epoch": 1.8018018018018018, "grad_norm": 1.1645872592926025, "learning_rate": 7.989703989703991e-06, "loss": 0.04735799407958984, "step": 7000 }, { "epoch": 1.9305019305019306, "grad_norm": 1.6507762670516968, "learning_rate": 7.1317031317031325e-06, "loss": 0.05284581756591797, "step": 7500 }, { "epoch": 2.0, "eval_casing_accuracy": 0.6404749585638992, "eval_loss": 0.06381073594093323, "eval_overall_accuracy": 0.6414089151195728, "eval_punctuation_accuracy": 0.6423428716752462, "eval_runtime": 85.5043, "eval_samples_per_second": 181.722, "eval_steps_per_second": 11.368, "step": 7770 } ], "logging_steps": 500, "max_steps": 11655, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }