Image-to-Text
Transformers
Safetensors
vision-encoder-decoder
image-text-to-text
vision
ocr
trocr
handwriting-recognition
document-processing
Instructions to use WARAJA/Tzefa-Word-OCR-TrOCR with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use WARAJA/Tzefa-Word-OCR-TrOCR with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "image-to-text" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("image-to-text", model="WARAJA/Tzefa-Word-OCR-TrOCR")# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("WARAJA/Tzefa-Word-OCR-TrOCR") model = AutoModelForMultimodalLM.from_pretrained("WARAJA/Tzefa-Word-OCR-TrOCR") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.6584924113449486, | |
| "eval_steps": 500, | |
| "global_step": 909000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9985712158349534e-05, | |
| "loss": 2.2557, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.997133807218206e-05, | |
| "loss": 2.049, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.995699273418692e-05, | |
| "loss": 2.0487, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9942618648019454e-05, | |
| "loss": 2.0357, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9928244561851985e-05, | |
| "loss": 2.0048, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.991387047568451e-05, | |
| "loss": 1.919, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.989949638951703e-05, | |
| "loss": 1.9217, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.988512230334957e-05, | |
| "loss": 1.825, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9870748217182095e-05, | |
| "loss": 1.8015, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.985637413101462e-05, | |
| "loss": 1.7936, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.984200004484715e-05, | |
| "loss": 1.7456, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9827654706852015e-05, | |
| "loss": 1.7414, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9813280620684546e-05, | |
| "loss": 1.728, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.979890653451707e-05, | |
| "loss": 1.7282, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9784532448349594e-05, | |
| "loss": 1.7469, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.97702158585268e-05, | |
| "loss": 1.6573, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.975584177235932e-05, | |
| "loss": 1.7118, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.974146768619185e-05, | |
| "loss": 1.6161, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9727093600024385e-05, | |
| "loss": 1.6275, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.971271951385691e-05, | |
| "loss": 1.6875, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9698345427689434e-05, | |
| "loss": 1.6544, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9683971341521965e-05, | |
| "loss": 1.6431, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9669597255354495e-05, | |
| "loss": 1.6536, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.965522316918702e-05, | |
| "loss": 1.6471, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9640877831191884e-05, | |
| "loss": 1.5942, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.962650374502441e-05, | |
| "loss": 1.6318, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9612129658856946e-05, | |
| "loss": 1.5797, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.959775557268947e-05, | |
| "loss": 1.5792, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9583410234694335e-05, | |
| "loss": 1.5361, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.956903614852686e-05, | |
| "loss": 1.6009, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9554690810531724e-05, | |
| "loss": 1.5841, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.954031672436425e-05, | |
| "loss": 1.5948, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.952594263819678e-05, | |
| "loss": 1.6366, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.951156855202931e-05, | |
| "loss": 1.5873, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9497194465861834e-05, | |
| "loss": 1.6046, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.94828491278667e-05, | |
| "loss": 1.6193, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.946847504169922e-05, | |
| "loss": 1.5759, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9454100955531754e-05, | |
| "loss": 1.5702, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9439726869364285e-05, | |
| "loss": 1.5179, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.942535278319681e-05, | |
| "loss": 1.5536, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.941097869702934e-05, | |
| "loss": 1.5235, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.939660461086187e-05, | |
| "loss": 1.5325, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9382230524694395e-05, | |
| "loss": 1.5385, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.936788518669926e-05, | |
| "loss": 1.5189, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9353511100531784e-05, | |
| "loss": 1.548, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9339137014364315e-05, | |
| "loss": 1.5116, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9324762928196846e-05, | |
| "loss": 1.5404, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.931041759020171e-05, | |
| "loss": 1.5425, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.929607225220657e-05, | |
| "loss": 1.5627, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.92816981660391e-05, | |
| "loss": 1.5173, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.926732407987162e-05, | |
| "loss": 1.5033, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.925294999370415e-05, | |
| "loss": 1.4979, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.9238575907536685e-05, | |
| "loss": 1.5482, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.922420182136921e-05, | |
| "loss": 1.5178, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.920982773520173e-05, | |
| "loss": 1.5487, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.919545364903427e-05, | |
| "loss": 1.5682, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.918110831103913e-05, | |
| "loss": 1.5289, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.916673422487166e-05, | |
| "loss": 1.5002, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9152360138704184e-05, | |
| "loss": 1.5392, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.913798605253671e-05, | |
| "loss": 1.5209, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9123611966369246e-05, | |
| "loss": 1.4972, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.910923788020177e-05, | |
| "loss": 1.4937, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9094863794034294e-05, | |
| "loss": 1.4849, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9080489707866825e-05, | |
| "loss": 1.4916, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.9066115621699356e-05, | |
| "loss": 1.4975, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.905174153553188e-05, | |
| "loss": 1.4753, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.903742494570908e-05, | |
| "loss": 1.4382, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.902305085954161e-05, | |
| "loss": 1.5305, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.9008676773374134e-05, | |
| "loss": 1.4588, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.8994302687206665e-05, | |
| "loss": 1.4831, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.897995734921152e-05, | |
| "loss": 1.5033, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.896558326304406e-05, | |
| "loss": 1.5217, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.8951209176876584e-05, | |
| "loss": 1.4881, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.893683509070911e-05, | |
| "loss": 1.4689, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.892246100454164e-05, | |
| "loss": 1.5241, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.890808691837417e-05, | |
| "loss": 1.5023, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.8893712832206695e-05, | |
| "loss": 1.4979, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.887933874603922e-05, | |
| "loss": 1.4906, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8864964659871757e-05, | |
| "loss": 1.4677, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8850619321876614e-05, | |
| "loss": 1.4625, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8836245235709145e-05, | |
| "loss": 1.4539, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.882187114954167e-05, | |
| "loss": 1.456, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.8807497063374194e-05, | |
| "loss": 1.4562, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.879315172537906e-05, | |
| "loss": 1.4844, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.877877763921159e-05, | |
| "loss": 1.4885, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.876440355304412e-05, | |
| "loss": 1.4746, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.8750029466876644e-05, | |
| "loss": 1.4728, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.873568412888151e-05, | |
| "loss": 1.4834, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.872131004271403e-05, | |
| "loss": 1.4744, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.8706935956546564e-05, | |
| "loss": 1.496, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.8692561870379095e-05, | |
| "loss": 1.4814, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.867821653238396e-05, | |
| "loss": 1.479, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8663842446216484e-05, | |
| "loss": 1.4949, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.864946836004901e-05, | |
| "loss": 1.4751, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8635094273881546e-05, | |
| "loss": 1.4633, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8620748935886403e-05, | |
| "loss": 1.419, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8606374849718934e-05, | |
| "loss": 1.4838, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.859200076355146e-05, | |
| "loss": 1.4311, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.857765542555632e-05, | |
| "loss": 1.5105, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.856328133938885e-05, | |
| "loss": 1.4417, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.854890725322138e-05, | |
| "loss": 1.4828, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.853453316705391e-05, | |
| "loss": 1.4745, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8520159080886433e-05, | |
| "loss": 1.4476, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8505784994718964e-05, | |
| "loss": 1.4907, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.849143965672382e-05, | |
| "loss": 1.4435, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.847706557055635e-05, | |
| "loss": 1.4265, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8462691484388884e-05, | |
| "loss": 1.4731, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.844831739822141e-05, | |
| "loss": 1.4475, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.843394331205394e-05, | |
| "loss": 1.4976, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.841956922588647e-05, | |
| "loss": 1.4172, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.8405223887891335e-05, | |
| "loss": 1.4651, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.839084980172386e-05, | |
| "loss": 1.4469, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.837647571555638e-05, | |
| "loss": 1.4732, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8362101629388914e-05, | |
| "loss": 1.4417, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.8347727543221445e-05, | |
| "loss": 1.4229, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.833335345705397e-05, | |
| "loss": 1.4677, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.83189793708865e-05, | |
| "loss": 1.4511, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.830460528471903e-05, | |
| "loss": 1.4449, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8290231198551555e-05, | |
| "loss": 1.4342, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.827588586055642e-05, | |
| "loss": 1.4212, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8261511774388944e-05, | |
| "loss": 1.4428, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.824716643639381e-05, | |
| "loss": 1.4625, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.823279235022633e-05, | |
| "loss": 1.4529, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8218418264058864e-05, | |
| "loss": 1.4375, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.8204044177891395e-05, | |
| "loss": 1.4338, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.818967009172392e-05, | |
| "loss": 1.4556, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.817529600555645e-05, | |
| "loss": 1.4141, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.816092191938898e-05, | |
| "loss": 1.4264, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.8146547833221505e-05, | |
| "loss": 1.4223, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.813220249522637e-05, | |
| "loss": 1.4082, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.8117828409058894e-05, | |
| "loss": 1.4546, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.8103454322891425e-05, | |
| "loss": 1.4737, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.808910898489629e-05, | |
| "loss": 1.4299, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.807473489872882e-05, | |
| "loss": 1.4541, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.8060360812561344e-05, | |
| "loss": 1.444, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.804598672639387e-05, | |
| "loss": 1.4292, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.80316126402264e-05, | |
| "loss": 1.3933, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.801723855405893e-05, | |
| "loss": 1.4456, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.8002864467891455e-05, | |
| "loss": 1.4617, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.7988490381723986e-05, | |
| "loss": 1.4573, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.7974145043728843e-05, | |
| "loss": 1.4704, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.7959770957561374e-05, | |
| "loss": 1.4401, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.794542561956624e-05, | |
| "loss": 1.4131, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.793105153339877e-05, | |
| "loss": 1.4041, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.7916677447231294e-05, | |
| "loss": 1.4603, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.7902303361063825e-05, | |
| "loss": 1.444, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.788792927489635e-05, | |
| "loss": 1.4544, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.787355518872888e-05, | |
| "loss": 1.4553, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.7859181102561404e-05, | |
| "loss": 1.417, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.784483576456627e-05, | |
| "loss": 1.4277, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.78304616783988e-05, | |
| "loss": 1.4746, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.781608759223133e-05, | |
| "loss": 1.4245, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.7801713506063855e-05, | |
| "loss": 1.429, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.778733941989638e-05, | |
| "loss": 1.39, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.777296533372891e-05, | |
| "loss": 1.4128, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.775859124756144e-05, | |
| "loss": 1.4054, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.7744217161393965e-05, | |
| "loss": 1.4737, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.772987182339883e-05, | |
| "loss": 1.4217, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.7715497737231354e-05, | |
| "loss": 1.4723, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.770115239923622e-05, | |
| "loss": 1.4435, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.768677831306875e-05, | |
| "loss": 1.438, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.767240422690128e-05, | |
| "loss": 1.4572, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7658030140733805e-05, | |
| "loss": 1.3813, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7643656054566336e-05, | |
| "loss": 1.45, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7629310716571193e-05, | |
| "loss": 1.4768, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7614936630403724e-05, | |
| "loss": 1.3856, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.7600562544236255e-05, | |
| "loss": 1.4351, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.758618845806878e-05, | |
| "loss": 1.4182, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.757181437190131e-05, | |
| "loss": 1.4368, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.7557440285733835e-05, | |
| "loss": 1.4184, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.7543066199566366e-05, | |
| "loss": 1.432, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.752869211339889e-05, | |
| "loss": 1.4598, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.7514346775403754e-05, | |
| "loss": 1.4246, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.7499972689236285e-05, | |
| "loss": 1.4359, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.7485598603068816e-05, | |
| "loss": 1.4266, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.747122451690134e-05, | |
| "loss": 1.4182, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.7456879178906205e-05, | |
| "loss": 1.4571, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.744250509273873e-05, | |
| "loss": 1.4457, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.742813100657126e-05, | |
| "loss": 1.4522, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.741375692040379e-05, | |
| "loss": 1.4193, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.7399382834236315e-05, | |
| "loss": 1.3954, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.7385008748068846e-05, | |
| "loss": 1.4555, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.7370663410073704e-05, | |
| "loss": 1.3915, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.7356289323906235e-05, | |
| "loss": 1.4263, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.7341915237738766e-05, | |
| "loss": 1.4067, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.732754115157129e-05, | |
| "loss": 1.4035, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.7313195813576155e-05, | |
| "loss": 1.4041, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.729882172740868e-05, | |
| "loss": 1.4253, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.728444764124121e-05, | |
| "loss": 1.4418, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.727007355507374e-05, | |
| "loss": 1.4457, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.7255728217078605e-05, | |
| "loss": 1.4415, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.724135413091113e-05, | |
| "loss": 1.4302, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.722698004474366e-05, | |
| "loss": 1.4659, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.7212605958576185e-05, | |
| "loss": 1.411, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.7198231872408716e-05, | |
| "loss": 1.4336, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.718385778624124e-05, | |
| "loss": 1.3472, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.716948370007377e-05, | |
| "loss": 1.4557, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.71551096139063e-05, | |
| "loss": 1.3925, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.714076427591116e-05, | |
| "loss": 1.3936, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.712639018974369e-05, | |
| "loss": 1.427, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.7112016103576215e-05, | |
| "loss": 1.4013, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7097642017408746e-05, | |
| "loss": 1.4052, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.708329667941361e-05, | |
| "loss": 1.4341, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7068922593246134e-05, | |
| "loss": 1.3709, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7054548507078665e-05, | |
| "loss": 1.3834, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.704017442091119e-05, | |
| "loss": 1.4163, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.702580033474372e-05, | |
| "loss": 1.4131, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.701142624857625e-05, | |
| "loss": 1.4448, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.699708091058111e-05, | |
| "loss": 1.4091, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.698270682441364e-05, | |
| "loss": 1.3743, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.6968361486418505e-05, | |
| "loss": 1.4378, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.695398740025103e-05, | |
| "loss": 1.3978, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.693961331408356e-05, | |
| "loss": 1.4279, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.692523922791609e-05, | |
| "loss": 1.4038, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.691089388992095e-05, | |
| "loss": 1.3691, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.689651980375348e-05, | |
| "loss": 1.4098, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.6882145717586004e-05, | |
| "loss": 1.4601, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.6867771631418535e-05, | |
| "loss": 1.4051, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.6853397545251066e-05, | |
| "loss": 1.3787, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.683902345908359e-05, | |
| "loss": 1.3862, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.682464937291612e-05, | |
| "loss": 1.3977, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.6810275286748645e-05, | |
| "loss": 1.4151, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.679592994875351e-05, | |
| "loss": 1.4034, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.678155586258604e-05, | |
| "loss": 1.3965, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.6767181776418565e-05, | |
| "loss": 1.4523, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.6752807690251096e-05, | |
| "loss": 1.4077, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.673843360408362e-05, | |
| "loss": 1.4365, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.672405951791615e-05, | |
| "loss": 1.371, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.670968543174868e-05, | |
| "loss": 1.3935, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.6695311345581206e-05, | |
| "loss": 1.4184, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.668096600758607e-05, | |
| "loss": 1.394, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6666591921418595e-05, | |
| "loss": 1.3801, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.6652217835251126e-05, | |
| "loss": 1.3832, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.663784374908366e-05, | |
| "loss": 1.4032, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.662346966291618e-05, | |
| "loss": 1.4205, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.660909557674871e-05, | |
| "loss": 1.3854, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.6594750238753576e-05, | |
| "loss": 1.3982, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.65803761525861e-05, | |
| "loss": 1.3992, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.656600206641863e-05, | |
| "loss": 1.348, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.6551627980251156e-05, | |
| "loss": 1.4251, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.653725389408369e-05, | |
| "loss": 1.3713, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.652290855608855e-05, | |
| "loss": 1.4152, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.6508534469921075e-05, | |
| "loss": 1.4396, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.6494160383753606e-05, | |
| "loss": 1.412, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.647978629758613e-05, | |
| "loss": 1.4279, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.646541221141866e-05, | |
| "loss": 1.4773, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.645103812525119e-05, | |
| "loss": 1.4025, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.643666403908372e-05, | |
| "loss": 1.4126, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.642228995291625e-05, | |
| "loss": 1.3657, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6407944614921105e-05, | |
| "loss": 1.4284, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.639359927692597e-05, | |
| "loss": 1.4351, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.63792251907585e-05, | |
| "loss": 1.4265, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.6364851104591025e-05, | |
| "loss": 1.3681, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.6350477018423556e-05, | |
| "loss": 1.3806, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.633610293225608e-05, | |
| "loss": 1.3462, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.632172884608861e-05, | |
| "loss": 1.4283, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.630735475992114e-05, | |
| "loss": 1.4105, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.6292980673753666e-05, | |
| "loss": 1.3929, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.627863533575853e-05, | |
| "loss": 1.4379, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.626426124959106e-05, | |
| "loss": 1.4175, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.6249887163423586e-05, | |
| "loss": 1.3625, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.623551307725612e-05, | |
| "loss": 1.4202, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.622113899108864e-05, | |
| "loss": 1.4135, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.620676490492117e-05, | |
| "loss": 1.4075, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.61923908187537e-05, | |
| "loss": 1.3812, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.617804548075856e-05, | |
| "loss": 1.3856, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.616367139459109e-05, | |
| "loss": 1.3601, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.6149297308423616e-05, | |
| "loss": 1.3832, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.613492322225615e-05, | |
| "loss": 1.3954, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.612057788426101e-05, | |
| "loss": 1.3669, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.6106203798093536e-05, | |
| "loss": 1.4357, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.609182971192607e-05, | |
| "loss": 1.4157, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.607745562575859e-05, | |
| "loss": 1.3635, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.606308153959112e-05, | |
| "loss": 1.4251, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.604870745342365e-05, | |
| "loss": 1.3557, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.603436211542852e-05, | |
| "loss": 1.3947, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.601998802926104e-05, | |
| "loss": 1.3881, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.6005613943093566e-05, | |
| "loss": 1.3912, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.59912398569261e-05, | |
| "loss": 1.3857, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.597689451893096e-05, | |
| "loss": 1.4202, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.596252043276349e-05, | |
| "loss": 1.3871, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.5948146346596016e-05, | |
| "loss": 1.3563, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.593380100860088e-05, | |
| "loss": 1.4027, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.5919455670605745e-05, | |
| "loss": 1.399, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.590508158443827e-05, | |
| "loss": 1.4522, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.58907074982708e-05, | |
| "loss": 1.4005, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.587633341210333e-05, | |
| "loss": 1.3592, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.5861959325935856e-05, | |
| "loss": 1.401, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.584758523976838e-05, | |
| "loss": 1.3642, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.583321115360091e-05, | |
| "loss": 1.3673, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.581883706743344e-05, | |
| "loss": 1.3732, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.5804491729438306e-05, | |
| "loss": 1.3784, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.579011764327083e-05, | |
| "loss": 1.3617, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.5775743557103355e-05, | |
| "loss": 1.4072, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.5761369470935886e-05, | |
| "loss": 1.387, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.574699538476842e-05, | |
| "loss": 1.4189, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.573262129860094e-05, | |
| "loss": 1.4086, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.571824721243347e-05, | |
| "loss": 1.4118, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.5703873126266e-05, | |
| "loss": 1.3505, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.568949904009853e-05, | |
| "loss": 1.3993, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.567515370210339e-05, | |
| "loss": 1.3766, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.5660779615935916e-05, | |
| "loss": 1.3773, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.564640552976845e-05, | |
| "loss": 1.3799, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.563203144360098e-05, | |
| "loss": 1.4175, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.56176573574335e-05, | |
| "loss": 1.3748, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.560328327126603e-05, | |
| "loss": 1.4009, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.558893793327089e-05, | |
| "loss": 1.3603, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.557456384710342e-05, | |
| "loss": 1.3207, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.556018976093595e-05, | |
| "loss": 1.4082, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.554581567476848e-05, | |
| "loss": 1.3811, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.553144158860101e-05, | |
| "loss": 1.4071, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.551706750243354e-05, | |
| "loss": 1.3726, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.550269341626606e-05, | |
| "loss": 1.4198, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.548831933009859e-05, | |
| "loss": 1.3945, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.547394524393112e-05, | |
| "loss": 1.4092, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.545957115776365e-05, | |
| "loss": 1.3685, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.5445225819768513e-05, | |
| "loss": 1.3967, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.543085173360104e-05, | |
| "loss": 1.3583, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.541647764743356e-05, | |
| "loss": 1.3435, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.5402132309438426e-05, | |
| "loss": 1.3974, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.538775822327096e-05, | |
| "loss": 1.3452, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.537338413710349e-05, | |
| "loss": 1.3704, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.535901005093601e-05, | |
| "loss": 1.4065, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.534463596476854e-05, | |
| "loss": 1.3301, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.5330261878601074e-05, | |
| "loss": 1.4155, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.53158877924336e-05, | |
| "loss": 1.3922, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.530151370626612e-05, | |
| "loss": 1.424, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.5287139620098654e-05, | |
| "loss": 1.3749, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.5272765533931185e-05, | |
| "loss": 1.3787, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.525839144776371e-05, | |
| "loss": 1.4292, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.5244046109768573e-05, | |
| "loss": 1.3708, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.52296720236011e-05, | |
| "loss": 1.3503, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.521529793743363e-05, | |
| "loss": 1.3939, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.520092385126616e-05, | |
| "loss": 1.3795, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.5186578513271024e-05, | |
| "loss": 1.317, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.517223317527589e-05, | |
| "loss": 1.3537, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.515785908910841e-05, | |
| "loss": 1.3657, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.514348500294094e-05, | |
| "loss": 1.3708, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.512911091677347e-05, | |
| "loss": 1.4122, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.5114736830606e-05, | |
| "loss": 1.3816, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.510036274443852e-05, | |
| "loss": 1.3861, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.508598865827105e-05, | |
| "loss": 1.3518, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.5071614572103585e-05, | |
| "loss": 1.3967, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.505726923410844e-05, | |
| "loss": 1.3642, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.504292389611331e-05, | |
| "loss": 1.353, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.502854980994584e-05, | |
| "loss": 1.3855, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.501417572377836e-05, | |
| "loss": 1.3736, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.499980163761089e-05, | |
| "loss": 1.3663, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.498542755144342e-05, | |
| "loss": 1.4056, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.497105346527595e-05, | |
| "loss": 1.3703, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.495667937910847e-05, | |
| "loss": 1.3636, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.4942305292941004e-05, | |
| "loss": 1.3887, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.4927931206773535e-05, | |
| "loss": 1.3568, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.49135858687784e-05, | |
| "loss": 1.4145, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.4899211782610923e-05, | |
| "loss": 1.3641, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.488483769644345e-05, | |
| "loss": 1.3802, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.487046361027598e-05, | |
| "loss": 1.3609, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.4856118272280836e-05, | |
| "loss": 1.3743, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.48417729342857e-05, | |
| "loss": 1.3967, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.482739884811823e-05, | |
| "loss": 1.3887, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.481302476195076e-05, | |
| "loss": 1.3803, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.479865067578329e-05, | |
| "loss": 1.3843, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.478427658961582e-05, | |
| "loss": 1.368, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.4769931251620676e-05, | |
| "loss": 1.4185, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.4755557165453214e-05, | |
| "loss": 1.3608, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.474118307928574e-05, | |
| "loss": 1.3724, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.472680899311826e-05, | |
| "loss": 1.3864, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.471243490695079e-05, | |
| "loss": 1.3729, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.4698060820783324e-05, | |
| "loss": 1.3833, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.468368673461585e-05, | |
| "loss": 1.3593, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.466931264844837e-05, | |
| "loss": 1.3443, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.465496731045324e-05, | |
| "loss": 1.3494, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.464059322428577e-05, | |
| "loss": 1.3314, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.46262191381183e-05, | |
| "loss": 1.3666, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.461184505195082e-05, | |
| "loss": 1.4079, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.459749971395569e-05, | |
| "loss": 1.4069, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.458312562778821e-05, | |
| "loss": 1.3429, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.4568780289793076e-05, | |
| "loss": 1.365, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.455440620362561e-05, | |
| "loss": 1.4167, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.454003211745814e-05, | |
| "loss": 1.3715, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.452565803129066e-05, | |
| "loss": 1.3698, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.4511283945123186e-05, | |
| "loss": 1.3936, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.4496909858955724e-05, | |
| "loss": 1.3526, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.448253577278825e-05, | |
| "loss": 1.3401, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.446819043479311e-05, | |
| "loss": 1.4083, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.445381634862564e-05, | |
| "loss": 1.3706, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.443944226245816e-05, | |
| "loss": 1.3702, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.44250681762907e-05, | |
| "loss": 1.3915, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.441069409012322e-05, | |
| "loss": 1.3347, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.439632000395575e-05, | |
| "loss": 1.4214, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.438197466596061e-05, | |
| "loss": 1.3836, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.4367600579793136e-05, | |
| "loss": 1.3125, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.4353226493625674e-05, | |
| "loss": 1.3719, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.43388524074582e-05, | |
| "loss": 1.388, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.432447832129072e-05, | |
| "loss": 1.3519, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.431010423512325e-05, | |
| "loss": 1.352, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.4295730148955784e-05, | |
| "loss": 1.3897, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.428135606278831e-05, | |
| "loss": 1.3947, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.426701072479317e-05, | |
| "loss": 1.4083, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.42526366386257e-05, | |
| "loss": 1.3361, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.4238262552458235e-05, | |
| "loss": 1.3415, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.422388846629076e-05, | |
| "loss": 1.4054, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.420951438012328e-05, | |
| "loss": 1.3871, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.419516904212815e-05, | |
| "loss": 1.3916, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.418079495596067e-05, | |
| "loss": 1.3449, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.416642086979321e-05, | |
| "loss": 1.4411, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.4152046783625734e-05, | |
| "loss": 1.3792, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.41377014456306e-05, | |
| "loss": 1.3843, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.412332735946312e-05, | |
| "loss": 1.3514, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.410895327329565e-05, | |
| "loss": 1.3531, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.4094579187128184e-05, | |
| "loss": 1.3575, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.408023384913305e-05, | |
| "loss": 1.3671, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.406585976296557e-05, | |
| "loss": 1.3818, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.40514856767981e-05, | |
| "loss": 1.3625, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.403711159063063e-05, | |
| "loss": 1.2734, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.402273750446316e-05, | |
| "loss": 1.4119, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.4008392166468024e-05, | |
| "loss": 1.3444, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.399401808030055e-05, | |
| "loss": 1.3591, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.397964399413307e-05, | |
| "loss": 1.3544, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.39652699079656e-05, | |
| "loss": 1.3843, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.3950895821798134e-05, | |
| "loss": 1.361, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.393652173563066e-05, | |
| "loss": 1.3929, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.392217639763552e-05, | |
| "loss": 1.3484, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.390780231146805e-05, | |
| "loss": 1.3612, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.389342822530058e-05, | |
| "loss": 1.3857, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.387905413913311e-05, | |
| "loss": 1.3092, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.386468005296563e-05, | |
| "loss": 1.3349, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.38503347149705e-05, | |
| "loss": 1.3484, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.383596062880302e-05, | |
| "loss": 1.3718, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.3821615290807887e-05, | |
| "loss": 1.371, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.380724120464042e-05, | |
| "loss": 1.3532, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.379286711847295e-05, | |
| "loss": 1.3346, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.377849303230547e-05, | |
| "loss": 1.4022, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.3764118946138e-05, | |
| "loss": 1.2966, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.3749744859970535e-05, | |
| "loss": 1.3618, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.373537077380306e-05, | |
| "loss": 1.3389, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.372099668763558e-05, | |
| "loss": 1.4182, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.370665134964045e-05, | |
| "loss": 1.3107, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.369230601164531e-05, | |
| "loss": 1.3784, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.3677931925477836e-05, | |
| "loss": 1.3293, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.3663557839310374e-05, | |
| "loss": 1.3381, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.36491837531429e-05, | |
| "loss": 1.3526, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.363483841514776e-05, | |
| "loss": 1.3405, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.362046432898029e-05, | |
| "loss": 1.3746, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.360609024281281e-05, | |
| "loss": 1.3648, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.359171615664535e-05, | |
| "loss": 1.413, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.357734207047787e-05, | |
| "loss": 1.3731, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.356299673248274e-05, | |
| "loss": 1.3905, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.354862264631526e-05, | |
| "loss": 1.3992, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.3534248560147786e-05, | |
| "loss": 1.3332, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.3519874473980324e-05, | |
| "loss": 1.3735, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.350550038781285e-05, | |
| "loss": 1.3732, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.349112630164537e-05, | |
| "loss": 1.3131, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.3476780963650237e-05, | |
| "loss": 1.3553, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.346240687748277e-05, | |
| "loss": 1.3415, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.34480327913153e-05, | |
| "loss": 1.4036, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.343365870514782e-05, | |
| "loss": 1.3283, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.341928461898035e-05, | |
| "loss": 1.368, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.340491053281288e-05, | |
| "loss": 1.3641, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.339053644664541e-05, | |
| "loss": 1.3759, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.337616236047793e-05, | |
| "loss": 1.3909, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.3361788274310464e-05, | |
| "loss": 1.337, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.334744293631532e-05, | |
| "loss": 1.3323, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.333306885014786e-05, | |
| "loss": 1.3528, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.3318694763980384e-05, | |
| "loss": 1.3617, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.330434942598525e-05, | |
| "loss": 1.353, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.328997533981777e-05, | |
| "loss": 1.3511, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.3275601253650297e-05, | |
| "loss": 1.3907, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.3261227167482834e-05, | |
| "loss": 1.3828, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.324685308131536e-05, | |
| "loss": 1.3867, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.323250774332022e-05, | |
| "loss": 1.4029, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.321813365715275e-05, | |
| "loss": 1.336, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.320375957098528e-05, | |
| "loss": 1.3467, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.318938548481781e-05, | |
| "loss": 1.3369, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.317501139865033e-05, | |
| "loss": 1.3248, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.316063731248286e-05, | |
| "loss": 1.3209, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.314626322631539e-05, | |
| "loss": 1.3802, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.313188914014792e-05, | |
| "loss": 1.3491, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.3117543802152784e-05, | |
| "loss": 1.3705, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.310319846415765e-05, | |
| "loss": 1.3495, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.308882437799017e-05, | |
| "loss": 1.3667, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.30744502918227e-05, | |
| "loss": 1.3464, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.306007620565523e-05, | |
| "loss": 1.3318, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.304570211948776e-05, | |
| "loss": 1.3779, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.303132803332028e-05, | |
| "loss": 1.3211, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.301695394715281e-05, | |
| "loss": 1.3281, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.3002579860985345e-05, | |
| "loss": 1.4052, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.298820577481787e-05, | |
| "loss": 1.357, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.297383168865039e-05, | |
| "loss": 1.3943, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.295948635065526e-05, | |
| "loss": 1.338, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.294511226448779e-05, | |
| "loss": 1.3568, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.2930766926492647e-05, | |
| "loss": 1.3254, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.291639284032518e-05, | |
| "loss": 1.3355, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.290201875415771e-05, | |
| "loss": 1.3979, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.288764466799023e-05, | |
| "loss": 1.3396, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.2873270581822764e-05, | |
| "loss": 1.3746, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_cer": 0.2530746846497492, | |
| "eval_loss": 1.026394009590149, | |
| "eval_runtime": 10837.1839, | |
| "eval_samples_per_second": 9.02, | |
| "eval_steps_per_second": 1.128, | |
| "step": 248463 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.2858896495655295e-05, | |
| "loss": 1.3325, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.284452240948782e-05, | |
| "loss": 1.3003, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.283014832332034e-05, | |
| "loss": 1.3294, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.2815774237152874e-05, | |
| "loss": 1.3038, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.280142889915774e-05, | |
| "loss": 1.3098, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.278705481299027e-05, | |
| "loss": 1.2883, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.2772709474995134e-05, | |
| "loss": 1.3391, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.275833538882766e-05, | |
| "loss": 1.342, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.274396130266018e-05, | |
| "loss": 1.2917, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.272958721649271e-05, | |
| "loss": 1.3136, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.2715213130325244e-05, | |
| "loss": 1.2974, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.270083904415777e-05, | |
| "loss": 1.2851, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.26864649579903e-05, | |
| "loss": 1.2778, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.267209087182283e-05, | |
| "loss": 1.3019, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.265774553382769e-05, | |
| "loss": 1.2946, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.264337144766022e-05, | |
| "loss": 1.3452, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.262899736149274e-05, | |
| "loss": 1.3797, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.2614623275325274e-05, | |
| "loss": 1.3143, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.260027793733013e-05, | |
| "loss": 1.2851, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.258590385116266e-05, | |
| "loss": 1.3339, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.2571529764995194e-05, | |
| "loss": 1.327, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.255718442700006e-05, | |
| "loss": 1.3036, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.254281034083258e-05, | |
| "loss": 1.2892, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.2528436254665114e-05, | |
| "loss": 1.3411, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.2514062168497645e-05, | |
| "loss": 1.3209, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.24997168305025e-05, | |
| "loss": 1.321, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.248534274433503e-05, | |
| "loss": 1.3048, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.247096865816756e-05, | |
| "loss": 1.2998, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.245659457200009e-05, | |
| "loss": 1.3151, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.244222048583262e-05, | |
| "loss": 1.3393, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.2427846399665144e-05, | |
| "loss": 1.3427, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.241347231349767e-05, | |
| "loss": 1.3462, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.23990982273302e-05, | |
| "loss": 1.2871, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.238472414116273e-05, | |
| "loss": 1.3217, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.2370378803167594e-05, | |
| "loss": 1.3224, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.235600471700012e-05, | |
| "loss": 1.3642, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.234163063083264e-05, | |
| "loss": 1.2768, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.2327256544665174e-05, | |
| "loss": 1.3201, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.231291120667004e-05, | |
| "loss": 1.3396, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.22985658686749e-05, | |
| "loss": 1.3086, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.2284191782507434e-05, | |
| "loss": 1.3079, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.226981769633996e-05, | |
| "loss": 1.3209, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.225544361017248e-05, | |
| "loss": 1.3255, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.224106952400501e-05, | |
| "loss": 1.3124, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.2226695437837544e-05, | |
| "loss": 1.3312, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.221232135167007e-05, | |
| "loss": 1.3202, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.21979472655026e-05, | |
| "loss": 1.3048, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.218357317933513e-05, | |
| "loss": 1.2913, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.216922784133999e-05, | |
| "loss": 1.3403, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.215485375517252e-05, | |
| "loss": 1.3155, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.214047966900504e-05, | |
| "loss": 1.3483, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.2126105582837574e-05, | |
| "loss": 1.2946, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.211176024484244e-05, | |
| "loss": 1.3293, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.209738615867496e-05, | |
| "loss": 1.3216, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.2083012072507494e-05, | |
| "loss": 1.3089, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.206863798634002e-05, | |
| "loss": 1.2769, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.205429264834488e-05, | |
| "loss": 1.3592, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.203994731034975e-05, | |
| "loss": 1.3546, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.202557322418227e-05, | |
| "loss": 1.2827, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.20111991380148e-05, | |
| "loss": 1.3685, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.199682505184733e-05, | |
| "loss": 1.3143, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.198245096567986e-05, | |
| "loss": 1.3236, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.196810562768472e-05, | |
| "loss": 1.3198, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.1953731541517246e-05, | |
| "loss": 1.298, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.193935745534978e-05, | |
| "loss": 1.2766, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.192498336918231e-05, | |
| "loss": 1.2751, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.191060928301483e-05, | |
| "loss": 1.3115, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.189629269319203e-05, | |
| "loss": 1.3157, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.188191860702456e-05, | |
| "loss": 1.3184, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.1867544520857085e-05, | |
| "loss": 1.3392, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.1853170434689616e-05, | |
| "loss": 1.3014, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.183879634852215e-05, | |
| "loss": 1.3342, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.182442226235467e-05, | |
| "loss": 1.2772, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.18100481761872e-05, | |
| "loss": 1.3493, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.179567409001973e-05, | |
| "loss": 1.3381, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.178130000385226e-05, | |
| "loss": 1.3231, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.176695466585712e-05, | |
| "loss": 1.3612, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.1752580579689646e-05, | |
| "loss": 1.3584, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.173820649352218e-05, | |
| "loss": 1.3164, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.172383240735471e-05, | |
| "loss": 1.3132, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.170945832118723e-05, | |
| "loss": 1.3136, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.1695084235019763e-05, | |
| "loss": 1.3275, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.168076764519696e-05, | |
| "loss": 1.3111, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.1666393559029486e-05, | |
| "loss": 1.3326, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.165201947286202e-05, | |
| "loss": 1.3284, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.163764538669454e-05, | |
| "loss": 1.3265, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.162327130052707e-05, | |
| "loss": 1.3103, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.1608897214359596e-05, | |
| "loss": 1.2961, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.159452312819213e-05, | |
| "loss": 1.2697, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.158014904202466e-05, | |
| "loss": 1.3562, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.156577495585718e-05, | |
| "loss": 1.3253, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.155142961786205e-05, | |
| "loss": 1.3282, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.153705553169457e-05, | |
| "loss": 1.3074, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.15226814455271e-05, | |
| "loss": 1.2868, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.150830735935963e-05, | |
| "loss": 1.3628, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.149393327319216e-05, | |
| "loss": 1.3665, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.147955918702469e-05, | |
| "loss": 1.2772, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.146518510085721e-05, | |
| "loss": 1.3005, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.145081101468974e-05, | |
| "loss": 1.3234, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.1436436928522274e-05, | |
| "loss": 1.3169, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.142209159052713e-05, | |
| "loss": 1.3383, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.140771750435966e-05, | |
| "loss": 1.3168, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.1393343418192194e-05, | |
| "loss": 1.3092, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.137896933202472e-05, | |
| "loss": 1.2855, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.136462399402958e-05, | |
| "loss": 1.3017, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.135024990786211e-05, | |
| "loss": 1.3185, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.133587582169464e-05, | |
| "loss": 1.3136, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.132150173552717e-05, | |
| "loss": 1.302, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.130712764935969e-05, | |
| "loss": 1.3137, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.1292753563192224e-05, | |
| "loss": 1.3458, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.127840822519708e-05, | |
| "loss": 1.3394, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.126403413902961e-05, | |
| "loss": 1.3058, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.1249660052862143e-05, | |
| "loss": 1.2944, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.123528596669467e-05, | |
| "loss": 1.2933, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.122094062869953e-05, | |
| "loss": 1.2685, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.120656654253206e-05, | |
| "loss": 1.3184, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.119222120453692e-05, | |
| "loss": 1.3046, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.117784711836945e-05, | |
| "loss": 1.2728, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.116347303220198e-05, | |
| "loss": 1.3227, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.114909894603451e-05, | |
| "loss": 1.3138, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.113472485986704e-05, | |
| "loss": 1.3364, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 4.112035077369956e-05, | |
| "loss": 1.3093, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.110597668753209e-05, | |
| "loss": 1.2981, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.109160260136462e-05, | |
| "loss": 1.3095, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.107722851519715e-05, | |
| "loss": 1.3385, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.1062911925374346e-05, | |
| "loss": 1.2851, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.104853783920688e-05, | |
| "loss": 1.3389, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.10341637530394e-05, | |
| "loss": 1.321, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.101978966687193e-05, | |
| "loss": 1.3117, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.100541558070446e-05, | |
| "loss": 1.3384, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.099104149453699e-05, | |
| "loss": 1.3208, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.097666740836951e-05, | |
| "loss": 1.2958, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.0962322070374376e-05, | |
| "loss": 1.3443, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.094794798420691e-05, | |
| "loss": 1.3363, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.093357389803943e-05, | |
| "loss": 1.2784, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.091919981187196e-05, | |
| "loss": 1.3149, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.090482572570449e-05, | |
| "loss": 1.3311, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.089045163953702e-05, | |
| "loss": 1.3024, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.087607755336955e-05, | |
| "loss": 1.2816, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.086170346720207e-05, | |
| "loss": 1.3256, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.084735812920694e-05, | |
| "loss": 1.3071, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.083298404303947e-05, | |
| "loss": 1.3082, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.081860995687199e-05, | |
| "loss": 1.325, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.0804235870704524e-05, | |
| "loss": 1.3001, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.078989053270939e-05, | |
| "loss": 1.2427, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.077551644654191e-05, | |
| "loss": 1.3231, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.076114236037444e-05, | |
| "loss": 1.2805, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.074676827420697e-05, | |
| "loss": 1.303, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.07323941880395e-05, | |
| "loss": 1.3498, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.071804885004436e-05, | |
| "loss": 1.3334, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.070367476387689e-05, | |
| "loss": 1.3092, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.068930067770942e-05, | |
| "loss": 1.3639, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.067492659154194e-05, | |
| "loss": 1.2504, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.066055250537447e-05, | |
| "loss": 1.3246, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.064620716737934e-05, | |
| "loss": 1.3483, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.063183308121186e-05, | |
| "loss": 1.322, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.061745899504439e-05, | |
| "loss": 1.299, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.0603084908876924e-05, | |
| "loss": 1.269, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.058873957088178e-05, | |
| "loss": 1.3026, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.057436548471431e-05, | |
| "loss": 1.3286, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.055999139854684e-05, | |
| "loss": 1.3145, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.054561731237937e-05, | |
| "loss": 1.3127, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.053127197438423e-05, | |
| "loss": 1.3519, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.0516897888216757e-05, | |
| "loss": 1.3121, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.050252380204929e-05, | |
| "loss": 1.3452, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.048817846405415e-05, | |
| "loss": 1.2979, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.0473804377886676e-05, | |
| "loss": 1.348, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.045943029171921e-05, | |
| "loss": 1.337, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.044505620555173e-05, | |
| "loss": 1.2797, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.043068211938426e-05, | |
| "loss": 1.301, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.0416308033216786e-05, | |
| "loss": 1.3433, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.040193394704932e-05, | |
| "loss": 1.2581, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.038755986088185e-05, | |
| "loss": 1.3248, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.037318577471437e-05, | |
| "loss": 1.2734, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.035884043671924e-05, | |
| "loss": 1.3842, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.034446635055177e-05, | |
| "loss": 1.2941, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.033009226438429e-05, | |
| "loss": 1.3248, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.031574692638916e-05, | |
| "loss": 1.3258, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.030137284022169e-05, | |
| "loss": 1.3629, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.028699875405421e-05, | |
| "loss": 1.2897, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.027262466788674e-05, | |
| "loss": 1.33, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.025825058171927e-05, | |
| "loss": 1.3211, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.024390524372413e-05, | |
| "loss": 1.3184, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.022953115755666e-05, | |
| "loss": 1.3217, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.021518581956153e-05, | |
| "loss": 1.2926, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.020081173339405e-05, | |
| "loss": 1.304, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.0186437647226576e-05, | |
| "loss": 1.2539, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.0172063561059107e-05, | |
| "loss": 1.3232, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.015768947489164e-05, | |
| "loss": 1.3217, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.014331538872416e-05, | |
| "loss": 1.2563, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.012894130255669e-05, | |
| "loss": 1.2927, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.0114567216389224e-05, | |
| "loss": 1.2866, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.010019313022175e-05, | |
| "loss": 1.3148, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.008584779222661e-05, | |
| "loss": 1.3109, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.0071473706059137e-05, | |
| "loss": 1.2924, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.005709961989167e-05, | |
| "loss": 1.3149, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.00427255337242e-05, | |
| "loss": 1.3123, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.002835144755672e-05, | |
| "loss": 1.3045, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 4.0013977361389254e-05, | |
| "loss": 1.3122, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.999963202339411e-05, | |
| "loss": 1.3002, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.998525793722664e-05, | |
| "loss": 1.314, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.997088385105917e-05, | |
| "loss": 1.2549, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.99565097648917e-05, | |
| "loss": 1.288, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.994213567872423e-05, | |
| "loss": 1.3279, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.992776159255676e-05, | |
| "loss": 1.2969, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.9913387506389284e-05, | |
| "loss": 1.2879, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.989901342022181e-05, | |
| "loss": 1.3131, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.988469683039901e-05, | |
| "loss": 1.3212, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.987032274423154e-05, | |
| "loss": 1.3022, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.985594865806406e-05, | |
| "loss": 1.313, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.984157457189659e-05, | |
| "loss": 1.2912, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.982720048572912e-05, | |
| "loss": 1.2996, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.981282639956165e-05, | |
| "loss": 1.2747, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.979845231339418e-05, | |
| "loss": 1.3257, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.978407822722671e-05, | |
| "loss": 1.2859, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.976970414105923e-05, | |
| "loss": 1.3374, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.97553588030641e-05, | |
| "loss": 1.324, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.974098471689662e-05, | |
| "loss": 1.2956, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.972661063072915e-05, | |
| "loss": 1.3514, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.9712236544561684e-05, | |
| "loss": 1.2666, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.969789120656655e-05, | |
| "loss": 1.3184, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.968351712039907e-05, | |
| "loss": 1.3575, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.96691430342316e-05, | |
| "loss": 1.3093, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.965476894806413e-05, | |
| "loss": 1.3171, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.964039486189666e-05, | |
| "loss": 1.3405, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.962604952390152e-05, | |
| "loss": 1.3328, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.961167543773405e-05, | |
| "loss": 1.2607, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.959730135156657e-05, | |
| "loss": 1.2771, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.95829272653991e-05, | |
| "loss": 1.2848, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.9568553179231634e-05, | |
| "loss": 1.2904, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.955417909306416e-05, | |
| "loss": 1.309, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.953980500689669e-05, | |
| "loss": 1.3402, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.9525459668901547e-05, | |
| "loss": 1.2667, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.9511085582734084e-05, | |
| "loss": 1.332, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.949671149656661e-05, | |
| "loss": 1.336, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.948233741039913e-05, | |
| "loss": 1.357, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.9467963324231664e-05, | |
| "loss": 1.2626, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.9453589238064195e-05, | |
| "loss": 1.291, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.943921515189672e-05, | |
| "loss": 1.3226, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.942484106572924e-05, | |
| "loss": 1.2919, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.941046697956178e-05, | |
| "loss": 1.3069, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.939612164156664e-05, | |
| "loss": 1.3492, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.938174755539917e-05, | |
| "loss": 1.2857, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.9367402217404034e-05, | |
| "loss": 1.296, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.935302813123656e-05, | |
| "loss": 1.3178, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.933865404506908e-05, | |
| "loss": 1.3425, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.932427995890161e-05, | |
| "loss": 1.2953, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.9309905872734144e-05, | |
| "loss": 1.3248, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.929553178656667e-05, | |
| "loss": 1.2808, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.92811577003992e-05, | |
| "loss": 1.2778, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.926678361423173e-05, | |
| "loss": 1.3322, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.9252409528064255e-05, | |
| "loss": 1.3141, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.923806419006912e-05, | |
| "loss": 1.3462, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.922369010390164e-05, | |
| "loss": 1.303, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.9209316017734174e-05, | |
| "loss": 1.2846, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.9194941931566705e-05, | |
| "loss": 1.2977, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.918059659357157e-05, | |
| "loss": 1.3207, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.9166222507404094e-05, | |
| "loss": 1.3579, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.915184842123662e-05, | |
| "loss": 1.2612, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.913747433506915e-05, | |
| "loss": 1.2684, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.912310024890168e-05, | |
| "loss": 1.3118, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.9108754910906545e-05, | |
| "loss": 1.2694, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.90944095729114e-05, | |
| "loss": 1.3283, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.908003548674393e-05, | |
| "loss": 1.2881, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.906566140057646e-05, | |
| "loss": 1.2595, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.905128731440899e-05, | |
| "loss": 1.3154, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.903691322824152e-05, | |
| "loss": 1.2472, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.9022539142074044e-05, | |
| "loss": 1.2501, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.900816505590657e-05, | |
| "loss": 1.2928, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.8993790969739106e-05, | |
| "loss": 1.3238, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.897944563174396e-05, | |
| "loss": 1.323, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.8965071545576494e-05, | |
| "loss": 1.3102, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.895069745940902e-05, | |
| "loss": 1.2707, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.893635212141388e-05, | |
| "loss": 1.3345, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.892197803524641e-05, | |
| "loss": 1.3071, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.890760394907894e-05, | |
| "loss": 1.3225, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.889322986291147e-05, | |
| "loss": 1.3136, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.887885577674399e-05, | |
| "loss": 1.3166, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.886448169057652e-05, | |
| "loss": 1.3347, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.8850107604409055e-05, | |
| "loss": 1.3325, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.883573351824158e-05, | |
| "loss": 1.244, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.8821359432074104e-05, | |
| "loss": 1.2885, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.880701409407897e-05, | |
| "loss": 1.2643, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.87926400079115e-05, | |
| "loss": 1.2765, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.877826592174403e-05, | |
| "loss": 1.2574, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.8763891835576554e-05, | |
| "loss": 1.3173, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.874954649758142e-05, | |
| "loss": 1.2948, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.873517241141394e-05, | |
| "loss": 1.2861, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.8720798325246474e-05, | |
| "loss": 1.3029, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.8706424239079005e-05, | |
| "loss": 1.3286, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.869207890108387e-05, | |
| "loss": 1.3104, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.8677704814916394e-05, | |
| "loss": 1.3286, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.866333072874892e-05, | |
| "loss": 1.2915, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.864895664258145e-05, | |
| "loss": 1.3365, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.863458255641398e-05, | |
| "loss": 1.3339, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.8620237218418844e-05, | |
| "loss": 1.3178, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.860586313225137e-05, | |
| "loss": 1.341, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.859148904608389e-05, | |
| "loss": 1.2945, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.857714370808876e-05, | |
| "loss": 1.2954, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.856276962192129e-05, | |
| "loss": 1.2809, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.854839553575382e-05, | |
| "loss": 1.2982, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.853402144958634e-05, | |
| "loss": 1.2972, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.851964736341887e-05, | |
| "loss": 1.3654, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.8505273277251405e-05, | |
| "loss": 1.2658, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.849089919108393e-05, | |
| "loss": 1.292, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.8476525104916454e-05, | |
| "loss": 1.3061, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8462151018748985e-05, | |
| "loss": 1.2735, | |
| "step": 401500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8447776932581516e-05, | |
| "loss": 1.3375, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.843340284641404e-05, | |
| "loss": 1.2889, | |
| "step": 402500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8419028760246564e-05, | |
| "loss": 1.3407, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.840468342225143e-05, | |
| "loss": 1.309, | |
| "step": 403500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.839030933608396e-05, | |
| "loss": 1.2736, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.837593524991649e-05, | |
| "loss": 1.2761, | |
| "step": 404500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.8361589911921355e-05, | |
| "loss": 1.333, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.834721582575388e-05, | |
| "loss": 1.2702, | |
| "step": 405500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.83328417395864e-05, | |
| "loss": 1.3226, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.831846765341894e-05, | |
| "loss": 1.3432, | |
| "step": 406500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.8304093567251465e-05, | |
| "loss": 1.2957, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.828971948108399e-05, | |
| "loss": 1.3351, | |
| "step": 407500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.827534539491652e-05, | |
| "loss": 1.3261, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.826097130874905e-05, | |
| "loss": 1.3281, | |
| "step": 408500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.8246625970753916e-05, | |
| "loss": 1.3111, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.823225188458644e-05, | |
| "loss": 1.3541, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.8217877798418964e-05, | |
| "loss": 1.2615, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.8203503712251495e-05, | |
| "loss": 1.2716, | |
| "step": 410500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.818915837425635e-05, | |
| "loss": 1.3619, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.817478428808889e-05, | |
| "loss": 1.3252, | |
| "step": 411500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.8160410201921415e-05, | |
| "loss": 1.2901, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.814603611575394e-05, | |
| "loss": 1.2943, | |
| "step": 412500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.813166202958648e-05, | |
| "loss": 1.2663, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.8117316691591335e-05, | |
| "loss": 1.2747, | |
| "step": 413500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.8102942605423866e-05, | |
| "loss": 1.3128, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.808856851925639e-05, | |
| "loss": 1.3107, | |
| "step": 414500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.8074194433088914e-05, | |
| "loss": 1.3281, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.805984909509378e-05, | |
| "loss": 1.2913, | |
| "step": 415500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.804547500892631e-05, | |
| "loss": 1.3163, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.803112967093117e-05, | |
| "loss": 1.2811, | |
| "step": 416500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.8016755584763705e-05, | |
| "loss": 1.3121, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.800238149859623e-05, | |
| "loss": 1.2716, | |
| "step": 417500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.798800741242875e-05, | |
| "loss": 1.3418, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.797366207443362e-05, | |
| "loss": 1.2755, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.795928798826615e-05, | |
| "loss": 1.2791, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.794491390209868e-05, | |
| "loss": 1.2886, | |
| "step": 419500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.7930539815931204e-05, | |
| "loss": 1.2725, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.791616572976373e-05, | |
| "loss": 1.2872, | |
| "step": 420500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.7901791643596266e-05, | |
| "loss": 1.2549, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.788741755742879e-05, | |
| "loss": 1.3114, | |
| "step": 421500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.7873043471261314e-05, | |
| "loss": 1.2449, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.785869813326618e-05, | |
| "loss": 1.2821, | |
| "step": 422500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.78443240470987e-05, | |
| "loss": 1.3374, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.782994996093124e-05, | |
| "loss": 1.2604, | |
| "step": 423500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.7815575874763765e-05, | |
| "loss": 1.2585, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.780123053676863e-05, | |
| "loss": 1.306, | |
| "step": 424500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.7786856450601154e-05, | |
| "loss": 1.2987, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.777248236443368e-05, | |
| "loss": 1.3073, | |
| "step": 425500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.7758108278266216e-05, | |
| "loss": 1.298, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.774373419209874e-05, | |
| "loss": 1.301, | |
| "step": 426500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.7729360105931264e-05, | |
| "loss": 1.3247, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.7714986019763795e-05, | |
| "loss": 1.2696, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.770064068176866e-05, | |
| "loss": 1.3062, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.768626659560119e-05, | |
| "loss": 1.2945, | |
| "step": 428500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.7671892509433715e-05, | |
| "loss": 1.2748, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.765751842326624e-05, | |
| "loss": 1.2795, | |
| "step": 429500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.764314433709877e-05, | |
| "loss": 1.3429, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.76287702509313e-05, | |
| "loss": 1.365, | |
| "step": 430500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.7614396164763825e-05, | |
| "loss": 1.3175, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.7600022078596356e-05, | |
| "loss": 1.2859, | |
| "step": 431500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.7585676740601214e-05, | |
| "loss": 1.2628, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.757130265443375e-05, | |
| "loss": 1.2823, | |
| "step": 432500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.7556928568266276e-05, | |
| "loss": 1.2831, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.75425544820988e-05, | |
| "loss": 1.31, | |
| "step": 433500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.7528209144103664e-05, | |
| "loss": 1.2884, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.751383505793619e-05, | |
| "loss": 1.3053, | |
| "step": 434500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.7499460971768726e-05, | |
| "loss": 1.288, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.748508688560125e-05, | |
| "loss": 1.3179, | |
| "step": 435500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.7470712799433775e-05, | |
| "loss": 1.3117, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.7456338713266306e-05, | |
| "loss": 1.3112, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.7441964627098837e-05, | |
| "loss": 1.2802, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.742759054093136e-05, | |
| "loss": 1.2868, | |
| "step": 437500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.741321645476389e-05, | |
| "loss": 1.2857, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.739887111676875e-05, | |
| "loss": 1.2808, | |
| "step": 438500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.738449703060128e-05, | |
| "loss": 1.309, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.737012294443381e-05, | |
| "loss": 1.3257, | |
| "step": 439500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.7355748858266336e-05, | |
| "loss": 1.3181, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.73414035202712e-05, | |
| "loss": 1.3189, | |
| "step": 440500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.7327029434103724e-05, | |
| "loss": 1.2565, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.731265534793626e-05, | |
| "loss": 1.2688, | |
| "step": 441500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.729831000994112e-05, | |
| "loss": 1.2885, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.728393592377365e-05, | |
| "loss": 1.3076, | |
| "step": 442500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.7269561837606175e-05, | |
| "loss": 1.2664, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.7255187751438706e-05, | |
| "loss": 1.3208, | |
| "step": 443500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.724081366527124e-05, | |
| "loss": 1.2682, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.722643957910376e-05, | |
| "loss": 1.3087, | |
| "step": 444500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.7212065492936285e-05, | |
| "loss": 1.2755, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.7197691406768816e-05, | |
| "loss": 1.3005, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.7183374816946014e-05, | |
| "loss": 1.2834, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.716900073077854e-05, | |
| "loss": 1.2735, | |
| "step": 446500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.715462664461107e-05, | |
| "loss": 1.3396, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.71402525584436e-05, | |
| "loss": 1.2835, | |
| "step": 447500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.7125878472276125e-05, | |
| "loss": 1.2776, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.7111504386108656e-05, | |
| "loss": 1.2976, | |
| "step": 448500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.7097159048113513e-05, | |
| "loss": 1.3103, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.708278496194605e-05, | |
| "loss": 1.3003, | |
| "step": 449500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.7068410875778575e-05, | |
| "loss": 1.305, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.70540367896111e-05, | |
| "loss": 1.2685, | |
| "step": 450500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.703966270344363e-05, | |
| "loss": 1.2824, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.7025317365448495e-05, | |
| "loss": 1.2703, | |
| "step": 451500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.7010943279281026e-05, | |
| "loss": 1.2796, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.699656919311355e-05, | |
| "loss": 1.2921, | |
| "step": 452500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.6982195106946074e-05, | |
| "loss": 1.2714, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.6967821020778605e-05, | |
| "loss": 1.2959, | |
| "step": 453500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.6953446934611136e-05, | |
| "loss": 1.283, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.693907284844366e-05, | |
| "loss": 1.3516, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.6924727510448525e-05, | |
| "loss": 1.2841, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.691035342428105e-05, | |
| "loss": 1.3314, | |
| "step": 455500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.689597933811358e-05, | |
| "loss": 1.3287, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.688160525194611e-05, | |
| "loss": 1.3005, | |
| "step": 456500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.6867231165778635e-05, | |
| "loss": 1.2771, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.6852857079611166e-05, | |
| "loss": 1.3057, | |
| "step": 457500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.68384829934437e-05, | |
| "loss": 1.299, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.682410890727622e-05, | |
| "loss": 1.2904, | |
| "step": 458500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.6809734821108746e-05, | |
| "loss": 1.2533, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.679538948311361e-05, | |
| "loss": 1.3131, | |
| "step": 459500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.678101539694614e-05, | |
| "loss": 1.2769, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.676664131077867e-05, | |
| "loss": 1.3289, | |
| "step": 460500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.675229597278354e-05, | |
| "loss": 1.3292, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.673792188661606e-05, | |
| "loss": 1.2642, | |
| "step": 461500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.6723547800448585e-05, | |
| "loss": 1.2938, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.6709173714281116e-05, | |
| "loss": 1.3634, | |
| "step": 462500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.669479962811365e-05, | |
| "loss": 1.2554, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.668042554194617e-05, | |
| "loss": 1.2683, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.66660514557787e-05, | |
| "loss": 1.2845, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.665167736961123e-05, | |
| "loss": 1.3046, | |
| "step": 464500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.663733203161609e-05, | |
| "loss": 1.2429, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.662295794544862e-05, | |
| "loss": 1.2988, | |
| "step": 465500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.6608583859281146e-05, | |
| "loss": 1.3175, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.659423852128601e-05, | |
| "loss": 1.279, | |
| "step": 466500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.6579893183290875e-05, | |
| "loss": 1.2663, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.65655190971234e-05, | |
| "loss": 1.2988, | |
| "step": 467500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.655114501095593e-05, | |
| "loss": 1.3224, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.653677092478846e-05, | |
| "loss": 1.2751, | |
| "step": 468500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.6522396838620985e-05, | |
| "loss": 1.2846, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.6508022752453516e-05, | |
| "loss": 1.3111, | |
| "step": 469500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.649364866628604e-05, | |
| "loss": 1.3045, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.647927458011857e-05, | |
| "loss": 1.2615, | |
| "step": 470500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.6464900493951096e-05, | |
| "loss": 1.264, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.6450526407783627e-05, | |
| "loss": 1.276, | |
| "step": 471500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.643615232161616e-05, | |
| "loss": 1.2798, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.642177823544868e-05, | |
| "loss": 1.2926, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.640740414928121e-05, | |
| "loss": 1.2795, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.639303006311374e-05, | |
| "loss": 1.239, | |
| "step": 473500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.6378713473290935e-05, | |
| "loss": 1.3506, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.6364339387123466e-05, | |
| "loss": 1.3075, | |
| "step": 474500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.6349965300956e-05, | |
| "loss": 1.3345, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.633559121478852e-05, | |
| "loss": 1.3233, | |
| "step": 475500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.632121712862105e-05, | |
| "loss": 1.2644, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.630687179062591e-05, | |
| "loss": 1.2507, | |
| "step": 476500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.629249770445844e-05, | |
| "loss": 1.2983, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.627812361829097e-05, | |
| "loss": 1.296, | |
| "step": 477500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.6263749532123496e-05, | |
| "loss": 1.297, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.624937544595603e-05, | |
| "loss": 1.3237, | |
| "step": 478500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.6235030107960885e-05, | |
| "loss": 1.2638, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.6220656021793416e-05, | |
| "loss": 1.2876, | |
| "step": 479500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.620628193562595e-05, | |
| "loss": 1.2688, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.619193659763081e-05, | |
| "loss": 1.2915, | |
| "step": 480500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.6177562511463335e-05, | |
| "loss": 1.2574, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.6163188425295866e-05, | |
| "loss": 1.2486, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.614881433912839e-05, | |
| "loss": 1.3007, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.613444025296092e-05, | |
| "loss": 1.2465, | |
| "step": 482500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.6120066166793446e-05, | |
| "loss": 1.272, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.610569208062598e-05, | |
| "loss": 1.3023, | |
| "step": 483500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.609131799445851e-05, | |
| "loss": 1.2473, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.607694390829103e-05, | |
| "loss": 1.2563, | |
| "step": 484500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.606256982212356e-05, | |
| "loss": 1.2937, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.604819573595609e-05, | |
| "loss": 1.3179, | |
| "step": 485500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.603382164978862e-05, | |
| "loss": 1.2751, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.601947631179348e-05, | |
| "loss": 1.2914, | |
| "step": 486500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.600510222562601e-05, | |
| "loss": 1.3037, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.599072813945854e-05, | |
| "loss": 1.2894, | |
| "step": 487500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.597635405329106e-05, | |
| "loss": 1.2917, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.5962008715295926e-05, | |
| "loss": 1.2776, | |
| "step": 488500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.594763462912846e-05, | |
| "loss": 1.3065, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.5933289291133315e-05, | |
| "loss": 1.3115, | |
| "step": 489500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.5918915204965846e-05, | |
| "loss": 1.2485, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.590454111879838e-05, | |
| "loss": 1.2972, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.58901670326309e-05, | |
| "loss": 1.2865, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.587579294646343e-05, | |
| "loss": 1.3067, | |
| "step": 491500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.5861418860295956e-05, | |
| "loss": 1.2642, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.584704477412849e-05, | |
| "loss": 1.3229, | |
| "step": 492500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.583267068796101e-05, | |
| "loss": 1.2988, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.581835409813821e-05, | |
| "loss": 1.2913, | |
| "step": 493500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.580398001197074e-05, | |
| "loss": 1.2787, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.5789634673975605e-05, | |
| "loss": 1.2615, | |
| "step": 494500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.577526058780813e-05, | |
| "loss": 1.2474, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.576088650164066e-05, | |
| "loss": 1.2448, | |
| "step": 495500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.5746512415473184e-05, | |
| "loss": 1.3201, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.5732138329305715e-05, | |
| "loss": 1.3178, | |
| "step": 496500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_cer": 0.24798620372209454, | |
| "eval_loss": 1.0018064975738525, | |
| "eval_runtime": 10926.7858, | |
| "eval_samples_per_second": 8.946, | |
| "eval_steps_per_second": 1.118, | |
| "step": 496926 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.5717764243138246e-05, | |
| "loss": 1.304, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.570339015697077e-05, | |
| "loss": 1.2501, | |
| "step": 497500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.56890160708033e-05, | |
| "loss": 1.2717, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.5674641984635826e-05, | |
| "loss": 1.2588, | |
| "step": 498500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.566026789846836e-05, | |
| "loss": 1.2931, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.564589381230089e-05, | |
| "loss": 1.2257, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.5631548474305745e-05, | |
| "loss": 1.2627, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.5617174388138276e-05, | |
| "loss": 1.2737, | |
| "step": 500500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.56028003019708e-05, | |
| "loss": 1.2805, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.558842621580333e-05, | |
| "loss": 1.2255, | |
| "step": 501500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.557405212963586e-05, | |
| "loss": 1.2769, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.555967804346839e-05, | |
| "loss": 1.259, | |
| "step": 502500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.554533270547325e-05, | |
| "loss": 1.2578, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.553095861930578e-05, | |
| "loss": 1.2479, | |
| "step": 503500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.5516584533138306e-05, | |
| "loss": 1.2287, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.550221044697084e-05, | |
| "loss": 1.2565, | |
| "step": 504500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.548783636080336e-05, | |
| "loss": 1.2534, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.547346227463589e-05, | |
| "loss": 1.2356, | |
| "step": 505500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.545911693664076e-05, | |
| "loss": 1.2739, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.544474285047328e-05, | |
| "loss": 1.2215, | |
| "step": 506500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.543036876430581e-05, | |
| "loss": 1.2267, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.5415994678138336e-05, | |
| "loss": 1.2708, | |
| "step": 507500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.540162059197087e-05, | |
| "loss": 1.299, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.538727525397573e-05, | |
| "loss": 1.2797, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.5372901167808256e-05, | |
| "loss": 1.254, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.535852708164079e-05, | |
| "loss": 1.239, | |
| "step": 509500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.534415299547331e-05, | |
| "loss": 1.2227, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.532977890930584e-05, | |
| "loss": 1.2489, | |
| "step": 510500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.531540482313837e-05, | |
| "loss": 1.2213, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.530105948514323e-05, | |
| "loss": 1.2131, | |
| "step": 511500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.528668539897576e-05, | |
| "loss": 1.2442, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.5272311312808286e-05, | |
| "loss": 1.2223, | |
| "step": 512500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.525793722664082e-05, | |
| "loss": 1.2384, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.524356314047335e-05, | |
| "loss": 1.2598, | |
| "step": 513500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.522921780247821e-05, | |
| "loss": 1.3033, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.521484371631074e-05, | |
| "loss": 1.2349, | |
| "step": 514500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.520046963014327e-05, | |
| "loss": 1.2263, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.518609554397579e-05, | |
| "loss": 1.2533, | |
| "step": 515500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.517172145780832e-05, | |
| "loss": 1.2289, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.515734737164085e-05, | |
| "loss": 1.2387, | |
| "step": 516500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.514300203364571e-05, | |
| "loss": 1.2407, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.512862794747824e-05, | |
| "loss": 1.2843, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.511425386131077e-05, | |
| "loss": 1.2666, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.50998797751433e-05, | |
| "loss": 1.2308, | |
| "step": 518500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.508550568897582e-05, | |
| "loss": 1.2481, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.507113160280835e-05, | |
| "loss": 1.2184, | |
| "step": 519500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.5056757516640884e-05, | |
| "loss": 1.2723, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.504241217864574e-05, | |
| "loss": 1.248, | |
| "step": 520500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.502803809247827e-05, | |
| "loss": 1.2647, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.50136640063108e-05, | |
| "loss": 1.2281, | |
| "step": 521500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.499928992014333e-05, | |
| "loss": 1.2436, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.498491583397586e-05, | |
| "loss": 1.2255, | |
| "step": 522500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.497054174780838e-05, | |
| "loss": 1.2615, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.4956167661640914e-05, | |
| "loss": 1.238, | |
| "step": 523500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.4941793575473445e-05, | |
| "loss": 1.2343, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.49274482374783e-05, | |
| "loss": 1.2505, | |
| "step": 524500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.491310289948317e-05, | |
| "loss": 1.2915, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.48987288133157e-05, | |
| "loss": 1.2477, | |
| "step": 525500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.488435472714822e-05, | |
| "loss": 1.2653, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.486998064098075e-05, | |
| "loss": 1.2362, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.485563530298561e-05, | |
| "loss": 1.2141, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.484126121681814e-05, | |
| "loss": 1.2283, | |
| "step": 527500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.482688713065067e-05, | |
| "loss": 1.2756, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.48125130444832e-05, | |
| "loss": 1.2044, | |
| "step": 528500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.479813895831573e-05, | |
| "loss": 1.2843, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.478376487214826e-05, | |
| "loss": 1.2615, | |
| "step": 529500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.476941953415312e-05, | |
| "loss": 1.3152, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.475504544798565e-05, | |
| "loss": 1.2859, | |
| "step": 530500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.474067136181817e-05, | |
| "loss": 1.2408, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.47262972756507e-05, | |
| "loss": 1.251, | |
| "step": 531500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.4711923189483234e-05, | |
| "loss": 1.1937, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.469754910331576e-05, | |
| "loss": 1.2696, | |
| "step": 532500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.468317501714828e-05, | |
| "loss": 1.2626, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.466880093098081e-05, | |
| "loss": 1.2667, | |
| "step": 533500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.465445559298568e-05, | |
| "loss": 1.2286, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.464011025499054e-05, | |
| "loss": 1.3034, | |
| "step": 534500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.4625736168823066e-05, | |
| "loss": 1.2727, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.46113620826556e-05, | |
| "loss": 1.3009, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.459698799648812e-05, | |
| "loss": 1.2333, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.458261391032065e-05, | |
| "loss": 1.2414, | |
| "step": 536500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.4568239824153183e-05, | |
| "loss": 1.2648, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.455386573798571e-05, | |
| "loss": 1.2345, | |
| "step": 537500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.453949165181824e-05, | |
| "loss": 1.2002, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.4525146313823096e-05, | |
| "loss": 1.2801, | |
| "step": 538500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.451077222765563e-05, | |
| "loss": 1.2502, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.449639814148816e-05, | |
| "loss": 1.2946, | |
| "step": 539500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.448202405532068e-05, | |
| "loss": 1.2442, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.446770746549788e-05, | |
| "loss": 1.2646, | |
| "step": 540500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.445333337933041e-05, | |
| "loss": 1.2592, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.4438959293162936e-05, | |
| "loss": 1.2476, | |
| "step": 541500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.442458520699547e-05, | |
| "loss": 1.2312, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.4410211120828e-05, | |
| "loss": 1.2613, | |
| "step": 542500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.439586578283286e-05, | |
| "loss": 1.2881, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.4381491696665386e-05, | |
| "loss": 1.2507, | |
| "step": 543500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.436711761049791e-05, | |
| "loss": 1.2611, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.435274352433044e-05, | |
| "loss": 1.2903, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.433836943816297e-05, | |
| "loss": 1.2557, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.432402410016784e-05, | |
| "loss": 1.2547, | |
| "step": 545500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.430965001400036e-05, | |
| "loss": 1.2438, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.4295275927832886e-05, | |
| "loss": 1.2705, | |
| "step": 546500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.4280901841665416e-05, | |
| "loss": 1.2638, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.426652775549795e-05, | |
| "loss": 1.246, | |
| "step": 547500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.425215366933047e-05, | |
| "loss": 1.259, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.4237808331335336e-05, | |
| "loss": 1.2738, | |
| "step": 548500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.422343424516786e-05, | |
| "loss": 1.2558, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.420906015900039e-05, | |
| "loss": 1.2246, | |
| "step": 549500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.419468607283292e-05, | |
| "loss": 1.193, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.4180311986665446e-05, | |
| "loss": 1.2253, | |
| "step": 550500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.416593790049798e-05, | |
| "loss": 1.2249, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.415156381433051e-05, | |
| "loss": 1.2014, | |
| "step": 551500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.413718972816303e-05, | |
| "loss": 1.2901, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.41228443901679e-05, | |
| "loss": 1.259, | |
| "step": 552500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.410847030400042e-05, | |
| "loss": 1.2049, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.4094124966005286e-05, | |
| "loss": 1.2618, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.407975087983782e-05, | |
| "loss": 1.2664, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.406537679367035e-05, | |
| "loss": 1.2458, | |
| "step": 554500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.405100270750287e-05, | |
| "loss": 1.2345, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.4036657369507737e-05, | |
| "loss": 1.2623, | |
| "step": 555500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.402228328334026e-05, | |
| "loss": 1.2062, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.400790919717279e-05, | |
| "loss": 1.2831, | |
| "step": 556500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.399353511100532e-05, | |
| "loss": 1.2745, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.397916102483785e-05, | |
| "loss": 1.233, | |
| "step": 557500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.396481568684271e-05, | |
| "loss": 1.2692, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.3950441600675236e-05, | |
| "loss": 1.2596, | |
| "step": 558500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.3936067514507767e-05, | |
| "loss": 1.2565, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.39216934283403e-05, | |
| "loss": 1.2362, | |
| "step": 559500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.390731934217282e-05, | |
| "loss": 1.2155, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.3892974004177686e-05, | |
| "loss": 1.266, | |
| "step": 560500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.387859991801021e-05, | |
| "loss": 1.2511, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.386422583184274e-05, | |
| "loss": 1.2623, | |
| "step": 561500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.384985174567527e-05, | |
| "loss": 1.216, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.383550640768014e-05, | |
| "loss": 1.2525, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.382113232151266e-05, | |
| "loss": 1.2496, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.3806758235345185e-05, | |
| "loss": 1.2253, | |
| "step": 563500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.3792384149177716e-05, | |
| "loss": 1.288, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.377801006301025e-05, | |
| "loss": 1.242, | |
| "step": 564500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.376366472501511e-05, | |
| "loss": 1.2785, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.3749290638847636e-05, | |
| "loss": 1.2483, | |
| "step": 565500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.373491655268016e-05, | |
| "loss": 1.2293, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.37205424665127e-05, | |
| "loss": 1.2759, | |
| "step": 566500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.3706197128517556e-05, | |
| "loss": 1.2912, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.3691823042350087e-05, | |
| "loss": 1.2564, | |
| "step": 567500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.367744895618261e-05, | |
| "loss": 1.2192, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.3663074870015135e-05, | |
| "loss": 1.2384, | |
| "step": 568500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.364870078384767e-05, | |
| "loss": 1.2578, | |
| "step": 569000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.363435544585253e-05, | |
| "loss": 1.2857, | |
| "step": 569500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.361998135968506e-05, | |
| "loss": 1.236, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.3605607273517586e-05, | |
| "loss": 1.262, | |
| "step": 570500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.3591233187350117e-05, | |
| "loss": 1.2389, | |
| "step": 571000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.357685910118265e-05, | |
| "loss": 1.2537, | |
| "step": 571500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.356248501501517e-05, | |
| "loss": 1.2869, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.3548110928847696e-05, | |
| "loss": 1.2332, | |
| "step": 572500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.353373684268023e-05, | |
| "loss": 1.2552, | |
| "step": 573000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.351936275651276e-05, | |
| "loss": 1.281, | |
| "step": 573500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.350498867034528e-05, | |
| "loss": 1.2591, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.349061458417781e-05, | |
| "loss": 1.2511, | |
| "step": 574500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.3476240498010344e-05, | |
| "loss": 1.2287, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.346189516001521e-05, | |
| "loss": 1.2665, | |
| "step": 575500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.344752107384773e-05, | |
| "loss": 1.1707, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.343314698768026e-05, | |
| "loss": 1.2328, | |
| "step": 576500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.341877290151279e-05, | |
| "loss": 1.2742, | |
| "step": 577000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.3404456311689986e-05, | |
| "loss": 1.247, | |
| "step": 577500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.339008222552251e-05, | |
| "loss": 1.2432, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.337570813935504e-05, | |
| "loss": 1.2333, | |
| "step": 578500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.336133405318757e-05, | |
| "loss": 1.2269, | |
| "step": 579000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.3346959967020096e-05, | |
| "loss": 1.2434, | |
| "step": 579500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.333258588085262e-05, | |
| "loss": 1.2411, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.331821179468516e-05, | |
| "loss": 1.2331, | |
| "step": 580500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.330386645669002e-05, | |
| "loss": 1.2312, | |
| "step": 581000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.328949237052255e-05, | |
| "loss": 1.2393, | |
| "step": 581500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.327511828435507e-05, | |
| "loss": 1.2778, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.32607441981876e-05, | |
| "loss": 1.2703, | |
| "step": 582500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.324637011202013e-05, | |
| "loss": 1.296, | |
| "step": 583000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.323199602585266e-05, | |
| "loss": 1.2469, | |
| "step": 583500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.321762193968518e-05, | |
| "loss": 1.2738, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.320324785351772e-05, | |
| "loss": 1.2696, | |
| "step": 584500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.318890251552258e-05, | |
| "loss": 1.2645, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.317452842935511e-05, | |
| "loss": 1.2552, | |
| "step": 585500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.316015434318763e-05, | |
| "loss": 1.2484, | |
| "step": 586000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.3145780257020156e-05, | |
| "loss": 1.2692, | |
| "step": 586500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.313143491902502e-05, | |
| "loss": 1.2243, | |
| "step": 587000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.3117089581029885e-05, | |
| "loss": 1.213, | |
| "step": 587500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.3102715494862416e-05, | |
| "loss": 1.2585, | |
| "step": 588000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.308834140869495e-05, | |
| "loss": 1.2533, | |
| "step": 588500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.307396732252747e-05, | |
| "loss": 1.2547, | |
| "step": 589000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.3059593236359996e-05, | |
| "loss": 1.2081, | |
| "step": 589500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.304521915019253e-05, | |
| "loss": 1.2561, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.303084506402506e-05, | |
| "loss": 1.2344, | |
| "step": 590500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.301647097785758e-05, | |
| "loss": 1.287, | |
| "step": 591000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.300209689169011e-05, | |
| "loss": 1.2357, | |
| "step": 591500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.2987722805522644e-05, | |
| "loss": 1.2741, | |
| "step": 592000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 3.297334871935517e-05, | |
| "loss": 1.2655, | |
| "step": 592500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.295897463318769e-05, | |
| "loss": 1.2085, | |
| "step": 593000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.294460054702023e-05, | |
| "loss": 1.2697, | |
| "step": 593500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.293025520902509e-05, | |
| "loss": 1.226, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.291588112285762e-05, | |
| "loss": 1.2169, | |
| "step": 594500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.290150703669014e-05, | |
| "loss": 1.2318, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.288713295052267e-05, | |
| "loss": 1.2192, | |
| "step": 595500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.287278761252753e-05, | |
| "loss": 1.2522, | |
| "step": 596000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.285841352636006e-05, | |
| "loss": 1.2731, | |
| "step": 596500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.284403944019259e-05, | |
| "loss": 1.2572, | |
| "step": 597000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.282966535402512e-05, | |
| "loss": 1.2339, | |
| "step": 597500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.281532001602998e-05, | |
| "loss": 1.283, | |
| "step": 598000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.2800945929862506e-05, | |
| "loss": 1.2274, | |
| "step": 598500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.278660059186737e-05, | |
| "loss": 1.2292, | |
| "step": 599000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.27722265056999e-05, | |
| "loss": 1.2474, | |
| "step": 599500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.275785241953243e-05, | |
| "loss": 1.278, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.274347833336496e-05, | |
| "loss": 1.2438, | |
| "step": 600500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.272910424719748e-05, | |
| "loss": 1.2174, | |
| "step": 601000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.271473016103002e-05, | |
| "loss": 1.2063, | |
| "step": 601500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.270035607486254e-05, | |
| "loss": 1.188, | |
| "step": 602000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.268601073686741e-05, | |
| "loss": 1.2207, | |
| "step": 602500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.267163665069993e-05, | |
| "loss": 1.233, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.2657262564532456e-05, | |
| "loss": 1.2776, | |
| "step": 603500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.2642888478364994e-05, | |
| "loss": 1.2963, | |
| "step": 604000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.262851439219752e-05, | |
| "loss": 1.2479, | |
| "step": 604500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.261414030603004e-05, | |
| "loss": 1.1801, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.259976621986258e-05, | |
| "loss": 1.2522, | |
| "step": 605500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.2585392133695104e-05, | |
| "loss": 1.2482, | |
| "step": 606000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.257104679569997e-05, | |
| "loss": 1.2325, | |
| "step": 606500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.255667270953249e-05, | |
| "loss": 1.3044, | |
| "step": 607000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.254232737153736e-05, | |
| "loss": 1.2587, | |
| "step": 607500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.252795328536988e-05, | |
| "loss": 1.2764, | |
| "step": 608000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.251357919920241e-05, | |
| "loss": 1.2882, | |
| "step": 608500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.249920511303494e-05, | |
| "loss": 1.2507, | |
| "step": 609000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.248483102686747e-05, | |
| "loss": 1.1949, | |
| "step": 609500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.247045694069999e-05, | |
| "loss": 1.2653, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.245608285453253e-05, | |
| "loss": 1.291, | |
| "step": 610500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.2441708768365054e-05, | |
| "loss": 1.1922, | |
| "step": 611000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.242736343036992e-05, | |
| "loss": 1.2725, | |
| "step": 611500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.241298934420244e-05, | |
| "loss": 1.1928, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.239861525803497e-05, | |
| "loss": 1.2518, | |
| "step": 612500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.2384241171867504e-05, | |
| "loss": 1.229, | |
| "step": 613000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.236989583387237e-05, | |
| "loss": 1.2946, | |
| "step": 613500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.235552174770489e-05, | |
| "loss": 1.256, | |
| "step": 614000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.234114766153742e-05, | |
| "loss": 1.2156, | |
| "step": 614500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.232677357536995e-05, | |
| "loss": 1.2669, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.231239948920248e-05, | |
| "loss": 1.2413, | |
| "step": 615500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.2298025403035e-05, | |
| "loss": 1.2777, | |
| "step": 616000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.228368006503987e-05, | |
| "loss": 1.2184, | |
| "step": 616500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.226930597887239e-05, | |
| "loss": 1.2222, | |
| "step": 617000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.225493189270492e-05, | |
| "loss": 1.2481, | |
| "step": 617500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.2240557806537454e-05, | |
| "loss": 1.2333, | |
| "step": 618000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.222618372036998e-05, | |
| "loss": 1.2376, | |
| "step": 618500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.221183838237484e-05, | |
| "loss": 1.2284, | |
| "step": 619000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.219746429620737e-05, | |
| "loss": 1.2159, | |
| "step": 619500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.21830902100399e-05, | |
| "loss": 1.2351, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.216871612387243e-05, | |
| "loss": 1.2163, | |
| "step": 620500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.215434203770495e-05, | |
| "loss": 1.2584, | |
| "step": 621000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.2139967951537484e-05, | |
| "loss": 1.2286, | |
| "step": 621500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.2125593865370015e-05, | |
| "loss": 1.2537, | |
| "step": 622000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.211124852737488e-05, | |
| "loss": 1.2097, | |
| "step": 622500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.2096874441207404e-05, | |
| "loss": 1.2062, | |
| "step": 623000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.208250035503993e-05, | |
| "loss": 1.2346, | |
| "step": 623500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.206812626887246e-05, | |
| "loss": 1.2543, | |
| "step": 624000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.205375218270499e-05, | |
| "loss": 1.2531, | |
| "step": 624500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.2039406844709854e-05, | |
| "loss": 1.2416, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.202503275854238e-05, | |
| "loss": 1.2237, | |
| "step": 625500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.20106586723749e-05, | |
| "loss": 1.2425, | |
| "step": 626000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.1996284586207434e-05, | |
| "loss": 1.2313, | |
| "step": 626500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.1981910500039965e-05, | |
| "loss": 1.2602, | |
| "step": 627000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.196753641387249e-05, | |
| "loss": 1.2338, | |
| "step": 627500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.195316232770501e-05, | |
| "loss": 1.2783, | |
| "step": 628000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.193881698970988e-05, | |
| "loss": 1.2234, | |
| "step": 628500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.192444290354241e-05, | |
| "loss": 1.2668, | |
| "step": 629000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.191006881737494e-05, | |
| "loss": 1.2136, | |
| "step": 629500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.1895694731207464e-05, | |
| "loss": 1.23, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.1881320645039995e-05, | |
| "loss": 1.218, | |
| "step": 630500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.1866946558872526e-05, | |
| "loss": 1.2594, | |
| "step": 631000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.185257247270505e-05, | |
| "loss": 1.2191, | |
| "step": 631500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.1838227134709914e-05, | |
| "loss": 1.2261, | |
| "step": 632000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.182385304854244e-05, | |
| "loss": 1.2288, | |
| "step": 632500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.180947896237497e-05, | |
| "loss": 1.2882, | |
| "step": 633000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.17951048762075e-05, | |
| "loss": 1.2466, | |
| "step": 633500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.1780730790040025e-05, | |
| "loss": 1.2221, | |
| "step": 634000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 3.176635670387255e-05, | |
| "loss": 1.2453, | |
| "step": 634500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.175198261770508e-05, | |
| "loss": 1.2455, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.173760853153761e-05, | |
| "loss": 1.2269, | |
| "step": 635500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.1723263193542475e-05, | |
| "loss": 1.2535, | |
| "step": 636000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.1708889107375e-05, | |
| "loss": 1.2515, | |
| "step": 636500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.169451502120753e-05, | |
| "loss": 1.239, | |
| "step": 637000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.168014093504006e-05, | |
| "loss": 1.2717, | |
| "step": 637500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.1665766848872586e-05, | |
| "loss": 1.2414, | |
| "step": 638000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.165142151087745e-05, | |
| "loss": 1.2811, | |
| "step": 638500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.1637047424709974e-05, | |
| "loss": 1.2393, | |
| "step": 639000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.1622673338542505e-05, | |
| "loss": 1.1999, | |
| "step": 639500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.1608299252375036e-05, | |
| "loss": 1.2494, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.159392516620756e-05, | |
| "loss": 1.247, | |
| "step": 640500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.1579551080040085e-05, | |
| "loss": 1.2276, | |
| "step": 641000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.1565176993872616e-05, | |
| "loss": 1.2768, | |
| "step": 641500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.1550802907705146e-05, | |
| "loss": 1.2675, | |
| "step": 642000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.153642882153767e-05, | |
| "loss": 1.269, | |
| "step": 642500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.152211223171487e-05, | |
| "loss": 1.2432, | |
| "step": 643000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.15077381455474e-05, | |
| "loss": 1.2798, | |
| "step": 643500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.1493364059379924e-05, | |
| "loss": 1.2195, | |
| "step": 644000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.1478989973212455e-05, | |
| "loss": 1.2509, | |
| "step": 644500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.1464615887044986e-05, | |
| "loss": 1.2541, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.145024180087751e-05, | |
| "loss": 1.2381, | |
| "step": 645500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.143586771471004e-05, | |
| "loss": 1.274, | |
| "step": 646000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.1421493628542565e-05, | |
| "loss": 1.2519, | |
| "step": 646500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.140717703871976e-05, | |
| "loss": 1.2487, | |
| "step": 647000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.1392802952552294e-05, | |
| "loss": 1.2223, | |
| "step": 647500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.1378428866384825e-05, | |
| "loss": 1.2576, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.136405478021735e-05, | |
| "loss": 1.2481, | |
| "step": 648500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.1349680694049874e-05, | |
| "loss": 1.236, | |
| "step": 649000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.1335306607882405e-05, | |
| "loss": 1.1997, | |
| "step": 649500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.132096126988727e-05, | |
| "loss": 1.2026, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.13065871837198e-05, | |
| "loss": 1.2536, | |
| "step": 650500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.1292213097552324e-05, | |
| "loss": 1.2429, | |
| "step": 651000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.127783901138485e-05, | |
| "loss": 1.2246, | |
| "step": 651500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 3.126349367338971e-05, | |
| "loss": 1.2687, | |
| "step": 652000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.1249119587222244e-05, | |
| "loss": 1.2562, | |
| "step": 652500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.1234745501054775e-05, | |
| "loss": 1.2347, | |
| "step": 653000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.12203714148873e-05, | |
| "loss": 1.2105, | |
| "step": 653500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.120599732871983e-05, | |
| "loss": 1.2172, | |
| "step": 654000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.1191623242552354e-05, | |
| "loss": 1.2471, | |
| "step": 654500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.1177249156384885e-05, | |
| "loss": 1.2841, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.116290381838975e-05, | |
| "loss": 1.2743, | |
| "step": 655500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.1148529732222274e-05, | |
| "loss": 1.2504, | |
| "step": 656000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.1134155646054805e-05, | |
| "loss": 1.2194, | |
| "step": 656500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.1119781559887336e-05, | |
| "loss": 1.2181, | |
| "step": 657000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.110540747371986e-05, | |
| "loss": 1.294, | |
| "step": 657500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.1091033387552384e-05, | |
| "loss": 1.248, | |
| "step": 658000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.1076659301384915e-05, | |
| "loss": 1.2075, | |
| "step": 658500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.1062285215217446e-05, | |
| "loss": 1.2646, | |
| "step": 659000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.104791112904997e-05, | |
| "loss": 1.2633, | |
| "step": 659500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.10335370428825e-05, | |
| "loss": 1.2457, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.1019191704887366e-05, | |
| "loss": 1.224, | |
| "step": 660500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.100481761871989e-05, | |
| "loss": 1.2108, | |
| "step": 661000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.099044353255242e-05, | |
| "loss": 1.2735, | |
| "step": 661500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.0976069446384945e-05, | |
| "loss": 1.2517, | |
| "step": 662000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.096172410838981e-05, | |
| "loss": 1.2133, | |
| "step": 662500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.094735002222234e-05, | |
| "loss": 1.2374, | |
| "step": 663000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.09330046842272e-05, | |
| "loss": 1.2218, | |
| "step": 663500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.091863059805973e-05, | |
| "loss": 1.2535, | |
| "step": 664000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.090425651189226e-05, | |
| "loss": 1.2373, | |
| "step": 664500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.0889882425724785e-05, | |
| "loss": 1.2154, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.087553708772965e-05, | |
| "loss": 1.2471, | |
| "step": 665500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.0861163001562173e-05, | |
| "loss": 1.1906, | |
| "step": 666000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.0846788915394704e-05, | |
| "loss": 1.2542, | |
| "step": 666500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.0832414829227235e-05, | |
| "loss": 1.2244, | |
| "step": 667000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.081804074305976e-05, | |
| "loss": 1.2348, | |
| "step": 667500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.080366665689229e-05, | |
| "loss": 1.2351, | |
| "step": 668000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.078929257072482e-05, | |
| "loss": 1.2391, | |
| "step": 668500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.0774918484557346e-05, | |
| "loss": 1.2348, | |
| "step": 669000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.0760544398389877e-05, | |
| "loss": 1.234, | |
| "step": 669500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.07461703122224e-05, | |
| "loss": 1.2389, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.073179622605493e-05, | |
| "loss": 1.2821, | |
| "step": 670500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.0717422139887456e-05, | |
| "loss": 1.2589, | |
| "step": 671000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.070307680189232e-05, | |
| "loss": 1.2413, | |
| "step": 671500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.068870271572485e-05, | |
| "loss": 1.2379, | |
| "step": 672000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.0674328629557376e-05, | |
| "loss": 1.2484, | |
| "step": 672500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.0659954543389907e-05, | |
| "loss": 1.2199, | |
| "step": 673000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.064560920539477e-05, | |
| "loss": 1.2932, | |
| "step": 673500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.0631235119227295e-05, | |
| "loss": 1.3011, | |
| "step": 674000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.0616861033059826e-05, | |
| "loss": 1.2379, | |
| "step": 674500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.060248694689235e-05, | |
| "loss": 1.2357, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.0588141608897215e-05, | |
| "loss": 1.2198, | |
| "step": 675500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.0573767522729746e-05, | |
| "loss": 1.2762, | |
| "step": 676000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.055939343656227e-05, | |
| "loss": 1.2504, | |
| "step": 676500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.05450193503948e-05, | |
| "loss": 1.2453, | |
| "step": 677000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.0530645264227325e-05, | |
| "loss": 1.2447, | |
| "step": 677500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.051629992623219e-05, | |
| "loss": 1.213, | |
| "step": 678000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.050192584006472e-05, | |
| "loss": 1.2488, | |
| "step": 678500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.048755175389725e-05, | |
| "loss": 1.1872, | |
| "step": 679000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.0473177667729773e-05, | |
| "loss": 1.2731, | |
| "step": 679500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.0458832329734637e-05, | |
| "loss": 1.2094, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.0444458243567165e-05, | |
| "loss": 1.2595, | |
| "step": 680500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.0430084157399696e-05, | |
| "loss": 1.2945, | |
| "step": 681000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.0415710071232223e-05, | |
| "loss": 1.2121, | |
| "step": 681500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.040133598506475e-05, | |
| "loss": 1.2368, | |
| "step": 682000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.0386961898897282e-05, | |
| "loss": 1.22, | |
| "step": 682500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.037261656090214e-05, | |
| "loss": 1.2889, | |
| "step": 683000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.035824247473467e-05, | |
| "loss": 1.2652, | |
| "step": 683500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.0343868388567198e-05, | |
| "loss": 1.2319, | |
| "step": 684000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.0329494302399726e-05, | |
| "loss": 1.2568, | |
| "step": 684500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.0315120216232257e-05, | |
| "loss": 1.2336, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.0300774878237114e-05, | |
| "loss": 1.3042, | |
| "step": 685500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.0286400792069645e-05, | |
| "loss": 1.2508, | |
| "step": 686000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.0272026705902173e-05, | |
| "loss": 1.2772, | |
| "step": 686500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.02576526197347e-05, | |
| "loss": 1.2556, | |
| "step": 687000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.024327853356723e-05, | |
| "loss": 1.2766, | |
| "step": 687500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.022890444739976e-05, | |
| "loss": 1.2361, | |
| "step": 688000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.0214530361232287e-05, | |
| "loss": 1.2118, | |
| "step": 688500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.0200185023237148e-05, | |
| "loss": 1.2504, | |
| "step": 689000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.0185810937069675e-05, | |
| "loss": 1.2209, | |
| "step": 689500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.0171436850902206e-05, | |
| "loss": 1.2519, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.0157062764734734e-05, | |
| "loss": 1.2479, | |
| "step": 690500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.014268867856726e-05, | |
| "loss": 1.2803, | |
| "step": 691000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.0128314592399792e-05, | |
| "loss": 1.2125, | |
| "step": 691500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.011396925440465e-05, | |
| "loss": 1.3017, | |
| "step": 692000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.009959516823718e-05, | |
| "loss": 1.2246, | |
| "step": 692500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.008522108206971e-05, | |
| "loss": 1.2735, | |
| "step": 693000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.0070846995902236e-05, | |
| "loss": 1.2631, | |
| "step": 693500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.0056472909734767e-05, | |
| "loss": 1.1929, | |
| "step": 694000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.0042098823567295e-05, | |
| "loss": 1.1861, | |
| "step": 694500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.002772473739982e-05, | |
| "loss": 1.2358, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.0013350651232347e-05, | |
| "loss": 1.2334, | |
| "step": 695500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.999900531323721e-05, | |
| "loss": 1.1998, | |
| "step": 696000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.9984631227069742e-05, | |
| "loss": 1.2429, | |
| "step": 696500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.997025714090227e-05, | |
| "loss": 1.2357, | |
| "step": 697000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.9955883054734797e-05, | |
| "loss": 1.2471, | |
| "step": 697500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.994153771673966e-05, | |
| "loss": 1.2215, | |
| "step": 698000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.9927192378744523e-05, | |
| "loss": 1.219, | |
| "step": 698500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.991281829257705e-05, | |
| "loss": 1.23, | |
| "step": 699000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.989844420640958e-05, | |
| "loss": 1.2233, | |
| "step": 699500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.988407012024211e-05, | |
| "loss": 1.2892, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9869696034074633e-05, | |
| "loss": 1.2555, | |
| "step": 700500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9855350696079498e-05, | |
| "loss": 1.2268, | |
| "step": 701000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9841005358084362e-05, | |
| "loss": 1.2577, | |
| "step": 701500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.982663127191689e-05, | |
| "loss": 1.2226, | |
| "step": 702000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.9812257185749414e-05, | |
| "loss": 1.2568, | |
| "step": 702500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.979788309958195e-05, | |
| "loss": 1.2055, | |
| "step": 703000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.9783509013414473e-05, | |
| "loss": 1.2711, | |
| "step": 703500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.9769134927247e-05, | |
| "loss": 1.2103, | |
| "step": 704000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.975476084107953e-05, | |
| "loss": 1.2523, | |
| "step": 704500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.974041550308439e-05, | |
| "loss": 1.2298, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.9726041416916923e-05, | |
| "loss": 1.2213, | |
| "step": 705500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.9711667330749447e-05, | |
| "loss": 1.2054, | |
| "step": 706000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.9697293244581975e-05, | |
| "loss": 1.2266, | |
| "step": 706500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.9682919158414506e-05, | |
| "loss": 1.2166, | |
| "step": 707000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.9668545072247034e-05, | |
| "loss": 1.2413, | |
| "step": 707500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.965417098607956e-05, | |
| "loss": 1.2661, | |
| "step": 708000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.9639796899912085e-05, | |
| "loss": 1.243, | |
| "step": 708500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.962542281374462e-05, | |
| "loss": 1.2561, | |
| "step": 709000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.961107747574948e-05, | |
| "loss": 1.2337, | |
| "step": 709500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.959670338958201e-05, | |
| "loss": 1.2169, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.9582329303414536e-05, | |
| "loss": 1.229, | |
| "step": 710500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.9567955217247067e-05, | |
| "loss": 1.2684, | |
| "step": 711000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.9553581131079595e-05, | |
| "loss": 1.2043, | |
| "step": 711500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.9539207044912122e-05, | |
| "loss": 1.2192, | |
| "step": 712000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.9524861706916983e-05, | |
| "loss": 1.2479, | |
| "step": 712500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.951048762074951e-05, | |
| "loss": 1.1925, | |
| "step": 713000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.9496113534582042e-05, | |
| "loss": 1.2559, | |
| "step": 713500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.948173944841457e-05, | |
| "loss": 1.2398, | |
| "step": 714000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.9467365362247097e-05, | |
| "loss": 1.2269, | |
| "step": 714500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.945299127607962e-05, | |
| "loss": 1.2711, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.9438617189912155e-05, | |
| "loss": 1.2233, | |
| "step": 715500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.942424310374468e-05, | |
| "loss": 1.2588, | |
| "step": 716000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.9409897765749544e-05, | |
| "loss": 1.2265, | |
| "step": 716500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.9395523679582072e-05, | |
| "loss": 1.23, | |
| "step": 717000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.9381149593414596e-05, | |
| "loss": 1.1803, | |
| "step": 717500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.936677550724713e-05, | |
| "loss": 1.2335, | |
| "step": 718000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.9352430169251995e-05, | |
| "loss": 1.2416, | |
| "step": 718500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.933805608308452e-05, | |
| "loss": 1.2309, | |
| "step": 719000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.9323681996917047e-05, | |
| "loss": 1.2603, | |
| "step": 719500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.9309307910749578e-05, | |
| "loss": 1.2577, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.9294933824582105e-05, | |
| "loss": 1.2358, | |
| "step": 720500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.9280559738414633e-05, | |
| "loss": 1.2698, | |
| "step": 721000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.9266185652247157e-05, | |
| "loss": 1.2195, | |
| "step": 721500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.925181156607969e-05, | |
| "loss": 1.2116, | |
| "step": 722000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.9237466228084552e-05, | |
| "loss": 1.3009, | |
| "step": 722500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.922309214191708e-05, | |
| "loss": 1.2121, | |
| "step": 723000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.9208718055749608e-05, | |
| "loss": 1.2387, | |
| "step": 723500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.9194343969582132e-05, | |
| "loss": 1.2452, | |
| "step": 724000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.9179998631586996e-05, | |
| "loss": 1.2374, | |
| "step": 724500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.9165624545419527e-05, | |
| "loss": 1.2658, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.9151250459252055e-05, | |
| "loss": 1.205, | |
| "step": 725500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.913690512125692e-05, | |
| "loss": 1.2435, | |
| "step": 726000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.9122531035089447e-05, | |
| "loss": 1.2219, | |
| "step": 726500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.910815694892197e-05, | |
| "loss": 1.2025, | |
| "step": 727000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.9093782862754506e-05, | |
| "loss": 1.2128, | |
| "step": 727500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.9079437524759367e-05, | |
| "loss": 1.2164, | |
| "step": 728000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.9065063438591894e-05, | |
| "loss": 1.2084, | |
| "step": 728500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.9050689352424422e-05, | |
| "loss": 1.2484, | |
| "step": 729000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.9036315266256946e-05, | |
| "loss": 1.2232, | |
| "step": 729500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.902194118008948e-05, | |
| "loss": 1.1881, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.9007567093922005e-05, | |
| "loss": 1.2966, | |
| "step": 730500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.899322175592687e-05, | |
| "loss": 1.2549, | |
| "step": 731000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.8978847669759397e-05, | |
| "loss": 1.2204, | |
| "step": 731500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.896447358359192e-05, | |
| "loss": 1.2822, | |
| "step": 732000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.8950099497424455e-05, | |
| "loss": 1.1844, | |
| "step": 732500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.893572541125698e-05, | |
| "loss": 1.2724, | |
| "step": 733000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.8921351325089507e-05, | |
| "loss": 1.2292, | |
| "step": 733500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.8906977238922038e-05, | |
| "loss": 1.2345, | |
| "step": 734000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.8892603152754565e-05, | |
| "loss": 1.1832, | |
| "step": 734500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.887825781475943e-05, | |
| "loss": 1.2625, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.8863883728591958e-05, | |
| "loss": 1.2347, | |
| "step": 735500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.8849509642424482e-05, | |
| "loss": 1.237, | |
| "step": 736000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.8835135556257016e-05, | |
| "loss": 1.249, | |
| "step": 736500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.8820790218261874e-05, | |
| "loss": 1.2297, | |
| "step": 737000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.8806416132094405e-05, | |
| "loss": 1.2588, | |
| "step": 737500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.8792042045926932e-05, | |
| "loss": 1.2088, | |
| "step": 738000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.8777667959759457e-05, | |
| "loss": 1.2394, | |
| "step": 738500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.876329387359199e-05, | |
| "loss": 1.2174, | |
| "step": 739000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.8748919787424515e-05, | |
| "loss": 1.2442, | |
| "step": 739500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.8734545701257043e-05, | |
| "loss": 1.245, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.872017161508957e-05, | |
| "loss": 1.2265, | |
| "step": 740500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.870582627709443e-05, | |
| "loss": 1.2302, | |
| "step": 741000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.8691452190926966e-05, | |
| "loss": 1.233, | |
| "step": 741500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.867707810475949e-05, | |
| "loss": 1.2119, | |
| "step": 742000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.8662704018592018e-05, | |
| "loss": 1.2313, | |
| "step": 742500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.864832993242455e-05, | |
| "loss": 1.2186, | |
| "step": 743000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.8633955846257076e-05, | |
| "loss": 1.2117, | |
| "step": 743500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.8619581760089604e-05, | |
| "loss": 1.2814, | |
| "step": 744000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.860520767392213e-05, | |
| "loss": 1.2375, | |
| "step": 744500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.8590862335926992e-05, | |
| "loss": 1.237, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_cer": 0.2424137811448767, | |
| "eval_loss": 0.976024329662323, | |
| "eval_runtime": 10946.8543, | |
| "eval_samples_per_second": 8.929, | |
| "eval_steps_per_second": 1.116, | |
| "step": 745389 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.8576488249759527e-05, | |
| "loss": 1.2107, | |
| "step": 745500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.856211416359205e-05, | |
| "loss": 1.1992, | |
| "step": 746000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.854774007742458e-05, | |
| "loss": 1.1948, | |
| "step": 746500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.8533365991257106e-05, | |
| "loss": 1.1713, | |
| "step": 747000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.8519020653261967e-05, | |
| "loss": 1.1483, | |
| "step": 747500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.85046465670945e-05, | |
| "loss": 1.2257, | |
| "step": 748000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.849030122909936e-05, | |
| "loss": 1.1686, | |
| "step": 748500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.847592714293189e-05, | |
| "loss": 1.1807, | |
| "step": 749000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.8461553056764418e-05, | |
| "loss": 1.1587, | |
| "step": 749500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.8447178970596946e-05, | |
| "loss": 1.2438, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.8432833632601807e-05, | |
| "loss": 1.1855, | |
| "step": 750500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.841845954643434e-05, | |
| "loss": 1.1944, | |
| "step": 751000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 2.84041142084392e-05, | |
| "loss": 1.1879, | |
| "step": 751500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.838974012227173e-05, | |
| "loss": 1.1839, | |
| "step": 752000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.8375366036104257e-05, | |
| "loss": 1.2428, | |
| "step": 752500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.836099194993678e-05, | |
| "loss": 1.2279, | |
| "step": 753000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.8346617863769316e-05, | |
| "loss": 1.2226, | |
| "step": 753500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 2.833224377760184e-05, | |
| "loss": 1.2016, | |
| "step": 754000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.8317869691434368e-05, | |
| "loss": 1.1895, | |
| "step": 754500 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.8303495605266895e-05, | |
| "loss": 1.2318, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.8289121519099426e-05, | |
| "loss": 1.2281, | |
| "step": 755500 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.8274747432931954e-05, | |
| "loss": 1.206, | |
| "step": 756000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.8260373346764478e-05, | |
| "loss": 1.1901, | |
| "step": 756500 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.8245999260597012e-05, | |
| "loss": 1.1949, | |
| "step": 757000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.823165392260187e-05, | |
| "loss": 1.1934, | |
| "step": 757500 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.82172798364344e-05, | |
| "loss": 1.1726, | |
| "step": 758000 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.820290575026693e-05, | |
| "loss": 1.1841, | |
| "step": 758500 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.8188531664099456e-05, | |
| "loss": 1.1345, | |
| "step": 759000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.8174186326104317e-05, | |
| "loss": 1.1785, | |
| "step": 759500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.8159812239936845e-05, | |
| "loss": 1.2117, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.8145438153769376e-05, | |
| "loss": 1.2459, | |
| "step": 760500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.813109281577424e-05, | |
| "loss": 1.2031, | |
| "step": 761000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.8116718729606768e-05, | |
| "loss": 1.1813, | |
| "step": 761500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.8102344643439292e-05, | |
| "loss": 1.1901, | |
| "step": 762000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.8087970557271827e-05, | |
| "loss": 1.1793, | |
| "step": 762500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.807359647110435e-05, | |
| "loss": 1.2231, | |
| "step": 763000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.8059222384936878e-05, | |
| "loss": 1.1651, | |
| "step": 763500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 2.8044848298769406e-05, | |
| "loss": 1.1767, | |
| "step": 764000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.8030474212601937e-05, | |
| "loss": 1.1947, | |
| "step": 764500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.801615762277913e-05, | |
| "loss": 1.2363, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.800178353661166e-05, | |
| "loss": 1.2033, | |
| "step": 765500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.798740945044419e-05, | |
| "loss": 1.1903, | |
| "step": 766000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 2.7973035364276718e-05, | |
| "loss": 1.1984, | |
| "step": 766500 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.7958690026281582e-05, | |
| "loss": 1.2454, | |
| "step": 767000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.7944315940114106e-05, | |
| "loss": 1.1979, | |
| "step": 767500 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.7929941853946634e-05, | |
| "loss": 1.1888, | |
| "step": 768000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.7915567767779165e-05, | |
| "loss": 1.1791, | |
| "step": 768500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.7901193681611693e-05, | |
| "loss": 1.2505, | |
| "step": 769000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.7886848343616557e-05, | |
| "loss": 1.2186, | |
| "step": 769500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.787247425744908e-05, | |
| "loss": 1.1915, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.7858100171281616e-05, | |
| "loss": 1.2007, | |
| "step": 770500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.784372608511414e-05, | |
| "loss": 1.1768, | |
| "step": 771000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.7829380747119004e-05, | |
| "loss": 1.1771, | |
| "step": 771500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.7815006660951532e-05, | |
| "loss": 1.165, | |
| "step": 772000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.780063257478406e-05, | |
| "loss": 1.2091, | |
| "step": 772500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.778625848861659e-05, | |
| "loss": 1.205, | |
| "step": 773000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.7771884402449118e-05, | |
| "loss": 1.1734, | |
| "step": 773500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.775753906445398e-05, | |
| "loss": 1.2123, | |
| "step": 774000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.7743164978286507e-05, | |
| "loss": 1.2031, | |
| "step": 774500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.7728790892119034e-05, | |
| "loss": 1.208, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.7714416805951565e-05, | |
| "loss": 1.2273, | |
| "step": 775500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.7700042719784093e-05, | |
| "loss": 1.1497, | |
| "step": 776000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.7685668633616617e-05, | |
| "loss": 1.2239, | |
| "step": 776500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.767132329562148e-05, | |
| "loss": 1.1874, | |
| "step": 777000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.765694920945401e-05, | |
| "loss": 1.168, | |
| "step": 777500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.764257512328654e-05, | |
| "loss": 1.2032, | |
| "step": 778000 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.7628201037119068e-05, | |
| "loss": 1.1673, | |
| "step": 778500 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.761385569912393e-05, | |
| "loss": 1.2331, | |
| "step": 779000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.7599481612956456e-05, | |
| "loss": 1.2009, | |
| "step": 779500 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.7585107526788984e-05, | |
| "loss": 1.1931, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.7570733440621515e-05, | |
| "loss": 1.185, | |
| "step": 780500 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.7556359354454043e-05, | |
| "loss": 1.2052, | |
| "step": 781000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.7542014016458907e-05, | |
| "loss": 1.1839, | |
| "step": 781500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.752763993029143e-05, | |
| "loss": 1.2023, | |
| "step": 782000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.751326584412396e-05, | |
| "loss": 1.2242, | |
| "step": 782500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.749889175795649e-05, | |
| "loss": 1.1282, | |
| "step": 783000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 2.7484546419961354e-05, | |
| "loss": 1.1759, | |
| "step": 783500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.7470172333793882e-05, | |
| "loss": 1.1955, | |
| "step": 784000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.7455798247626406e-05, | |
| "loss": 1.1752, | |
| "step": 784500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.7441424161458934e-05, | |
| "loss": 1.2358, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.7427050075291465e-05, | |
| "loss": 1.1965, | |
| "step": 785500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.7412675989123992e-05, | |
| "loss": 1.1938, | |
| "step": 786000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.739830190295652e-05, | |
| "loss": 1.1775, | |
| "step": 786500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.7383956564961384e-05, | |
| "loss": 1.2015, | |
| "step": 787000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.736958247879391e-05, | |
| "loss": 1.1769, | |
| "step": 787500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.7355208392626443e-05, | |
| "loss": 1.1922, | |
| "step": 788000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.7340834306458967e-05, | |
| "loss": 1.1821, | |
| "step": 788500 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.7326460220291495e-05, | |
| "loss": 1.2219, | |
| "step": 789000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.731211488229636e-05, | |
| "loss": 1.1915, | |
| "step": 789500 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.729774079612889e-05, | |
| "loss": 1.1633, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.7283366709961418e-05, | |
| "loss": 1.194, | |
| "step": 790500 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.7268992623793942e-05, | |
| "loss": 1.2103, | |
| "step": 791000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.725461853762647e-05, | |
| "loss": 1.1906, | |
| "step": 791500 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.7240273199631334e-05, | |
| "loss": 1.1998, | |
| "step": 792000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.7225899113463865e-05, | |
| "loss": 1.2401, | |
| "step": 792500 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.7211525027296393e-05, | |
| "loss": 1.1735, | |
| "step": 793000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.7197150941128917e-05, | |
| "loss": 1.1767, | |
| "step": 793500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.7182776854961444e-05, | |
| "loss": 1.2027, | |
| "step": 794000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.7168402768793975e-05, | |
| "loss": 1.1756, | |
| "step": 794500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.7154028682626503e-05, | |
| "loss": 1.2002, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.713965459645903e-05, | |
| "loss": 1.2255, | |
| "step": 795500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 2.7125309258463895e-05, | |
| "loss": 1.2232, | |
| "step": 796000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 2.711093517229642e-05, | |
| "loss": 1.1852, | |
| "step": 796500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 2.7096561086128954e-05, | |
| "loss": 1.1968, | |
| "step": 797000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 2.7082215748133815e-05, | |
| "loss": 1.2214, | |
| "step": 797500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 2.7067841661966342e-05, | |
| "loss": 1.1843, | |
| "step": 798000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 2.705346757579887e-05, | |
| "loss": 1.1879, | |
| "step": 798500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 2.7039093489631394e-05, | |
| "loss": 1.1862, | |
| "step": 799000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 2.702471940346393e-05, | |
| "loss": 1.1976, | |
| "step": 799500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 2.7010345317296453e-05, | |
| "loss": 1.2242, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 2.699597123112898e-05, | |
| "loss": 1.2212, | |
| "step": 800500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 2.698159714496151e-05, | |
| "loss": 1.1921, | |
| "step": 801000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.6967251806966376e-05, | |
| "loss": 1.1946, | |
| "step": 801500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.6952877720798903e-05, | |
| "loss": 1.1886, | |
| "step": 802000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.6938503634631427e-05, | |
| "loss": 1.2393, | |
| "step": 802500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.6924129548463955e-05, | |
| "loss": 1.1932, | |
| "step": 803000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.6909755462296486e-05, | |
| "loss": 1.1931, | |
| "step": 803500 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 2.689541012430135e-05, | |
| "loss": 1.1557, | |
| "step": 804000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 2.6881036038133878e-05, | |
| "loss": 1.203, | |
| "step": 804500 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 2.6866661951966406e-05, | |
| "loss": 1.1944, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 2.685228786579893e-05, | |
| "loss": 1.2017, | |
| "step": 805500 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 2.6837913779631464e-05, | |
| "loss": 1.2312, | |
| "step": 806000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 2.682353969346399e-05, | |
| "loss": 1.2017, | |
| "step": 806500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 2.6809194355468853e-05, | |
| "loss": 1.1889, | |
| "step": 807000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 2.679482026930138e-05, | |
| "loss": 1.2188, | |
| "step": 807500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 2.6780446183133905e-05, | |
| "loss": 1.1738, | |
| "step": 808000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 2.676610084513877e-05, | |
| "loss": 1.1761, | |
| "step": 808500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 2.67517267589713e-05, | |
| "loss": 1.1507, | |
| "step": 809000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 2.6737352672803828e-05, | |
| "loss": 1.1887, | |
| "step": 809500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 2.6722978586636355e-05, | |
| "loss": 1.2401, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 2.6708604500468886e-05, | |
| "loss": 1.2165, | |
| "step": 810500 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 2.6694230414301414e-05, | |
| "loss": 1.1777, | |
| "step": 811000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.667985632813394e-05, | |
| "loss": 1.17, | |
| "step": 811500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.6665482241966466e-05, | |
| "loss": 1.209, | |
| "step": 812000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.6651108155799e-05, | |
| "loss": 1.1625, | |
| "step": 812500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.663676281780386e-05, | |
| "loss": 1.1877, | |
| "step": 813000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.662238873163639e-05, | |
| "loss": 1.2107, | |
| "step": 813500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.6608014645468916e-05, | |
| "loss": 1.2071, | |
| "step": 814000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.659364055930144e-05, | |
| "loss": 1.1744, | |
| "step": 814500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.6579266473133975e-05, | |
| "loss": 1.1599, | |
| "step": 815000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.6564921135138836e-05, | |
| "loss": 1.1937, | |
| "step": 815500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.6550547048971364e-05, | |
| "loss": 1.1839, | |
| "step": 816000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.653617296280389e-05, | |
| "loss": 1.231, | |
| "step": 816500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.6521798876636415e-05, | |
| "loss": 1.1725, | |
| "step": 817000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.650742479046895e-05, | |
| "loss": 1.2099, | |
| "step": 817500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.6493050704301474e-05, | |
| "loss": 1.1811, | |
| "step": 818000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.647870536630634e-05, | |
| "loss": 1.2299, | |
| "step": 818500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.6464331280138866e-05, | |
| "loss": 1.1692, | |
| "step": 819000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.6449957193971394e-05, | |
| "loss": 1.2198, | |
| "step": 819500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.6435583107803924e-05, | |
| "loss": 1.2231, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.6421209021636452e-05, | |
| "loss": 1.2063, | |
| "step": 820500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.6406834935468976e-05, | |
| "loss": 1.2099, | |
| "step": 821000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 2.639248959747384e-05, | |
| "loss": 1.2221, | |
| "step": 821500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 2.6378115511306372e-05, | |
| "loss": 1.2231, | |
| "step": 822000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 2.63637414251389e-05, | |
| "loss": 1.2349, | |
| "step": 822500 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 2.6349367338971427e-05, | |
| "loss": 1.2288, | |
| "step": 823000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 2.633499325280395e-05, | |
| "loss": 1.2305, | |
| "step": 823500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.6320619166636485e-05, | |
| "loss": 1.1755, | |
| "step": 824000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.6306273828641347e-05, | |
| "loss": 1.2109, | |
| "step": 824500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.6291899742473874e-05, | |
| "loss": 1.2093, | |
| "step": 825000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.6277525656306402e-05, | |
| "loss": 1.1682, | |
| "step": 825500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 2.6263151570138926e-05, | |
| "loss": 1.1864, | |
| "step": 826000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 2.624877748397146e-05, | |
| "loss": 1.2548, | |
| "step": 826500 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 2.6234403397803984e-05, | |
| "loss": 1.1723, | |
| "step": 827000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 2.6220029311636512e-05, | |
| "loss": 1.1182, | |
| "step": 827500 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 2.6205655225469043e-05, | |
| "loss": 1.2548, | |
| "step": 828000 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 2.6191309887473904e-05, | |
| "loss": 1.1878, | |
| "step": 828500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 2.6176935801306435e-05, | |
| "loss": 1.2124, | |
| "step": 829000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 2.6162561715138963e-05, | |
| "loss": 1.1506, | |
| "step": 829500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 2.6148187628971487e-05, | |
| "loss": 1.1477, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 2.613384229097635e-05, | |
| "loss": 1.1757, | |
| "step": 830500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 2.611946820480888e-05, | |
| "loss": 1.2022, | |
| "step": 831000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 2.610509411864141e-05, | |
| "loss": 1.1714, | |
| "step": 831500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 2.6090748780646275e-05, | |
| "loss": 1.2133, | |
| "step": 832000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 2.60763746944788e-05, | |
| "loss": 1.1997, | |
| "step": 832500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 2.6062000608311326e-05, | |
| "loss": 1.1625, | |
| "step": 833000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 2.6047626522143857e-05, | |
| "loss": 1.1425, | |
| "step": 833500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.603328118414872e-05, | |
| "loss": 1.1886, | |
| "step": 834000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.601890709798125e-05, | |
| "loss": 1.1943, | |
| "step": 834500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.6004533011813777e-05, | |
| "loss": 1.1922, | |
| "step": 835000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.59901589256463e-05, | |
| "loss": 1.1883, | |
| "step": 835500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.5975784839478835e-05, | |
| "loss": 1.1884, | |
| "step": 836000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 2.596141075331136e-05, | |
| "loss": 1.2316, | |
| "step": 836500 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 2.5947036667143887e-05, | |
| "loss": 1.2248, | |
| "step": 837000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 2.5932691329148752e-05, | |
| "loss": 1.2145, | |
| "step": 837500 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 2.5918317242981276e-05, | |
| "loss": 1.2003, | |
| "step": 838000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 2.590394315681381e-05, | |
| "loss": 1.2374, | |
| "step": 838500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 2.5889569070646335e-05, | |
| "loss": 1.194, | |
| "step": 839000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 2.5875194984478862e-05, | |
| "loss": 1.2262, | |
| "step": 839500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 2.5860849646483727e-05, | |
| "loss": 1.1755, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 2.584647556031625e-05, | |
| "loss": 1.193, | |
| "step": 840500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 2.5832101474148785e-05, | |
| "loss": 1.2141, | |
| "step": 841000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 2.581772738798131e-05, | |
| "loss": 1.1863, | |
| "step": 841500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 2.5803382049986174e-05, | |
| "loss": 1.1763, | |
| "step": 842000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 2.57890079638187e-05, | |
| "loss": 1.1525, | |
| "step": 842500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 2.577463387765123e-05, | |
| "loss": 1.1811, | |
| "step": 843000 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 2.576025979148376e-05, | |
| "loss": 1.1807, | |
| "step": 843500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.5745885705316288e-05, | |
| "loss": 1.2043, | |
| "step": 844000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.573154036732115e-05, | |
| "loss": 1.1559, | |
| "step": 844500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.5717166281153676e-05, | |
| "loss": 1.2187, | |
| "step": 845000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.5702792194986204e-05, | |
| "loss": 1.1387, | |
| "step": 845500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.5688418108818735e-05, | |
| "loss": 1.165, | |
| "step": 846000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.5674044022651262e-05, | |
| "loss": 1.2342, | |
| "step": 846500 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.5659698684656124e-05, | |
| "loss": 1.1913, | |
| "step": 847000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.564532459848865e-05, | |
| "loss": 1.2385, | |
| "step": 847500 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.563095051232118e-05, | |
| "loss": 1.2333, | |
| "step": 848000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.561657642615371e-05, | |
| "loss": 1.2223, | |
| "step": 848500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 2.5602202339986237e-05, | |
| "loss": 1.2245, | |
| "step": 849000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 2.55878570019911e-05, | |
| "loss": 1.2579, | |
| "step": 849500 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 2.5573482915823626e-05, | |
| "loss": 1.2277, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 2.5559108829656154e-05, | |
| "loss": 1.2517, | |
| "step": 850500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 2.5544763491661018e-05, | |
| "loss": 1.1861, | |
| "step": 851000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 2.553038940549355e-05, | |
| "loss": 1.2176, | |
| "step": 851500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 2.5516015319326077e-05, | |
| "loss": 1.208, | |
| "step": 852000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 2.55016412331586e-05, | |
| "loss": 1.169, | |
| "step": 852500 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 2.5487267146991135e-05, | |
| "loss": 1.1972, | |
| "step": 853000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.547289306082366e-05, | |
| "loss": 1.1865, | |
| "step": 853500 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.5458518974656187e-05, | |
| "loss": 1.1974, | |
| "step": 854000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.5444144888488715e-05, | |
| "loss": 1.1921, | |
| "step": 854500 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.5429799550493576e-05, | |
| "loss": 1.1845, | |
| "step": 855000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.541542546432611e-05, | |
| "loss": 1.1969, | |
| "step": 855500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.5401051378158634e-05, | |
| "loss": 1.1579, | |
| "step": 856000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.5386677291991162e-05, | |
| "loss": 1.2033, | |
| "step": 856500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.537230320582369e-05, | |
| "loss": 1.1873, | |
| "step": 857000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.5357957867828554e-05, | |
| "loss": 1.262, | |
| "step": 857500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.5343612529833415e-05, | |
| "loss": 1.2345, | |
| "step": 858000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.5329238443665943e-05, | |
| "loss": 1.1708, | |
| "step": 858500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.5314864357498474e-05, | |
| "loss": 1.1934, | |
| "step": 859000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.5300490271331e-05, | |
| "loss": 1.2051, | |
| "step": 859500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.528611618516353e-05, | |
| "loss": 1.1626, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.527177084716839e-05, | |
| "loss": 1.2027, | |
| "step": 860500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 2.5257425509173255e-05, | |
| "loss": 1.1948, | |
| "step": 861000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 2.5243051423005782e-05, | |
| "loss": 1.1753, | |
| "step": 861500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 2.5228677336838313e-05, | |
| "loss": 1.1782, | |
| "step": 862000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 2.521430325067084e-05, | |
| "loss": 1.1961, | |
| "step": 862500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 2.5199929164503365e-05, | |
| "loss": 1.2003, | |
| "step": 863000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 2.51855550783359e-05, | |
| "loss": 1.2066, | |
| "step": 863500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 2.5171180992168423e-05, | |
| "loss": 1.1561, | |
| "step": 864000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 2.515680690600095e-05, | |
| "loss": 1.1935, | |
| "step": 864500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 2.514243281983348e-05, | |
| "loss": 1.1848, | |
| "step": 865000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 2.512805873366601e-05, | |
| "loss": 1.223, | |
| "step": 865500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 2.5113684647498537e-05, | |
| "loss": 1.2462, | |
| "step": 866000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 2.5099310561331065e-05, | |
| "loss": 1.1886, | |
| "step": 866500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 2.5084936475163596e-05, | |
| "loss": 1.2163, | |
| "step": 867000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 2.5070562388996123e-05, | |
| "loss": 1.1928, | |
| "step": 867500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 2.5056188302828647e-05, | |
| "loss": 1.1466, | |
| "step": 868000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.5041842964833512e-05, | |
| "loss": 1.1692, | |
| "step": 868500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.502746887866604e-05, | |
| "loss": 1.181, | |
| "step": 869000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.501309479249857e-05, | |
| "loss": 1.1604, | |
| "step": 869500 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.4998720706331098e-05, | |
| "loss": 1.1684, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.4984346620163622e-05, | |
| "loss": 1.1951, | |
| "step": 870500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.4970001282168487e-05, | |
| "loss": 1.1963, | |
| "step": 871000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.4955627196001018e-05, | |
| "loss": 1.1888, | |
| "step": 871500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.4941253109833542e-05, | |
| "loss": 1.1906, | |
| "step": 872000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.4926879023666073e-05, | |
| "loss": 1.1896, | |
| "step": 872500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 2.4912504937498597e-05, | |
| "loss": 1.2349, | |
| "step": 873000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 2.4898130851331128e-05, | |
| "loss": 1.1894, | |
| "step": 873500 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 2.4883785513335993e-05, | |
| "loss": 1.1912, | |
| "step": 874000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 2.4869411427168517e-05, | |
| "loss": 1.1669, | |
| "step": 874500 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 2.4855037341001048e-05, | |
| "loss": 1.2058, | |
| "step": 875000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 2.4840663254833575e-05, | |
| "loss": 1.212, | |
| "step": 875500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.4826317916838436e-05, | |
| "loss": 1.1972, | |
| "step": 876000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.4811943830670967e-05, | |
| "loss": 1.1659, | |
| "step": 876500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.4797569744503495e-05, | |
| "loss": 1.1717, | |
| "step": 877000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.4783195658336022e-05, | |
| "loss": 1.1978, | |
| "step": 877500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.4768821572168553e-05, | |
| "loss": 1.1735, | |
| "step": 878000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.475447623417341e-05, | |
| "loss": 1.2148, | |
| "step": 878500 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.4740102148005942e-05, | |
| "loss": 1.1704, | |
| "step": 879000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.472572806183847e-05, | |
| "loss": 1.2026, | |
| "step": 879500 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.4711353975670997e-05, | |
| "loss": 1.1933, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.4696979889503528e-05, | |
| "loss": 1.2241, | |
| "step": 880500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 2.4682605803336052e-05, | |
| "loss": 1.169, | |
| "step": 881000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 2.4668231717168583e-05, | |
| "loss": 1.1719, | |
| "step": 881500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 2.465385763100111e-05, | |
| "loss": 1.1823, | |
| "step": 882000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 2.4639512293005972e-05, | |
| "loss": 1.1706, | |
| "step": 882500 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 2.4625166955010837e-05, | |
| "loss": 1.1814, | |
| "step": 883000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.4610792868843364e-05, | |
| "loss": 1.2014, | |
| "step": 883500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.4596418782675892e-05, | |
| "loss": 1.1884, | |
| "step": 884000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.4582044696508423e-05, | |
| "loss": 1.1569, | |
| "step": 884500 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.4567699358513284e-05, | |
| "loss": 1.2318, | |
| "step": 885000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 2.455332527234581e-05, | |
| "loss": 1.2312, | |
| "step": 885500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 2.4538951186178343e-05, | |
| "loss": 1.1868, | |
| "step": 886000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 2.45246058481832e-05, | |
| "loss": 1.1478, | |
| "step": 886500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 2.451023176201573e-05, | |
| "loss": 1.1534, | |
| "step": 887000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 2.449585767584826e-05, | |
| "loss": 1.2182, | |
| "step": 887500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 2.4481483589680786e-05, | |
| "loss": 1.1811, | |
| "step": 888000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.4467109503513317e-05, | |
| "loss": 1.2205, | |
| "step": 888500 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.445273541734584e-05, | |
| "loss": 1.1841, | |
| "step": 889000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.4438361331178373e-05, | |
| "loss": 1.194, | |
| "step": 889500 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.4424015993183237e-05, | |
| "loss": 1.1959, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.440964190701576e-05, | |
| "loss": 1.1874, | |
| "step": 890500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.4395267820848292e-05, | |
| "loss": 1.1992, | |
| "step": 891000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.4380893734680816e-05, | |
| "loss": 1.1908, | |
| "step": 891500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.4366519648513347e-05, | |
| "loss": 1.1908, | |
| "step": 892000 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.4352145562345875e-05, | |
| "loss": 1.2308, | |
| "step": 892500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.4337771476178403e-05, | |
| "loss": 1.1986, | |
| "step": 893000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.4323397390010933e-05, | |
| "loss": 1.2446, | |
| "step": 893500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.4309023303843458e-05, | |
| "loss": 1.2184, | |
| "step": 894000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.4294677965848322e-05, | |
| "loss": 1.1718, | |
| "step": 894500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.428030387968085e-05, | |
| "loss": 1.226, | |
| "step": 895000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.4265929793513377e-05, | |
| "loss": 1.1927, | |
| "step": 895500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 2.425155570734591e-05, | |
| "loss": 1.2361, | |
| "step": 896000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 2.4237181621178433e-05, | |
| "loss": 1.1985, | |
| "step": 896500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 2.4222836283183297e-05, | |
| "loss": 1.1943, | |
| "step": 897000 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 2.4208462197015828e-05, | |
| "loss": 1.1602, | |
| "step": 897500 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 2.419411685902069e-05, | |
| "loss": 1.1945, | |
| "step": 898000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 2.4179742772853217e-05, | |
| "loss": 1.1917, | |
| "step": 898500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 2.4165368686685748e-05, | |
| "loss": 1.1912, | |
| "step": 899000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 2.4150994600518272e-05, | |
| "loss": 1.1658, | |
| "step": 899500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 2.4136620514350803e-05, | |
| "loss": 1.1596, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 2.412224642818333e-05, | |
| "loss": 1.1588, | |
| "step": 900500 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.4107872342015858e-05, | |
| "loss": 1.1778, | |
| "step": 901000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.4093498255848386e-05, | |
| "loss": 1.2307, | |
| "step": 901500 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.4079152917853247e-05, | |
| "loss": 1.2047, | |
| "step": 902000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.4064778831685778e-05, | |
| "loss": 1.2131, | |
| "step": 902500 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 2.4050404745518305e-05, | |
| "loss": 1.1606, | |
| "step": 903000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 2.4036030659350833e-05, | |
| "loss": 1.1774, | |
| "step": 903500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 2.4021685321355697e-05, | |
| "loss": 1.1936, | |
| "step": 904000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 2.4007311235188225e-05, | |
| "loss": 1.2016, | |
| "step": 904500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 2.3992937149020753e-05, | |
| "loss": 1.2103, | |
| "step": 905000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 2.397856306285328e-05, | |
| "loss": 1.2546, | |
| "step": 905500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 2.3964188976685808e-05, | |
| "loss": 1.1628, | |
| "step": 906000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 2.3949843638690672e-05, | |
| "loss": 1.2363, | |
| "step": 906500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 2.39354695525232e-05, | |
| "loss": 1.1835, | |
| "step": 907000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 2.3921095466355727e-05, | |
| "loss": 1.1611, | |
| "step": 907500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 2.3906721380188255e-05, | |
| "loss": 1.1875, | |
| "step": 908000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 2.3892347294020783e-05, | |
| "loss": 1.1809, | |
| "step": 908500 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 2.3877973207853314e-05, | |
| "loss": 1.1983, | |
| "step": 909000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1739241, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 9000, | |
| "total_flos": 8.699092914175696e+20, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |