Sentence Similarity
sentence-transformers
TensorBoard
Safetensors
bert
feature-extraction
text-embeddings-inference
Instructions to use Jjzzzz/finetuned_bge-small-en-v1.5 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use Jjzzzz/finetuned_bge-small-en-v1.5 with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("Jjzzzz/finetuned_bge-small-en-v1.5") sentences = [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [3, 3] - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 16.3265306122449, | |
| "eval_steps": 500, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04081632653061224, | |
| "grad_norm": 7.114395618438721, | |
| "learning_rate": 9.981632653061225e-06, | |
| "loss": 0.7362, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08163265306122448, | |
| "grad_norm": 11.572301864624023, | |
| "learning_rate": 9.961224489795919e-06, | |
| "loss": 0.8729, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12244897959183673, | |
| "grad_norm": 9.383491516113281, | |
| "learning_rate": 9.940816326530614e-06, | |
| "loss": 0.773, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16326530612244897, | |
| "grad_norm": 7.83120059967041, | |
| "learning_rate": 9.920408163265307e-06, | |
| "loss": 0.7817, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 10.92087173461914, | |
| "learning_rate": 9.9e-06, | |
| "loss": 0.6256, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24489795918367346, | |
| "grad_norm": 3.8826725482940674, | |
| "learning_rate": 9.879591836734695e-06, | |
| "loss": 0.5759, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 11.15483283996582, | |
| "learning_rate": 9.859183673469388e-06, | |
| "loss": 0.7333, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.32653061224489793, | |
| "grad_norm": 11.470726013183594, | |
| "learning_rate": 9.838775510204083e-06, | |
| "loss": 0.5943, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3673469387755102, | |
| "grad_norm": 13.159674644470215, | |
| "learning_rate": 9.818367346938777e-06, | |
| "loss": 0.7804, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 9.58558464050293, | |
| "learning_rate": 9.79795918367347e-06, | |
| "loss": 0.6491, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4489795918367347, | |
| "grad_norm": 9.653897285461426, | |
| "learning_rate": 9.777551020408163e-06, | |
| "loss": 0.5919, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4897959183673469, | |
| "grad_norm": 8.117432594299316, | |
| "learning_rate": 9.757142857142858e-06, | |
| "loss": 0.4571, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5306122448979592, | |
| "grad_norm": 6.9328460693359375, | |
| "learning_rate": 9.736734693877551e-06, | |
| "loss": 0.6597, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 7.962501049041748, | |
| "learning_rate": 9.716326530612246e-06, | |
| "loss": 0.5132, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 10.508763313293457, | |
| "learning_rate": 9.69591836734694e-06, | |
| "loss": 0.6893, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6530612244897959, | |
| "grad_norm": 7.637253761291504, | |
| "learning_rate": 9.675510204081635e-06, | |
| "loss": 0.6142, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6938775510204082, | |
| "grad_norm": 10.0332670211792, | |
| "learning_rate": 9.655102040816328e-06, | |
| "loss": 0.582, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7346938775510204, | |
| "grad_norm": 8.150875091552734, | |
| "learning_rate": 9.634693877551021e-06, | |
| "loss": 0.477, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7755102040816326, | |
| "grad_norm": 10.330913543701172, | |
| "learning_rate": 9.614285714285714e-06, | |
| "loss": 0.5916, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 11.654999732971191, | |
| "learning_rate": 9.593877551020408e-06, | |
| "loss": 0.6236, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 8.048078536987305, | |
| "learning_rate": 9.573469387755103e-06, | |
| "loss": 0.6142, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8979591836734694, | |
| "grad_norm": 9.869592666625977, | |
| "learning_rate": 9.553061224489798e-06, | |
| "loss": 0.625, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9387755102040817, | |
| "grad_norm": 8.321409225463867, | |
| "learning_rate": 9.532653061224491e-06, | |
| "loss": 0.5767, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9795918367346939, | |
| "grad_norm": 7.6769256591796875, | |
| "learning_rate": 9.512244897959184e-06, | |
| "loss": 0.5134, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0204081632653061, | |
| "grad_norm": 9.609123229980469, | |
| "learning_rate": 9.491836734693877e-06, | |
| "loss": 0.5868, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0612244897959184, | |
| "grad_norm": 9.19683837890625, | |
| "learning_rate": 9.471428571428572e-06, | |
| "loss": 0.5215, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1020408163265305, | |
| "grad_norm": 7.328164577484131, | |
| "learning_rate": 9.451020408163266e-06, | |
| "loss": 0.5422, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 6.913904190063477, | |
| "learning_rate": 9.430612244897959e-06, | |
| "loss": 0.5214, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.183673469387755, | |
| "grad_norm": 9.28811264038086, | |
| "learning_rate": 9.410204081632654e-06, | |
| "loss": 0.5319, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2244897959183674, | |
| "grad_norm": 9.132966041564941, | |
| "learning_rate": 9.389795918367349e-06, | |
| "loss": 0.5581, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2653061224489797, | |
| "grad_norm": 6.9722065925598145, | |
| "learning_rate": 9.369387755102042e-06, | |
| "loss": 0.431, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.306122448979592, | |
| "grad_norm": 5.06177282333374, | |
| "learning_rate": 9.348979591836736e-06, | |
| "loss": 0.4583, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.346938775510204, | |
| "grad_norm": 7.732840538024902, | |
| "learning_rate": 9.328571428571429e-06, | |
| "loss": 0.4194, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3877551020408163, | |
| "grad_norm": 8.94101333618164, | |
| "learning_rate": 9.308163265306122e-06, | |
| "loss": 0.4519, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 7.5437750816345215, | |
| "learning_rate": 9.287755102040817e-06, | |
| "loss": 0.5095, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.469387755102041, | |
| "grad_norm": 5.702700138092041, | |
| "learning_rate": 9.26734693877551e-06, | |
| "loss": 0.3936, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.510204081632653, | |
| "grad_norm": 9.153871536254883, | |
| "learning_rate": 9.246938775510205e-06, | |
| "loss": 0.4566, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.5510204081632653, | |
| "grad_norm": 13.249794006347656, | |
| "learning_rate": 9.226530612244899e-06, | |
| "loss": 0.5216, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5918367346938775, | |
| "grad_norm": 7.065913200378418, | |
| "learning_rate": 9.206122448979594e-06, | |
| "loss": 0.4562, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 7.559301853179932, | |
| "learning_rate": 9.185714285714287e-06, | |
| "loss": 0.3883, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6734693877551021, | |
| "grad_norm": 12.103629112243652, | |
| "learning_rate": 9.16530612244898e-06, | |
| "loss": 0.4149, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 7.9720072746276855, | |
| "learning_rate": 9.144897959183673e-06, | |
| "loss": 0.4718, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.7551020408163265, | |
| "grad_norm": 4.845782279968262, | |
| "learning_rate": 9.124489795918368e-06, | |
| "loss": 0.4304, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7959183673469388, | |
| "grad_norm": 6.954368591308594, | |
| "learning_rate": 9.104081632653062e-06, | |
| "loss": 0.3436, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.836734693877551, | |
| "grad_norm": 4.751299858093262, | |
| "learning_rate": 9.083673469387757e-06, | |
| "loss": 0.4366, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8775510204081631, | |
| "grad_norm": 6.507364273071289, | |
| "learning_rate": 9.06326530612245e-06, | |
| "loss": 0.5794, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9183673469387754, | |
| "grad_norm": 8.891802787780762, | |
| "learning_rate": 9.042857142857143e-06, | |
| "loss": 0.4616, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.9591836734693877, | |
| "grad_norm": 10.056327819824219, | |
| "learning_rate": 9.022448979591838e-06, | |
| "loss": 0.4946, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 7.899660110473633, | |
| "learning_rate": 9.002040816326531e-06, | |
| "loss": 0.4437, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 6.761326313018799, | |
| "learning_rate": 8.981632653061225e-06, | |
| "loss": 0.4303, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0816326530612246, | |
| "grad_norm": 8.639615058898926, | |
| "learning_rate": 8.96122448979592e-06, | |
| "loss": 0.3267, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.122448979591837, | |
| "grad_norm": 7.710758209228516, | |
| "learning_rate": 8.940816326530613e-06, | |
| "loss": 0.3559, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.163265306122449, | |
| "grad_norm": 6.812905311584473, | |
| "learning_rate": 8.920408163265308e-06, | |
| "loss": 0.4761, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.204081632653061, | |
| "grad_norm": 7.2431511878967285, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.405, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.2448979591836733, | |
| "grad_norm": 7.230724811553955, | |
| "learning_rate": 8.879591836734694e-06, | |
| "loss": 0.3638, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 9.520208358764648, | |
| "learning_rate": 8.859183673469388e-06, | |
| "loss": 0.3473, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.326530612244898, | |
| "grad_norm": 7.048585414886475, | |
| "learning_rate": 8.838775510204083e-06, | |
| "loss": 0.3652, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.36734693877551, | |
| "grad_norm": 6.979404449462891, | |
| "learning_rate": 8.818367346938776e-06, | |
| "loss": 0.3855, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.4081632653061225, | |
| "grad_norm": 3.765305280685425, | |
| "learning_rate": 8.797959183673471e-06, | |
| "loss": 0.3452, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.4489795918367347, | |
| "grad_norm": 10.533697128295898, | |
| "learning_rate": 8.777551020408164e-06, | |
| "loss": 0.3874, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.489795918367347, | |
| "grad_norm": 8.108145713806152, | |
| "learning_rate": 8.757142857142858e-06, | |
| "loss": 0.3695, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.5306122448979593, | |
| "grad_norm": 7.947360992431641, | |
| "learning_rate": 8.736734693877552e-06, | |
| "loss": 0.408, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 7.8081374168396, | |
| "learning_rate": 8.716326530612246e-06, | |
| "loss": 0.4059, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.612244897959184, | |
| "grad_norm": 8.579155921936035, | |
| "learning_rate": 8.695918367346939e-06, | |
| "loss": 0.3934, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.6530612244897958, | |
| "grad_norm": 6.4387712478637695, | |
| "learning_rate": 8.675510204081632e-06, | |
| "loss": 0.4256, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.693877551020408, | |
| "grad_norm": 8.415692329406738, | |
| "learning_rate": 8.655102040816327e-06, | |
| "loss": 0.3453, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.7346938775510203, | |
| "grad_norm": 8.50904369354248, | |
| "learning_rate": 8.63469387755102e-06, | |
| "loss": 0.4766, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.7755102040816326, | |
| "grad_norm": 4.662519931793213, | |
| "learning_rate": 8.614285714285716e-06, | |
| "loss": 0.3973, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.816326530612245, | |
| "grad_norm": 6.288435935974121, | |
| "learning_rate": 8.593877551020409e-06, | |
| "loss": 0.4408, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 6.625838279724121, | |
| "learning_rate": 8.573469387755102e-06, | |
| "loss": 0.2908, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.8979591836734695, | |
| "grad_norm": 8.510032653808594, | |
| "learning_rate": 8.553061224489797e-06, | |
| "loss": 0.3813, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.938775510204082, | |
| "grad_norm": 11.82463264465332, | |
| "learning_rate": 8.53265306122449e-06, | |
| "loss": 0.4352, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.979591836734694, | |
| "grad_norm": 8.821819305419922, | |
| "learning_rate": 8.512244897959184e-06, | |
| "loss": 0.4318, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.020408163265306, | |
| "grad_norm": 8.010713577270508, | |
| "learning_rate": 8.491836734693879e-06, | |
| "loss": 0.2323, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.061224489795918, | |
| "grad_norm": 9.03991985321045, | |
| "learning_rate": 8.471428571428572e-06, | |
| "loss": 0.3603, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.1020408163265305, | |
| "grad_norm": 10.94204044342041, | |
| "learning_rate": 8.451020408163267e-06, | |
| "loss": 0.3576, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 7.89410924911499, | |
| "learning_rate": 8.43061224489796e-06, | |
| "loss": 0.2851, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.183673469387755, | |
| "grad_norm": 6.53656005859375, | |
| "learning_rate": 8.410204081632653e-06, | |
| "loss": 0.318, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.2244897959183674, | |
| "grad_norm": 6.487284183502197, | |
| "learning_rate": 8.389795918367347e-06, | |
| "loss": 0.317, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.2653061224489797, | |
| "grad_norm": 6.947931289672852, | |
| "learning_rate": 8.369387755102042e-06, | |
| "loss": 0.2879, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.306122448979592, | |
| "grad_norm": 4.166048526763916, | |
| "learning_rate": 8.348979591836735e-06, | |
| "loss": 0.3392, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.3469387755102042, | |
| "grad_norm": 9.974846839904785, | |
| "learning_rate": 8.32857142857143e-06, | |
| "loss": 0.3663, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.387755102040816, | |
| "grad_norm": 9.668428421020508, | |
| "learning_rate": 8.308163265306123e-06, | |
| "loss": 0.3212, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 11.81507396697998, | |
| "learning_rate": 8.287755102040816e-06, | |
| "loss": 0.3241, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.4693877551020407, | |
| "grad_norm": 13.690321922302246, | |
| "learning_rate": 8.267346938775511e-06, | |
| "loss": 0.4535, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.510204081632653, | |
| "grad_norm": 11.042778968811035, | |
| "learning_rate": 8.246938775510205e-06, | |
| "loss": 0.3826, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.5510204081632653, | |
| "grad_norm": 8.57719612121582, | |
| "learning_rate": 8.226530612244898e-06, | |
| "loss": 0.3905, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.5918367346938775, | |
| "grad_norm": 7.843425750732422, | |
| "learning_rate": 8.206122448979591e-06, | |
| "loss": 0.3125, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.63265306122449, | |
| "grad_norm": 5.9236931800842285, | |
| "learning_rate": 8.185714285714286e-06, | |
| "loss": 0.3512, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.673469387755102, | |
| "grad_norm": 8.213603973388672, | |
| "learning_rate": 8.165306122448981e-06, | |
| "loss": 0.4094, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 3.8083949089050293, | |
| "learning_rate": 8.144897959183674e-06, | |
| "loss": 0.2751, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.7551020408163263, | |
| "grad_norm": 12.339240074157715, | |
| "learning_rate": 8.124489795918368e-06, | |
| "loss": 0.3296, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.795918367346939, | |
| "grad_norm": 9.532052040100098, | |
| "learning_rate": 8.104081632653061e-06, | |
| "loss": 0.3033, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.836734693877551, | |
| "grad_norm": 6.307032108306885, | |
| "learning_rate": 8.083673469387756e-06, | |
| "loss": 0.3765, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.877551020408163, | |
| "grad_norm": 7.3003010749816895, | |
| "learning_rate": 8.06326530612245e-06, | |
| "loss": 0.2161, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.9183673469387754, | |
| "grad_norm": 7.6572699546813965, | |
| "learning_rate": 8.042857142857143e-06, | |
| "loss": 0.2886, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.9591836734693877, | |
| "grad_norm": 6.745776176452637, | |
| "learning_rate": 8.022448979591838e-06, | |
| "loss": 0.3376, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 10.482270240783691, | |
| "learning_rate": 8.002040816326533e-06, | |
| "loss": 0.2657, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.040816326530612, | |
| "grad_norm": 6.213717460632324, | |
| "learning_rate": 7.981632653061226e-06, | |
| "loss": 0.2596, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.081632653061225, | |
| "grad_norm": 10.256094932556152, | |
| "learning_rate": 7.961224489795919e-06, | |
| "loss": 0.2338, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.122448979591836, | |
| "grad_norm": 8.640167236328125, | |
| "learning_rate": 7.940816326530612e-06, | |
| "loss": 0.2936, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.163265306122449, | |
| "grad_norm": 6.750300407409668, | |
| "learning_rate": 7.920408163265306e-06, | |
| "loss": 0.3511, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.204081632653061, | |
| "grad_norm": 9.488007545471191, | |
| "learning_rate": 7.9e-06, | |
| "loss": 0.2913, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.244897959183674, | |
| "grad_norm": 4.2671003341674805, | |
| "learning_rate": 7.879591836734694e-06, | |
| "loss": 0.2768, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "grad_norm": 5.375782489776611, | |
| "learning_rate": 7.859183673469389e-06, | |
| "loss": 0.274, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.326530612244898, | |
| "grad_norm": 10.316515922546387, | |
| "learning_rate": 7.838775510204082e-06, | |
| "loss": 0.2373, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.36734693877551, | |
| "grad_norm": 6.733712673187256, | |
| "learning_rate": 7.818367346938777e-06, | |
| "loss": 0.32, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.408163265306122, | |
| "grad_norm": 6.432683944702148, | |
| "learning_rate": 7.79795918367347e-06, | |
| "loss": 0.3149, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.448979591836735, | |
| "grad_norm": 8.520115852355957, | |
| "learning_rate": 7.777551020408164e-06, | |
| "loss": 0.3369, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.489795918367347, | |
| "grad_norm": 3.52677845954895, | |
| "learning_rate": 7.757142857142857e-06, | |
| "loss": 0.2584, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.530612244897959, | |
| "grad_norm": 9.642569541931152, | |
| "learning_rate": 7.736734693877552e-06, | |
| "loss": 0.3076, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 11.233070373535156, | |
| "learning_rate": 7.716326530612245e-06, | |
| "loss": 0.3263, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.612244897959184, | |
| "grad_norm": 7.919038772583008, | |
| "learning_rate": 7.69591836734694e-06, | |
| "loss": 0.3152, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.653061224489796, | |
| "grad_norm": 7.116144180297852, | |
| "learning_rate": 7.675510204081633e-06, | |
| "loss": 0.2809, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.6938775510204085, | |
| "grad_norm": 6.757585525512695, | |
| "learning_rate": 7.655102040816327e-06, | |
| "loss": 0.2642, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.73469387755102, | |
| "grad_norm": 9.032824516296387, | |
| "learning_rate": 7.634693877551022e-06, | |
| "loss": 0.3546, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.775510204081632, | |
| "grad_norm": 7.837385654449463, | |
| "learning_rate": 7.614285714285715e-06, | |
| "loss": 0.3196, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.816326530612245, | |
| "grad_norm": 5.3134846687316895, | |
| "learning_rate": 7.593877551020409e-06, | |
| "loss": 0.2068, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.857142857142857, | |
| "grad_norm": 8.036781311035156, | |
| "learning_rate": 7.573469387755102e-06, | |
| "loss": 0.24, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.8979591836734695, | |
| "grad_norm": 12.13339900970459, | |
| "learning_rate": 7.5530612244897965e-06, | |
| "loss": 0.2795, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.938775510204081, | |
| "grad_norm": 5.253749370574951, | |
| "learning_rate": 7.532653061224491e-06, | |
| "loss": 0.2667, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.979591836734694, | |
| "grad_norm": 7.921318531036377, | |
| "learning_rate": 7.512244897959185e-06, | |
| "loss": 0.3218, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 5.020408163265306, | |
| "grad_norm": 9.056379318237305, | |
| "learning_rate": 7.491836734693878e-06, | |
| "loss": 0.2324, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 5.061224489795919, | |
| "grad_norm": 5.0674920082092285, | |
| "learning_rate": 7.471428571428571e-06, | |
| "loss": 0.3063, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 5.1020408163265305, | |
| "grad_norm": 12.247352600097656, | |
| "learning_rate": 7.451020408163266e-06, | |
| "loss": 0.3469, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 5.142857142857143, | |
| "grad_norm": 7.278679847717285, | |
| "learning_rate": 7.43061224489796e-06, | |
| "loss": 0.2851, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 5.183673469387755, | |
| "grad_norm": 11.491589546203613, | |
| "learning_rate": 7.410204081632654e-06, | |
| "loss": 0.245, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 5.224489795918367, | |
| "grad_norm": 8.652173042297363, | |
| "learning_rate": 7.389795918367347e-06, | |
| "loss": 0.2328, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 5.26530612244898, | |
| "grad_norm": 7.953402519226074, | |
| "learning_rate": 7.369387755102041e-06, | |
| "loss": 0.2784, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 5.3061224489795915, | |
| "grad_norm": 6.98084831237793, | |
| "learning_rate": 7.348979591836736e-06, | |
| "loss": 0.2379, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.346938775510204, | |
| "grad_norm": 7.400093078613281, | |
| "learning_rate": 7.328571428571429e-06, | |
| "loss": 0.2057, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 5.387755102040816, | |
| "grad_norm": 4.131850719451904, | |
| "learning_rate": 7.308163265306123e-06, | |
| "loss": 0.2792, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 5.428571428571429, | |
| "grad_norm": 10.25373363494873, | |
| "learning_rate": 7.287755102040817e-06, | |
| "loss": 0.2642, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.469387755102041, | |
| "grad_norm": 5.801217079162598, | |
| "learning_rate": 7.267346938775512e-06, | |
| "loss": 0.2301, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.510204081632653, | |
| "grad_norm": 3.3520116806030273, | |
| "learning_rate": 7.246938775510205e-06, | |
| "loss": 0.2728, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.551020408163265, | |
| "grad_norm": 5.335054874420166, | |
| "learning_rate": 7.226530612244898e-06, | |
| "loss": 0.2812, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.591836734693878, | |
| "grad_norm": 4.713162899017334, | |
| "learning_rate": 7.206122448979592e-06, | |
| "loss": 0.221, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 5.63265306122449, | |
| "grad_norm": 8.641826629638672, | |
| "learning_rate": 7.185714285714286e-06, | |
| "loss": 0.2204, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 5.673469387755102, | |
| "grad_norm": 4.286067485809326, | |
| "learning_rate": 7.165306122448981e-06, | |
| "loss": 0.2446, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 7.071314811706543, | |
| "learning_rate": 7.144897959183674e-06, | |
| "loss": 0.1788, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.755102040816326, | |
| "grad_norm": 9.792560577392578, | |
| "learning_rate": 7.124489795918368e-06, | |
| "loss": 0.272, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 5.795918367346939, | |
| "grad_norm": 6.726722240447998, | |
| "learning_rate": 7.104081632653061e-06, | |
| "loss": 0.18, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.836734693877551, | |
| "grad_norm": 7.378534317016602, | |
| "learning_rate": 7.083673469387755e-06, | |
| "loss": 0.2308, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 5.877551020408164, | |
| "grad_norm": 10.169758796691895, | |
| "learning_rate": 7.0632653061224495e-06, | |
| "loss": 0.296, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.918367346938775, | |
| "grad_norm": 5.440324306488037, | |
| "learning_rate": 7.042857142857144e-06, | |
| "loss": 0.2725, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.959183673469388, | |
| "grad_norm": 8.175464630126953, | |
| "learning_rate": 7.022448979591837e-06, | |
| "loss": 0.2251, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 6.7653398513793945, | |
| "learning_rate": 7.002040816326531e-06, | |
| "loss": 0.3411, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 6.040816326530612, | |
| "grad_norm": 6.0152716636657715, | |
| "learning_rate": 6.981632653061225e-06, | |
| "loss": 0.2071, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 6.081632653061225, | |
| "grad_norm": 8.066520690917969, | |
| "learning_rate": 6.961224489795919e-06, | |
| "loss": 0.1639, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 6.122448979591836, | |
| "grad_norm": 4.027646541595459, | |
| "learning_rate": 6.940816326530613e-06, | |
| "loss": 0.2069, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.163265306122449, | |
| "grad_norm": 5.61140251159668, | |
| "learning_rate": 6.920408163265307e-06, | |
| "loss": 0.2413, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 6.204081632653061, | |
| "grad_norm": 9.809159278869629, | |
| "learning_rate": 6.9e-06, | |
| "loss": 0.26, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 6.244897959183674, | |
| "grad_norm": 6.755568504333496, | |
| "learning_rate": 6.879591836734695e-06, | |
| "loss": 0.1965, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 6.285714285714286, | |
| "grad_norm": 4.21774435043335, | |
| "learning_rate": 6.859183673469388e-06, | |
| "loss": 0.1959, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 6.326530612244898, | |
| "grad_norm": 5.166352272033691, | |
| "learning_rate": 6.838775510204082e-06, | |
| "loss": 0.1941, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 6.36734693877551, | |
| "grad_norm": 10.346336364746094, | |
| "learning_rate": 6.818367346938776e-06, | |
| "loss": 0.2278, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 6.408163265306122, | |
| "grad_norm": 7.703672409057617, | |
| "learning_rate": 6.797959183673471e-06, | |
| "loss": 0.2854, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 6.448979591836735, | |
| "grad_norm": 9.366389274597168, | |
| "learning_rate": 6.777551020408164e-06, | |
| "loss": 0.2229, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 6.489795918367347, | |
| "grad_norm": 7.013561248779297, | |
| "learning_rate": 6.757142857142858e-06, | |
| "loss": 0.2732, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 6.530612244897959, | |
| "grad_norm": 5.82119083404541, | |
| "learning_rate": 6.736734693877551e-06, | |
| "loss": 0.2389, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.571428571428571, | |
| "grad_norm": 8.112947463989258, | |
| "learning_rate": 6.716326530612245e-06, | |
| "loss": 0.193, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.612244897959184, | |
| "grad_norm": 8.238506317138672, | |
| "learning_rate": 6.6959183673469396e-06, | |
| "loss": 0.2344, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.653061224489796, | |
| "grad_norm": 6.531697750091553, | |
| "learning_rate": 6.675510204081634e-06, | |
| "loss": 0.2146, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 6.6938775510204085, | |
| "grad_norm": 6.525634288787842, | |
| "learning_rate": 6.655102040816327e-06, | |
| "loss": 0.1993, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 6.73469387755102, | |
| "grad_norm": 8.847986221313477, | |
| "learning_rate": 6.63469387755102e-06, | |
| "loss": 0.2884, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 6.775510204081632, | |
| "grad_norm": 9.006918907165527, | |
| "learning_rate": 6.614285714285715e-06, | |
| "loss": 0.2173, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 6.816326530612245, | |
| "grad_norm": 9.229476928710938, | |
| "learning_rate": 6.593877551020409e-06, | |
| "loss": 0.201, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 6.857142857142857, | |
| "grad_norm": 5.367541313171387, | |
| "learning_rate": 6.573469387755103e-06, | |
| "loss": 0.2505, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 6.8979591836734695, | |
| "grad_norm": 7.771108150482178, | |
| "learning_rate": 6.553061224489796e-06, | |
| "loss": 0.2225, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 6.938775510204081, | |
| "grad_norm": 5.306410789489746, | |
| "learning_rate": 6.53265306122449e-06, | |
| "loss": 0.2549, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.979591836734694, | |
| "grad_norm": 4.359670162200928, | |
| "learning_rate": 6.512244897959185e-06, | |
| "loss": 0.2585, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 7.020408163265306, | |
| "grad_norm": 4.528923034667969, | |
| "learning_rate": 6.491836734693878e-06, | |
| "loss": 0.2489, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 7.061224489795919, | |
| "grad_norm": 4.760287761688232, | |
| "learning_rate": 6.4714285714285715e-06, | |
| "loss": 0.1955, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 7.1020408163265305, | |
| "grad_norm": 9.205543518066406, | |
| "learning_rate": 6.451020408163266e-06, | |
| "loss": 0.2283, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 5.910974025726318, | |
| "learning_rate": 6.43061224489796e-06, | |
| "loss": 0.2308, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 7.183673469387755, | |
| "grad_norm": 5.042090892791748, | |
| "learning_rate": 6.410204081632654e-06, | |
| "loss": 0.1883, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 7.224489795918367, | |
| "grad_norm": 5.0842742919921875, | |
| "learning_rate": 6.389795918367347e-06, | |
| "loss": 0.1942, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 7.26530612244898, | |
| "grad_norm": 9.219313621520996, | |
| "learning_rate": 6.369387755102041e-06, | |
| "loss": 0.1946, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 7.3061224489795915, | |
| "grad_norm": 7.5656208992004395, | |
| "learning_rate": 6.348979591836735e-06, | |
| "loss": 0.2016, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 7.346938775510204, | |
| "grad_norm": 5.8213653564453125, | |
| "learning_rate": 6.3285714285714296e-06, | |
| "loss": 0.2808, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.387755102040816, | |
| "grad_norm": 8.004778861999512, | |
| "learning_rate": 6.308163265306123e-06, | |
| "loss": 0.2393, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 7.428571428571429, | |
| "grad_norm": 7.5001139640808105, | |
| "learning_rate": 6.287755102040817e-06, | |
| "loss": 0.2007, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 7.469387755102041, | |
| "grad_norm": 9.618229866027832, | |
| "learning_rate": 6.26734693877551e-06, | |
| "loss": 0.2464, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 7.510204081632653, | |
| "grad_norm": 7.257756233215332, | |
| "learning_rate": 6.246938775510205e-06, | |
| "loss": 0.1692, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 7.551020408163265, | |
| "grad_norm": 7.658279895782471, | |
| "learning_rate": 6.2265306122448985e-06, | |
| "loss": 0.2367, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 7.591836734693878, | |
| "grad_norm": 6.590469837188721, | |
| "learning_rate": 6.206122448979593e-06, | |
| "loss": 0.2508, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 7.63265306122449, | |
| "grad_norm": 8.601705551147461, | |
| "learning_rate": 6.185714285714286e-06, | |
| "loss": 0.2168, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 7.673469387755102, | |
| "grad_norm": 6.144942283630371, | |
| "learning_rate": 6.16530612244898e-06, | |
| "loss": 0.2308, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 7.714285714285714, | |
| "grad_norm": 3.256690502166748, | |
| "learning_rate": 6.144897959183674e-06, | |
| "loss": 0.1791, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 7.755102040816326, | |
| "grad_norm": 6.810645580291748, | |
| "learning_rate": 6.124489795918368e-06, | |
| "loss": 0.2335, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.795918367346939, | |
| "grad_norm": 3.4259018898010254, | |
| "learning_rate": 6.1040816326530616e-06, | |
| "loss": 0.212, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 7.836734693877551, | |
| "grad_norm": 8.353039741516113, | |
| "learning_rate": 6.083673469387756e-06, | |
| "loss": 0.2312, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.877551020408164, | |
| "grad_norm": 5.548733711242676, | |
| "learning_rate": 6.06326530612245e-06, | |
| "loss": 0.2666, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 7.918367346938775, | |
| "grad_norm": 8.386053085327148, | |
| "learning_rate": 6.042857142857144e-06, | |
| "loss": 0.2543, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 7.959183673469388, | |
| "grad_norm": 7.863219261169434, | |
| "learning_rate": 6.022448979591837e-06, | |
| "loss": 0.2171, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 5.328917503356934, | |
| "learning_rate": 6.0020408163265305e-06, | |
| "loss": 0.1592, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 8.040816326530612, | |
| "grad_norm": 11.456307411193848, | |
| "learning_rate": 5.981632653061225e-06, | |
| "loss": 0.2345, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 8.081632653061224, | |
| "grad_norm": 3.9219276905059814, | |
| "learning_rate": 5.96122448979592e-06, | |
| "loss": 0.228, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 8.122448979591837, | |
| "grad_norm": 7.155372142791748, | |
| "learning_rate": 5.940816326530613e-06, | |
| "loss": 0.1879, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 8.16326530612245, | |
| "grad_norm": 8.530526161193848, | |
| "learning_rate": 5.920408163265306e-06, | |
| "loss": 0.1808, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.204081632653061, | |
| "grad_norm": 10.660292625427246, | |
| "learning_rate": 5.902040816326531e-06, | |
| "loss": 0.2545, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 8.244897959183673, | |
| "grad_norm": 2.4285929203033447, | |
| "learning_rate": 5.881632653061225e-06, | |
| "loss": 0.1697, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 8.285714285714286, | |
| "grad_norm": 7.390512943267822, | |
| "learning_rate": 5.861224489795919e-06, | |
| "loss": 0.1868, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 8.326530612244898, | |
| "grad_norm": 6.263368129730225, | |
| "learning_rate": 5.840816326530613e-06, | |
| "loss": 0.2508, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 8.36734693877551, | |
| "grad_norm": 0.5456896424293518, | |
| "learning_rate": 5.820408163265306e-06, | |
| "loss": 0.1737, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 8.408163265306122, | |
| "grad_norm": 9.859989166259766, | |
| "learning_rate": 5.8e-06, | |
| "loss": 0.2154, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 8.448979591836734, | |
| "grad_norm": 7.002127647399902, | |
| "learning_rate": 5.7795918367346945e-06, | |
| "loss": 0.178, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 8.489795918367347, | |
| "grad_norm": 11.321815490722656, | |
| "learning_rate": 5.759183673469389e-06, | |
| "loss": 0.2147, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 8.53061224489796, | |
| "grad_norm": 4.573301315307617, | |
| "learning_rate": 5.738775510204082e-06, | |
| "loss": 0.2011, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 8.571428571428571, | |
| "grad_norm": 5.418411731719971, | |
| "learning_rate": 5.718367346938776e-06, | |
| "loss": 0.1873, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 8.612244897959183, | |
| "grad_norm": 5.087385177612305, | |
| "learning_rate": 5.697959183673469e-06, | |
| "loss": 0.2519, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 8.653061224489797, | |
| "grad_norm": 5.11081600189209, | |
| "learning_rate": 5.677551020408164e-06, | |
| "loss": 0.1733, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 8.693877551020408, | |
| "grad_norm": 9.11174201965332, | |
| "learning_rate": 5.6571428571428576e-06, | |
| "loss": 0.2496, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 8.73469387755102, | |
| "grad_norm": 7.031070709228516, | |
| "learning_rate": 5.636734693877552e-06, | |
| "loss": 0.2239, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 8.775510204081632, | |
| "grad_norm": 7.756526947021484, | |
| "learning_rate": 5.616326530612245e-06, | |
| "loss": 0.1974, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 8.816326530612244, | |
| "grad_norm": 7.697716236114502, | |
| "learning_rate": 5.59591836734694e-06, | |
| "loss": 0.258, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 8.857142857142858, | |
| "grad_norm": 8.356952667236328, | |
| "learning_rate": 5.575510204081633e-06, | |
| "loss": 0.1936, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 8.89795918367347, | |
| "grad_norm": 7.454901695251465, | |
| "learning_rate": 5.555102040816327e-06, | |
| "loss": 0.1504, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 8.938775510204081, | |
| "grad_norm": 4.652848720550537, | |
| "learning_rate": 5.534693877551021e-06, | |
| "loss": 0.1793, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 8.979591836734693, | |
| "grad_norm": 6.558859348297119, | |
| "learning_rate": 5.514285714285714e-06, | |
| "loss": 0.1974, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 9.020408163265307, | |
| "grad_norm": 7.869504928588867, | |
| "learning_rate": 5.493877551020409e-06, | |
| "loss": 0.2128, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 9.061224489795919, | |
| "grad_norm": 8.646539688110352, | |
| "learning_rate": 5.473469387755103e-06, | |
| "loss": 0.1755, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 9.10204081632653, | |
| "grad_norm": 7.58027458190918, | |
| "learning_rate": 5.453061224489796e-06, | |
| "loss": 0.1701, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 9.142857142857142, | |
| "grad_norm": 8.236135482788086, | |
| "learning_rate": 5.4326530612244895e-06, | |
| "loss": 0.219, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 9.183673469387756, | |
| "grad_norm": 6.239899635314941, | |
| "learning_rate": 5.4122448979591845e-06, | |
| "loss": 0.1846, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 9.224489795918368, | |
| "grad_norm": 5.651657581329346, | |
| "learning_rate": 5.391836734693879e-06, | |
| "loss": 0.237, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 9.26530612244898, | |
| "grad_norm": 6.521792411804199, | |
| "learning_rate": 5.371428571428572e-06, | |
| "loss": 0.2158, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 9.306122448979592, | |
| "grad_norm": 12.538895606994629, | |
| "learning_rate": 5.351020408163265e-06, | |
| "loss": 0.2103, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 9.346938775510203, | |
| "grad_norm": 6.341105937957764, | |
| "learning_rate": 5.330612244897959e-06, | |
| "loss": 0.1468, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 9.387755102040817, | |
| "grad_norm": 8.830240249633789, | |
| "learning_rate": 5.310204081632654e-06, | |
| "loss": 0.165, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 9.428571428571429, | |
| "grad_norm": 7.01543664932251, | |
| "learning_rate": 5.2897959183673476e-06, | |
| "loss": 0.1742, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 9.46938775510204, | |
| "grad_norm": 7.823669910430908, | |
| "learning_rate": 5.269387755102041e-06, | |
| "loss": 0.2414, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 9.510204081632653, | |
| "grad_norm": 7.9082770347595215, | |
| "learning_rate": 5.248979591836735e-06, | |
| "loss": 0.1961, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 9.551020408163264, | |
| "grad_norm": 8.552570343017578, | |
| "learning_rate": 5.22857142857143e-06, | |
| "loss": 0.1862, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 9.591836734693878, | |
| "grad_norm": 10.67483901977539, | |
| "learning_rate": 5.208163265306123e-06, | |
| "loss": 0.1773, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 9.63265306122449, | |
| "grad_norm": 5.287507057189941, | |
| "learning_rate": 5.1877551020408165e-06, | |
| "loss": 0.2165, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 9.673469387755102, | |
| "grad_norm": 3.9037272930145264, | |
| "learning_rate": 5.167346938775511e-06, | |
| "loss": 0.1598, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 9.714285714285714, | |
| "grad_norm": 2.465554714202881, | |
| "learning_rate": 5.146938775510204e-06, | |
| "loss": 0.1398, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 9.755102040816327, | |
| "grad_norm": 4.5391950607299805, | |
| "learning_rate": 5.126530612244899e-06, | |
| "loss": 0.1642, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 9.795918367346939, | |
| "grad_norm": 9.52216911315918, | |
| "learning_rate": 5.106122448979592e-06, | |
| "loss": 0.2317, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 9.83673469387755, | |
| "grad_norm": 7.413267135620117, | |
| "learning_rate": 5.085714285714286e-06, | |
| "loss": 0.2339, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 9.877551020408163, | |
| "grad_norm": 11.318732261657715, | |
| "learning_rate": 5.0653061224489795e-06, | |
| "loss": 0.2225, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 9.918367346938776, | |
| "grad_norm": 11.242213249206543, | |
| "learning_rate": 5.0448979591836745e-06, | |
| "loss": 0.3078, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 9.959183673469388, | |
| "grad_norm": 9.637420654296875, | |
| "learning_rate": 5.024489795918368e-06, | |
| "loss": 0.1856, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 10.665837287902832, | |
| "learning_rate": 5.004081632653062e-06, | |
| "loss": 0.2125, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 10.040816326530612, | |
| "grad_norm": 5.397568702697754, | |
| "learning_rate": 4.983673469387755e-06, | |
| "loss": 0.1722, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 10.081632653061224, | |
| "grad_norm": 5.377045631408691, | |
| "learning_rate": 4.963265306122449e-06, | |
| "loss": 0.1958, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 10.122448979591837, | |
| "grad_norm": 7.449840068817139, | |
| "learning_rate": 4.9428571428571435e-06, | |
| "loss": 0.1832, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 10.16326530612245, | |
| "grad_norm": 7.062196731567383, | |
| "learning_rate": 4.9224489795918376e-06, | |
| "loss": 0.1913, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 10.204081632653061, | |
| "grad_norm": 6.084071636199951, | |
| "learning_rate": 4.902040816326531e-06, | |
| "loss": 0.1187, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.244897959183673, | |
| "grad_norm": 4.809638023376465, | |
| "learning_rate": 4.881632653061225e-06, | |
| "loss": 0.1405, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 10.285714285714286, | |
| "grad_norm": 4.271588325500488, | |
| "learning_rate": 4.861224489795919e-06, | |
| "loss": 0.1357, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 10.326530612244898, | |
| "grad_norm": 8.498083114624023, | |
| "learning_rate": 4.840816326530612e-06, | |
| "loss": 0.2201, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 10.36734693877551, | |
| "grad_norm": 7.665253162384033, | |
| "learning_rate": 4.8204081632653065e-06, | |
| "loss": 0.1959, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 10.408163265306122, | |
| "grad_norm": 5.3332743644714355, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.1956, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 10.448979591836734, | |
| "grad_norm": 4.948959827423096, | |
| "learning_rate": 4.779591836734695e-06, | |
| "loss": 0.2092, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 10.489795918367347, | |
| "grad_norm": 7.226635456085205, | |
| "learning_rate": 4.759183673469388e-06, | |
| "loss": 0.1584, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 10.53061224489796, | |
| "grad_norm": 4.212419509887695, | |
| "learning_rate": 4.738775510204082e-06, | |
| "loss": 0.1476, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 10.571428571428571, | |
| "grad_norm": 7.545459270477295, | |
| "learning_rate": 4.7183673469387754e-06, | |
| "loss": 0.2103, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 10.612244897959183, | |
| "grad_norm": 1.853903889656067, | |
| "learning_rate": 4.69795918367347e-06, | |
| "loss": 0.2105, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 10.653061224489797, | |
| "grad_norm": 4.872255802154541, | |
| "learning_rate": 4.677551020408164e-06, | |
| "loss": 0.1979, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 10.693877551020408, | |
| "grad_norm": 8.121048927307129, | |
| "learning_rate": 4.657142857142857e-06, | |
| "loss": 0.1904, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 10.73469387755102, | |
| "grad_norm": 6.789455890655518, | |
| "learning_rate": 4.636734693877551e-06, | |
| "loss": 0.231, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 10.775510204081632, | |
| "grad_norm": 6.744252681732178, | |
| "learning_rate": 4.616326530612245e-06, | |
| "loss": 0.1028, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 10.816326530612244, | |
| "grad_norm": 1.3881915807724, | |
| "learning_rate": 4.595918367346939e-06, | |
| "loss": 0.1611, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 10.857142857142858, | |
| "grad_norm": 4.106182098388672, | |
| "learning_rate": 4.575510204081633e-06, | |
| "loss": 0.1721, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 10.89795918367347, | |
| "grad_norm": 6.046643257141113, | |
| "learning_rate": 4.555102040816327e-06, | |
| "loss": 0.2162, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 10.938775510204081, | |
| "grad_norm": 2.952366352081299, | |
| "learning_rate": 4.534693877551021e-06, | |
| "loss": 0.1941, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 10.979591836734693, | |
| "grad_norm": 5.679470539093018, | |
| "learning_rate": 4.514285714285714e-06, | |
| "loss": 0.1322, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 11.020408163265307, | |
| "grad_norm": 4.781472206115723, | |
| "learning_rate": 4.493877551020408e-06, | |
| "loss": 0.1756, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 11.061224489795919, | |
| "grad_norm": 4.453323841094971, | |
| "learning_rate": 4.473469387755102e-06, | |
| "loss": 0.1652, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 11.10204081632653, | |
| "grad_norm": 6.313192367553711, | |
| "learning_rate": 4.4530612244897965e-06, | |
| "loss": 0.2071, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 11.142857142857142, | |
| "grad_norm": 3.620354413986206, | |
| "learning_rate": 4.43265306122449e-06, | |
| "loss": 0.2166, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 11.183673469387756, | |
| "grad_norm": 5.309145450592041, | |
| "learning_rate": 4.412244897959184e-06, | |
| "loss": 0.1787, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 11.224489795918368, | |
| "grad_norm": 5.886495590209961, | |
| "learning_rate": 4.391836734693878e-06, | |
| "loss": 0.1699, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 11.26530612244898, | |
| "grad_norm": 6.391637325286865, | |
| "learning_rate": 4.371428571428572e-06, | |
| "loss": 0.1928, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 11.306122448979592, | |
| "grad_norm": 9.88926887512207, | |
| "learning_rate": 4.3510204081632654e-06, | |
| "loss": 0.2344, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 11.346938775510203, | |
| "grad_norm": 4.097028732299805, | |
| "learning_rate": 4.3306122448979596e-06, | |
| "loss": 0.1457, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 11.387755102040817, | |
| "grad_norm": 6.217812538146973, | |
| "learning_rate": 4.310204081632654e-06, | |
| "loss": 0.1773, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 11.428571428571429, | |
| "grad_norm": 6.636881351470947, | |
| "learning_rate": 4.289795918367347e-06, | |
| "loss": 0.1977, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 11.46938775510204, | |
| "grad_norm": 4.406877517700195, | |
| "learning_rate": 4.269387755102041e-06, | |
| "loss": 0.1331, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 11.510204081632653, | |
| "grad_norm": 7.739985466003418, | |
| "learning_rate": 4.248979591836735e-06, | |
| "loss": 0.1624, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 11.551020408163264, | |
| "grad_norm": 8.672730445861816, | |
| "learning_rate": 4.228571428571429e-06, | |
| "loss": 0.1435, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 11.591836734693878, | |
| "grad_norm": 8.240572929382324, | |
| "learning_rate": 4.208163265306123e-06, | |
| "loss": 0.1864, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 11.63265306122449, | |
| "grad_norm": 6.257174968719482, | |
| "learning_rate": 4.187755102040817e-06, | |
| "loss": 0.2008, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 11.673469387755102, | |
| "grad_norm": 7.619112014770508, | |
| "learning_rate": 4.167346938775511e-06, | |
| "loss": 0.1529, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 11.714285714285714, | |
| "grad_norm": 3.644754648208618, | |
| "learning_rate": 4.146938775510204e-06, | |
| "loss": 0.1434, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 11.755102040816327, | |
| "grad_norm": 3.984260082244873, | |
| "learning_rate": 4.126530612244898e-06, | |
| "loss": 0.1327, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 11.795918367346939, | |
| "grad_norm": 7.851540565490723, | |
| "learning_rate": 4.106122448979592e-06, | |
| "loss": 0.1709, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 11.83673469387755, | |
| "grad_norm": 2.9527790546417236, | |
| "learning_rate": 4.0857142857142865e-06, | |
| "loss": 0.1487, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 11.877551020408163, | |
| "grad_norm": 3.5094189643859863, | |
| "learning_rate": 4.06530612244898e-06, | |
| "loss": 0.1733, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 11.918367346938776, | |
| "grad_norm": 8.27623176574707, | |
| "learning_rate": 4.044897959183674e-06, | |
| "loss": 0.2207, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 11.959183673469388, | |
| "grad_norm": 6.333447456359863, | |
| "learning_rate": 4.024489795918368e-06, | |
| "loss": 0.1912, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 3.803018569946289, | |
| "learning_rate": 4.004081632653062e-06, | |
| "loss": 0.1969, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 12.040816326530612, | |
| "grad_norm": 7.993055820465088, | |
| "learning_rate": 3.9836734693877555e-06, | |
| "loss": 0.1708, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 12.081632653061224, | |
| "grad_norm": 3.1507952213287354, | |
| "learning_rate": 3.963265306122449e-06, | |
| "loss": 0.0929, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 12.122448979591837, | |
| "grad_norm": 3.0786514282226562, | |
| "learning_rate": 3.942857142857143e-06, | |
| "loss": 0.1392, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 12.16326530612245, | |
| "grad_norm": 12.369327545166016, | |
| "learning_rate": 3.922448979591837e-06, | |
| "loss": 0.1887, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 12.204081632653061, | |
| "grad_norm": 11.310943603515625, | |
| "learning_rate": 3.902040816326531e-06, | |
| "loss": 0.2074, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 12.244897959183673, | |
| "grad_norm": 8.117951393127441, | |
| "learning_rate": 3.881632653061224e-06, | |
| "loss": 0.2066, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 12.285714285714286, | |
| "grad_norm": 0.911245584487915, | |
| "learning_rate": 3.8612244897959185e-06, | |
| "loss": 0.1504, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 12.326530612244898, | |
| "grad_norm": 7.755861282348633, | |
| "learning_rate": 3.840816326530613e-06, | |
| "loss": 0.187, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 12.36734693877551, | |
| "grad_norm": 11.341306686401367, | |
| "learning_rate": 3.820408163265306e-06, | |
| "loss": 0.1385, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 12.408163265306122, | |
| "grad_norm": 6.276268005371094, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 0.1776, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 12.448979591836734, | |
| "grad_norm": 3.063488006591797, | |
| "learning_rate": 3.779591836734694e-06, | |
| "loss": 0.1903, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 12.489795918367347, | |
| "grad_norm": 6.759077548980713, | |
| "learning_rate": 3.7591836734693883e-06, | |
| "loss": 0.1504, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 12.53061224489796, | |
| "grad_norm": 9.553186416625977, | |
| "learning_rate": 3.738775510204082e-06, | |
| "loss": 0.1763, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 12.571428571428571, | |
| "grad_norm": 7.130514621734619, | |
| "learning_rate": 3.718367346938776e-06, | |
| "loss": 0.1464, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 12.612244897959183, | |
| "grad_norm": 6.302764415740967, | |
| "learning_rate": 3.69795918367347e-06, | |
| "loss": 0.1141, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 12.653061224489797, | |
| "grad_norm": 7.9323835372924805, | |
| "learning_rate": 3.677551020408164e-06, | |
| "loss": 0.2175, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 12.693877551020408, | |
| "grad_norm": 5.5850324630737305, | |
| "learning_rate": 3.6571428571428576e-06, | |
| "loss": 0.1602, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 12.73469387755102, | |
| "grad_norm": 7.268565654754639, | |
| "learning_rate": 3.636734693877551e-06, | |
| "loss": 0.181, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 12.775510204081632, | |
| "grad_norm": 5.973303318023682, | |
| "learning_rate": 3.616326530612245e-06, | |
| "loss": 0.1024, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 12.816326530612244, | |
| "grad_norm": 3.3456904888153076, | |
| "learning_rate": 3.5959183673469387e-06, | |
| "loss": 0.1756, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 12.857142857142858, | |
| "grad_norm": 7.198270797729492, | |
| "learning_rate": 3.575510204081633e-06, | |
| "loss": 0.156, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 12.89795918367347, | |
| "grad_norm": 3.496737003326416, | |
| "learning_rate": 3.5551020408163266e-06, | |
| "loss": 0.1868, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 12.938775510204081, | |
| "grad_norm": 9.160460472106934, | |
| "learning_rate": 3.5346938775510207e-06, | |
| "loss": 0.242, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 12.979591836734693, | |
| "grad_norm": 9.402756690979004, | |
| "learning_rate": 3.5142857142857144e-06, | |
| "loss": 0.1947, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 13.020408163265307, | |
| "grad_norm": 4.506322860717773, | |
| "learning_rate": 3.4938775510204085e-06, | |
| "loss": 0.1468, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 13.061224489795919, | |
| "grad_norm": 6.5064921379089355, | |
| "learning_rate": 3.4734693877551022e-06, | |
| "loss": 0.1914, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 13.10204081632653, | |
| "grad_norm": 5.771793842315674, | |
| "learning_rate": 3.453061224489796e-06, | |
| "loss": 0.1401, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 13.142857142857142, | |
| "grad_norm": 7.380532264709473, | |
| "learning_rate": 3.43265306122449e-06, | |
| "loss": 0.1248, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 13.183673469387756, | |
| "grad_norm": 3.246613025665283, | |
| "learning_rate": 3.4122448979591838e-06, | |
| "loss": 0.1464, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 13.224489795918368, | |
| "grad_norm": 7.83432674407959, | |
| "learning_rate": 3.391836734693878e-06, | |
| "loss": 0.1243, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 13.26530612244898, | |
| "grad_norm": 3.1438093185424805, | |
| "learning_rate": 3.3714285714285716e-06, | |
| "loss": 0.1624, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 13.306122448979592, | |
| "grad_norm": 11.76649284362793, | |
| "learning_rate": 3.3510204081632657e-06, | |
| "loss": 0.1864, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 13.346938775510203, | |
| "grad_norm": 9.060452461242676, | |
| "learning_rate": 3.3306122448979594e-06, | |
| "loss": 0.2151, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 13.387755102040817, | |
| "grad_norm": 4.446900367736816, | |
| "learning_rate": 3.310204081632653e-06, | |
| "loss": 0.1426, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 13.428571428571429, | |
| "grad_norm": 5.351714611053467, | |
| "learning_rate": 3.2897959183673472e-06, | |
| "loss": 0.1786, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 13.46938775510204, | |
| "grad_norm": 5.846502304077148, | |
| "learning_rate": 3.269387755102041e-06, | |
| "loss": 0.1612, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 13.510204081632653, | |
| "grad_norm": 1.2500088214874268, | |
| "learning_rate": 3.248979591836735e-06, | |
| "loss": 0.1413, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 13.551020408163264, | |
| "grad_norm": 8.840521812438965, | |
| "learning_rate": 3.2285714285714288e-06, | |
| "loss": 0.1483, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 13.591836734693878, | |
| "grad_norm": 6.333540916442871, | |
| "learning_rate": 3.208163265306123e-06, | |
| "loss": 0.1949, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 13.63265306122449, | |
| "grad_norm": 7.405364990234375, | |
| "learning_rate": 3.1877551020408166e-06, | |
| "loss": 0.1546, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 13.673469387755102, | |
| "grad_norm": 4.707162857055664, | |
| "learning_rate": 3.1673469387755107e-06, | |
| "loss": 0.1809, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 13.714285714285714, | |
| "grad_norm": 9.862110137939453, | |
| "learning_rate": 3.1469387755102044e-06, | |
| "loss": 0.1563, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 13.755102040816327, | |
| "grad_norm": 7.040682792663574, | |
| "learning_rate": 3.126530612244898e-06, | |
| "loss": 0.1717, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 13.795918367346939, | |
| "grad_norm": 8.885647773742676, | |
| "learning_rate": 3.1061224489795922e-06, | |
| "loss": 0.1468, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 13.83673469387755, | |
| "grad_norm": 5.706595420837402, | |
| "learning_rate": 3.085714285714286e-06, | |
| "loss": 0.1334, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 13.877551020408163, | |
| "grad_norm": 6.5305352210998535, | |
| "learning_rate": 3.06530612244898e-06, | |
| "loss": 0.1253, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 13.918367346938776, | |
| "grad_norm": 8.774666786193848, | |
| "learning_rate": 3.0448979591836738e-06, | |
| "loss": 0.2265, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 13.959183673469388, | |
| "grad_norm": 7.207103729248047, | |
| "learning_rate": 3.024489795918368e-06, | |
| "loss": 0.1446, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 7.917428493499756, | |
| "learning_rate": 3.0040816326530616e-06, | |
| "loss": 0.1569, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 14.040816326530612, | |
| "grad_norm": 6.160830974578857, | |
| "learning_rate": 2.9836734693877557e-06, | |
| "loss": 0.1126, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 14.081632653061224, | |
| "grad_norm": 6.8933186531066895, | |
| "learning_rate": 2.9632653061224494e-06, | |
| "loss": 0.1646, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 14.122448979591837, | |
| "grad_norm": 6.392195701599121, | |
| "learning_rate": 2.9428571428571427e-06, | |
| "loss": 0.1732, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 14.16326530612245, | |
| "grad_norm": 4.3666462898254395, | |
| "learning_rate": 2.9224489795918372e-06, | |
| "loss": 0.1358, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 14.204081632653061, | |
| "grad_norm": 4.311633586883545, | |
| "learning_rate": 2.9020408163265305e-06, | |
| "loss": 0.1994, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 14.244897959183673, | |
| "grad_norm": 7.359527587890625, | |
| "learning_rate": 2.881632653061225e-06, | |
| "loss": 0.2524, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 14.285714285714286, | |
| "grad_norm": 7.441856861114502, | |
| "learning_rate": 2.8612244897959183e-06, | |
| "loss": 0.1356, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 14.326530612244898, | |
| "grad_norm": 7.622951507568359, | |
| "learning_rate": 2.8408163265306125e-06, | |
| "loss": 0.1732, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 14.36734693877551, | |
| "grad_norm": 4.243079662322998, | |
| "learning_rate": 2.820408163265306e-06, | |
| "loss": 0.1582, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 14.408163265306122, | |
| "grad_norm": 7.601721286773682, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.17, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 14.448979591836734, | |
| "grad_norm": 8.382099151611328, | |
| "learning_rate": 2.779591836734694e-06, | |
| "loss": 0.1305, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 14.489795918367347, | |
| "grad_norm": 6.1426591873168945, | |
| "learning_rate": 2.7591836734693877e-06, | |
| "loss": 0.2164, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 14.53061224489796, | |
| "grad_norm": 3.3495421409606934, | |
| "learning_rate": 2.738775510204082e-06, | |
| "loss": 0.2136, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 14.571428571428571, | |
| "grad_norm": 5.564877986907959, | |
| "learning_rate": 2.7183673469387755e-06, | |
| "loss": 0.1569, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 14.612244897959183, | |
| "grad_norm": 2.329392910003662, | |
| "learning_rate": 2.6979591836734697e-06, | |
| "loss": 0.2099, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 14.653061224489797, | |
| "grad_norm": 4.251242160797119, | |
| "learning_rate": 2.6775510204081634e-06, | |
| "loss": 0.1837, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 14.693877551020408, | |
| "grad_norm": 3.6388752460479736, | |
| "learning_rate": 2.6571428571428575e-06, | |
| "loss": 0.1558, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 14.73469387755102, | |
| "grad_norm": 1.2794233560562134, | |
| "learning_rate": 2.636734693877551e-06, | |
| "loss": 0.1416, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 14.775510204081632, | |
| "grad_norm": 8.014337539672852, | |
| "learning_rate": 2.616326530612245e-06, | |
| "loss": 0.2179, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 14.816326530612244, | |
| "grad_norm": 8.611292839050293, | |
| "learning_rate": 2.595918367346939e-06, | |
| "loss": 0.1655, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 14.857142857142858, | |
| "grad_norm": 5.971241474151611, | |
| "learning_rate": 2.5755102040816327e-06, | |
| "loss": 0.1575, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 14.89795918367347, | |
| "grad_norm": 8.207197189331055, | |
| "learning_rate": 2.555102040816327e-06, | |
| "loss": 0.1448, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 14.938775510204081, | |
| "grad_norm": 7.183290958404541, | |
| "learning_rate": 2.5346938775510205e-06, | |
| "loss": 0.1438, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 14.979591836734693, | |
| "grad_norm": 5.2017669677734375, | |
| "learning_rate": 2.5142857142857147e-06, | |
| "loss": 0.1582, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 15.020408163265307, | |
| "grad_norm": 5.843130588531494, | |
| "learning_rate": 2.4938775510204084e-06, | |
| "loss": 0.1632, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 15.061224489795919, | |
| "grad_norm": 4.228123188018799, | |
| "learning_rate": 2.473469387755102e-06, | |
| "loss": 0.1471, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 15.10204081632653, | |
| "grad_norm": 7.808995723724365, | |
| "learning_rate": 2.453061224489796e-06, | |
| "loss": 0.1754, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 15.142857142857142, | |
| "grad_norm": 6.6370849609375, | |
| "learning_rate": 2.43265306122449e-06, | |
| "loss": 0.1385, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 15.183673469387756, | |
| "grad_norm": 10.751446723937988, | |
| "learning_rate": 2.412244897959184e-06, | |
| "loss": 0.2018, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 15.224489795918368, | |
| "grad_norm": 2.6137239933013916, | |
| "learning_rate": 2.3918367346938777e-06, | |
| "loss": 0.1027, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 15.26530612244898, | |
| "grad_norm": 8.975826263427734, | |
| "learning_rate": 2.371428571428572e-06, | |
| "loss": 0.1591, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 15.306122448979592, | |
| "grad_norm": 5.2156758308410645, | |
| "learning_rate": 2.3510204081632655e-06, | |
| "loss": 0.2063, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 15.346938775510203, | |
| "grad_norm": 1.501234531402588, | |
| "learning_rate": 2.3306122448979592e-06, | |
| "loss": 0.1375, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 15.387755102040817, | |
| "grad_norm": 8.037463188171387, | |
| "learning_rate": 2.3102040816326534e-06, | |
| "loss": 0.1607, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 15.428571428571429, | |
| "grad_norm": 3.0848886966705322, | |
| "learning_rate": 2.289795918367347e-06, | |
| "loss": 0.1313, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 15.46938775510204, | |
| "grad_norm": 7.0728254318237305, | |
| "learning_rate": 2.269387755102041e-06, | |
| "loss": 0.1805, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 15.510204081632653, | |
| "grad_norm": 4.4785003662109375, | |
| "learning_rate": 2.248979591836735e-06, | |
| "loss": 0.1491, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 15.551020408163264, | |
| "grad_norm": 6.812148094177246, | |
| "learning_rate": 2.228571428571429e-06, | |
| "loss": 0.1597, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 15.591836734693878, | |
| "grad_norm": 6.794986248016357, | |
| "learning_rate": 2.2081632653061227e-06, | |
| "loss": 0.1942, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 15.63265306122449, | |
| "grad_norm": 8.72322940826416, | |
| "learning_rate": 2.1877551020408164e-06, | |
| "loss": 0.1595, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 15.673469387755102, | |
| "grad_norm": 6.645898818969727, | |
| "learning_rate": 2.16734693877551e-06, | |
| "loss": 0.1789, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 15.714285714285714, | |
| "grad_norm": 2.4683022499084473, | |
| "learning_rate": 2.1469387755102042e-06, | |
| "loss": 0.1489, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 15.755102040816327, | |
| "grad_norm": 3.773542881011963, | |
| "learning_rate": 2.126530612244898e-06, | |
| "loss": 0.1196, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 15.795918367346939, | |
| "grad_norm": 12.576253890991211, | |
| "learning_rate": 2.106122448979592e-06, | |
| "loss": 0.1655, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 15.83673469387755, | |
| "grad_norm": 9.4144287109375, | |
| "learning_rate": 2.0857142857142858e-06, | |
| "loss": 0.1791, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 15.877551020408163, | |
| "grad_norm": 6.735049724578857, | |
| "learning_rate": 2.06530612244898e-06, | |
| "loss": 0.1278, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 15.918367346938776, | |
| "grad_norm": 5.6283183097839355, | |
| "learning_rate": 2.0448979591836736e-06, | |
| "loss": 0.1513, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 15.959183673469388, | |
| "grad_norm": 3.5016186237335205, | |
| "learning_rate": 2.0244897959183677e-06, | |
| "loss": 0.0998, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 9.131093978881836, | |
| "learning_rate": 2.0040816326530614e-06, | |
| "loss": 0.1838, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 16.040816326530614, | |
| "grad_norm": 9.901714324951172, | |
| "learning_rate": 1.983673469387755e-06, | |
| "loss": 0.19, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 16.081632653061224, | |
| "grad_norm": 8.307121276855469, | |
| "learning_rate": 1.9632653061224492e-06, | |
| "loss": 0.153, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 16.122448979591837, | |
| "grad_norm": 5.754699230194092, | |
| "learning_rate": 1.942857142857143e-06, | |
| "loss": 0.1497, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 16.163265306122447, | |
| "grad_norm": 9.883703231811523, | |
| "learning_rate": 1.922448979591837e-06, | |
| "loss": 0.171, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 16.20408163265306, | |
| "grad_norm": 4.327215194702148, | |
| "learning_rate": 1.9020408163265308e-06, | |
| "loss": 0.1696, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 16.244897959183675, | |
| "grad_norm": 5.210219860076904, | |
| "learning_rate": 1.8816326530612247e-06, | |
| "loss": 0.16, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 16.285714285714285, | |
| "grad_norm": 4.966328144073486, | |
| "learning_rate": 1.8612244897959186e-06, | |
| "loss": 0.1657, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 16.3265306122449, | |
| "grad_norm": 4.904468059539795, | |
| "learning_rate": 1.8408163265306123e-06, | |
| "loss": 0.1356, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4900, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |