mys's picture
Upload folder using huggingface_hub
141b8b7 verified
Raw
History Blame
193 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1033,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009682885499878964,
"grad_norm": 0.6142507791519165,
"learning_rate": 0.0,
"loss": 0.7025314569473267,
"step": 1
},
{
"epoch": 0.0019365770999757927,
"grad_norm": 0.6211322546005249,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.6656137704849243,
"step": 2
},
{
"epoch": 0.002904865649963689,
"grad_norm": 0.6215519905090332,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.6469869017601013,
"step": 3
},
{
"epoch": 0.0038731541999515854,
"grad_norm": 0.6204696297645569,
"learning_rate": 3e-06,
"loss": 0.6729673147201538,
"step": 4
},
{
"epoch": 0.004841442749939482,
"grad_norm": 0.5724360942840576,
"learning_rate": 4.000000000000001e-06,
"loss": 0.6311185956001282,
"step": 5
},
{
"epoch": 0.005809731299927378,
"grad_norm": 0.6253241896629333,
"learning_rate": 5e-06,
"loss": 0.6582703590393066,
"step": 6
},
{
"epoch": 0.006778019849915275,
"grad_norm": 0.6960524320602417,
"learning_rate": 6e-06,
"loss": 0.6846659183502197,
"step": 7
},
{
"epoch": 0.007746308399903171,
"grad_norm": 0.669350802898407,
"learning_rate": 7e-06,
"loss": 0.690190315246582,
"step": 8
},
{
"epoch": 0.008714596949891068,
"grad_norm": 0.603227436542511,
"learning_rate": 8.000000000000001e-06,
"loss": 0.6193867325782776,
"step": 9
},
{
"epoch": 0.009682885499878963,
"grad_norm": 0.5855698585510254,
"learning_rate": 9e-06,
"loss": 0.5652514696121216,
"step": 10
},
{
"epoch": 0.01065117404986686,
"grad_norm": 0.652574360370636,
"learning_rate": 1e-05,
"loss": 0.5543577671051025,
"step": 11
},
{
"epoch": 0.011619462599854757,
"grad_norm": 0.6981928944587708,
"learning_rate": 9.990224828934506e-06,
"loss": 0.5915582180023193,
"step": 12
},
{
"epoch": 0.012587751149842653,
"grad_norm": 0.6892595291137695,
"learning_rate": 9.980449657869014e-06,
"loss": 0.5405319333076477,
"step": 13
},
{
"epoch": 0.01355603969983055,
"grad_norm": 0.6135081648826599,
"learning_rate": 9.97067448680352e-06,
"loss": 0.535025954246521,
"step": 14
},
{
"epoch": 0.014524328249818447,
"grad_norm": 0.6271191239356995,
"learning_rate": 9.960899315738027e-06,
"loss": 0.5299935340881348,
"step": 15
},
{
"epoch": 0.015492616799806342,
"grad_norm": 0.5334578156471252,
"learning_rate": 9.951124144672532e-06,
"loss": 0.4963300824165344,
"step": 16
},
{
"epoch": 0.01646090534979424,
"grad_norm": 0.5006250143051147,
"learning_rate": 9.94134897360704e-06,
"loss": 0.49313884973526,
"step": 17
},
{
"epoch": 0.017429193899782137,
"grad_norm": 0.3994006812572479,
"learning_rate": 9.931573802541545e-06,
"loss": 0.4737316966056824,
"step": 18
},
{
"epoch": 0.01839748244977003,
"grad_norm": 0.28798240423202515,
"learning_rate": 9.921798631476052e-06,
"loss": 0.4324286878108978,
"step": 19
},
{
"epoch": 0.019365770999757927,
"grad_norm": 0.26087912917137146,
"learning_rate": 9.912023460410558e-06,
"loss": 0.43165814876556396,
"step": 20
},
{
"epoch": 0.020334059549745823,
"grad_norm": 0.2219318300485611,
"learning_rate": 9.902248289345065e-06,
"loss": 0.44213879108428955,
"step": 21
},
{
"epoch": 0.02130234809973372,
"grad_norm": 0.2282973825931549,
"learning_rate": 9.89247311827957e-06,
"loss": 0.45201411843299866,
"step": 22
},
{
"epoch": 0.022270636649721617,
"grad_norm": 0.2014036774635315,
"learning_rate": 9.882697947214078e-06,
"loss": 0.41472718119621277,
"step": 23
},
{
"epoch": 0.023238925199709513,
"grad_norm": 0.21150773763656616,
"learning_rate": 9.872922776148584e-06,
"loss": 0.41946664452552795,
"step": 24
},
{
"epoch": 0.02420721374969741,
"grad_norm": 0.253612220287323,
"learning_rate": 9.863147605083089e-06,
"loss": 0.42294907569885254,
"step": 25
},
{
"epoch": 0.025175502299685307,
"grad_norm": 0.21278540790081024,
"learning_rate": 9.853372434017596e-06,
"loss": 0.4383317828178406,
"step": 26
},
{
"epoch": 0.026143790849673203,
"grad_norm": 0.24685898423194885,
"learning_rate": 9.843597262952102e-06,
"loss": 0.46812987327575684,
"step": 27
},
{
"epoch": 0.0271120793996611,
"grad_norm": 0.18908213078975677,
"learning_rate": 9.83382209188661e-06,
"loss": 0.40582185983657837,
"step": 28
},
{
"epoch": 0.028080367949648997,
"grad_norm": 0.20694321393966675,
"learning_rate": 9.824046920821115e-06,
"loss": 0.39531630277633667,
"step": 29
},
{
"epoch": 0.029048656499636893,
"grad_norm": 0.1862354725599289,
"learning_rate": 9.814271749755622e-06,
"loss": 0.4046899676322937,
"step": 30
},
{
"epoch": 0.030016945049624787,
"grad_norm": 0.21069619059562683,
"learning_rate": 9.804496578690128e-06,
"loss": 0.39400529861450195,
"step": 31
},
{
"epoch": 0.030985233599612683,
"grad_norm": 0.1739916056394577,
"learning_rate": 9.794721407624635e-06,
"loss": 0.40542545914649963,
"step": 32
},
{
"epoch": 0.03195352214960058,
"grad_norm": 0.16906821727752686,
"learning_rate": 9.78494623655914e-06,
"loss": 0.37384384870529175,
"step": 33
},
{
"epoch": 0.03292181069958848,
"grad_norm": 0.17224127054214478,
"learning_rate": 9.775171065493648e-06,
"loss": 0.3819228410720825,
"step": 34
},
{
"epoch": 0.03389009924957637,
"grad_norm": 0.16344308853149414,
"learning_rate": 9.765395894428153e-06,
"loss": 0.41173726320266724,
"step": 35
},
{
"epoch": 0.034858387799564274,
"grad_norm": 0.17100028693675995,
"learning_rate": 9.75562072336266e-06,
"loss": 0.39287662506103516,
"step": 36
},
{
"epoch": 0.03582667634955217,
"grad_norm": 0.15641067922115326,
"learning_rate": 9.745845552297166e-06,
"loss": 0.3993951976299286,
"step": 37
},
{
"epoch": 0.03679496489954006,
"grad_norm": 0.15000952780246735,
"learning_rate": 9.736070381231672e-06,
"loss": 0.37331604957580566,
"step": 38
},
{
"epoch": 0.03776325344952796,
"grad_norm": 0.16917653381824493,
"learning_rate": 9.726295210166179e-06,
"loss": 0.45155206322669983,
"step": 39
},
{
"epoch": 0.03873154199951585,
"grad_norm": 0.1585894376039505,
"learning_rate": 9.716520039100685e-06,
"loss": 0.3733840882778168,
"step": 40
},
{
"epoch": 0.03969983054950375,
"grad_norm": 0.14439353346824646,
"learning_rate": 9.706744868035192e-06,
"loss": 0.3886685073375702,
"step": 41
},
{
"epoch": 0.04066811909949165,
"grad_norm": 0.14183790981769562,
"learning_rate": 9.696969696969698e-06,
"loss": 0.3821936547756195,
"step": 42
},
{
"epoch": 0.04163640764947955,
"grad_norm": 0.16753076016902924,
"learning_rate": 9.687194525904205e-06,
"loss": 0.4522704780101776,
"step": 43
},
{
"epoch": 0.04260469619946744,
"grad_norm": 0.1615847498178482,
"learning_rate": 9.67741935483871e-06,
"loss": 0.3997975289821625,
"step": 44
},
{
"epoch": 0.04357298474945534,
"grad_norm": 0.1554916650056839,
"learning_rate": 9.667644183773218e-06,
"loss": 0.3691978454589844,
"step": 45
},
{
"epoch": 0.04454127329944323,
"grad_norm": 0.16841153800487518,
"learning_rate": 9.657869012707723e-06,
"loss": 0.41484490036964417,
"step": 46
},
{
"epoch": 0.045509561849431134,
"grad_norm": 0.1278965324163437,
"learning_rate": 9.64809384164223e-06,
"loss": 0.35556912422180176,
"step": 47
},
{
"epoch": 0.04647785039941903,
"grad_norm": 0.14681562781333923,
"learning_rate": 9.638318670576736e-06,
"loss": 0.40769901871681213,
"step": 48
},
{
"epoch": 0.04744613894940692,
"grad_norm": 0.14918000996112823,
"learning_rate": 9.628543499511243e-06,
"loss": 0.3773626387119293,
"step": 49
},
{
"epoch": 0.04841442749939482,
"grad_norm": 0.13720250129699707,
"learning_rate": 9.618768328445749e-06,
"loss": 0.3663005232810974,
"step": 50
},
{
"epoch": 0.04938271604938271,
"grad_norm": 0.14886170625686646,
"learning_rate": 9.608993157380255e-06,
"loss": 0.4067220985889435,
"step": 51
},
{
"epoch": 0.05035100459937061,
"grad_norm": 0.14274443686008453,
"learning_rate": 9.599217986314762e-06,
"loss": 0.3832167685031891,
"step": 52
},
{
"epoch": 0.05131929314935851,
"grad_norm": 0.15536513924598694,
"learning_rate": 9.589442815249267e-06,
"loss": 0.4395195245742798,
"step": 53
},
{
"epoch": 0.05228758169934641,
"grad_norm": 0.1393464207649231,
"learning_rate": 9.579667644183775e-06,
"loss": 0.3716701567173004,
"step": 54
},
{
"epoch": 0.0532558702493343,
"grad_norm": 0.1450338065624237,
"learning_rate": 9.56989247311828e-06,
"loss": 0.362257719039917,
"step": 55
},
{
"epoch": 0.0542241587993222,
"grad_norm": 0.14616632461547852,
"learning_rate": 9.560117302052788e-06,
"loss": 0.4077686369419098,
"step": 56
},
{
"epoch": 0.05519244734931009,
"grad_norm": 0.1374523639678955,
"learning_rate": 9.550342130987293e-06,
"loss": 0.3961605429649353,
"step": 57
},
{
"epoch": 0.056160735899297994,
"grad_norm": 0.1394190788269043,
"learning_rate": 9.5405669599218e-06,
"loss": 0.3706665635108948,
"step": 58
},
{
"epoch": 0.05712902444928589,
"grad_norm": 0.11874961853027344,
"learning_rate": 9.530791788856306e-06,
"loss": 0.30775582790374756,
"step": 59
},
{
"epoch": 0.05809731299927379,
"grad_norm": 0.1349610835313797,
"learning_rate": 9.521016617790813e-06,
"loss": 0.3784869313240051,
"step": 60
},
{
"epoch": 0.05906560154926168,
"grad_norm": 0.13463151454925537,
"learning_rate": 9.511241446725319e-06,
"loss": 0.3922792077064514,
"step": 61
},
{
"epoch": 0.06003389009924957,
"grad_norm": 0.12930694222450256,
"learning_rate": 9.501466275659824e-06,
"loss": 0.37777647376060486,
"step": 62
},
{
"epoch": 0.06100217864923747,
"grad_norm": 0.13075940310955048,
"learning_rate": 9.491691104594332e-06,
"loss": 0.38546550273895264,
"step": 63
},
{
"epoch": 0.06197046719922537,
"grad_norm": 0.13507235050201416,
"learning_rate": 9.481915933528837e-06,
"loss": 0.3709413707256317,
"step": 64
},
{
"epoch": 0.06293875574921326,
"grad_norm": 0.1276707798242569,
"learning_rate": 9.472140762463345e-06,
"loss": 0.3352872431278229,
"step": 65
},
{
"epoch": 0.06390704429920116,
"grad_norm": 0.11911962181329727,
"learning_rate": 9.46236559139785e-06,
"loss": 0.3386506140232086,
"step": 66
},
{
"epoch": 0.06487533284918906,
"grad_norm": 0.12956929206848145,
"learning_rate": 9.452590420332357e-06,
"loss": 0.3754933774471283,
"step": 67
},
{
"epoch": 0.06584362139917696,
"grad_norm": 0.13890986144542694,
"learning_rate": 9.442815249266863e-06,
"loss": 0.4128858745098114,
"step": 68
},
{
"epoch": 0.06681190994916485,
"grad_norm": 0.11394089460372925,
"learning_rate": 9.43304007820137e-06,
"loss": 0.3089035153388977,
"step": 69
},
{
"epoch": 0.06778019849915275,
"grad_norm": 0.1245599314570427,
"learning_rate": 9.423264907135876e-06,
"loss": 0.32959863543510437,
"step": 70
},
{
"epoch": 0.06874848704914065,
"grad_norm": 0.14015918970108032,
"learning_rate": 9.413489736070383e-06,
"loss": 0.3884163498878479,
"step": 71
},
{
"epoch": 0.06971677559912855,
"grad_norm": 0.1232111006975174,
"learning_rate": 9.403714565004889e-06,
"loss": 0.3284456431865692,
"step": 72
},
{
"epoch": 0.07068506414911643,
"grad_norm": 0.13799598813056946,
"learning_rate": 9.393939393939396e-06,
"loss": 0.3356078565120697,
"step": 73
},
{
"epoch": 0.07165335269910433,
"grad_norm": 0.1208195611834526,
"learning_rate": 9.384164222873902e-06,
"loss": 0.3578157126903534,
"step": 74
},
{
"epoch": 0.07262164124909223,
"grad_norm": 0.11411258578300476,
"learning_rate": 9.374389051808407e-06,
"loss": 0.31658506393432617,
"step": 75
},
{
"epoch": 0.07358992979908012,
"grad_norm": 0.11944623291492462,
"learning_rate": 9.364613880742913e-06,
"loss": 0.36029356718063354,
"step": 76
},
{
"epoch": 0.07455821834906802,
"grad_norm": 0.12852734327316284,
"learning_rate": 9.35483870967742e-06,
"loss": 0.3340183198451996,
"step": 77
},
{
"epoch": 0.07552650689905592,
"grad_norm": 0.11775587499141693,
"learning_rate": 9.345063538611926e-06,
"loss": 0.3083425760269165,
"step": 78
},
{
"epoch": 0.07649479544904382,
"grad_norm": 0.13742083311080933,
"learning_rate": 9.335288367546433e-06,
"loss": 0.3448983132839203,
"step": 79
},
{
"epoch": 0.0774630839990317,
"grad_norm": 0.12395518273115158,
"learning_rate": 9.325513196480938e-06,
"loss": 0.39519673585891724,
"step": 80
},
{
"epoch": 0.0784313725490196,
"grad_norm": 0.1322673112154007,
"learning_rate": 9.315738025415446e-06,
"loss": 0.3364032506942749,
"step": 81
},
{
"epoch": 0.0793996610990075,
"grad_norm": 0.13429760932922363,
"learning_rate": 9.305962854349951e-06,
"loss": 0.3513767719268799,
"step": 82
},
{
"epoch": 0.08036794964899541,
"grad_norm": 0.11903934180736542,
"learning_rate": 9.296187683284459e-06,
"loss": 0.3263617157936096,
"step": 83
},
{
"epoch": 0.0813362381989833,
"grad_norm": 0.11960665881633759,
"learning_rate": 9.286412512218964e-06,
"loss": 0.3449134826660156,
"step": 84
},
{
"epoch": 0.0823045267489712,
"grad_norm": 0.12201559543609619,
"learning_rate": 9.27663734115347e-06,
"loss": 0.32846352458000183,
"step": 85
},
{
"epoch": 0.0832728152989591,
"grad_norm": 0.12875522673130035,
"learning_rate": 9.266862170087977e-06,
"loss": 0.37101566791534424,
"step": 86
},
{
"epoch": 0.08424110384894698,
"grad_norm": 0.11964312195777893,
"learning_rate": 9.257086999022483e-06,
"loss": 0.31812378764152527,
"step": 87
},
{
"epoch": 0.08520939239893488,
"grad_norm": 0.15213125944137573,
"learning_rate": 9.24731182795699e-06,
"loss": 0.3813604414463043,
"step": 88
},
{
"epoch": 0.08617768094892278,
"grad_norm": 0.1251516193151474,
"learning_rate": 9.237536656891495e-06,
"loss": 0.3027239441871643,
"step": 89
},
{
"epoch": 0.08714596949891068,
"grad_norm": 0.11938060075044632,
"learning_rate": 9.227761485826003e-06,
"loss": 0.37067025899887085,
"step": 90
},
{
"epoch": 0.08811425804889857,
"grad_norm": 0.13240274786949158,
"learning_rate": 9.217986314760508e-06,
"loss": 0.33599379658699036,
"step": 91
},
{
"epoch": 0.08908254659888647,
"grad_norm": 0.13307581841945648,
"learning_rate": 9.208211143695016e-06,
"loss": 0.37259358167648315,
"step": 92
},
{
"epoch": 0.09005083514887437,
"grad_norm": 0.12138372659683228,
"learning_rate": 9.198435972629521e-06,
"loss": 0.3587302565574646,
"step": 93
},
{
"epoch": 0.09101912369886227,
"grad_norm": 0.1167801097035408,
"learning_rate": 9.188660801564028e-06,
"loss": 0.33125776052474976,
"step": 94
},
{
"epoch": 0.09198741224885015,
"grad_norm": 0.1275295913219452,
"learning_rate": 9.178885630498534e-06,
"loss": 0.30975601077079773,
"step": 95
},
{
"epoch": 0.09295570079883805,
"grad_norm": 0.13747365772724152,
"learning_rate": 9.16911045943304e-06,
"loss": 0.3368357717990875,
"step": 96
},
{
"epoch": 0.09392398934882595,
"grad_norm": 0.12222792208194733,
"learning_rate": 9.159335288367547e-06,
"loss": 0.31197813153266907,
"step": 97
},
{
"epoch": 0.09489227789881384,
"grad_norm": 0.1364426612854004,
"learning_rate": 9.149560117302052e-06,
"loss": 0.32897326350212097,
"step": 98
},
{
"epoch": 0.09586056644880174,
"grad_norm": 0.14532364904880524,
"learning_rate": 9.13978494623656e-06,
"loss": 0.3669801652431488,
"step": 99
},
{
"epoch": 0.09682885499878964,
"grad_norm": 0.13844749331474304,
"learning_rate": 9.130009775171065e-06,
"loss": 0.36969247460365295,
"step": 100
},
{
"epoch": 0.09779714354877754,
"grad_norm": 0.12275300920009613,
"learning_rate": 9.120234604105573e-06,
"loss": 0.3295097053050995,
"step": 101
},
{
"epoch": 0.09876543209876543,
"grad_norm": 0.13520191609859467,
"learning_rate": 9.110459433040078e-06,
"loss": 0.3737986087799072,
"step": 102
},
{
"epoch": 0.09973372064875333,
"grad_norm": 0.13066619634628296,
"learning_rate": 9.100684261974585e-06,
"loss": 0.3033255934715271,
"step": 103
},
{
"epoch": 0.10070200919874123,
"grad_norm": 0.11890687793493271,
"learning_rate": 9.090909090909091e-06,
"loss": 0.34380683302879333,
"step": 104
},
{
"epoch": 0.10167029774872913,
"grad_norm": 0.13914473354816437,
"learning_rate": 9.081133919843598e-06,
"loss": 0.35355979204177856,
"step": 105
},
{
"epoch": 0.10263858629871701,
"grad_norm": 0.120316281914711,
"learning_rate": 9.071358748778104e-06,
"loss": 0.3205440044403076,
"step": 106
},
{
"epoch": 0.10360687484870491,
"grad_norm": 0.16602486371994019,
"learning_rate": 9.061583577712611e-06,
"loss": 0.3168993890285492,
"step": 107
},
{
"epoch": 0.10457516339869281,
"grad_norm": 0.12726294994354248,
"learning_rate": 9.051808406647117e-06,
"loss": 0.3384619653224945,
"step": 108
},
{
"epoch": 0.10554345194868071,
"grad_norm": 0.1322595477104187,
"learning_rate": 9.042033235581622e-06,
"loss": 0.3112761974334717,
"step": 109
},
{
"epoch": 0.1065117404986686,
"grad_norm": 0.1453908234834671,
"learning_rate": 9.03225806451613e-06,
"loss": 0.2989104390144348,
"step": 110
},
{
"epoch": 0.1074800290486565,
"grad_norm": 0.12833762168884277,
"learning_rate": 9.022482893450635e-06,
"loss": 0.3298214077949524,
"step": 111
},
{
"epoch": 0.1084483175986444,
"grad_norm": 0.12525658309459686,
"learning_rate": 9.012707722385142e-06,
"loss": 0.32383644580841064,
"step": 112
},
{
"epoch": 0.10941660614863229,
"grad_norm": 0.1307019740343094,
"learning_rate": 9.002932551319648e-06,
"loss": 0.3317619264125824,
"step": 113
},
{
"epoch": 0.11038489469862019,
"grad_norm": 0.13214020431041718,
"learning_rate": 8.993157380254155e-06,
"loss": 0.2884703576564789,
"step": 114
},
{
"epoch": 0.11135318324860809,
"grad_norm": 0.12866291403770447,
"learning_rate": 8.983382209188661e-06,
"loss": 0.351254940032959,
"step": 115
},
{
"epoch": 0.11232147179859599,
"grad_norm": 0.13350999355316162,
"learning_rate": 8.973607038123168e-06,
"loss": 0.30035918951034546,
"step": 116
},
{
"epoch": 0.11328976034858387,
"grad_norm": 0.11388203501701355,
"learning_rate": 8.963831867057674e-06,
"loss": 0.2958531081676483,
"step": 117
},
{
"epoch": 0.11425804889857177,
"grad_norm": 0.14289627969264984,
"learning_rate": 8.954056695992181e-06,
"loss": 0.32053035497665405,
"step": 118
},
{
"epoch": 0.11522633744855967,
"grad_norm": 0.13894328474998474,
"learning_rate": 8.944281524926687e-06,
"loss": 0.30120429396629333,
"step": 119
},
{
"epoch": 0.11619462599854757,
"grad_norm": 0.12141028046607971,
"learning_rate": 8.934506353861194e-06,
"loss": 0.31479382514953613,
"step": 120
},
{
"epoch": 0.11716291454853546,
"grad_norm": 0.12900526821613312,
"learning_rate": 8.9247311827957e-06,
"loss": 0.36297452449798584,
"step": 121
},
{
"epoch": 0.11813120309852336,
"grad_norm": 0.12149893492460251,
"learning_rate": 8.914956011730205e-06,
"loss": 0.2906142473220825,
"step": 122
},
{
"epoch": 0.11909949164851126,
"grad_norm": 0.16426807641983032,
"learning_rate": 8.905180840664712e-06,
"loss": 0.31878572702407837,
"step": 123
},
{
"epoch": 0.12006778019849915,
"grad_norm": 0.11868342757225037,
"learning_rate": 8.895405669599218e-06,
"loss": 0.3231990933418274,
"step": 124
},
{
"epoch": 0.12103606874848705,
"grad_norm": 0.13657772541046143,
"learning_rate": 8.885630498533725e-06,
"loss": 0.29259440302848816,
"step": 125
},
{
"epoch": 0.12200435729847495,
"grad_norm": 0.1253119558095932,
"learning_rate": 8.87585532746823e-06,
"loss": 0.2838287353515625,
"step": 126
},
{
"epoch": 0.12297264584846285,
"grad_norm": 0.1284995973110199,
"learning_rate": 8.866080156402738e-06,
"loss": 0.3066769242286682,
"step": 127
},
{
"epoch": 0.12394093439845073,
"grad_norm": 0.11573974788188934,
"learning_rate": 8.856304985337244e-06,
"loss": 0.3010478615760803,
"step": 128
},
{
"epoch": 0.12490922294843863,
"grad_norm": 0.12995308637619019,
"learning_rate": 8.846529814271751e-06,
"loss": 0.3350738286972046,
"step": 129
},
{
"epoch": 0.12587751149842652,
"grad_norm": 0.13642707467079163,
"learning_rate": 8.836754643206256e-06,
"loss": 0.31808528304100037,
"step": 130
},
{
"epoch": 0.12684580004841442,
"grad_norm": 0.1310724914073944,
"learning_rate": 8.826979472140764e-06,
"loss": 0.3332287669181824,
"step": 131
},
{
"epoch": 0.12781408859840232,
"grad_norm": 0.12466035038232803,
"learning_rate": 8.81720430107527e-06,
"loss": 0.3196363151073456,
"step": 132
},
{
"epoch": 0.12878237714839022,
"grad_norm": 0.13588100671768188,
"learning_rate": 8.807429130009777e-06,
"loss": 0.31376099586486816,
"step": 133
},
{
"epoch": 0.12975066569837812,
"grad_norm": 0.13263723254203796,
"learning_rate": 8.797653958944282e-06,
"loss": 0.3171752393245697,
"step": 134
},
{
"epoch": 0.13071895424836602,
"grad_norm": 0.13374009728431702,
"learning_rate": 8.787878787878788e-06,
"loss": 0.2953280210494995,
"step": 135
},
{
"epoch": 0.13168724279835392,
"grad_norm": 0.13743482530117035,
"learning_rate": 8.778103616813295e-06,
"loss": 0.3488181531429291,
"step": 136
},
{
"epoch": 0.1326555313483418,
"grad_norm": 0.13955242931842804,
"learning_rate": 8.7683284457478e-06,
"loss": 0.331007182598114,
"step": 137
},
{
"epoch": 0.1336238198983297,
"grad_norm": 0.14186261594295502,
"learning_rate": 8.758553274682308e-06,
"loss": 0.36398252844810486,
"step": 138
},
{
"epoch": 0.1345921084483176,
"grad_norm": 0.1471295952796936,
"learning_rate": 8.748778103616813e-06,
"loss": 0.336472749710083,
"step": 139
},
{
"epoch": 0.1355603969983055,
"grad_norm": 0.11482029408216476,
"learning_rate": 8.73900293255132e-06,
"loss": 0.3006575405597687,
"step": 140
},
{
"epoch": 0.1365286855482934,
"grad_norm": 0.13504621386528015,
"learning_rate": 8.729227761485826e-06,
"loss": 0.3569592535495758,
"step": 141
},
{
"epoch": 0.1374969740982813,
"grad_norm": 0.15188293159008026,
"learning_rate": 8.719452590420334e-06,
"loss": 0.3333485424518585,
"step": 142
},
{
"epoch": 0.1384652626482692,
"grad_norm": 0.13065899908542633,
"learning_rate": 8.70967741935484e-06,
"loss": 0.3319074511528015,
"step": 143
},
{
"epoch": 0.1394335511982571,
"grad_norm": 0.1272367686033249,
"learning_rate": 8.699902248289346e-06,
"loss": 0.3033870756626129,
"step": 144
},
{
"epoch": 0.14040183974824497,
"grad_norm": 0.1433865875005722,
"learning_rate": 8.690127077223852e-06,
"loss": 0.30503055453300476,
"step": 145
},
{
"epoch": 0.14137012829823287,
"grad_norm": 0.12748195230960846,
"learning_rate": 8.68035190615836e-06,
"loss": 0.3041837811470032,
"step": 146
},
{
"epoch": 0.14233841684822077,
"grad_norm": 0.13291986286640167,
"learning_rate": 8.670576735092865e-06,
"loss": 0.3430430591106415,
"step": 147
},
{
"epoch": 0.14330670539820867,
"grad_norm": 0.12126651406288147,
"learning_rate": 8.66080156402737e-06,
"loss": 0.33859869837760925,
"step": 148
},
{
"epoch": 0.14427499394819657,
"grad_norm": 0.15293890237808228,
"learning_rate": 8.651026392961878e-06,
"loss": 0.33767563104629517,
"step": 149
},
{
"epoch": 0.14524328249818447,
"grad_norm": 0.13613349199295044,
"learning_rate": 8.641251221896383e-06,
"loss": 0.29907119274139404,
"step": 150
},
{
"epoch": 0.14621157104817237,
"grad_norm": 0.12802888453006744,
"learning_rate": 8.63147605083089e-06,
"loss": 0.3123582601547241,
"step": 151
},
{
"epoch": 0.14717985959816024,
"grad_norm": 0.16021937131881714,
"learning_rate": 8.621700879765396e-06,
"loss": 0.37841248512268066,
"step": 152
},
{
"epoch": 0.14814814814814814,
"grad_norm": 0.12790994346141815,
"learning_rate": 8.611925708699903e-06,
"loss": 0.27514874935150146,
"step": 153
},
{
"epoch": 0.14911643669813604,
"grad_norm": 0.13345623016357422,
"learning_rate": 8.602150537634409e-06,
"loss": 0.3330199718475342,
"step": 154
},
{
"epoch": 0.15008472524812394,
"grad_norm": 0.13262543082237244,
"learning_rate": 8.592375366568916e-06,
"loss": 0.2858338952064514,
"step": 155
},
{
"epoch": 0.15105301379811184,
"grad_norm": 0.16613167524337769,
"learning_rate": 8.582600195503422e-06,
"loss": 0.3251619040966034,
"step": 156
},
{
"epoch": 0.15202130234809974,
"grad_norm": 0.14091891050338745,
"learning_rate": 8.57282502443793e-06,
"loss": 0.3126198649406433,
"step": 157
},
{
"epoch": 0.15298959089808764,
"grad_norm": 0.12945963442325592,
"learning_rate": 8.563049853372435e-06,
"loss": 0.3045946955680847,
"step": 158
},
{
"epoch": 0.1539578794480755,
"grad_norm": 0.13346253335475922,
"learning_rate": 8.553274682306942e-06,
"loss": 0.3187895119190216,
"step": 159
},
{
"epoch": 0.1549261679980634,
"grad_norm": 0.1281236708164215,
"learning_rate": 8.543499511241448e-06,
"loss": 0.2990340292453766,
"step": 160
},
{
"epoch": 0.1558944565480513,
"grad_norm": 0.13074296712875366,
"learning_rate": 8.533724340175953e-06,
"loss": 0.3452467620372772,
"step": 161
},
{
"epoch": 0.1568627450980392,
"grad_norm": 0.13953045010566711,
"learning_rate": 8.52394916911046e-06,
"loss": 0.32909831404685974,
"step": 162
},
{
"epoch": 0.1578310336480271,
"grad_norm": 0.14059635996818542,
"learning_rate": 8.514173998044966e-06,
"loss": 0.27773308753967285,
"step": 163
},
{
"epoch": 0.158799322198015,
"grad_norm": 0.1397535353899002,
"learning_rate": 8.504398826979473e-06,
"loss": 0.3393952250480652,
"step": 164
},
{
"epoch": 0.15976761074800291,
"grad_norm": 0.13574957847595215,
"learning_rate": 8.494623655913979e-06,
"loss": 0.32174286246299744,
"step": 165
},
{
"epoch": 0.16073589929799081,
"grad_norm": 0.13975924253463745,
"learning_rate": 8.484848484848486e-06,
"loss": 0.30419760942459106,
"step": 166
},
{
"epoch": 0.1617041878479787,
"grad_norm": 0.1427648961544037,
"learning_rate": 8.475073313782992e-06,
"loss": 0.3033597469329834,
"step": 167
},
{
"epoch": 0.1626724763979666,
"grad_norm": 0.14715762436389923,
"learning_rate": 8.465298142717499e-06,
"loss": 0.32338106632232666,
"step": 168
},
{
"epoch": 0.1636407649479545,
"grad_norm": 0.1452789306640625,
"learning_rate": 8.455522971652005e-06,
"loss": 0.345528781414032,
"step": 169
},
{
"epoch": 0.1646090534979424,
"grad_norm": 0.13867947459220886,
"learning_rate": 8.445747800586512e-06,
"loss": 0.32734549045562744,
"step": 170
},
{
"epoch": 0.1655773420479303,
"grad_norm": 0.137126162648201,
"learning_rate": 8.435972629521018e-06,
"loss": 0.32425397634506226,
"step": 171
},
{
"epoch": 0.1665456305979182,
"grad_norm": 0.15507617592811584,
"learning_rate": 8.426197458455525e-06,
"loss": 0.3226757049560547,
"step": 172
},
{
"epoch": 0.1675139191479061,
"grad_norm": 0.12315394729375839,
"learning_rate": 8.41642228739003e-06,
"loss": 0.3322482407093048,
"step": 173
},
{
"epoch": 0.16848220769789396,
"grad_norm": 0.14539486169815063,
"learning_rate": 8.406647116324536e-06,
"loss": 0.3431966304779053,
"step": 174
},
{
"epoch": 0.16945049624788186,
"grad_norm": 0.1458021104335785,
"learning_rate": 8.396871945259043e-06,
"loss": 0.3117983937263489,
"step": 175
},
{
"epoch": 0.17041878479786976,
"grad_norm": 0.126032292842865,
"learning_rate": 8.387096774193549e-06,
"loss": 0.304436057806015,
"step": 176
},
{
"epoch": 0.17138707334785766,
"grad_norm": 0.15044239163398743,
"learning_rate": 8.377321603128056e-06,
"loss": 0.3327201306819916,
"step": 177
},
{
"epoch": 0.17235536189784556,
"grad_norm": 0.12567083537578583,
"learning_rate": 8.367546432062562e-06,
"loss": 0.32488536834716797,
"step": 178
},
{
"epoch": 0.17332365044783346,
"grad_norm": 0.1399868130683899,
"learning_rate": 8.357771260997069e-06,
"loss": 0.28393518924713135,
"step": 179
},
{
"epoch": 0.17429193899782136,
"grad_norm": 0.12733778357505798,
"learning_rate": 8.347996089931575e-06,
"loss": 0.28177058696746826,
"step": 180
},
{
"epoch": 0.17526022754780926,
"grad_norm": 0.15660876035690308,
"learning_rate": 8.338220918866082e-06,
"loss": 0.31686797738075256,
"step": 181
},
{
"epoch": 0.17622851609779713,
"grad_norm": 0.1510598510503769,
"learning_rate": 8.328445747800587e-06,
"loss": 0.2910916209220886,
"step": 182
},
{
"epoch": 0.17719680464778503,
"grad_norm": 0.13863040506839752,
"learning_rate": 8.318670576735095e-06,
"loss": 0.3391288220882416,
"step": 183
},
{
"epoch": 0.17816509319777293,
"grad_norm": 0.1262752115726471,
"learning_rate": 8.3088954056696e-06,
"loss": 0.26286113262176514,
"step": 184
},
{
"epoch": 0.17913338174776083,
"grad_norm": 0.1450256109237671,
"learning_rate": 8.299120234604106e-06,
"loss": 0.33761149644851685,
"step": 185
},
{
"epoch": 0.18010167029774873,
"grad_norm": 0.145137757062912,
"learning_rate": 8.289345063538613e-06,
"loss": 0.299782931804657,
"step": 186
},
{
"epoch": 0.18106995884773663,
"grad_norm": 0.14223727583885193,
"learning_rate": 8.279569892473119e-06,
"loss": 0.28673055768013,
"step": 187
},
{
"epoch": 0.18203824739772453,
"grad_norm": 0.14671868085861206,
"learning_rate": 8.269794721407626e-06,
"loss": 0.3334650993347168,
"step": 188
},
{
"epoch": 0.1830065359477124,
"grad_norm": 0.14561879634857178,
"learning_rate": 8.260019550342132e-06,
"loss": 0.3131367564201355,
"step": 189
},
{
"epoch": 0.1839748244977003,
"grad_norm": 0.13915206491947174,
"learning_rate": 8.250244379276639e-06,
"loss": 0.285634845495224,
"step": 190
},
{
"epoch": 0.1849431130476882,
"grad_norm": 0.13024187088012695,
"learning_rate": 8.240469208211144e-06,
"loss": 0.3532557487487793,
"step": 191
},
{
"epoch": 0.1859114015976761,
"grad_norm": 0.1433749794960022,
"learning_rate": 8.230694037145652e-06,
"loss": 0.3235865831375122,
"step": 192
},
{
"epoch": 0.186879690147664,
"grad_norm": 0.1517333984375,
"learning_rate": 8.220918866080157e-06,
"loss": 0.2964053153991699,
"step": 193
},
{
"epoch": 0.1878479786976519,
"grad_norm": 0.139493927359581,
"learning_rate": 8.211143695014665e-06,
"loss": 0.2810608148574829,
"step": 194
},
{
"epoch": 0.1888162672476398,
"grad_norm": 0.12849940359592438,
"learning_rate": 8.20136852394917e-06,
"loss": 0.2949499189853668,
"step": 195
},
{
"epoch": 0.18978455579762768,
"grad_norm": 0.14101386070251465,
"learning_rate": 8.191593352883677e-06,
"loss": 0.28787606954574585,
"step": 196
},
{
"epoch": 0.19075284434761558,
"grad_norm": 0.13321508467197418,
"learning_rate": 8.181818181818183e-06,
"loss": 0.31819286942481995,
"step": 197
},
{
"epoch": 0.19172113289760348,
"grad_norm": 0.1368619203567505,
"learning_rate": 8.172043010752689e-06,
"loss": 0.2770519256591797,
"step": 198
},
{
"epoch": 0.19268942144759138,
"grad_norm": 0.14590312540531158,
"learning_rate": 8.162267839687196e-06,
"loss": 0.331787645816803,
"step": 199
},
{
"epoch": 0.19365770999757928,
"grad_norm": 0.14525046944618225,
"learning_rate": 8.152492668621701e-06,
"loss": 0.3185243308544159,
"step": 200
},
{
"epoch": 0.19462599854756718,
"grad_norm": 0.14318214356899261,
"learning_rate": 8.142717497556209e-06,
"loss": 0.2779344320297241,
"step": 201
},
{
"epoch": 0.19559428709755508,
"grad_norm": 0.13709904253482819,
"learning_rate": 8.132942326490714e-06,
"loss": 0.2518289387226105,
"step": 202
},
{
"epoch": 0.19656257564754298,
"grad_norm": 0.1377800703048706,
"learning_rate": 8.12316715542522e-06,
"loss": 0.2550484836101532,
"step": 203
},
{
"epoch": 0.19753086419753085,
"grad_norm": 0.15116380155086517,
"learning_rate": 8.113391984359727e-06,
"loss": 0.3201026916503906,
"step": 204
},
{
"epoch": 0.19849915274751875,
"grad_norm": 0.13895870745182037,
"learning_rate": 8.103616813294233e-06,
"loss": 0.314879834651947,
"step": 205
},
{
"epoch": 0.19946744129750665,
"grad_norm": 0.1607581377029419,
"learning_rate": 8.09384164222874e-06,
"loss": 0.35479456186294556,
"step": 206
},
{
"epoch": 0.20043572984749455,
"grad_norm": 0.14690084755420685,
"learning_rate": 8.084066471163246e-06,
"loss": 0.35220852494239807,
"step": 207
},
{
"epoch": 0.20140401839748245,
"grad_norm": 0.14206227660179138,
"learning_rate": 8.074291300097751e-06,
"loss": 0.30387187004089355,
"step": 208
},
{
"epoch": 0.20237230694747035,
"grad_norm": 0.15204882621765137,
"learning_rate": 8.064516129032258e-06,
"loss": 0.2756717801094055,
"step": 209
},
{
"epoch": 0.20334059549745825,
"grad_norm": 0.1398657113313675,
"learning_rate": 8.054740957966764e-06,
"loss": 0.2777653932571411,
"step": 210
},
{
"epoch": 0.20430888404744613,
"grad_norm": 0.13735412061214447,
"learning_rate": 8.044965786901271e-06,
"loss": 0.28360527753829956,
"step": 211
},
{
"epoch": 0.20527717259743403,
"grad_norm": 0.15420980751514435,
"learning_rate": 8.035190615835777e-06,
"loss": 0.32784318923950195,
"step": 212
},
{
"epoch": 0.20624546114742193,
"grad_norm": 0.14892657101154327,
"learning_rate": 8.025415444770284e-06,
"loss": 0.33523041009902954,
"step": 213
},
{
"epoch": 0.20721374969740983,
"grad_norm": 0.13076002895832062,
"learning_rate": 8.01564027370479e-06,
"loss": 0.2862524092197418,
"step": 214
},
{
"epoch": 0.20818203824739773,
"grad_norm": 0.1415518820285797,
"learning_rate": 8.005865102639297e-06,
"loss": 0.33531394600868225,
"step": 215
},
{
"epoch": 0.20915032679738563,
"grad_norm": 0.1702524572610855,
"learning_rate": 7.996089931573803e-06,
"loss": 0.32485491037368774,
"step": 216
},
{
"epoch": 0.21011861534737353,
"grad_norm": 0.1344050168991089,
"learning_rate": 7.98631476050831e-06,
"loss": 0.3258602023124695,
"step": 217
},
{
"epoch": 0.21108690389736143,
"grad_norm": 0.1570902317762375,
"learning_rate": 7.976539589442815e-06,
"loss": 0.32586684823036194,
"step": 218
},
{
"epoch": 0.2120551924473493,
"grad_norm": 0.15897458791732788,
"learning_rate": 7.966764418377323e-06,
"loss": 0.2798767685890198,
"step": 219
},
{
"epoch": 0.2130234809973372,
"grad_norm": 0.15497955679893494,
"learning_rate": 7.956989247311828e-06,
"loss": 0.3338768482208252,
"step": 220
},
{
"epoch": 0.2139917695473251,
"grad_norm": 0.14507335424423218,
"learning_rate": 7.947214076246334e-06,
"loss": 0.2613910138607025,
"step": 221
},
{
"epoch": 0.214960058097313,
"grad_norm": 0.1506527066230774,
"learning_rate": 7.937438905180841e-06,
"loss": 0.2877991795539856,
"step": 222
},
{
"epoch": 0.2159283466473009,
"grad_norm": 0.14218902587890625,
"learning_rate": 7.927663734115347e-06,
"loss": 0.28079351782798767,
"step": 223
},
{
"epoch": 0.2168966351972888,
"grad_norm": 0.14527355134487152,
"learning_rate": 7.917888563049854e-06,
"loss": 0.29178351163864136,
"step": 224
},
{
"epoch": 0.2178649237472767,
"grad_norm": 0.1565907746553421,
"learning_rate": 7.90811339198436e-06,
"loss": 0.29092147946357727,
"step": 225
},
{
"epoch": 0.21883321229726457,
"grad_norm": 0.16128268837928772,
"learning_rate": 7.898338220918867e-06,
"loss": 0.30649298429489136,
"step": 226
},
{
"epoch": 0.21980150084725247,
"grad_norm": 0.13981635868549347,
"learning_rate": 7.888563049853372e-06,
"loss": 0.2914465069770813,
"step": 227
},
{
"epoch": 0.22076978939724037,
"grad_norm": 0.13276293873786926,
"learning_rate": 7.87878787878788e-06,
"loss": 0.29372796416282654,
"step": 228
},
{
"epoch": 0.22173807794722827,
"grad_norm": 0.14917345345020294,
"learning_rate": 7.869012707722385e-06,
"loss": 0.30806928873062134,
"step": 229
},
{
"epoch": 0.22270636649721617,
"grad_norm": 0.15436047315597534,
"learning_rate": 7.859237536656893e-06,
"loss": 0.32170775532722473,
"step": 230
},
{
"epoch": 0.22367465504720407,
"grad_norm": 0.14901861548423767,
"learning_rate": 7.849462365591398e-06,
"loss": 0.2876305878162384,
"step": 231
},
{
"epoch": 0.22464294359719197,
"grad_norm": 0.13269929587841034,
"learning_rate": 7.839687194525904e-06,
"loss": 0.34557828307151794,
"step": 232
},
{
"epoch": 0.22561123214717985,
"grad_norm": 0.14736007153987885,
"learning_rate": 7.829912023460411e-06,
"loss": 0.3261890411376953,
"step": 233
},
{
"epoch": 0.22657952069716775,
"grad_norm": 0.157369464635849,
"learning_rate": 7.820136852394917e-06,
"loss": 0.33243319392204285,
"step": 234
},
{
"epoch": 0.22754780924715565,
"grad_norm": 0.15422044694423676,
"learning_rate": 7.810361681329424e-06,
"loss": 0.32125651836395264,
"step": 235
},
{
"epoch": 0.22851609779714355,
"grad_norm": 0.15290172398090363,
"learning_rate": 7.80058651026393e-06,
"loss": 0.3197525143623352,
"step": 236
},
{
"epoch": 0.22948438634713145,
"grad_norm": 0.13229885697364807,
"learning_rate": 7.790811339198437e-06,
"loss": 0.3051709532737732,
"step": 237
},
{
"epoch": 0.23045267489711935,
"grad_norm": 0.15573133528232574,
"learning_rate": 7.781036168132942e-06,
"loss": 0.3190789520740509,
"step": 238
},
{
"epoch": 0.23142096344710725,
"grad_norm": 0.1598438322544098,
"learning_rate": 7.77126099706745e-06,
"loss": 0.32250896096229553,
"step": 239
},
{
"epoch": 0.23238925199709515,
"grad_norm": 0.13183802366256714,
"learning_rate": 7.761485826001955e-06,
"loss": 0.31600791215896606,
"step": 240
},
{
"epoch": 0.23335754054708302,
"grad_norm": 0.14657722413539886,
"learning_rate": 7.751710654936462e-06,
"loss": 0.2864221930503845,
"step": 241
},
{
"epoch": 0.23432582909707092,
"grad_norm": 0.1653253585100174,
"learning_rate": 7.741935483870968e-06,
"loss": 0.30967211723327637,
"step": 242
},
{
"epoch": 0.23529411764705882,
"grad_norm": 0.15613378584384918,
"learning_rate": 7.732160312805475e-06,
"loss": 0.30234426259994507,
"step": 243
},
{
"epoch": 0.23626240619704672,
"grad_norm": 0.1411314755678177,
"learning_rate": 7.722385141739981e-06,
"loss": 0.28815943002700806,
"step": 244
},
{
"epoch": 0.23723069474703462,
"grad_norm": 0.14803080260753632,
"learning_rate": 7.712609970674486e-06,
"loss": 0.2975384294986725,
"step": 245
},
{
"epoch": 0.23819898329702252,
"grad_norm": 0.1584216207265854,
"learning_rate": 7.702834799608994e-06,
"loss": 0.2974746525287628,
"step": 246
},
{
"epoch": 0.23916727184701042,
"grad_norm": 0.16107121109962463,
"learning_rate": 7.6930596285435e-06,
"loss": 0.31581875681877136,
"step": 247
},
{
"epoch": 0.2401355603969983,
"grad_norm": 0.17307540774345398,
"learning_rate": 7.683284457478007e-06,
"loss": 0.29687726497650146,
"step": 248
},
{
"epoch": 0.2411038489469862,
"grad_norm": 0.16493360698223114,
"learning_rate": 7.673509286412512e-06,
"loss": 0.35423439741134644,
"step": 249
},
{
"epoch": 0.2420721374969741,
"grad_norm": 0.14273418486118317,
"learning_rate": 7.66373411534702e-06,
"loss": 0.29487237334251404,
"step": 250
},
{
"epoch": 0.243040426046962,
"grad_norm": 0.164155974984169,
"learning_rate": 7.653958944281525e-06,
"loss": 0.32345789670944214,
"step": 251
},
{
"epoch": 0.2440087145969499,
"grad_norm": 0.15766294300556183,
"learning_rate": 7.644183773216032e-06,
"loss": 0.319865882396698,
"step": 252
},
{
"epoch": 0.2449770031469378,
"grad_norm": 0.15514512360095978,
"learning_rate": 7.634408602150538e-06,
"loss": 0.3162165880203247,
"step": 253
},
{
"epoch": 0.2459452916969257,
"grad_norm": 0.15435358881950378,
"learning_rate": 7.624633431085044e-06,
"loss": 0.28262361884117126,
"step": 254
},
{
"epoch": 0.24691358024691357,
"grad_norm": 0.14835764467716217,
"learning_rate": 7.614858260019551e-06,
"loss": 0.28154870867729187,
"step": 255
},
{
"epoch": 0.24788186879690147,
"grad_norm": 0.15369164943695068,
"learning_rate": 7.605083088954057e-06,
"loss": 0.3451993465423584,
"step": 256
},
{
"epoch": 0.24885015734688937,
"grad_norm": 0.13362520933151245,
"learning_rate": 7.5953079178885636e-06,
"loss": 0.2882372736930847,
"step": 257
},
{
"epoch": 0.24981844589687727,
"grad_norm": 0.14538030326366425,
"learning_rate": 7.58553274682307e-06,
"loss": 0.30620044469833374,
"step": 258
},
{
"epoch": 0.25078673444686517,
"grad_norm": 0.1679297834634781,
"learning_rate": 7.5757575757575764e-06,
"loss": 0.3071752190589905,
"step": 259
},
{
"epoch": 0.25175502299685304,
"grad_norm": 0.1505117118358612,
"learning_rate": 7.565982404692083e-06,
"loss": 0.2810661792755127,
"step": 260
},
{
"epoch": 0.25272331154684097,
"grad_norm": 0.13865773379802704,
"learning_rate": 7.556207233626589e-06,
"loss": 0.2779511511325836,
"step": 261
},
{
"epoch": 0.25369160009682884,
"grad_norm": 0.14810754358768463,
"learning_rate": 7.546432062561096e-06,
"loss": 0.3234580457210541,
"step": 262
},
{
"epoch": 0.25465988864681677,
"grad_norm": 0.15836334228515625,
"learning_rate": 7.536656891495602e-06,
"loss": 0.3174368739128113,
"step": 263
},
{
"epoch": 0.25562817719680464,
"grad_norm": 0.15845665335655212,
"learning_rate": 7.526881720430108e-06,
"loss": 0.292019784450531,
"step": 264
},
{
"epoch": 0.25659646574679257,
"grad_norm": 0.1532326340675354,
"learning_rate": 7.517106549364614e-06,
"loss": 0.3122391700744629,
"step": 265
},
{
"epoch": 0.25756475429678044,
"grad_norm": 0.16675175726413727,
"learning_rate": 7.507331378299121e-06,
"loss": 0.33715298771858215,
"step": 266
},
{
"epoch": 0.2585330428467683,
"grad_norm": 0.1525373011827469,
"learning_rate": 7.497556207233627e-06,
"loss": 0.32337823510169983,
"step": 267
},
{
"epoch": 0.25950133139675624,
"grad_norm": 0.15557681024074554,
"learning_rate": 7.4877810361681334e-06,
"loss": 0.33496809005737305,
"step": 268
},
{
"epoch": 0.2604696199467441,
"grad_norm": 0.1552857607603073,
"learning_rate": 7.47800586510264e-06,
"loss": 0.29575905203819275,
"step": 269
},
{
"epoch": 0.26143790849673204,
"grad_norm": 0.17039579153060913,
"learning_rate": 7.468230694037146e-06,
"loss": 0.3094739317893982,
"step": 270
},
{
"epoch": 0.2624061970467199,
"grad_norm": 0.15926915407180786,
"learning_rate": 7.458455522971653e-06,
"loss": 0.30116045475006104,
"step": 271
},
{
"epoch": 0.26337448559670784,
"grad_norm": 0.15641555190086365,
"learning_rate": 7.448680351906159e-06,
"loss": 0.33569908142089844,
"step": 272
},
{
"epoch": 0.2643427741466957,
"grad_norm": 0.14819961786270142,
"learning_rate": 7.438905180840666e-06,
"loss": 0.3124736249446869,
"step": 273
},
{
"epoch": 0.2653110626966836,
"grad_norm": 0.15690119564533234,
"learning_rate": 7.429130009775172e-06,
"loss": 0.2757438123226166,
"step": 274
},
{
"epoch": 0.2662793512466715,
"grad_norm": 0.1666852980852127,
"learning_rate": 7.4193548387096784e-06,
"loss": 0.3246593177318573,
"step": 275
},
{
"epoch": 0.2672476397966594,
"grad_norm": 0.16269199550151825,
"learning_rate": 7.409579667644185e-06,
"loss": 0.2908874750137329,
"step": 276
},
{
"epoch": 0.2682159283466473,
"grad_norm": 0.1640820950269699,
"learning_rate": 7.3998044965786904e-06,
"loss": 0.3241088390350342,
"step": 277
},
{
"epoch": 0.2691842168966352,
"grad_norm": 0.15672433376312256,
"learning_rate": 7.390029325513197e-06,
"loss": 0.2804832458496094,
"step": 278
},
{
"epoch": 0.2701525054466231,
"grad_norm": 0.14417491853237152,
"learning_rate": 7.380254154447703e-06,
"loss": 0.29370468854904175,
"step": 279
},
{
"epoch": 0.271120793996611,
"grad_norm": 0.14853185415267944,
"learning_rate": 7.37047898338221e-06,
"loss": 0.278653621673584,
"step": 280
},
{
"epoch": 0.27208908254659886,
"grad_norm": 0.1435244232416153,
"learning_rate": 7.360703812316716e-06,
"loss": 0.2943509817123413,
"step": 281
},
{
"epoch": 0.2730573710965868,
"grad_norm": 0.16490313410758972,
"learning_rate": 7.350928641251223e-06,
"loss": 0.32479339838027954,
"step": 282
},
{
"epoch": 0.27402565964657466,
"grad_norm": 0.14916065335273743,
"learning_rate": 7.341153470185729e-06,
"loss": 0.3356713652610779,
"step": 283
},
{
"epoch": 0.2749939481965626,
"grad_norm": 0.158106729388237,
"learning_rate": 7.3313782991202354e-06,
"loss": 0.3492435812950134,
"step": 284
},
{
"epoch": 0.27596223674655046,
"grad_norm": 0.15584231913089752,
"learning_rate": 7.321603128054742e-06,
"loss": 0.30297964811325073,
"step": 285
},
{
"epoch": 0.2769305252965384,
"grad_norm": 0.15248778462409973,
"learning_rate": 7.311827956989248e-06,
"loss": 0.28723299503326416,
"step": 286
},
{
"epoch": 0.27789881384652626,
"grad_norm": 0.14938265085220337,
"learning_rate": 7.302052785923755e-06,
"loss": 0.2916105389595032,
"step": 287
},
{
"epoch": 0.2788671023965142,
"grad_norm": 0.15402287244796753,
"learning_rate": 7.292277614858261e-06,
"loss": 0.29120731353759766,
"step": 288
},
{
"epoch": 0.27983539094650206,
"grad_norm": 0.15380002558231354,
"learning_rate": 7.282502443792767e-06,
"loss": 0.34908658266067505,
"step": 289
},
{
"epoch": 0.28080367949648993,
"grad_norm": 0.15176504850387573,
"learning_rate": 7.272727272727273e-06,
"loss": 0.346072793006897,
"step": 290
},
{
"epoch": 0.28177196804647786,
"grad_norm": 0.1672578603029251,
"learning_rate": 7.26295210166178e-06,
"loss": 0.31465622782707214,
"step": 291
},
{
"epoch": 0.28274025659646573,
"grad_norm": 0.14658679068088531,
"learning_rate": 7.253176930596286e-06,
"loss": 0.28865766525268555,
"step": 292
},
{
"epoch": 0.28370854514645366,
"grad_norm": 0.1515662670135498,
"learning_rate": 7.2434017595307925e-06,
"loss": 0.30215001106262207,
"step": 293
},
{
"epoch": 0.28467683369644153,
"grad_norm": 0.18654093146324158,
"learning_rate": 7.233626588465299e-06,
"loss": 0.3126724064350128,
"step": 294
},
{
"epoch": 0.28564512224642946,
"grad_norm": 0.1485200673341751,
"learning_rate": 7.223851417399805e-06,
"loss": 0.2654643654823303,
"step": 295
},
{
"epoch": 0.28661341079641733,
"grad_norm": 0.1476060450077057,
"learning_rate": 7.214076246334312e-06,
"loss": 0.2976668179035187,
"step": 296
},
{
"epoch": 0.2875816993464052,
"grad_norm": 0.14893072843551636,
"learning_rate": 7.204301075268818e-06,
"loss": 0.3222036361694336,
"step": 297
},
{
"epoch": 0.28854998789639313,
"grad_norm": 0.15406127274036407,
"learning_rate": 7.194525904203325e-06,
"loss": 0.33120018243789673,
"step": 298
},
{
"epoch": 0.289518276446381,
"grad_norm": 0.17568016052246094,
"learning_rate": 7.184750733137831e-06,
"loss": 0.3500976860523224,
"step": 299
},
{
"epoch": 0.29048656499636893,
"grad_norm": 0.15244793891906738,
"learning_rate": 7.1749755620723375e-06,
"loss": 0.29483211040496826,
"step": 300
},
{
"epoch": 0.2914548535463568,
"grad_norm": 0.14039957523345947,
"learning_rate": 7.165200391006844e-06,
"loss": 0.30703267455101013,
"step": 301
},
{
"epoch": 0.29242314209634473,
"grad_norm": 0.16928645968437195,
"learning_rate": 7.1554252199413495e-06,
"loss": 0.285969078540802,
"step": 302
},
{
"epoch": 0.2933914306463326,
"grad_norm": 0.15592513978481293,
"learning_rate": 7.145650048875856e-06,
"loss": 0.2873114347457886,
"step": 303
},
{
"epoch": 0.2943597191963205,
"grad_norm": 0.1448688954114914,
"learning_rate": 7.135874877810362e-06,
"loss": 0.256163626909256,
"step": 304
},
{
"epoch": 0.2953280077463084,
"grad_norm": 0.1630263477563858,
"learning_rate": 7.126099706744869e-06,
"loss": 0.2792564630508423,
"step": 305
},
{
"epoch": 0.2962962962962963,
"grad_norm": 0.17319388687610626,
"learning_rate": 7.116324535679375e-06,
"loss": 0.2806742489337921,
"step": 306
},
{
"epoch": 0.2972645848462842,
"grad_norm": 0.15814098715782166,
"learning_rate": 7.106549364613882e-06,
"loss": 0.2914005517959595,
"step": 307
},
{
"epoch": 0.2982328733962721,
"grad_norm": 0.16322992742061615,
"learning_rate": 7.096774193548388e-06,
"loss": 0.293082594871521,
"step": 308
},
{
"epoch": 0.29920116194626,
"grad_norm": 0.1633518785238266,
"learning_rate": 7.0869990224828945e-06,
"loss": 0.2866649925708771,
"step": 309
},
{
"epoch": 0.3001694504962479,
"grad_norm": 0.16669867932796478,
"learning_rate": 7.077223851417401e-06,
"loss": 0.28914347290992737,
"step": 310
},
{
"epoch": 0.30113773904623575,
"grad_norm": 0.15272612869739532,
"learning_rate": 7.067448680351907e-06,
"loss": 0.3084270656108856,
"step": 311
},
{
"epoch": 0.3021060275962237,
"grad_norm": 0.15269719064235687,
"learning_rate": 7.057673509286414e-06,
"loss": 0.30022960901260376,
"step": 312
},
{
"epoch": 0.30307431614621155,
"grad_norm": 0.17254814505577087,
"learning_rate": 7.04789833822092e-06,
"loss": 0.32925137877464294,
"step": 313
},
{
"epoch": 0.3040426046961995,
"grad_norm": 0.1512719690799713,
"learning_rate": 7.038123167155427e-06,
"loss": 0.302681028842926,
"step": 314
},
{
"epoch": 0.30501089324618735,
"grad_norm": 0.14636491239070892,
"learning_rate": 7.028347996089932e-06,
"loss": 0.28604596853256226,
"step": 315
},
{
"epoch": 0.3059791817961753,
"grad_norm": 0.17897070944309235,
"learning_rate": 7.018572825024439e-06,
"loss": 0.3204849362373352,
"step": 316
},
{
"epoch": 0.30694747034616315,
"grad_norm": 0.1784772127866745,
"learning_rate": 7.008797653958945e-06,
"loss": 0.26697760820388794,
"step": 317
},
{
"epoch": 0.307915758896151,
"grad_norm": 0.15881852805614471,
"learning_rate": 6.9990224828934515e-06,
"loss": 0.2558300495147705,
"step": 318
},
{
"epoch": 0.30888404744613895,
"grad_norm": 0.15870684385299683,
"learning_rate": 6.989247311827958e-06,
"loss": 0.3025914132595062,
"step": 319
},
{
"epoch": 0.3098523359961268,
"grad_norm": 0.1527319699525833,
"learning_rate": 6.979472140762464e-06,
"loss": 0.3081514835357666,
"step": 320
},
{
"epoch": 0.31082062454611475,
"grad_norm": 0.1624557226896286,
"learning_rate": 6.969696969696971e-06,
"loss": 0.30399930477142334,
"step": 321
},
{
"epoch": 0.3117889130961026,
"grad_norm": 0.1645076870918274,
"learning_rate": 6.959921798631477e-06,
"loss": 0.294676810503006,
"step": 322
},
{
"epoch": 0.31275720164609055,
"grad_norm": 0.14686374366283417,
"learning_rate": 6.950146627565984e-06,
"loss": 0.2830040752887726,
"step": 323
},
{
"epoch": 0.3137254901960784,
"grad_norm": 0.15746936202049255,
"learning_rate": 6.94037145650049e-06,
"loss": 0.3081029951572418,
"step": 324
},
{
"epoch": 0.31469377874606635,
"grad_norm": 0.14588280022144318,
"learning_rate": 6.9305962854349965e-06,
"loss": 0.2718695104122162,
"step": 325
},
{
"epoch": 0.3156620672960542,
"grad_norm": 0.16646429896354675,
"learning_rate": 6.920821114369503e-06,
"loss": 0.31487900018692017,
"step": 326
},
{
"epoch": 0.3166303558460421,
"grad_norm": 0.1697472333908081,
"learning_rate": 6.911045943304009e-06,
"loss": 0.29448121786117554,
"step": 327
},
{
"epoch": 0.31759864439603,
"grad_norm": 0.16235950589179993,
"learning_rate": 6.901270772238515e-06,
"loss": 0.2658935487270355,
"step": 328
},
{
"epoch": 0.3185669329460179,
"grad_norm": 0.167832151055336,
"learning_rate": 6.891495601173021e-06,
"loss": 0.28193116188049316,
"step": 329
},
{
"epoch": 0.31953522149600583,
"grad_norm": 0.15145322680473328,
"learning_rate": 6.881720430107528e-06,
"loss": 0.2826996445655823,
"step": 330
},
{
"epoch": 0.3205035100459937,
"grad_norm": 0.19785556197166443,
"learning_rate": 6.871945259042033e-06,
"loss": 0.31135326623916626,
"step": 331
},
{
"epoch": 0.32147179859598163,
"grad_norm": 0.1549469530582428,
"learning_rate": 6.86217008797654e-06,
"loss": 0.3094058930873871,
"step": 332
},
{
"epoch": 0.3224400871459695,
"grad_norm": 0.15144820511341095,
"learning_rate": 6.852394916911046e-06,
"loss": 0.26982536911964417,
"step": 333
},
{
"epoch": 0.3234083756959574,
"grad_norm": 0.15371711552143097,
"learning_rate": 6.842619745845553e-06,
"loss": 0.29452741146087646,
"step": 334
},
{
"epoch": 0.3243766642459453,
"grad_norm": 0.15917186439037323,
"learning_rate": 6.832844574780059e-06,
"loss": 0.29580235481262207,
"step": 335
},
{
"epoch": 0.3253449527959332,
"grad_norm": 0.1550978273153305,
"learning_rate": 6.823069403714565e-06,
"loss": 0.36159706115722656,
"step": 336
},
{
"epoch": 0.3263132413459211,
"grad_norm": 0.14809750020503998,
"learning_rate": 6.813294232649071e-06,
"loss": 0.2756076753139496,
"step": 337
},
{
"epoch": 0.327281529895909,
"grad_norm": 0.15484212338924408,
"learning_rate": 6.8035190615835775e-06,
"loss": 0.26915088295936584,
"step": 338
},
{
"epoch": 0.3282498184458969,
"grad_norm": 0.16279636323451996,
"learning_rate": 6.793743890518084e-06,
"loss": 0.3175829350948334,
"step": 339
},
{
"epoch": 0.3292181069958848,
"grad_norm": 0.16756120324134827,
"learning_rate": 6.78396871945259e-06,
"loss": 0.3034948706626892,
"step": 340
},
{
"epoch": 0.33018639554587265,
"grad_norm": 0.1840161681175232,
"learning_rate": 6.774193548387097e-06,
"loss": 0.32814380526542664,
"step": 341
},
{
"epoch": 0.3311546840958606,
"grad_norm": 0.17000938951969147,
"learning_rate": 6.764418377321603e-06,
"loss": 0.2874530553817749,
"step": 342
},
{
"epoch": 0.33212297264584845,
"grad_norm": 0.16385532915592194,
"learning_rate": 6.75464320625611e-06,
"loss": 0.3138440251350403,
"step": 343
},
{
"epoch": 0.3330912611958364,
"grad_norm": 0.18285442888736725,
"learning_rate": 6.744868035190616e-06,
"loss": 0.30819112062454224,
"step": 344
},
{
"epoch": 0.33405954974582425,
"grad_norm": 0.16300350427627563,
"learning_rate": 6.7350928641251225e-06,
"loss": 0.291953444480896,
"step": 345
},
{
"epoch": 0.3350278382958122,
"grad_norm": 0.18186615407466888,
"learning_rate": 6.725317693059629e-06,
"loss": 0.27950209379196167,
"step": 346
},
{
"epoch": 0.33599612684580005,
"grad_norm": 0.16226314008235931,
"learning_rate": 6.715542521994135e-06,
"loss": 0.29765087366104126,
"step": 347
},
{
"epoch": 0.3369644153957879,
"grad_norm": 0.18730367720127106,
"learning_rate": 6.705767350928642e-06,
"loss": 0.3281936049461365,
"step": 348
},
{
"epoch": 0.33793270394577585,
"grad_norm": 0.16875147819519043,
"learning_rate": 6.695992179863147e-06,
"loss": 0.3038584291934967,
"step": 349
},
{
"epoch": 0.3389009924957637,
"grad_norm": 0.17776557803153992,
"learning_rate": 6.686217008797654e-06,
"loss": 0.2937045395374298,
"step": 350
},
{
"epoch": 0.33986928104575165,
"grad_norm": 0.14246642589569092,
"learning_rate": 6.67644183773216e-06,
"loss": 0.3092482388019562,
"step": 351
},
{
"epoch": 0.3408375695957395,
"grad_norm": 0.15377755463123322,
"learning_rate": 6.666666666666667e-06,
"loss": 0.2588135898113251,
"step": 352
},
{
"epoch": 0.34180585814572745,
"grad_norm": 0.1557725965976715,
"learning_rate": 6.656891495601173e-06,
"loss": 0.2831732928752899,
"step": 353
},
{
"epoch": 0.3427741466957153,
"grad_norm": 0.15106302499771118,
"learning_rate": 6.6471163245356795e-06,
"loss": 0.26685526967048645,
"step": 354
},
{
"epoch": 0.3437424352457032,
"grad_norm": 0.17710185050964355,
"learning_rate": 6.637341153470186e-06,
"loss": 0.3084600567817688,
"step": 355
},
{
"epoch": 0.3447107237956911,
"grad_norm": 0.14838555455207825,
"learning_rate": 6.627565982404692e-06,
"loss": 0.2909257113933563,
"step": 356
},
{
"epoch": 0.345679012345679,
"grad_norm": 0.1700345277786255,
"learning_rate": 6.617790811339199e-06,
"loss": 0.3314460217952728,
"step": 357
},
{
"epoch": 0.3466473008956669,
"grad_norm": 0.17465804517269135,
"learning_rate": 6.608015640273705e-06,
"loss": 0.30570876598358154,
"step": 358
},
{
"epoch": 0.3476155894456548,
"grad_norm": 0.1821223646402359,
"learning_rate": 6.598240469208212e-06,
"loss": 0.3378984034061432,
"step": 359
},
{
"epoch": 0.3485838779956427,
"grad_norm": 0.15884800255298615,
"learning_rate": 6.588465298142718e-06,
"loss": 0.2953569293022156,
"step": 360
},
{
"epoch": 0.3495521665456306,
"grad_norm": 0.15600639581680298,
"learning_rate": 6.5786901270772245e-06,
"loss": 0.36609965562820435,
"step": 361
},
{
"epoch": 0.3505204550956185,
"grad_norm": 0.15438097715377808,
"learning_rate": 6.56891495601173e-06,
"loss": 0.33403828740119934,
"step": 362
},
{
"epoch": 0.3514887436456064,
"grad_norm": 0.1604045182466507,
"learning_rate": 6.5591397849462365e-06,
"loss": 0.2991517186164856,
"step": 363
},
{
"epoch": 0.35245703219559427,
"grad_norm": 0.17102058231830597,
"learning_rate": 6.549364613880743e-06,
"loss": 0.28116142749786377,
"step": 364
},
{
"epoch": 0.3534253207455822,
"grad_norm": 0.15797586739063263,
"learning_rate": 6.539589442815249e-06,
"loss": 0.30658936500549316,
"step": 365
},
{
"epoch": 0.35439360929557007,
"grad_norm": 0.1578051745891571,
"learning_rate": 6.529814271749756e-06,
"loss": 0.3066115379333496,
"step": 366
},
{
"epoch": 0.355361897845558,
"grad_norm": 0.15831097960472107,
"learning_rate": 6.520039100684262e-06,
"loss": 0.30893969535827637,
"step": 367
},
{
"epoch": 0.35633018639554587,
"grad_norm": 0.16711507737636566,
"learning_rate": 6.510263929618769e-06,
"loss": 0.29801586270332336,
"step": 368
},
{
"epoch": 0.3572984749455338,
"grad_norm": 0.1717497706413269,
"learning_rate": 6.500488758553275e-06,
"loss": 0.3415631949901581,
"step": 369
},
{
"epoch": 0.35826676349552167,
"grad_norm": 0.1789737045764923,
"learning_rate": 6.4907135874877815e-06,
"loss": 0.3342001140117645,
"step": 370
},
{
"epoch": 0.35923505204550954,
"grad_norm": 0.16474243998527527,
"learning_rate": 6.480938416422288e-06,
"loss": 0.29570648074150085,
"step": 371
},
{
"epoch": 0.36020334059549747,
"grad_norm": 0.17131595313549042,
"learning_rate": 6.471163245356794e-06,
"loss": 0.33989042043685913,
"step": 372
},
{
"epoch": 0.36117162914548534,
"grad_norm": 0.1660817712545395,
"learning_rate": 6.461388074291301e-06,
"loss": 0.28438785672187805,
"step": 373
},
{
"epoch": 0.36213991769547327,
"grad_norm": 0.16126903891563416,
"learning_rate": 6.451612903225806e-06,
"loss": 0.30980294942855835,
"step": 374
},
{
"epoch": 0.36310820624546114,
"grad_norm": 0.1648305058479309,
"learning_rate": 6.441837732160313e-06,
"loss": 0.325278103351593,
"step": 375
},
{
"epoch": 0.36407649479544907,
"grad_norm": 0.17030373215675354,
"learning_rate": 6.432062561094819e-06,
"loss": 0.3066975176334381,
"step": 376
},
{
"epoch": 0.36504478334543694,
"grad_norm": 0.17431139945983887,
"learning_rate": 6.422287390029326e-06,
"loss": 0.28780531883239746,
"step": 377
},
{
"epoch": 0.3660130718954248,
"grad_norm": 0.1605546921491623,
"learning_rate": 6.412512218963832e-06,
"loss": 0.2759549915790558,
"step": 378
},
{
"epoch": 0.36698136044541274,
"grad_norm": 0.16246621310710907,
"learning_rate": 6.4027370478983385e-06,
"loss": 0.2838786840438843,
"step": 379
},
{
"epoch": 0.3679496489954006,
"grad_norm": 0.173859640955925,
"learning_rate": 6.392961876832845e-06,
"loss": 0.28778478503227234,
"step": 380
},
{
"epoch": 0.36891793754538854,
"grad_norm": 0.17580649256706238,
"learning_rate": 6.383186705767351e-06,
"loss": 0.29734641313552856,
"step": 381
},
{
"epoch": 0.3698862260953764,
"grad_norm": 0.21752490103244781,
"learning_rate": 6.373411534701858e-06,
"loss": 0.331564724445343,
"step": 382
},
{
"epoch": 0.37085451464536434,
"grad_norm": 0.1802123785018921,
"learning_rate": 6.363636363636364e-06,
"loss": 0.2878391742706299,
"step": 383
},
{
"epoch": 0.3718228031953522,
"grad_norm": 0.16118982434272766,
"learning_rate": 6.353861192570871e-06,
"loss": 0.29216498136520386,
"step": 384
},
{
"epoch": 0.3727910917453401,
"grad_norm": 0.18390141427516937,
"learning_rate": 6.344086021505377e-06,
"loss": 0.3013034164905548,
"step": 385
},
{
"epoch": 0.373759380295328,
"grad_norm": 0.17186126112937927,
"learning_rate": 6.3343108504398835e-06,
"loss": 0.2939417362213135,
"step": 386
},
{
"epoch": 0.3747276688453159,
"grad_norm": 0.1863613873720169,
"learning_rate": 6.324535679374389e-06,
"loss": 0.3011291027069092,
"step": 387
},
{
"epoch": 0.3756959573953038,
"grad_norm": 0.16492682695388794,
"learning_rate": 6.3147605083088955e-06,
"loss": 0.29598119854927063,
"step": 388
},
{
"epoch": 0.3766642459452917,
"grad_norm": 0.1751633882522583,
"learning_rate": 6.304985337243402e-06,
"loss": 0.3110932409763336,
"step": 389
},
{
"epoch": 0.3776325344952796,
"grad_norm": 0.1898571252822876,
"learning_rate": 6.295210166177908e-06,
"loss": 0.30633416771888733,
"step": 390
},
{
"epoch": 0.3786008230452675,
"grad_norm": 0.1563596874475479,
"learning_rate": 6.285434995112415e-06,
"loss": 0.2938535809516907,
"step": 391
},
{
"epoch": 0.37956911159525536,
"grad_norm": 0.18046635389328003,
"learning_rate": 6.275659824046921e-06,
"loss": 0.2856330871582031,
"step": 392
},
{
"epoch": 0.3805374001452433,
"grad_norm": 0.15708908438682556,
"learning_rate": 6.265884652981428e-06,
"loss": 0.2904341518878937,
"step": 393
},
{
"epoch": 0.38150568869523116,
"grad_norm": 0.17719998955726624,
"learning_rate": 6.256109481915934e-06,
"loss": 0.28807759284973145,
"step": 394
},
{
"epoch": 0.3824739772452191,
"grad_norm": 0.15774236619472504,
"learning_rate": 6.2463343108504405e-06,
"loss": 0.2675943076610565,
"step": 395
},
{
"epoch": 0.38344226579520696,
"grad_norm": 0.15558338165283203,
"learning_rate": 6.236559139784947e-06,
"loss": 0.2567376494407654,
"step": 396
},
{
"epoch": 0.3844105543451949,
"grad_norm": 0.1891474723815918,
"learning_rate": 6.226783968719453e-06,
"loss": 0.2961275279521942,
"step": 397
},
{
"epoch": 0.38537884289518276,
"grad_norm": 0.17778225243091583,
"learning_rate": 6.21700879765396e-06,
"loss": 0.3132587671279907,
"step": 398
},
{
"epoch": 0.3863471314451707,
"grad_norm": 0.1902502477169037,
"learning_rate": 6.207233626588466e-06,
"loss": 0.31374305486679077,
"step": 399
},
{
"epoch": 0.38731541999515856,
"grad_norm": 0.1710149198770523,
"learning_rate": 6.197458455522972e-06,
"loss": 0.34003812074661255,
"step": 400
},
{
"epoch": 0.38828370854514643,
"grad_norm": 0.16460557281970978,
"learning_rate": 6.187683284457478e-06,
"loss": 0.2728930115699768,
"step": 401
},
{
"epoch": 0.38925199709513436,
"grad_norm": 0.17229019105434418,
"learning_rate": 6.177908113391985e-06,
"loss": 0.2700308561325073,
"step": 402
},
{
"epoch": 0.39022028564512223,
"grad_norm": 0.18431095778942108,
"learning_rate": 6.168132942326491e-06,
"loss": 0.2867494821548462,
"step": 403
},
{
"epoch": 0.39118857419511016,
"grad_norm": 0.17898224294185638,
"learning_rate": 6.1583577712609975e-06,
"loss": 0.26027926802635193,
"step": 404
},
{
"epoch": 0.39215686274509803,
"grad_norm": 0.1536150425672531,
"learning_rate": 6.148582600195504e-06,
"loss": 0.2740130126476288,
"step": 405
},
{
"epoch": 0.39312515129508596,
"grad_norm": 0.46492230892181396,
"learning_rate": 6.13880742913001e-06,
"loss": 0.2832326889038086,
"step": 406
},
{
"epoch": 0.39409343984507383,
"grad_norm": 0.18063224852085114,
"learning_rate": 6.129032258064517e-06,
"loss": 0.2683679163455963,
"step": 407
},
{
"epoch": 0.3950617283950617,
"grad_norm": 0.18479417264461517,
"learning_rate": 6.119257086999023e-06,
"loss": 0.2960650324821472,
"step": 408
},
{
"epoch": 0.39603001694504963,
"grad_norm": 0.16542568802833557,
"learning_rate": 6.10948191593353e-06,
"loss": 0.28208404779434204,
"step": 409
},
{
"epoch": 0.3969983054950375,
"grad_norm": 0.1611918956041336,
"learning_rate": 6.099706744868036e-06,
"loss": 0.2653481364250183,
"step": 410
},
{
"epoch": 0.39796659404502543,
"grad_norm": 0.17886482179164886,
"learning_rate": 6.0899315738025425e-06,
"loss": 0.33219113945961,
"step": 411
},
{
"epoch": 0.3989348825950133,
"grad_norm": 0.16463807225227356,
"learning_rate": 6.080156402737049e-06,
"loss": 0.28929123282432556,
"step": 412
},
{
"epoch": 0.39990317114500123,
"grad_norm": 0.18031014502048492,
"learning_rate": 6.0703812316715545e-06,
"loss": 0.27609509229660034,
"step": 413
},
{
"epoch": 0.4008714596949891,
"grad_norm": 0.18028417229652405,
"learning_rate": 6.060606060606061e-06,
"loss": 0.29474079608917236,
"step": 414
},
{
"epoch": 0.401839748244977,
"grad_norm": 0.16485083103179932,
"learning_rate": 6.050830889540567e-06,
"loss": 0.33132994174957275,
"step": 415
},
{
"epoch": 0.4028080367949649,
"grad_norm": 0.17660938203334808,
"learning_rate": 6.041055718475074e-06,
"loss": 0.2553951144218445,
"step": 416
},
{
"epoch": 0.4037763253449528,
"grad_norm": 0.18007521331310272,
"learning_rate": 6.03128054740958e-06,
"loss": 0.2640475034713745,
"step": 417
},
{
"epoch": 0.4047446138949407,
"grad_norm": 0.16710299253463745,
"learning_rate": 6.021505376344087e-06,
"loss": 0.26302963495254517,
"step": 418
},
{
"epoch": 0.4057129024449286,
"grad_norm": 0.1827956736087799,
"learning_rate": 6.011730205278593e-06,
"loss": 0.3405194878578186,
"step": 419
},
{
"epoch": 0.4066811909949165,
"grad_norm": 0.1711130291223526,
"learning_rate": 6.0019550342130995e-06,
"loss": 0.280174195766449,
"step": 420
},
{
"epoch": 0.4076494795449044,
"grad_norm": 0.16884659230709076,
"learning_rate": 5.992179863147606e-06,
"loss": 0.26946425437927246,
"step": 421
},
{
"epoch": 0.40861776809489225,
"grad_norm": 0.17745757102966309,
"learning_rate": 5.982404692082112e-06,
"loss": 0.3392980396747589,
"step": 422
},
{
"epoch": 0.4095860566448802,
"grad_norm": 0.1780301034450531,
"learning_rate": 5.972629521016619e-06,
"loss": 0.30674225091934204,
"step": 423
},
{
"epoch": 0.41055434519486805,
"grad_norm": 0.17808158695697784,
"learning_rate": 5.962854349951125e-06,
"loss": 0.3345290720462799,
"step": 424
},
{
"epoch": 0.411522633744856,
"grad_norm": 0.16129203140735626,
"learning_rate": 5.953079178885631e-06,
"loss": 0.2831481695175171,
"step": 425
},
{
"epoch": 0.41249092229484385,
"grad_norm": 0.18456275761127472,
"learning_rate": 5.943304007820137e-06,
"loss": 0.3257300853729248,
"step": 426
},
{
"epoch": 0.4134592108448318,
"grad_norm": 0.18435759842395782,
"learning_rate": 5.933528836754644e-06,
"loss": 0.26924797892570496,
"step": 427
},
{
"epoch": 0.41442749939481965,
"grad_norm": 0.1941821128129959,
"learning_rate": 5.92375366568915e-06,
"loss": 0.3252018392086029,
"step": 428
},
{
"epoch": 0.4153957879448075,
"grad_norm": 0.17482848465442657,
"learning_rate": 5.9139784946236566e-06,
"loss": 0.33910396695137024,
"step": 429
},
{
"epoch": 0.41636407649479545,
"grad_norm": 0.18026143312454224,
"learning_rate": 5.904203323558163e-06,
"loss": 0.2899131178855896,
"step": 430
},
{
"epoch": 0.4173323650447833,
"grad_norm": 0.18868599832057953,
"learning_rate": 5.894428152492669e-06,
"loss": 0.26209527254104614,
"step": 431
},
{
"epoch": 0.41830065359477125,
"grad_norm": 0.172159805893898,
"learning_rate": 5.884652981427176e-06,
"loss": 0.2784045338630676,
"step": 432
},
{
"epoch": 0.4192689421447591,
"grad_norm": 0.19189684092998505,
"learning_rate": 5.874877810361682e-06,
"loss": 0.3449173867702484,
"step": 433
},
{
"epoch": 0.42023723069474705,
"grad_norm": 0.18038828670978546,
"learning_rate": 5.865102639296189e-06,
"loss": 0.260070264339447,
"step": 434
},
{
"epoch": 0.4212055192447349,
"grad_norm": 0.17879043519496918,
"learning_rate": 5.855327468230695e-06,
"loss": 0.2970094382762909,
"step": 435
},
{
"epoch": 0.42217380779472286,
"grad_norm": 0.19369956851005554,
"learning_rate": 5.8455522971652016e-06,
"loss": 0.262788325548172,
"step": 436
},
{
"epoch": 0.4231420963447107,
"grad_norm": 0.1980774849653244,
"learning_rate": 5.835777126099708e-06,
"loss": 0.3415115475654602,
"step": 437
},
{
"epoch": 0.4241103848946986,
"grad_norm": 0.1517505943775177,
"learning_rate": 5.8260019550342136e-06,
"loss": 0.2550700902938843,
"step": 438
},
{
"epoch": 0.42507867344468653,
"grad_norm": 0.16468308866024017,
"learning_rate": 5.81622678396872e-06,
"loss": 0.3277415633201599,
"step": 439
},
{
"epoch": 0.4260469619946744,
"grad_norm": 0.1632845550775528,
"learning_rate": 5.806451612903226e-06,
"loss": 0.2696504294872284,
"step": 440
},
{
"epoch": 0.42701525054466233,
"grad_norm": 0.17740678787231445,
"learning_rate": 5.796676441837733e-06,
"loss": 0.3146612048149109,
"step": 441
},
{
"epoch": 0.4279835390946502,
"grad_norm": 0.1720811426639557,
"learning_rate": 5.786901270772239e-06,
"loss": 0.293180376291275,
"step": 442
},
{
"epoch": 0.42895182764463813,
"grad_norm": 0.16457650065422058,
"learning_rate": 5.777126099706746e-06,
"loss": 0.25529271364212036,
"step": 443
},
{
"epoch": 0.429920116194626,
"grad_norm": 0.18886499106884003,
"learning_rate": 5.767350928641252e-06,
"loss": 0.2667441964149475,
"step": 444
},
{
"epoch": 0.4308884047446139,
"grad_norm": 0.16837763786315918,
"learning_rate": 5.7575757575757586e-06,
"loss": 0.2874595820903778,
"step": 445
},
{
"epoch": 0.4318566932946018,
"grad_norm": 0.19567479193210602,
"learning_rate": 5.747800586510265e-06,
"loss": 0.2736223042011261,
"step": 446
},
{
"epoch": 0.4328249818445897,
"grad_norm": 0.18101078271865845,
"learning_rate": 5.738025415444771e-06,
"loss": 0.3007189631462097,
"step": 447
},
{
"epoch": 0.4337932703945776,
"grad_norm": 0.17572757601737976,
"learning_rate": 5.728250244379278e-06,
"loss": 0.3632327914237976,
"step": 448
},
{
"epoch": 0.4347615589445655,
"grad_norm": 0.17773869633674622,
"learning_rate": 5.718475073313784e-06,
"loss": 0.3204823434352875,
"step": 449
},
{
"epoch": 0.4357298474945534,
"grad_norm": 0.1703418791294098,
"learning_rate": 5.708699902248291e-06,
"loss": 0.31934505701065063,
"step": 450
},
{
"epoch": 0.4366981360445413,
"grad_norm": 0.16851919889450073,
"learning_rate": 5.698924731182796e-06,
"loss": 0.33900323510169983,
"step": 451
},
{
"epoch": 0.43766642459452915,
"grad_norm": 0.16920781135559082,
"learning_rate": 5.689149560117303e-06,
"loss": 0.2747448980808258,
"step": 452
},
{
"epoch": 0.4386347131445171,
"grad_norm": 0.20053993165493011,
"learning_rate": 5.679374389051809e-06,
"loss": 0.28275251388549805,
"step": 453
},
{
"epoch": 0.43960300169450495,
"grad_norm": 0.17686837911605835,
"learning_rate": 5.6695992179863156e-06,
"loss": 0.26753419637680054,
"step": 454
},
{
"epoch": 0.4405712902444929,
"grad_norm": 0.20442141592502594,
"learning_rate": 5.659824046920822e-06,
"loss": 0.32636407017707825,
"step": 455
},
{
"epoch": 0.44153957879448075,
"grad_norm": 0.1751495897769928,
"learning_rate": 5.6500488758553284e-06,
"loss": 0.29740267992019653,
"step": 456
},
{
"epoch": 0.4425078673444687,
"grad_norm": 0.17008022964000702,
"learning_rate": 5.640273704789835e-06,
"loss": 0.2965855002403259,
"step": 457
},
{
"epoch": 0.44347615589445655,
"grad_norm": 0.1770244538784027,
"learning_rate": 5.630498533724341e-06,
"loss": 0.39362120628356934,
"step": 458
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.17790165543556213,
"learning_rate": 5.620723362658846e-06,
"loss": 0.2864190340042114,
"step": 459
},
{
"epoch": 0.44541273299443235,
"grad_norm": 0.17405082285404205,
"learning_rate": 5.6109481915933524e-06,
"loss": 0.2946798801422119,
"step": 460
},
{
"epoch": 0.4463810215444202,
"grad_norm": 0.16010600328445435,
"learning_rate": 5.601173020527859e-06,
"loss": 0.32160502672195435,
"step": 461
},
{
"epoch": 0.44734931009440815,
"grad_norm": 0.1997617781162262,
"learning_rate": 5.591397849462365e-06,
"loss": 0.32814455032348633,
"step": 462
},
{
"epoch": 0.448317598644396,
"grad_norm": 0.17624011635780334,
"learning_rate": 5.581622678396872e-06,
"loss": 0.2808952331542969,
"step": 463
},
{
"epoch": 0.44928588719438395,
"grad_norm": 0.16722382605075836,
"learning_rate": 5.571847507331378e-06,
"loss": 0.26833376288414,
"step": 464
},
{
"epoch": 0.4502541757443718,
"grad_norm": 0.16350014507770538,
"learning_rate": 5.562072336265885e-06,
"loss": 0.2904164791107178,
"step": 465
},
{
"epoch": 0.4512224642943597,
"grad_norm": 0.15504086017608643,
"learning_rate": 5.552297165200391e-06,
"loss": 0.3124706745147705,
"step": 466
},
{
"epoch": 0.4521907528443476,
"grad_norm": 0.17865699529647827,
"learning_rate": 5.5425219941348974e-06,
"loss": 0.30932655930519104,
"step": 467
},
{
"epoch": 0.4531590413943355,
"grad_norm": 0.179380863904953,
"learning_rate": 5.532746823069404e-06,
"loss": 0.3099682033061981,
"step": 468
},
{
"epoch": 0.4541273299443234,
"grad_norm": 0.1848987489938736,
"learning_rate": 5.52297165200391e-06,
"loss": 0.310943603515625,
"step": 469
},
{
"epoch": 0.4550956184943113,
"grad_norm": 0.17355690896511078,
"learning_rate": 5.513196480938417e-06,
"loss": 0.27683690190315247,
"step": 470
},
{
"epoch": 0.4560639070442992,
"grad_norm": 0.18208661675453186,
"learning_rate": 5.503421309872923e-06,
"loss": 0.26567360758781433,
"step": 471
},
{
"epoch": 0.4570321955942871,
"grad_norm": 0.17654170095920563,
"learning_rate": 5.493646138807429e-06,
"loss": 0.29490426182746887,
"step": 472
},
{
"epoch": 0.45800048414427497,
"grad_norm": 0.1757243424654007,
"learning_rate": 5.483870967741935e-06,
"loss": 0.30711159110069275,
"step": 473
},
{
"epoch": 0.4589687726942629,
"grad_norm": 0.17413422465324402,
"learning_rate": 5.474095796676442e-06,
"loss": 0.28973209857940674,
"step": 474
},
{
"epoch": 0.45993706124425077,
"grad_norm": 0.20302073657512665,
"learning_rate": 5.464320625610948e-06,
"loss": 0.3249307870864868,
"step": 475
},
{
"epoch": 0.4609053497942387,
"grad_norm": 0.17959873378276825,
"learning_rate": 5.4545454545454545e-06,
"loss": 0.29579484462738037,
"step": 476
},
{
"epoch": 0.46187363834422657,
"grad_norm": 0.17562335729599,
"learning_rate": 5.444770283479961e-06,
"loss": 0.3038690984249115,
"step": 477
},
{
"epoch": 0.4628419268942145,
"grad_norm": 0.16495366394519806,
"learning_rate": 5.434995112414467e-06,
"loss": 0.281146377325058,
"step": 478
},
{
"epoch": 0.46381021544420237,
"grad_norm": 0.17205455899238586,
"learning_rate": 5.425219941348974e-06,
"loss": 0.2786451280117035,
"step": 479
},
{
"epoch": 0.4647785039941903,
"grad_norm": 0.19133879244327545,
"learning_rate": 5.41544477028348e-06,
"loss": 0.3336411416530609,
"step": 480
},
{
"epoch": 0.46574679254417817,
"grad_norm": 0.18153399229049683,
"learning_rate": 5.405669599217987e-06,
"loss": 0.28267285227775574,
"step": 481
},
{
"epoch": 0.46671508109416604,
"grad_norm": 0.16732986271381378,
"learning_rate": 5.395894428152493e-06,
"loss": 0.2745664119720459,
"step": 482
},
{
"epoch": 0.46768336964415397,
"grad_norm": 0.19961762428283691,
"learning_rate": 5.3861192570869995e-06,
"loss": 0.2916579246520996,
"step": 483
},
{
"epoch": 0.46865165819414184,
"grad_norm": 0.18672992289066315,
"learning_rate": 5.376344086021506e-06,
"loss": 0.2882307767868042,
"step": 484
},
{
"epoch": 0.46961994674412977,
"grad_norm": 0.16605433821678162,
"learning_rate": 5.3665689149560115e-06,
"loss": 0.32832133769989014,
"step": 485
},
{
"epoch": 0.47058823529411764,
"grad_norm": 0.1809573769569397,
"learning_rate": 5.356793743890518e-06,
"loss": 0.28796786069869995,
"step": 486
},
{
"epoch": 0.47155652384410557,
"grad_norm": 0.15820080041885376,
"learning_rate": 5.347018572825024e-06,
"loss": 0.24655906856060028,
"step": 487
},
{
"epoch": 0.47252481239409344,
"grad_norm": 0.183393657207489,
"learning_rate": 5.337243401759531e-06,
"loss": 0.3693656027317047,
"step": 488
},
{
"epoch": 0.4734931009440813,
"grad_norm": 0.17333702743053436,
"learning_rate": 5.327468230694037e-06,
"loss": 0.2813875079154968,
"step": 489
},
{
"epoch": 0.47446138949406924,
"grad_norm": 0.18470393121242523,
"learning_rate": 5.317693059628544e-06,
"loss": 0.32118356227874756,
"step": 490
},
{
"epoch": 0.4754296780440571,
"grad_norm": 0.17366191744804382,
"learning_rate": 5.30791788856305e-06,
"loss": 0.27578046917915344,
"step": 491
},
{
"epoch": 0.47639796659404504,
"grad_norm": 0.16945011913776398,
"learning_rate": 5.2981427174975565e-06,
"loss": 0.3115886151790619,
"step": 492
},
{
"epoch": 0.4773662551440329,
"grad_norm": 0.20388440787792206,
"learning_rate": 5.288367546432063e-06,
"loss": 0.309696227312088,
"step": 493
},
{
"epoch": 0.47833454369402084,
"grad_norm": 0.156901016831398,
"learning_rate": 5.278592375366569e-06,
"loss": 0.27146872878074646,
"step": 494
},
{
"epoch": 0.4793028322440087,
"grad_norm": 0.20242440700531006,
"learning_rate": 5.268817204301076e-06,
"loss": 0.33286309242248535,
"step": 495
},
{
"epoch": 0.4802711207939966,
"grad_norm": 0.20036989450454712,
"learning_rate": 5.259042033235582e-06,
"loss": 0.285398006439209,
"step": 496
},
{
"epoch": 0.4812394093439845,
"grad_norm": 0.16521663963794708,
"learning_rate": 5.249266862170089e-06,
"loss": 0.27880388498306274,
"step": 497
},
{
"epoch": 0.4822076978939724,
"grad_norm": 0.16702234745025635,
"learning_rate": 5.239491691104594e-06,
"loss": 0.29399362206459045,
"step": 498
},
{
"epoch": 0.4831759864439603,
"grad_norm": 0.18302516639232635,
"learning_rate": 5.229716520039101e-06,
"loss": 0.2757553160190582,
"step": 499
},
{
"epoch": 0.4841442749939482,
"grad_norm": 0.17423763871192932,
"learning_rate": 5.219941348973607e-06,
"loss": 0.2870354950428009,
"step": 500
},
{
"epoch": 0.4851125635439361,
"grad_norm": 0.19603262841701508,
"learning_rate": 5.2101661779081135e-06,
"loss": 0.2726498246192932,
"step": 501
},
{
"epoch": 0.486080852093924,
"grad_norm": 0.1614205241203308,
"learning_rate": 5.20039100684262e-06,
"loss": 0.25111639499664307,
"step": 502
},
{
"epoch": 0.48704914064391186,
"grad_norm": 0.17319105565547943,
"learning_rate": 5.190615835777126e-06,
"loss": 0.27468031644821167,
"step": 503
},
{
"epoch": 0.4880174291938998,
"grad_norm": 0.16882063448429108,
"learning_rate": 5.180840664711633e-06,
"loss": 0.27068573236465454,
"step": 504
},
{
"epoch": 0.48898571774388766,
"grad_norm": 0.18153499066829681,
"learning_rate": 5.171065493646139e-06,
"loss": 0.28188517689704895,
"step": 505
},
{
"epoch": 0.4899540062938756,
"grad_norm": 0.1816774159669876,
"learning_rate": 5.161290322580646e-06,
"loss": 0.32222485542297363,
"step": 506
},
{
"epoch": 0.49092229484386346,
"grad_norm": 0.16442593932151794,
"learning_rate": 5.151515151515152e-06,
"loss": 0.30542707443237305,
"step": 507
},
{
"epoch": 0.4918905833938514,
"grad_norm": 0.1821308732032776,
"learning_rate": 5.1417399804496585e-06,
"loss": 0.293884813785553,
"step": 508
},
{
"epoch": 0.49285887194383926,
"grad_norm": 0.1683465987443924,
"learning_rate": 5.131964809384165e-06,
"loss": 0.26638439297676086,
"step": 509
},
{
"epoch": 0.49382716049382713,
"grad_norm": 0.17483524978160858,
"learning_rate": 5.1221896383186705e-06,
"loss": 0.30652916431427,
"step": 510
},
{
"epoch": 0.49479544904381506,
"grad_norm": 0.1842867136001587,
"learning_rate": 5.112414467253177e-06,
"loss": 0.364931583404541,
"step": 511
},
{
"epoch": 0.49576373759380293,
"grad_norm": 0.19743406772613525,
"learning_rate": 5.102639296187683e-06,
"loss": 0.2590721547603607,
"step": 512
},
{
"epoch": 0.49673202614379086,
"grad_norm": 0.18802092969417572,
"learning_rate": 5.09286412512219e-06,
"loss": 0.31060951948165894,
"step": 513
},
{
"epoch": 0.49770031469377873,
"grad_norm": 0.16384844481945038,
"learning_rate": 5.083088954056696e-06,
"loss": 0.27959296107292175,
"step": 514
},
{
"epoch": 0.49866860324376666,
"grad_norm": 0.2127850353717804,
"learning_rate": 5.073313782991203e-06,
"loss": 0.3346613049507141,
"step": 515
},
{
"epoch": 0.49963689179375453,
"grad_norm": 0.17491693794727325,
"learning_rate": 5.063538611925709e-06,
"loss": 0.2960091531276703,
"step": 516
},
{
"epoch": 0.5006051803437425,
"grad_norm": 0.1880018264055252,
"learning_rate": 5.0537634408602155e-06,
"loss": 0.2997010350227356,
"step": 517
},
{
"epoch": 0.5015734688937303,
"grad_norm": 0.1748742163181305,
"learning_rate": 5.043988269794722e-06,
"loss": 0.2931768596172333,
"step": 518
},
{
"epoch": 0.5025417574437182,
"grad_norm": 0.15878638625144958,
"learning_rate": 5.034213098729228e-06,
"loss": 0.254057914018631,
"step": 519
},
{
"epoch": 0.5035100459937061,
"grad_norm": 0.2069050818681717,
"learning_rate": 5.024437927663735e-06,
"loss": 0.2735084295272827,
"step": 520
},
{
"epoch": 0.5044783345436941,
"grad_norm": 0.16623827815055847,
"learning_rate": 5.014662756598241e-06,
"loss": 0.25306957960128784,
"step": 521
},
{
"epoch": 0.5054466230936819,
"grad_norm": 0.1891428381204605,
"learning_rate": 5.004887585532748e-06,
"loss": 0.2810228765010834,
"step": 522
},
{
"epoch": 0.5064149116436698,
"grad_norm": 0.2315511256456375,
"learning_rate": 4.995112414467253e-06,
"loss": 0.2733577489852905,
"step": 523
},
{
"epoch": 0.5073832001936577,
"grad_norm": 0.16957992315292358,
"learning_rate": 4.98533724340176e-06,
"loss": 0.27292630076408386,
"step": 524
},
{
"epoch": 0.5083514887436456,
"grad_norm": 0.17816272377967834,
"learning_rate": 4.975562072336266e-06,
"loss": 0.27049022912979126,
"step": 525
},
{
"epoch": 0.5093197772936335,
"grad_norm": 0.17525239288806915,
"learning_rate": 4.9657869012707725e-06,
"loss": 0.2759566903114319,
"step": 526
},
{
"epoch": 0.5102880658436214,
"grad_norm": 0.18764440715312958,
"learning_rate": 4.956011730205279e-06,
"loss": 0.27127569913864136,
"step": 527
},
{
"epoch": 0.5112563543936093,
"grad_norm": 0.18698008358478546,
"learning_rate": 4.946236559139785e-06,
"loss": 0.2902853786945343,
"step": 528
},
{
"epoch": 0.5122246429435972,
"grad_norm": 0.17745737731456757,
"learning_rate": 4.936461388074292e-06,
"loss": 0.32079097628593445,
"step": 529
},
{
"epoch": 0.5131929314935851,
"grad_norm": 0.17994803190231323,
"learning_rate": 4.926686217008798e-06,
"loss": 0.27671536803245544,
"step": 530
},
{
"epoch": 0.514161220043573,
"grad_norm": 0.1736883968114853,
"learning_rate": 4.916911045943305e-06,
"loss": 0.29842981696128845,
"step": 531
},
{
"epoch": 0.5151295085935609,
"grad_norm": 0.17682136595249176,
"learning_rate": 4.907135874877811e-06,
"loss": 0.28436222672462463,
"step": 532
},
{
"epoch": 0.5160977971435488,
"grad_norm": 0.18292061984539032,
"learning_rate": 4.8973607038123175e-06,
"loss": 0.2722223401069641,
"step": 533
},
{
"epoch": 0.5170660856935366,
"grad_norm": 0.1844838410615921,
"learning_rate": 4.887585532746824e-06,
"loss": 0.26570263504981995,
"step": 534
},
{
"epoch": 0.5180343742435246,
"grad_norm": 0.18923698365688324,
"learning_rate": 4.87781036168133e-06,
"loss": 0.3637017607688904,
"step": 535
},
{
"epoch": 0.5190026627935125,
"grad_norm": 0.16404788196086884,
"learning_rate": 4.868035190615836e-06,
"loss": 0.28690028190612793,
"step": 536
},
{
"epoch": 0.5199709513435004,
"grad_norm": 0.1970244199037552,
"learning_rate": 4.858260019550342e-06,
"loss": 0.2881229519844055,
"step": 537
},
{
"epoch": 0.5209392398934882,
"grad_norm": 0.1616058647632599,
"learning_rate": 4.848484848484849e-06,
"loss": 0.2817743122577667,
"step": 538
},
{
"epoch": 0.5219075284434762,
"grad_norm": 0.18213775753974915,
"learning_rate": 4.838709677419355e-06,
"loss": 0.2646360695362091,
"step": 539
},
{
"epoch": 0.5228758169934641,
"grad_norm": 0.1883658468723297,
"learning_rate": 4.828934506353862e-06,
"loss": 0.32929307222366333,
"step": 540
},
{
"epoch": 0.523844105543452,
"grad_norm": 0.1898542195558548,
"learning_rate": 4.819159335288368e-06,
"loss": 0.27511003613471985,
"step": 541
},
{
"epoch": 0.5248123940934398,
"grad_norm": 0.1817118525505066,
"learning_rate": 4.8093841642228745e-06,
"loss": 0.27474260330200195,
"step": 542
},
{
"epoch": 0.5257806826434277,
"grad_norm": 0.19033664464950562,
"learning_rate": 4.799608993157381e-06,
"loss": 0.32937076687812805,
"step": 543
},
{
"epoch": 0.5267489711934157,
"grad_norm": 0.18128858506679535,
"learning_rate": 4.789833822091887e-06,
"loss": 0.3000837564468384,
"step": 544
},
{
"epoch": 0.5277172597434036,
"grad_norm": 0.18828479945659637,
"learning_rate": 4.780058651026394e-06,
"loss": 0.3411107063293457,
"step": 545
},
{
"epoch": 0.5286855482933914,
"grad_norm": 0.21484431624412537,
"learning_rate": 4.7702834799609e-06,
"loss": 0.32155299186706543,
"step": 546
},
{
"epoch": 0.5296538368433793,
"grad_norm": 0.19658254086971283,
"learning_rate": 4.760508308895407e-06,
"loss": 0.2874881327152252,
"step": 547
},
{
"epoch": 0.5306221253933672,
"grad_norm": 0.19206486642360687,
"learning_rate": 4.750733137829912e-06,
"loss": 0.31940093636512756,
"step": 548
},
{
"epoch": 0.5315904139433552,
"grad_norm": 0.2160305678844452,
"learning_rate": 4.740957966764419e-06,
"loss": 0.3076990246772766,
"step": 549
},
{
"epoch": 0.532558702493343,
"grad_norm": 0.18269337713718414,
"learning_rate": 4.731182795698925e-06,
"loss": 0.27614516019821167,
"step": 550
},
{
"epoch": 0.5335269910433309,
"grad_norm": 0.18244397640228271,
"learning_rate": 4.7214076246334315e-06,
"loss": 0.28050702810287476,
"step": 551
},
{
"epoch": 0.5344952795933188,
"grad_norm": 0.16129615902900696,
"learning_rate": 4.711632453567938e-06,
"loss": 0.28339844942092896,
"step": 552
},
{
"epoch": 0.5354635681433068,
"grad_norm": 0.1605842411518097,
"learning_rate": 4.701857282502444e-06,
"loss": 0.28020599484443665,
"step": 553
},
{
"epoch": 0.5364318566932946,
"grad_norm": 0.17767396569252014,
"learning_rate": 4.692082111436951e-06,
"loss": 0.26483970880508423,
"step": 554
},
{
"epoch": 0.5374001452432825,
"grad_norm": 0.17699919641017914,
"learning_rate": 4.682306940371456e-06,
"loss": 0.27966004610061646,
"step": 555
},
{
"epoch": 0.5383684337932704,
"grad_norm": 0.19072790443897247,
"learning_rate": 4.672531769305963e-06,
"loss": 0.282270222902298,
"step": 556
},
{
"epoch": 0.5393367223432582,
"grad_norm": 0.1869659274816513,
"learning_rate": 4.662756598240469e-06,
"loss": 0.3432008624076843,
"step": 557
},
{
"epoch": 0.5403050108932462,
"grad_norm": 0.18851327896118164,
"learning_rate": 4.652981427174976e-06,
"loss": 0.2940416932106018,
"step": 558
},
{
"epoch": 0.5412732994432341,
"grad_norm": 0.20195099711418152,
"learning_rate": 4.643206256109482e-06,
"loss": 0.30535370111465454,
"step": 559
},
{
"epoch": 0.542241587993222,
"grad_norm": 0.17963868379592896,
"learning_rate": 4.6334310850439885e-06,
"loss": 0.3085969388484955,
"step": 560
},
{
"epoch": 0.5432098765432098,
"grad_norm": 0.170511856675148,
"learning_rate": 4.623655913978495e-06,
"loss": 0.3072543442249298,
"step": 561
},
{
"epoch": 0.5441781650931977,
"grad_norm": 0.18112339079380035,
"learning_rate": 4.613880742913001e-06,
"loss": 0.3005993366241455,
"step": 562
},
{
"epoch": 0.5451464536431857,
"grad_norm": 0.18734918534755707,
"learning_rate": 4.604105571847508e-06,
"loss": 0.2741018533706665,
"step": 563
},
{
"epoch": 0.5461147421931736,
"grad_norm": 0.18844076991081238,
"learning_rate": 4.594330400782014e-06,
"loss": 0.27082327008247375,
"step": 564
},
{
"epoch": 0.5470830307431614,
"grad_norm": 0.18848098814487457,
"learning_rate": 4.58455522971652e-06,
"loss": 0.2900712490081787,
"step": 565
},
{
"epoch": 0.5480513192931493,
"grad_norm": 0.18217670917510986,
"learning_rate": 4.574780058651026e-06,
"loss": 0.2818305492401123,
"step": 566
},
{
"epoch": 0.5490196078431373,
"grad_norm": 0.1847630739212036,
"learning_rate": 4.565004887585533e-06,
"loss": 0.3052092492580414,
"step": 567
},
{
"epoch": 0.5499878963931252,
"grad_norm": 0.17965678870677948,
"learning_rate": 4.555229716520039e-06,
"loss": 0.37061765789985657,
"step": 568
},
{
"epoch": 0.550956184943113,
"grad_norm": 0.182081401348114,
"learning_rate": 4.5454545454545455e-06,
"loss": 0.2812265157699585,
"step": 569
},
{
"epoch": 0.5519244734931009,
"grad_norm": 0.1826234757900238,
"learning_rate": 4.535679374389052e-06,
"loss": 0.33616483211517334,
"step": 570
},
{
"epoch": 0.5528927620430888,
"grad_norm": 0.18337081372737885,
"learning_rate": 4.525904203323558e-06,
"loss": 0.26936668157577515,
"step": 571
},
{
"epoch": 0.5538610505930768,
"grad_norm": 0.19079728424549103,
"learning_rate": 4.516129032258065e-06,
"loss": 0.31582286953926086,
"step": 572
},
{
"epoch": 0.5548293391430646,
"grad_norm": 0.19277691841125488,
"learning_rate": 4.506353861192571e-06,
"loss": 0.26570555567741394,
"step": 573
},
{
"epoch": 0.5557976276930525,
"grad_norm": 0.1885417103767395,
"learning_rate": 4.496578690127078e-06,
"loss": 0.283278226852417,
"step": 574
},
{
"epoch": 0.5567659162430404,
"grad_norm": 0.1837887167930603,
"learning_rate": 4.486803519061584e-06,
"loss": 0.2855049967765808,
"step": 575
},
{
"epoch": 0.5577342047930284,
"grad_norm": 0.1967337280511856,
"learning_rate": 4.4770283479960905e-06,
"loss": 0.2932886481285095,
"step": 576
},
{
"epoch": 0.5587024933430162,
"grad_norm": 0.17725642025470734,
"learning_rate": 4.467253176930597e-06,
"loss": 0.27526989579200745,
"step": 577
},
{
"epoch": 0.5596707818930041,
"grad_norm": 0.17137347161769867,
"learning_rate": 4.4574780058651025e-06,
"loss": 0.3213641047477722,
"step": 578
},
{
"epoch": 0.560639070442992,
"grad_norm": 0.21623080968856812,
"learning_rate": 4.447702834799609e-06,
"loss": 0.30579251050949097,
"step": 579
},
{
"epoch": 0.5616073589929799,
"grad_norm": 0.17714564502239227,
"learning_rate": 4.437927663734115e-06,
"loss": 0.27001863718032837,
"step": 580
},
{
"epoch": 0.5625756475429678,
"grad_norm": 0.19795329868793488,
"learning_rate": 4.428152492668622e-06,
"loss": 0.3162938356399536,
"step": 581
},
{
"epoch": 0.5635439360929557,
"grad_norm": 0.16567392647266388,
"learning_rate": 4.418377321603128e-06,
"loss": 0.27828705310821533,
"step": 582
},
{
"epoch": 0.5645122246429436,
"grad_norm": 0.19157780706882477,
"learning_rate": 4.408602150537635e-06,
"loss": 0.26456013321876526,
"step": 583
},
{
"epoch": 0.5654805131929315,
"grad_norm": 0.18285039067268372,
"learning_rate": 4.398826979472141e-06,
"loss": 0.27962782979011536,
"step": 584
},
{
"epoch": 0.5664488017429193,
"grad_norm": 0.18198364973068237,
"learning_rate": 4.3890518084066475e-06,
"loss": 0.32034292817115784,
"step": 585
},
{
"epoch": 0.5674170902929073,
"grad_norm": 0.189778670668602,
"learning_rate": 4.379276637341154e-06,
"loss": 0.27116918563842773,
"step": 586
},
{
"epoch": 0.5683853788428952,
"grad_norm": 0.19017699360847473,
"learning_rate": 4.36950146627566e-06,
"loss": 0.28804683685302734,
"step": 587
},
{
"epoch": 0.5693536673928831,
"grad_norm": 0.1705840528011322,
"learning_rate": 4.359726295210167e-06,
"loss": 0.3060193657875061,
"step": 588
},
{
"epoch": 0.5703219559428709,
"grad_norm": 0.22186465561389923,
"learning_rate": 4.349951124144673e-06,
"loss": 0.26255226135253906,
"step": 589
},
{
"epoch": 0.5712902444928589,
"grad_norm": 0.16935674846172333,
"learning_rate": 4.34017595307918e-06,
"loss": 0.25682443380355835,
"step": 590
},
{
"epoch": 0.5722585330428468,
"grad_norm": 0.2110513299703598,
"learning_rate": 4.330400782013685e-06,
"loss": 0.3172002136707306,
"step": 591
},
{
"epoch": 0.5732268215928347,
"grad_norm": 0.17660263180732727,
"learning_rate": 4.320625610948192e-06,
"loss": 0.2504763603210449,
"step": 592
},
{
"epoch": 0.5741951101428225,
"grad_norm": 0.1752292513847351,
"learning_rate": 4.310850439882698e-06,
"loss": 0.28053516149520874,
"step": 593
},
{
"epoch": 0.5751633986928104,
"grad_norm": 0.17443427443504333,
"learning_rate": 4.3010752688172045e-06,
"loss": 0.27481114864349365,
"step": 594
},
{
"epoch": 0.5761316872427984,
"grad_norm": 0.20570909976959229,
"learning_rate": 4.291300097751711e-06,
"loss": 0.32052427530288696,
"step": 595
},
{
"epoch": 0.5770999757927863,
"grad_norm": 0.17960628867149353,
"learning_rate": 4.281524926686217e-06,
"loss": 0.30593350529670715,
"step": 596
},
{
"epoch": 0.5780682643427741,
"grad_norm": 0.20899339020252228,
"learning_rate": 4.271749755620724e-06,
"loss": 0.3231653571128845,
"step": 597
},
{
"epoch": 0.579036552892762,
"grad_norm": 0.17927585542201996,
"learning_rate": 4.26197458455523e-06,
"loss": 0.23228108882904053,
"step": 598
},
{
"epoch": 0.5800048414427499,
"grad_norm": 0.19766579568386078,
"learning_rate": 4.252199413489737e-06,
"loss": 0.3512587547302246,
"step": 599
},
{
"epoch": 0.5809731299927379,
"grad_norm": 0.2258554995059967,
"learning_rate": 4.242424242424243e-06,
"loss": 0.29843974113464355,
"step": 600
},
{
"epoch": 0.5819414185427257,
"grad_norm": 0.19223785400390625,
"learning_rate": 4.2326490713587495e-06,
"loss": 0.27962884306907654,
"step": 601
},
{
"epoch": 0.5829097070927136,
"grad_norm": 0.17844106256961823,
"learning_rate": 4.222873900293256e-06,
"loss": 0.27644073963165283,
"step": 602
},
{
"epoch": 0.5838779956427015,
"grad_norm": 0.1867385059595108,
"learning_rate": 4.213098729227762e-06,
"loss": 0.27366524934768677,
"step": 603
},
{
"epoch": 0.5848462841926895,
"grad_norm": 0.17379915714263916,
"learning_rate": 4.203323558162268e-06,
"loss": 0.30100804567337036,
"step": 604
},
{
"epoch": 0.5858145727426773,
"grad_norm": 0.1838119775056839,
"learning_rate": 4.193548387096774e-06,
"loss": 0.3351133167743683,
"step": 605
},
{
"epoch": 0.5867828612926652,
"grad_norm": 0.19593499600887299,
"learning_rate": 4.183773216031281e-06,
"loss": 0.28100982308387756,
"step": 606
},
{
"epoch": 0.5877511498426531,
"grad_norm": 0.16322395205497742,
"learning_rate": 4.173998044965787e-06,
"loss": 0.26457294821739197,
"step": 607
},
{
"epoch": 0.588719438392641,
"grad_norm": 0.1786675602197647,
"learning_rate": 4.164222873900294e-06,
"loss": 0.2559005618095398,
"step": 608
},
{
"epoch": 0.5896877269426289,
"grad_norm": 0.19520226120948792,
"learning_rate": 4.1544477028348e-06,
"loss": 0.2999897003173828,
"step": 609
},
{
"epoch": 0.5906560154926168,
"grad_norm": 0.17103256285190582,
"learning_rate": 4.1446725317693065e-06,
"loss": 0.30779922008514404,
"step": 610
},
{
"epoch": 0.5916243040426047,
"grad_norm": 0.17526350915431976,
"learning_rate": 4.134897360703813e-06,
"loss": 0.29173529148101807,
"step": 611
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.18206097185611725,
"learning_rate": 4.125122189638319e-06,
"loss": 0.29199522733688354,
"step": 612
},
{
"epoch": 0.5935608811425805,
"grad_norm": 0.1679670661687851,
"learning_rate": 4.115347018572826e-06,
"loss": 0.25542762875556946,
"step": 613
},
{
"epoch": 0.5945291696925684,
"grad_norm": 0.19803665578365326,
"learning_rate": 4.105571847507332e-06,
"loss": 0.2858905792236328,
"step": 614
},
{
"epoch": 0.5954974582425563,
"grad_norm": 0.17995841801166534,
"learning_rate": 4.095796676441839e-06,
"loss": 0.27671483159065247,
"step": 615
},
{
"epoch": 0.5964657467925442,
"grad_norm": 0.18616031110286713,
"learning_rate": 4.086021505376344e-06,
"loss": 0.2712816596031189,
"step": 616
},
{
"epoch": 0.597434035342532,
"grad_norm": 0.19008490443229675,
"learning_rate": 4.076246334310851e-06,
"loss": 0.2625333368778229,
"step": 617
},
{
"epoch": 0.59840232389252,
"grad_norm": 0.1998487263917923,
"learning_rate": 4.066471163245357e-06,
"loss": 0.28343838453292847,
"step": 618
},
{
"epoch": 0.5993706124425079,
"grad_norm": 0.17429369688034058,
"learning_rate": 4.0566959921798636e-06,
"loss": 0.2731628715991974,
"step": 619
},
{
"epoch": 0.6003389009924958,
"grad_norm": 0.19498169422149658,
"learning_rate": 4.04692082111437e-06,
"loss": 0.29789942502975464,
"step": 620
},
{
"epoch": 0.6013071895424836,
"grad_norm": 0.178371399641037,
"learning_rate": 4.0371456500488756e-06,
"loss": 0.28699758648872375,
"step": 621
},
{
"epoch": 0.6022754780924715,
"grad_norm": 0.1959543526172638,
"learning_rate": 4.027370478983382e-06,
"loss": 0.32473817467689514,
"step": 622
},
{
"epoch": 0.6032437666424595,
"grad_norm": 0.18459352850914001,
"learning_rate": 4.017595307917888e-06,
"loss": 0.2685423493385315,
"step": 623
},
{
"epoch": 0.6042120551924474,
"grad_norm": 0.18294654786586761,
"learning_rate": 4.007820136852395e-06,
"loss": 0.28354576230049133,
"step": 624
},
{
"epoch": 0.6051803437424352,
"grad_norm": 0.19509679079055786,
"learning_rate": 3.998044965786901e-06,
"loss": 0.30655306577682495,
"step": 625
},
{
"epoch": 0.6061486322924231,
"grad_norm": 0.18222194910049438,
"learning_rate": 3.988269794721408e-06,
"loss": 0.26319050788879395,
"step": 626
},
{
"epoch": 0.6071169208424111,
"grad_norm": 0.21766740083694458,
"learning_rate": 3.978494623655914e-06,
"loss": 0.29476338624954224,
"step": 627
},
{
"epoch": 0.608085209392399,
"grad_norm": 0.1838199496269226,
"learning_rate": 3.9687194525904206e-06,
"loss": 0.313698947429657,
"step": 628
},
{
"epoch": 0.6090534979423868,
"grad_norm": 0.18570809066295624,
"learning_rate": 3.958944281524927e-06,
"loss": 0.3509555160999298,
"step": 629
},
{
"epoch": 0.6100217864923747,
"grad_norm": 0.19644515216350555,
"learning_rate": 3.949169110459433e-06,
"loss": 0.2718711197376251,
"step": 630
},
{
"epoch": 0.6109900750423626,
"grad_norm": 0.1909233182668686,
"learning_rate": 3.93939393939394e-06,
"loss": 0.3205246925354004,
"step": 631
},
{
"epoch": 0.6119583635923506,
"grad_norm": 0.18373022973537445,
"learning_rate": 3.929618768328446e-06,
"loss": 0.295777827501297,
"step": 632
},
{
"epoch": 0.6129266521423384,
"grad_norm": 0.18277910351753235,
"learning_rate": 3.919843597262952e-06,
"loss": 0.3180069625377655,
"step": 633
},
{
"epoch": 0.6138949406923263,
"grad_norm": 0.19421808421611786,
"learning_rate": 3.910068426197458e-06,
"loss": 0.2791898250579834,
"step": 634
},
{
"epoch": 0.6148632292423142,
"grad_norm": 0.17601901292800903,
"learning_rate": 3.900293255131965e-06,
"loss": 0.26764553785324097,
"step": 635
},
{
"epoch": 0.615831517792302,
"grad_norm": 0.1744976043701172,
"learning_rate": 3.890518084066471e-06,
"loss": 0.307162344455719,
"step": 636
},
{
"epoch": 0.61679980634229,
"grad_norm": 0.1944838911294937,
"learning_rate": 3.8807429130009776e-06,
"loss": 0.2940749228000641,
"step": 637
},
{
"epoch": 0.6177680948922779,
"grad_norm": 0.29076093435287476,
"learning_rate": 3.870967741935484e-06,
"loss": 0.32644060254096985,
"step": 638
},
{
"epoch": 0.6187363834422658,
"grad_norm": 0.18829455971717834,
"learning_rate": 3.8611925708699904e-06,
"loss": 0.28472450375556946,
"step": 639
},
{
"epoch": 0.6197046719922537,
"grad_norm": 0.1949450969696045,
"learning_rate": 3.851417399804497e-06,
"loss": 0.2577253580093384,
"step": 640
},
{
"epoch": 0.6206729605422416,
"grad_norm": 0.1973968893289566,
"learning_rate": 3.841642228739003e-06,
"loss": 0.28368428349494934,
"step": 641
},
{
"epoch": 0.6216412490922295,
"grad_norm": 0.1733219027519226,
"learning_rate": 3.83186705767351e-06,
"loss": 0.26086172461509705,
"step": 642
},
{
"epoch": 0.6226095376422174,
"grad_norm": 0.20539860427379608,
"learning_rate": 3.822091886608016e-06,
"loss": 0.3593149483203888,
"step": 643
},
{
"epoch": 0.6235778261922053,
"grad_norm": 0.18563023209571838,
"learning_rate": 3.812316715542522e-06,
"loss": 0.3003098964691162,
"step": 644
},
{
"epoch": 0.6245461147421931,
"grad_norm": 0.19810666143894196,
"learning_rate": 3.8025415444770286e-06,
"loss": 0.2925172448158264,
"step": 645
},
{
"epoch": 0.6255144032921811,
"grad_norm": 0.2321307510137558,
"learning_rate": 3.792766373411535e-06,
"loss": 0.25980299711227417,
"step": 646
},
{
"epoch": 0.626482691842169,
"grad_norm": 0.16675977408885956,
"learning_rate": 3.7829912023460414e-06,
"loss": 0.258143812417984,
"step": 647
},
{
"epoch": 0.6274509803921569,
"grad_norm": 0.18522602319717407,
"learning_rate": 3.773216031280548e-06,
"loss": 0.3249315619468689,
"step": 648
},
{
"epoch": 0.6284192689421447,
"grad_norm": 0.17373818159103394,
"learning_rate": 3.763440860215054e-06,
"loss": 0.289806991815567,
"step": 649
},
{
"epoch": 0.6293875574921327,
"grad_norm": 0.18944744765758514,
"learning_rate": 3.7536656891495603e-06,
"loss": 0.30416756868362427,
"step": 650
},
{
"epoch": 0.6303558460421206,
"grad_norm": 0.19680985808372498,
"learning_rate": 3.7438905180840667e-06,
"loss": 0.28972989320755005,
"step": 651
},
{
"epoch": 0.6313241345921085,
"grad_norm": 0.2205217033624649,
"learning_rate": 3.734115347018573e-06,
"loss": 0.28554368019104004,
"step": 652
},
{
"epoch": 0.6322924231420963,
"grad_norm": 0.172973170876503,
"learning_rate": 3.7243401759530796e-06,
"loss": 0.331814169883728,
"step": 653
},
{
"epoch": 0.6332607116920842,
"grad_norm": 0.1913972645998001,
"learning_rate": 3.714565004887586e-06,
"loss": 0.27782005071640015,
"step": 654
},
{
"epoch": 0.6342290002420722,
"grad_norm": 0.19561362266540527,
"learning_rate": 3.7047898338220924e-06,
"loss": 0.3030650019645691,
"step": 655
},
{
"epoch": 0.63519728879206,
"grad_norm": 0.19253604114055634,
"learning_rate": 3.6950146627565984e-06,
"loss": 0.29422512650489807,
"step": 656
},
{
"epoch": 0.6361655773420479,
"grad_norm": 0.19124586880207062,
"learning_rate": 3.685239491691105e-06,
"loss": 0.26767367124557495,
"step": 657
},
{
"epoch": 0.6371338658920358,
"grad_norm": 0.2221280336380005,
"learning_rate": 3.6754643206256113e-06,
"loss": 0.3479483723640442,
"step": 658
},
{
"epoch": 0.6381021544420237,
"grad_norm": 0.20241160690784454,
"learning_rate": 3.6656891495601177e-06,
"loss": 0.2787402868270874,
"step": 659
},
{
"epoch": 0.6390704429920117,
"grad_norm": 0.19073940813541412,
"learning_rate": 3.655913978494624e-06,
"loss": 0.29317712783813477,
"step": 660
},
{
"epoch": 0.6400387315419995,
"grad_norm": 0.20870280265808105,
"learning_rate": 3.6461388074291306e-06,
"loss": 0.3079635202884674,
"step": 661
},
{
"epoch": 0.6410070200919874,
"grad_norm": 0.18194538354873657,
"learning_rate": 3.6363636363636366e-06,
"loss": 0.26036518812179565,
"step": 662
},
{
"epoch": 0.6419753086419753,
"grad_norm": 0.19380781054496765,
"learning_rate": 3.626588465298143e-06,
"loss": 0.32105469703674316,
"step": 663
},
{
"epoch": 0.6429435971919633,
"grad_norm": 0.18779927492141724,
"learning_rate": 3.6168132942326494e-06,
"loss": 0.23958516120910645,
"step": 664
},
{
"epoch": 0.6439118857419511,
"grad_norm": 0.16800741851329803,
"learning_rate": 3.607038123167156e-06,
"loss": 0.3183926045894623,
"step": 665
},
{
"epoch": 0.644880174291939,
"grad_norm": 0.18218325078487396,
"learning_rate": 3.5972629521016623e-06,
"loss": 0.36072227358818054,
"step": 666
},
{
"epoch": 0.6458484628419269,
"grad_norm": 0.1973208338022232,
"learning_rate": 3.5874877810361687e-06,
"loss": 0.31081509590148926,
"step": 667
},
{
"epoch": 0.6468167513919147,
"grad_norm": 0.17719313502311707,
"learning_rate": 3.5777126099706747e-06,
"loss": 0.3088850677013397,
"step": 668
},
{
"epoch": 0.6477850399419027,
"grad_norm": 0.22201496362686157,
"learning_rate": 3.567937438905181e-06,
"loss": 0.2832217812538147,
"step": 669
},
{
"epoch": 0.6487533284918906,
"grad_norm": 0.2052207589149475,
"learning_rate": 3.5581622678396876e-06,
"loss": 0.2777295708656311,
"step": 670
},
{
"epoch": 0.6497216170418785,
"grad_norm": 0.17530739307403564,
"learning_rate": 3.548387096774194e-06,
"loss": 0.3057093620300293,
"step": 671
},
{
"epoch": 0.6506899055918663,
"grad_norm": 0.20253078639507294,
"learning_rate": 3.5386119257087004e-06,
"loss": 0.2525123059749603,
"step": 672
},
{
"epoch": 0.6516581941418542,
"grad_norm": 0.19099098443984985,
"learning_rate": 3.528836754643207e-06,
"loss": 0.26486071944236755,
"step": 673
},
{
"epoch": 0.6526264826918422,
"grad_norm": 0.19429947435855865,
"learning_rate": 3.5190615835777133e-06,
"loss": 0.27915486693382263,
"step": 674
},
{
"epoch": 0.6535947712418301,
"grad_norm": 0.19641940295696259,
"learning_rate": 3.5092864125122193e-06,
"loss": 0.2952028214931488,
"step": 675
},
{
"epoch": 0.654563059791818,
"grad_norm": 0.18606482446193695,
"learning_rate": 3.4995112414467257e-06,
"loss": 0.26710712909698486,
"step": 676
},
{
"epoch": 0.6555313483418058,
"grad_norm": 0.18616363406181335,
"learning_rate": 3.489736070381232e-06,
"loss": 0.2896000146865845,
"step": 677
},
{
"epoch": 0.6564996368917938,
"grad_norm": 0.18305549025535583,
"learning_rate": 3.4799608993157386e-06,
"loss": 0.27804529666900635,
"step": 678
},
{
"epoch": 0.6574679254417817,
"grad_norm": 0.19162502884864807,
"learning_rate": 3.470185728250245e-06,
"loss": 0.3180793821811676,
"step": 679
},
{
"epoch": 0.6584362139917695,
"grad_norm": 0.17288638651371002,
"learning_rate": 3.4604105571847514e-06,
"loss": 0.27254006266593933,
"step": 680
},
{
"epoch": 0.6594045025417574,
"grad_norm": 0.20115594565868378,
"learning_rate": 3.4506353861192575e-06,
"loss": 0.326080858707428,
"step": 681
},
{
"epoch": 0.6603727910917453,
"grad_norm": 0.20309938490390778,
"learning_rate": 3.440860215053764e-06,
"loss": 0.29796141386032104,
"step": 682
},
{
"epoch": 0.6613410796417333,
"grad_norm": 0.20176127552986145,
"learning_rate": 3.43108504398827e-06,
"loss": 0.2814856469631195,
"step": 683
},
{
"epoch": 0.6623093681917211,
"grad_norm": 0.21620069444179535,
"learning_rate": 3.4213098729227763e-06,
"loss": 0.36335426568984985,
"step": 684
},
{
"epoch": 0.663277656741709,
"grad_norm": 0.20982684195041656,
"learning_rate": 3.4115347018572823e-06,
"loss": 0.2819657027721405,
"step": 685
},
{
"epoch": 0.6642459452916969,
"grad_norm": 0.18432947993278503,
"learning_rate": 3.4017595307917887e-06,
"loss": 0.32050448656082153,
"step": 686
},
{
"epoch": 0.6652142338416849,
"grad_norm": 0.17828144133090973,
"learning_rate": 3.391984359726295e-06,
"loss": 0.3236311376094818,
"step": 687
},
{
"epoch": 0.6661825223916727,
"grad_norm": 0.1964399665594101,
"learning_rate": 3.3822091886608016e-06,
"loss": 0.32314908504486084,
"step": 688
},
{
"epoch": 0.6671508109416606,
"grad_norm": 0.19078870117664337,
"learning_rate": 3.372434017595308e-06,
"loss": 0.27393656969070435,
"step": 689
},
{
"epoch": 0.6681190994916485,
"grad_norm": 0.19160780310630798,
"learning_rate": 3.3626588465298145e-06,
"loss": 0.3088667690753937,
"step": 690
},
{
"epoch": 0.6690873880416364,
"grad_norm": 0.18718208372592926,
"learning_rate": 3.352883675464321e-06,
"loss": 0.28694334626197815,
"step": 691
},
{
"epoch": 0.6700556765916243,
"grad_norm": 0.19036638736724854,
"learning_rate": 3.343108504398827e-06,
"loss": 0.2764681279659271,
"step": 692
},
{
"epoch": 0.6710239651416122,
"grad_norm": 0.17227678000926971,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.2784879207611084,
"step": 693
},
{
"epoch": 0.6719922536916001,
"grad_norm": 0.20473547279834747,
"learning_rate": 3.3235581622678398e-06,
"loss": 0.2824912667274475,
"step": 694
},
{
"epoch": 0.672960542241588,
"grad_norm": 0.1921864002943039,
"learning_rate": 3.313782991202346e-06,
"loss": 0.2795690894126892,
"step": 695
},
{
"epoch": 0.6739288307915758,
"grad_norm": 0.2057105302810669,
"learning_rate": 3.3040078201368526e-06,
"loss": 0.27492284774780273,
"step": 696
},
{
"epoch": 0.6748971193415638,
"grad_norm": 0.2041766345500946,
"learning_rate": 3.294232649071359e-06,
"loss": 0.30277037620544434,
"step": 697
},
{
"epoch": 0.6758654078915517,
"grad_norm": 0.19042398035526276,
"learning_rate": 3.284457478005865e-06,
"loss": 0.31011852622032166,
"step": 698
},
{
"epoch": 0.6768336964415396,
"grad_norm": 0.18352696299552917,
"learning_rate": 3.2746823069403715e-06,
"loss": 0.28382012248039246,
"step": 699
},
{
"epoch": 0.6778019849915274,
"grad_norm": 0.2007741928100586,
"learning_rate": 3.264907135874878e-06,
"loss": 0.28195974230766296,
"step": 700
},
{
"epoch": 0.6787702735415154,
"grad_norm": 0.20310088992118835,
"learning_rate": 3.2551319648093843e-06,
"loss": 0.2988584637641907,
"step": 701
},
{
"epoch": 0.6797385620915033,
"grad_norm": 0.20353393256664276,
"learning_rate": 3.2453567937438908e-06,
"loss": 0.24649690091609955,
"step": 702
},
{
"epoch": 0.6807068506414912,
"grad_norm": 0.1926201432943344,
"learning_rate": 3.235581622678397e-06,
"loss": 0.2895974814891815,
"step": 703
},
{
"epoch": 0.681675139191479,
"grad_norm": 0.19565631449222565,
"learning_rate": 3.225806451612903e-06,
"loss": 0.2735288441181183,
"step": 704
},
{
"epoch": 0.6826434277414669,
"grad_norm": 0.20555929839611053,
"learning_rate": 3.2160312805474096e-06,
"loss": 0.2749082148075104,
"step": 705
},
{
"epoch": 0.6836117162914549,
"grad_norm": 0.19519391655921936,
"learning_rate": 3.206256109481916e-06,
"loss": 0.35463032126426697,
"step": 706
},
{
"epoch": 0.6845800048414428,
"grad_norm": 0.19124329090118408,
"learning_rate": 3.1964809384164225e-06,
"loss": 0.2960769832134247,
"step": 707
},
{
"epoch": 0.6855482933914306,
"grad_norm": 0.19353725016117096,
"learning_rate": 3.186705767350929e-06,
"loss": 0.29588258266448975,
"step": 708
},
{
"epoch": 0.6865165819414185,
"grad_norm": 0.1908576339483261,
"learning_rate": 3.1769305962854353e-06,
"loss": 0.32410839200019836,
"step": 709
},
{
"epoch": 0.6874848704914064,
"grad_norm": 0.19978390634059906,
"learning_rate": 3.1671554252199418e-06,
"loss": 0.26154428720474243,
"step": 710
},
{
"epoch": 0.6884531590413944,
"grad_norm": 0.17735745012760162,
"learning_rate": 3.1573802541544478e-06,
"loss": 0.2741011083126068,
"step": 711
},
{
"epoch": 0.6894214475913822,
"grad_norm": 0.19261346757411957,
"learning_rate": 3.147605083088954e-06,
"loss": 0.29346680641174316,
"step": 712
},
{
"epoch": 0.6903897361413701,
"grad_norm": 0.18815375864505768,
"learning_rate": 3.1378299120234606e-06,
"loss": 0.317450612783432,
"step": 713
},
{
"epoch": 0.691358024691358,
"grad_norm": 0.1747797578573227,
"learning_rate": 3.128054740957967e-06,
"loss": 0.26710936427116394,
"step": 714
},
{
"epoch": 0.692326313241346,
"grad_norm": 0.1850060522556305,
"learning_rate": 3.1182795698924735e-06,
"loss": 0.3440788984298706,
"step": 715
},
{
"epoch": 0.6932946017913338,
"grad_norm": 0.19904842972755432,
"learning_rate": 3.10850439882698e-06,
"loss": 0.27237698435783386,
"step": 716
},
{
"epoch": 0.6942628903413217,
"grad_norm": 0.19219987094402313,
"learning_rate": 3.098729227761486e-06,
"loss": 0.2665986716747284,
"step": 717
},
{
"epoch": 0.6952311788913096,
"grad_norm": 0.1957559734582901,
"learning_rate": 3.0889540566959923e-06,
"loss": 0.28654614090919495,
"step": 718
},
{
"epoch": 0.6961994674412975,
"grad_norm": 0.2007106989622116,
"learning_rate": 3.0791788856304988e-06,
"loss": 0.30569547414779663,
"step": 719
},
{
"epoch": 0.6971677559912854,
"grad_norm": 0.21884313225746155,
"learning_rate": 3.069403714565005e-06,
"loss": 0.2851307690143585,
"step": 720
},
{
"epoch": 0.6981360445412733,
"grad_norm": 0.18904490768909454,
"learning_rate": 3.0596285434995116e-06,
"loss": 0.32544124126434326,
"step": 721
},
{
"epoch": 0.6991043330912612,
"grad_norm": 0.22827713191509247,
"learning_rate": 3.049853372434018e-06,
"loss": 0.2876453101634979,
"step": 722
},
{
"epoch": 0.7000726216412491,
"grad_norm": 0.18982501327991486,
"learning_rate": 3.0400782013685245e-06,
"loss": 0.28896069526672363,
"step": 723
},
{
"epoch": 0.701040910191237,
"grad_norm": 0.208974227309227,
"learning_rate": 3.0303030303030305e-06,
"loss": 0.26989954710006714,
"step": 724
},
{
"epoch": 0.7020091987412249,
"grad_norm": 0.19682757556438446,
"learning_rate": 3.020527859237537e-06,
"loss": 0.316387414932251,
"step": 725
},
{
"epoch": 0.7029774872912128,
"grad_norm": 0.1741049438714981,
"learning_rate": 3.0107526881720433e-06,
"loss": 0.26443612575531006,
"step": 726
},
{
"epoch": 0.7039457758412007,
"grad_norm": 0.2087400257587433,
"learning_rate": 3.0009775171065498e-06,
"loss": 0.2930486500263214,
"step": 727
},
{
"epoch": 0.7049140643911885,
"grad_norm": 0.19682444632053375,
"learning_rate": 2.991202346041056e-06,
"loss": 0.2777274250984192,
"step": 728
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.18029047548770905,
"learning_rate": 2.9814271749755626e-06,
"loss": 0.30682748556137085,
"step": 729
},
{
"epoch": 0.7068506414911644,
"grad_norm": 0.21413344144821167,
"learning_rate": 2.9716520039100686e-06,
"loss": 0.2852901220321655,
"step": 730
},
{
"epoch": 0.7078189300411523,
"grad_norm": 0.20641835033893585,
"learning_rate": 2.961876832844575e-06,
"loss": 0.30264589190483093,
"step": 731
},
{
"epoch": 0.7087872185911401,
"grad_norm": 0.20583511888980865,
"learning_rate": 2.9521016617790815e-06,
"loss": 0.31246519088745117,
"step": 732
},
{
"epoch": 0.709755507141128,
"grad_norm": 0.19352665543556213,
"learning_rate": 2.942326490713588e-06,
"loss": 0.25201672315597534,
"step": 733
},
{
"epoch": 0.710723795691116,
"grad_norm": 0.19948013126850128,
"learning_rate": 2.9325513196480943e-06,
"loss": 0.2469996213912964,
"step": 734
},
{
"epoch": 0.7116920842411039,
"grad_norm": 0.20024363696575165,
"learning_rate": 2.9227761485826008e-06,
"loss": 0.28980398178100586,
"step": 735
},
{
"epoch": 0.7126603727910917,
"grad_norm": 0.19101053476333618,
"learning_rate": 2.9130009775171068e-06,
"loss": 0.27129659056663513,
"step": 736
},
{
"epoch": 0.7136286613410796,
"grad_norm": 0.19807986915111542,
"learning_rate": 2.903225806451613e-06,
"loss": 0.2989445924758911,
"step": 737
},
{
"epoch": 0.7145969498910676,
"grad_norm": 0.2047462910413742,
"learning_rate": 2.8934506353861196e-06,
"loss": 0.29249265789985657,
"step": 738
},
{
"epoch": 0.7155652384410555,
"grad_norm": 0.21451207995414734,
"learning_rate": 2.883675464320626e-06,
"loss": 0.308368980884552,
"step": 739
},
{
"epoch": 0.7165335269910433,
"grad_norm": 0.18969380855560303,
"learning_rate": 2.8739002932551325e-06,
"loss": 0.30544131994247437,
"step": 740
},
{
"epoch": 0.7175018155410312,
"grad_norm": 0.21949923038482666,
"learning_rate": 2.864125122189639e-06,
"loss": 0.2871190011501312,
"step": 741
},
{
"epoch": 0.7184701040910191,
"grad_norm": 0.18441982567310333,
"learning_rate": 2.8543499511241454e-06,
"loss": 0.34001511335372925,
"step": 742
},
{
"epoch": 0.7194383926410071,
"grad_norm": 0.20495833456516266,
"learning_rate": 2.8445747800586514e-06,
"loss": 0.31153956055641174,
"step": 743
},
{
"epoch": 0.7204066811909949,
"grad_norm": 0.17847374081611633,
"learning_rate": 2.8347996089931578e-06,
"loss": 0.2785325348377228,
"step": 744
},
{
"epoch": 0.7213749697409828,
"grad_norm": 0.20845407247543335,
"learning_rate": 2.8250244379276642e-06,
"loss": 0.28710830211639404,
"step": 745
},
{
"epoch": 0.7223432582909707,
"grad_norm": 0.20801788568496704,
"learning_rate": 2.8152492668621706e-06,
"loss": 0.2709939181804657,
"step": 746
},
{
"epoch": 0.7233115468409586,
"grad_norm": 0.17509667575359344,
"learning_rate": 2.8054740957966762e-06,
"loss": 0.24158413708209991,
"step": 747
},
{
"epoch": 0.7242798353909465,
"grad_norm": 0.2237170934677124,
"learning_rate": 2.7956989247311827e-06,
"loss": 0.26651033759117126,
"step": 748
},
{
"epoch": 0.7252481239409344,
"grad_norm": 0.1964648962020874,
"learning_rate": 2.785923753665689e-06,
"loss": 0.26544153690338135,
"step": 749
},
{
"epoch": 0.7262164124909223,
"grad_norm": 0.1828320175409317,
"learning_rate": 2.7761485826001955e-06,
"loss": 0.24963009357452393,
"step": 750
},
{
"epoch": 0.7271847010409102,
"grad_norm": 0.17765893042087555,
"learning_rate": 2.766373411534702e-06,
"loss": 0.2530496418476105,
"step": 751
},
{
"epoch": 0.7281529895908981,
"grad_norm": 0.17918957769870758,
"learning_rate": 2.7565982404692084e-06,
"loss": 0.2385520339012146,
"step": 752
},
{
"epoch": 0.729121278140886,
"grad_norm": 0.1830013394355774,
"learning_rate": 2.7468230694037144e-06,
"loss": 0.26376885175704956,
"step": 753
},
{
"epoch": 0.7300895666908739,
"grad_norm": 0.20502547919750214,
"learning_rate": 2.737047898338221e-06,
"loss": 0.2629661560058594,
"step": 754
},
{
"epoch": 0.7310578552408618,
"grad_norm": 0.19126304984092712,
"learning_rate": 2.7272727272727272e-06,
"loss": 0.27432548999786377,
"step": 755
},
{
"epoch": 0.7320261437908496,
"grad_norm": 0.1837206333875656,
"learning_rate": 2.7174975562072337e-06,
"loss": 0.2646147906780243,
"step": 756
},
{
"epoch": 0.7329944323408376,
"grad_norm": 0.22238245606422424,
"learning_rate": 2.70772238514174e-06,
"loss": 0.29708367586135864,
"step": 757
},
{
"epoch": 0.7339627208908255,
"grad_norm": 0.19030597805976868,
"learning_rate": 2.6979472140762465e-06,
"loss": 0.3007453680038452,
"step": 758
},
{
"epoch": 0.7349310094408134,
"grad_norm": 0.18150079250335693,
"learning_rate": 2.688172043010753e-06,
"loss": 0.28624916076660156,
"step": 759
},
{
"epoch": 0.7358992979908012,
"grad_norm": 0.21237732470035553,
"learning_rate": 2.678396871945259e-06,
"loss": 0.31297317147254944,
"step": 760
},
{
"epoch": 0.7368675865407892,
"grad_norm": 0.2071557343006134,
"learning_rate": 2.6686217008797654e-06,
"loss": 0.25083449482917786,
"step": 761
},
{
"epoch": 0.7378358750907771,
"grad_norm": 0.18313196301460266,
"learning_rate": 2.658846529814272e-06,
"loss": 0.28581753373146057,
"step": 762
},
{
"epoch": 0.738804163640765,
"grad_norm": 0.20016784965991974,
"learning_rate": 2.6490713587487782e-06,
"loss": 0.28767916560173035,
"step": 763
},
{
"epoch": 0.7397724521907528,
"grad_norm": 0.1874615103006363,
"learning_rate": 2.6392961876832847e-06,
"loss": 0.28244420886039734,
"step": 764
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.21257996559143066,
"learning_rate": 2.629521016617791e-06,
"loss": 0.2639189064502716,
"step": 765
},
{
"epoch": 0.7417090292907287,
"grad_norm": 0.21034327149391174,
"learning_rate": 2.619745845552297e-06,
"loss": 0.2773539125919342,
"step": 766
},
{
"epoch": 0.7426773178407166,
"grad_norm": 0.21635524928569794,
"learning_rate": 2.6099706744868035e-06,
"loss": 0.283179372549057,
"step": 767
},
{
"epoch": 0.7436456063907044,
"grad_norm": 0.19200022518634796,
"learning_rate": 2.60019550342131e-06,
"loss": 0.2603984475135803,
"step": 768
},
{
"epoch": 0.7446138949406923,
"grad_norm": 0.20428141951560974,
"learning_rate": 2.5904203323558164e-06,
"loss": 0.3322230577468872,
"step": 769
},
{
"epoch": 0.7455821834906802,
"grad_norm": 0.17995081841945648,
"learning_rate": 2.580645161290323e-06,
"loss": 0.26364511251449585,
"step": 770
},
{
"epoch": 0.7465504720406682,
"grad_norm": 0.19678199291229248,
"learning_rate": 2.5708699902248292e-06,
"loss": 0.3625681698322296,
"step": 771
},
{
"epoch": 0.747518760590656,
"grad_norm": 0.183084636926651,
"learning_rate": 2.5610948191593352e-06,
"loss": 0.2772168517112732,
"step": 772
},
{
"epoch": 0.7484870491406439,
"grad_norm": 0.2048066258430481,
"learning_rate": 2.5513196480938417e-06,
"loss": 0.30713188648223877,
"step": 773
},
{
"epoch": 0.7494553376906318,
"grad_norm": 0.21669703722000122,
"learning_rate": 2.541544477028348e-06,
"loss": 0.3376876413822174,
"step": 774
},
{
"epoch": 0.7504236262406198,
"grad_norm": 0.16890452802181244,
"learning_rate": 2.5317693059628545e-06,
"loss": 0.28936320543289185,
"step": 775
},
{
"epoch": 0.7513919147906076,
"grad_norm": 0.2113950401544571,
"learning_rate": 2.521994134897361e-06,
"loss": 0.3068625330924988,
"step": 776
},
{
"epoch": 0.7523602033405955,
"grad_norm": 0.19548510015010834,
"learning_rate": 2.5122189638318674e-06,
"loss": 0.2764047384262085,
"step": 777
},
{
"epoch": 0.7533284918905834,
"grad_norm": 0.19676341116428375,
"learning_rate": 2.502443792766374e-06,
"loss": 0.32238852977752686,
"step": 778
},
{
"epoch": 0.7542967804405712,
"grad_norm": 0.20870518684387207,
"learning_rate": 2.49266862170088e-06,
"loss": 0.2966168224811554,
"step": 779
},
{
"epoch": 0.7552650689905592,
"grad_norm": 0.19091863930225372,
"learning_rate": 2.4828934506353862e-06,
"loss": 0.260260671377182,
"step": 780
},
{
"epoch": 0.7562333575405471,
"grad_norm": 0.18716365098953247,
"learning_rate": 2.4731182795698927e-06,
"loss": 0.2716587781906128,
"step": 781
},
{
"epoch": 0.757201646090535,
"grad_norm": 0.19777894020080566,
"learning_rate": 2.463343108504399e-06,
"loss": 0.2737089693546295,
"step": 782
},
{
"epoch": 0.7581699346405228,
"grad_norm": 0.1986621618270874,
"learning_rate": 2.4535679374389055e-06,
"loss": 0.27934715151786804,
"step": 783
},
{
"epoch": 0.7591382231905107,
"grad_norm": 0.2001214176416397,
"learning_rate": 2.443792766373412e-06,
"loss": 0.29675161838531494,
"step": 784
},
{
"epoch": 0.7601065117404987,
"grad_norm": 0.17941324412822723,
"learning_rate": 2.434017595307918e-06,
"loss": 0.2796166241168976,
"step": 785
},
{
"epoch": 0.7610748002904866,
"grad_norm": 0.18563294410705566,
"learning_rate": 2.4242424242424244e-06,
"loss": 0.2594640851020813,
"step": 786
},
{
"epoch": 0.7620430888404744,
"grad_norm": 0.1819997876882553,
"learning_rate": 2.414467253176931e-06,
"loss": 0.28631582856178284,
"step": 787
},
{
"epoch": 0.7630113773904623,
"grad_norm": 0.2092135101556778,
"learning_rate": 2.4046920821114372e-06,
"loss": 0.29993587732315063,
"step": 788
},
{
"epoch": 0.7639796659404503,
"grad_norm": 0.20817267894744873,
"learning_rate": 2.3949169110459437e-06,
"loss": 0.2964945435523987,
"step": 789
},
{
"epoch": 0.7649479544904382,
"grad_norm": 0.18305228650569916,
"learning_rate": 2.38514173998045e-06,
"loss": 0.2470388114452362,
"step": 790
},
{
"epoch": 0.765916243040426,
"grad_norm": 0.18974260985851288,
"learning_rate": 2.375366568914956e-06,
"loss": 0.26321178674697876,
"step": 791
},
{
"epoch": 0.7668845315904139,
"grad_norm": 0.22661836445331573,
"learning_rate": 2.3655913978494625e-06,
"loss": 0.28920090198516846,
"step": 792
},
{
"epoch": 0.7678528201404018,
"grad_norm": 0.21956227719783783,
"learning_rate": 2.355816226783969e-06,
"loss": 0.2883264422416687,
"step": 793
},
{
"epoch": 0.7688211086903898,
"grad_norm": 0.21458660066127777,
"learning_rate": 2.3460410557184754e-06,
"loss": 0.3575912117958069,
"step": 794
},
{
"epoch": 0.7697893972403776,
"grad_norm": 0.19066624343395233,
"learning_rate": 2.3362658846529814e-06,
"loss": 0.25565528869628906,
"step": 795
},
{
"epoch": 0.7707576857903655,
"grad_norm": 0.19037111103534698,
"learning_rate": 2.326490713587488e-06,
"loss": 0.26588374376296997,
"step": 796
},
{
"epoch": 0.7717259743403534,
"grad_norm": 0.1706329733133316,
"learning_rate": 2.3167155425219943e-06,
"loss": 0.2640436887741089,
"step": 797
},
{
"epoch": 0.7726942628903414,
"grad_norm": 0.203688383102417,
"learning_rate": 2.3069403714565007e-06,
"loss": 0.272479772567749,
"step": 798
},
{
"epoch": 0.7736625514403292,
"grad_norm": 0.21687336266040802,
"learning_rate": 2.297165200391007e-06,
"loss": 0.2606707811355591,
"step": 799
},
{
"epoch": 0.7746308399903171,
"grad_norm": 0.18459083139896393,
"learning_rate": 2.287390029325513e-06,
"loss": 0.2953495979309082,
"step": 800
},
{
"epoch": 0.775599128540305,
"grad_norm": 0.2097976803779602,
"learning_rate": 2.2776148582600195e-06,
"loss": 0.29703575372695923,
"step": 801
},
{
"epoch": 0.7765674170902929,
"grad_norm": 0.20715487003326416,
"learning_rate": 2.267839687194526e-06,
"loss": 0.2804234027862549,
"step": 802
},
{
"epoch": 0.7775357056402808,
"grad_norm": 0.21985439956188202,
"learning_rate": 2.2580645161290324e-06,
"loss": 0.29094335436820984,
"step": 803
},
{
"epoch": 0.7785039941902687,
"grad_norm": 0.17857959866523743,
"learning_rate": 2.248289345063539e-06,
"loss": 0.2993057668209076,
"step": 804
},
{
"epoch": 0.7794722827402566,
"grad_norm": 0.20267243683338165,
"learning_rate": 2.2385141739980453e-06,
"loss": 0.28471803665161133,
"step": 805
},
{
"epoch": 0.7804405712902445,
"grad_norm": 0.18737877905368805,
"learning_rate": 2.2287390029325513e-06,
"loss": 0.27943700551986694,
"step": 806
},
{
"epoch": 0.7814088598402323,
"grad_norm": 0.17687441408634186,
"learning_rate": 2.2189638318670577e-06,
"loss": 0.2751350402832031,
"step": 807
},
{
"epoch": 0.7823771483902203,
"grad_norm": 0.20583491027355194,
"learning_rate": 2.209188660801564e-06,
"loss": 0.28236058354377747,
"step": 808
},
{
"epoch": 0.7833454369402082,
"grad_norm": 0.22925525903701782,
"learning_rate": 2.1994134897360705e-06,
"loss": 0.2999430000782013,
"step": 809
},
{
"epoch": 0.7843137254901961,
"grad_norm": 0.1996539980173111,
"learning_rate": 2.189638318670577e-06,
"loss": 0.29116010665893555,
"step": 810
},
{
"epoch": 0.7852820140401839,
"grad_norm": 0.19890666007995605,
"learning_rate": 2.1798631476050834e-06,
"loss": 0.2903507947921753,
"step": 811
},
{
"epoch": 0.7862503025901719,
"grad_norm": 0.1992999017238617,
"learning_rate": 2.17008797653959e-06,
"loss": 0.2690543532371521,
"step": 812
},
{
"epoch": 0.7872185911401598,
"grad_norm": 0.1835276484489441,
"learning_rate": 2.160312805474096e-06,
"loss": 0.28388747572898865,
"step": 813
},
{
"epoch": 0.7881868796901477,
"grad_norm": 0.236952006816864,
"learning_rate": 2.1505376344086023e-06,
"loss": 0.2714405953884125,
"step": 814
},
{
"epoch": 0.7891551682401355,
"grad_norm": 0.19345760345458984,
"learning_rate": 2.1407624633431087e-06,
"loss": 0.2626250982284546,
"step": 815
},
{
"epoch": 0.7901234567901234,
"grad_norm": 0.20259200036525726,
"learning_rate": 2.130987292277615e-06,
"loss": 0.29853078722953796,
"step": 816
},
{
"epoch": 0.7910917453401114,
"grad_norm": 0.1846383810043335,
"learning_rate": 2.1212121212121216e-06,
"loss": 0.27077630162239075,
"step": 817
},
{
"epoch": 0.7920600338900993,
"grad_norm": 0.21752354502677917,
"learning_rate": 2.111436950146628e-06,
"loss": 0.28987622261047363,
"step": 818
},
{
"epoch": 0.7930283224400871,
"grad_norm": 0.18915565311908722,
"learning_rate": 2.101661779081134e-06,
"loss": 0.2888622581958771,
"step": 819
},
{
"epoch": 0.793996610990075,
"grad_norm": 0.2110828459262848,
"learning_rate": 2.0918866080156404e-06,
"loss": 0.24480582773685455,
"step": 820
},
{
"epoch": 0.7949648995400629,
"grad_norm": 0.19739995896816254,
"learning_rate": 2.082111436950147e-06,
"loss": 0.26558613777160645,
"step": 821
},
{
"epoch": 0.7959331880900509,
"grad_norm": 0.17837020754814148,
"learning_rate": 2.0723362658846533e-06,
"loss": 0.2380271553993225,
"step": 822
},
{
"epoch": 0.7969014766400387,
"grad_norm": 0.2132730782032013,
"learning_rate": 2.0625610948191597e-06,
"loss": 0.2731876075267792,
"step": 823
},
{
"epoch": 0.7978697651900266,
"grad_norm": 0.18625319004058838,
"learning_rate": 2.052785923753666e-06,
"loss": 0.2940404415130615,
"step": 824
},
{
"epoch": 0.7988380537400145,
"grad_norm": 0.18981625139713287,
"learning_rate": 2.043010752688172e-06,
"loss": 0.25833550095558167,
"step": 825
},
{
"epoch": 0.7998063422900025,
"grad_norm": 0.19009682536125183,
"learning_rate": 2.0332355816226786e-06,
"loss": 0.26862984895706177,
"step": 826
},
{
"epoch": 0.8007746308399903,
"grad_norm": 0.17396694421768188,
"learning_rate": 2.023460410557185e-06,
"loss": 0.2869129180908203,
"step": 827
},
{
"epoch": 0.8017429193899782,
"grad_norm": 0.19141492247581482,
"learning_rate": 2.013685239491691e-06,
"loss": 0.32933974266052246,
"step": 828
},
{
"epoch": 0.8027112079399661,
"grad_norm": 0.22585217654705048,
"learning_rate": 2.0039100684261974e-06,
"loss": 0.25727906823158264,
"step": 829
},
{
"epoch": 0.803679496489954,
"grad_norm": 0.20204074680805206,
"learning_rate": 1.994134897360704e-06,
"loss": 0.28683584928512573,
"step": 830
},
{
"epoch": 0.8046477850399419,
"grad_norm": 0.1816793978214264,
"learning_rate": 1.9843597262952103e-06,
"loss": 0.2783251702785492,
"step": 831
},
{
"epoch": 0.8056160735899298,
"grad_norm": 0.19098123908042908,
"learning_rate": 1.9745845552297167e-06,
"loss": 0.28205838799476624,
"step": 832
},
{
"epoch": 0.8065843621399177,
"grad_norm": 0.2102154642343521,
"learning_rate": 1.964809384164223e-06,
"loss": 0.32708585262298584,
"step": 833
},
{
"epoch": 0.8075526506899056,
"grad_norm": 0.2377101480960846,
"learning_rate": 1.955034213098729e-06,
"loss": 0.3074392080307007,
"step": 834
},
{
"epoch": 0.8085209392398935,
"grad_norm": 0.21340312063694,
"learning_rate": 1.9452590420332356e-06,
"loss": 0.28936126828193665,
"step": 835
},
{
"epoch": 0.8094892277898814,
"grad_norm": 0.19761207699775696,
"learning_rate": 1.935483870967742e-06,
"loss": 0.30385932326316833,
"step": 836
},
{
"epoch": 0.8104575163398693,
"grad_norm": 0.17896802723407745,
"learning_rate": 1.9257086999022484e-06,
"loss": 0.2657051682472229,
"step": 837
},
{
"epoch": 0.8114258048898572,
"grad_norm": 0.19170638918876648,
"learning_rate": 1.915933528836755e-06,
"loss": 0.3132804036140442,
"step": 838
},
{
"epoch": 0.812394093439845,
"grad_norm": 0.18938247859477997,
"learning_rate": 1.906158357771261e-06,
"loss": 0.260288804769516,
"step": 839
},
{
"epoch": 0.813362381989833,
"grad_norm": 0.18173451721668243,
"learning_rate": 1.8963831867057675e-06,
"loss": 0.2886829078197479,
"step": 840
},
{
"epoch": 0.8143306705398209,
"grad_norm": 0.1915765106678009,
"learning_rate": 1.886608015640274e-06,
"loss": 0.30934807658195496,
"step": 841
},
{
"epoch": 0.8152989590898088,
"grad_norm": 0.2193581461906433,
"learning_rate": 1.8768328445747801e-06,
"loss": 0.29573243856430054,
"step": 842
},
{
"epoch": 0.8162672476397966,
"grad_norm": 0.1817786544561386,
"learning_rate": 1.8670576735092866e-06,
"loss": 0.2668893337249756,
"step": 843
},
{
"epoch": 0.8172355361897845,
"grad_norm": 0.19725021719932556,
"learning_rate": 1.857282502443793e-06,
"loss": 0.3286668062210083,
"step": 844
},
{
"epoch": 0.8182038247397725,
"grad_norm": 0.20280499756336212,
"learning_rate": 1.8475073313782992e-06,
"loss": 0.26897329092025757,
"step": 845
},
{
"epoch": 0.8191721132897604,
"grad_norm": 0.19977053999900818,
"learning_rate": 1.8377321603128056e-06,
"loss": 0.27279871702194214,
"step": 846
},
{
"epoch": 0.8201404018397482,
"grad_norm": 0.19068841636180878,
"learning_rate": 1.827956989247312e-06,
"loss": 0.254513144493103,
"step": 847
},
{
"epoch": 0.8211086903897361,
"grad_norm": 0.2015547901391983,
"learning_rate": 1.8181818181818183e-06,
"loss": 0.29649272561073303,
"step": 848
},
{
"epoch": 0.8220769789397241,
"grad_norm": 0.18814009428024292,
"learning_rate": 1.8084066471163247e-06,
"loss": 0.2868715524673462,
"step": 849
},
{
"epoch": 0.823045267489712,
"grad_norm": 0.19368094205856323,
"learning_rate": 1.7986314760508311e-06,
"loss": 0.2806050777435303,
"step": 850
},
{
"epoch": 0.8240135560396998,
"grad_norm": 0.20298543572425842,
"learning_rate": 1.7888563049853374e-06,
"loss": 0.26234903931617737,
"step": 851
},
{
"epoch": 0.8249818445896877,
"grad_norm": 0.1959095001220703,
"learning_rate": 1.7790811339198438e-06,
"loss": 0.28573155403137207,
"step": 852
},
{
"epoch": 0.8259501331396756,
"grad_norm": 0.20691703259944916,
"learning_rate": 1.7693059628543502e-06,
"loss": 0.2719816565513611,
"step": 853
},
{
"epoch": 0.8269184216896636,
"grad_norm": 0.21501125395298004,
"learning_rate": 1.7595307917888567e-06,
"loss": 0.29406917095184326,
"step": 854
},
{
"epoch": 0.8278867102396514,
"grad_norm": 0.17245161533355713,
"learning_rate": 1.7497556207233629e-06,
"loss": 0.2694648206233978,
"step": 855
},
{
"epoch": 0.8288549987896393,
"grad_norm": 0.18521907925605774,
"learning_rate": 1.7399804496578693e-06,
"loss": 0.2755904793739319,
"step": 856
},
{
"epoch": 0.8298232873396272,
"grad_norm": 0.20708146691322327,
"learning_rate": 1.7302052785923757e-06,
"loss": 0.2739972472190857,
"step": 857
},
{
"epoch": 0.830791575889615,
"grad_norm": 0.2165932059288025,
"learning_rate": 1.720430107526882e-06,
"loss": 0.30347809195518494,
"step": 858
},
{
"epoch": 0.831759864439603,
"grad_norm": 0.2044944018125534,
"learning_rate": 1.7106549364613882e-06,
"loss": 0.30577352643013,
"step": 859
},
{
"epoch": 0.8327281529895909,
"grad_norm": 0.23014850914478302,
"learning_rate": 1.7008797653958944e-06,
"loss": 0.2837938070297241,
"step": 860
},
{
"epoch": 0.8336964415395788,
"grad_norm": 0.170841246843338,
"learning_rate": 1.6911045943304008e-06,
"loss": 0.27039510011672974,
"step": 861
},
{
"epoch": 0.8346647300895667,
"grad_norm": 0.2066902071237564,
"learning_rate": 1.6813294232649072e-06,
"loss": 0.3122199773788452,
"step": 862
},
{
"epoch": 0.8356330186395546,
"grad_norm": 0.21400435268878937,
"learning_rate": 1.6715542521994134e-06,
"loss": 0.2904992997646332,
"step": 863
},
{
"epoch": 0.8366013071895425,
"grad_norm": 0.23855531215667725,
"learning_rate": 1.6617790811339199e-06,
"loss": 0.2858680486679077,
"step": 864
},
{
"epoch": 0.8375695957395304,
"grad_norm": 0.20174764096736908,
"learning_rate": 1.6520039100684263e-06,
"loss": 0.2764103412628174,
"step": 865
},
{
"epoch": 0.8385378842895183,
"grad_norm": 0.1859450787305832,
"learning_rate": 1.6422287390029325e-06,
"loss": 0.2620023488998413,
"step": 866
},
{
"epoch": 0.8395061728395061,
"grad_norm": 0.18559077382087708,
"learning_rate": 1.632453567937439e-06,
"loss": 0.2956124544143677,
"step": 867
},
{
"epoch": 0.8404744613894941,
"grad_norm": 0.1958460807800293,
"learning_rate": 1.6226783968719454e-06,
"loss": 0.24393334984779358,
"step": 868
},
{
"epoch": 0.841442749939482,
"grad_norm": 0.20028391480445862,
"learning_rate": 1.6129032258064516e-06,
"loss": 0.2675096392631531,
"step": 869
},
{
"epoch": 0.8424110384894699,
"grad_norm": 0.18042640388011932,
"learning_rate": 1.603128054740958e-06,
"loss": 0.2402784675359726,
"step": 870
},
{
"epoch": 0.8433793270394577,
"grad_norm": 0.21275922656059265,
"learning_rate": 1.5933528836754645e-06,
"loss": 0.2840040922164917,
"step": 871
},
{
"epoch": 0.8443476155894457,
"grad_norm": 0.19365417957305908,
"learning_rate": 1.5835777126099709e-06,
"loss": 0.28499388694763184,
"step": 872
},
{
"epoch": 0.8453159041394336,
"grad_norm": 0.1794516146183014,
"learning_rate": 1.573802541544477e-06,
"loss": 0.24146252870559692,
"step": 873
},
{
"epoch": 0.8462841926894215,
"grad_norm": 0.2163521647453308,
"learning_rate": 1.5640273704789835e-06,
"loss": 0.3129892349243164,
"step": 874
},
{
"epoch": 0.8472524812394093,
"grad_norm": 0.1975439339876175,
"learning_rate": 1.55425219941349e-06,
"loss": 0.2796524167060852,
"step": 875
},
{
"epoch": 0.8482207697893972,
"grad_norm": 0.2034914195537567,
"learning_rate": 1.5444770283479962e-06,
"loss": 0.279870867729187,
"step": 876
},
{
"epoch": 0.8491890583393852,
"grad_norm": 0.19650639593601227,
"learning_rate": 1.5347018572825026e-06,
"loss": 0.2665901184082031,
"step": 877
},
{
"epoch": 0.8501573468893731,
"grad_norm": 0.2097690999507904,
"learning_rate": 1.524926686217009e-06,
"loss": 0.27686014771461487,
"step": 878
},
{
"epoch": 0.8511256354393609,
"grad_norm": 0.2037818878889084,
"learning_rate": 1.5151515151515152e-06,
"loss": 0.3026971220970154,
"step": 879
},
{
"epoch": 0.8520939239893488,
"grad_norm": 0.20769764482975006,
"learning_rate": 1.5053763440860217e-06,
"loss": 0.27736592292785645,
"step": 880
},
{
"epoch": 0.8530622125393367,
"grad_norm": 0.1871424913406372,
"learning_rate": 1.495601173020528e-06,
"loss": 0.2646699547767639,
"step": 881
},
{
"epoch": 0.8540305010893247,
"grad_norm": 0.17274564504623413,
"learning_rate": 1.4858260019550343e-06,
"loss": 0.2835018038749695,
"step": 882
},
{
"epoch": 0.8549987896393125,
"grad_norm": 0.19041228294372559,
"learning_rate": 1.4760508308895407e-06,
"loss": 0.2666222155094147,
"step": 883
},
{
"epoch": 0.8559670781893004,
"grad_norm": 0.2032071202993393,
"learning_rate": 1.4662756598240472e-06,
"loss": 0.2845078706741333,
"step": 884
},
{
"epoch": 0.8569353667392883,
"grad_norm": 0.19567905366420746,
"learning_rate": 1.4565004887585534e-06,
"loss": 0.2767939567565918,
"step": 885
},
{
"epoch": 0.8579036552892763,
"grad_norm": 0.21539276838302612,
"learning_rate": 1.4467253176930598e-06,
"loss": 0.28917932510375977,
"step": 886
},
{
"epoch": 0.8588719438392641,
"grad_norm": 0.19841663539409637,
"learning_rate": 1.4369501466275662e-06,
"loss": 0.2754652202129364,
"step": 887
},
{
"epoch": 0.859840232389252,
"grad_norm": 0.1980779618024826,
"learning_rate": 1.4271749755620727e-06,
"loss": 0.24697673320770264,
"step": 888
},
{
"epoch": 0.8608085209392399,
"grad_norm": 0.21110616624355316,
"learning_rate": 1.4173998044965789e-06,
"loss": 0.2851879894733429,
"step": 889
},
{
"epoch": 0.8617768094892277,
"grad_norm": 0.19414329528808594,
"learning_rate": 1.4076246334310853e-06,
"loss": 0.25482916831970215,
"step": 890
},
{
"epoch": 0.8627450980392157,
"grad_norm": 0.21367470920085907,
"learning_rate": 1.3978494623655913e-06,
"loss": 0.2666151821613312,
"step": 891
},
{
"epoch": 0.8637133865892036,
"grad_norm": 0.1971525102853775,
"learning_rate": 1.3880742913000978e-06,
"loss": 0.28804174065589905,
"step": 892
},
{
"epoch": 0.8646816751391915,
"grad_norm": 0.196051225066185,
"learning_rate": 1.3782991202346042e-06,
"loss": 0.2798953354358673,
"step": 893
},
{
"epoch": 0.8656499636891793,
"grad_norm": 0.19818323850631714,
"learning_rate": 1.3685239491691104e-06,
"loss": 0.251752108335495,
"step": 894
},
{
"epoch": 0.8666182522391672,
"grad_norm": 0.19199031591415405,
"learning_rate": 1.3587487781036168e-06,
"loss": 0.27647408843040466,
"step": 895
},
{
"epoch": 0.8675865407891552,
"grad_norm": 0.22557084262371063,
"learning_rate": 1.3489736070381233e-06,
"loss": 0.3452335000038147,
"step": 896
},
{
"epoch": 0.8685548293391431,
"grad_norm": 0.21375709772109985,
"learning_rate": 1.3391984359726295e-06,
"loss": 0.31028902530670166,
"step": 897
},
{
"epoch": 0.869523117889131,
"grad_norm": 0.2083037942647934,
"learning_rate": 1.329423264907136e-06,
"loss": 0.27808475494384766,
"step": 898
},
{
"epoch": 0.8704914064391188,
"grad_norm": 0.19114944338798523,
"learning_rate": 1.3196480938416423e-06,
"loss": 0.2660242021083832,
"step": 899
},
{
"epoch": 0.8714596949891068,
"grad_norm": 0.2077726423740387,
"learning_rate": 1.3098729227761485e-06,
"loss": 0.3196616470813751,
"step": 900
},
{
"epoch": 0.8724279835390947,
"grad_norm": 0.19043967127799988,
"learning_rate": 1.300097751710655e-06,
"loss": 0.2752097547054291,
"step": 901
},
{
"epoch": 0.8733962720890825,
"grad_norm": 0.1956516057252884,
"learning_rate": 1.2903225806451614e-06,
"loss": 0.2782442271709442,
"step": 902
},
{
"epoch": 0.8743645606390704,
"grad_norm": 0.21374346315860748,
"learning_rate": 1.2805474095796676e-06,
"loss": 0.3336328864097595,
"step": 903
},
{
"epoch": 0.8753328491890583,
"grad_norm": 0.17390403151512146,
"learning_rate": 1.270772238514174e-06,
"loss": 0.28889116644859314,
"step": 904
},
{
"epoch": 0.8763011377390463,
"grad_norm": 0.1946377009153366,
"learning_rate": 1.2609970674486805e-06,
"loss": 0.26131391525268555,
"step": 905
},
{
"epoch": 0.8772694262890341,
"grad_norm": 0.19059988856315613,
"learning_rate": 1.251221896383187e-06,
"loss": 0.27641037106513977,
"step": 906
},
{
"epoch": 0.878237714839022,
"grad_norm": 0.21638603508472443,
"learning_rate": 1.2414467253176931e-06,
"loss": 0.2549016773700714,
"step": 907
},
{
"epoch": 0.8792060033890099,
"grad_norm": 0.18561683595180511,
"learning_rate": 1.2316715542521995e-06,
"loss": 0.2516704797744751,
"step": 908
},
{
"epoch": 0.8801742919389978,
"grad_norm": 0.18754595518112183,
"learning_rate": 1.221896383186706e-06,
"loss": 0.24743372201919556,
"step": 909
},
{
"epoch": 0.8811425804889857,
"grad_norm": 0.17627929151058197,
"learning_rate": 1.2121212121212122e-06,
"loss": 0.2443106323480606,
"step": 910
},
{
"epoch": 0.8821108690389736,
"grad_norm": 0.21671797335147858,
"learning_rate": 1.2023460410557186e-06,
"loss": 0.2789687514305115,
"step": 911
},
{
"epoch": 0.8830791575889615,
"grad_norm": 0.18491996824741364,
"learning_rate": 1.192570869990225e-06,
"loss": 0.280285507440567,
"step": 912
},
{
"epoch": 0.8840474461389494,
"grad_norm": 0.19200359284877777,
"learning_rate": 1.1827956989247313e-06,
"loss": 0.3697912096977234,
"step": 913
},
{
"epoch": 0.8850157346889373,
"grad_norm": 0.19149360060691833,
"learning_rate": 1.1730205278592377e-06,
"loss": 0.2533896565437317,
"step": 914
},
{
"epoch": 0.8859840232389252,
"grad_norm": 0.1858339011669159,
"learning_rate": 1.163245356793744e-06,
"loss": 0.2724184989929199,
"step": 915
},
{
"epoch": 0.8869523117889131,
"grad_norm": 0.18043696880340576,
"learning_rate": 1.1534701857282503e-06,
"loss": 0.3179680109024048,
"step": 916
},
{
"epoch": 0.887920600338901,
"grad_norm": 0.2031916230916977,
"learning_rate": 1.1436950146627566e-06,
"loss": 0.2644922733306885,
"step": 917
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.2100847363471985,
"learning_rate": 1.133919843597263e-06,
"loss": 0.2834533154964447,
"step": 918
},
{
"epoch": 0.8898571774388768,
"grad_norm": 0.20932041108608246,
"learning_rate": 1.1241446725317694e-06,
"loss": 0.2602953314781189,
"step": 919
},
{
"epoch": 0.8908254659888647,
"grad_norm": 0.1940714567899704,
"learning_rate": 1.1143695014662756e-06,
"loss": 0.29519274830818176,
"step": 920
},
{
"epoch": 0.8917937545388526,
"grad_norm": 0.20699529349803925,
"learning_rate": 1.104594330400782e-06,
"loss": 0.2826448678970337,
"step": 921
},
{
"epoch": 0.8927620430888404,
"grad_norm": 0.18003135919570923,
"learning_rate": 1.0948191593352885e-06,
"loss": 0.3036431670188904,
"step": 922
},
{
"epoch": 0.8937303316388284,
"grad_norm": 0.2626630961894989,
"learning_rate": 1.085043988269795e-06,
"loss": 0.2694006860256195,
"step": 923
},
{
"epoch": 0.8946986201888163,
"grad_norm": 0.21386921405792236,
"learning_rate": 1.0752688172043011e-06,
"loss": 0.2830575704574585,
"step": 924
},
{
"epoch": 0.8956669087388042,
"grad_norm": 0.20465651154518127,
"learning_rate": 1.0654936461388076e-06,
"loss": 0.2928478419780731,
"step": 925
},
{
"epoch": 0.896635197288792,
"grad_norm": 0.218974307179451,
"learning_rate": 1.055718475073314e-06,
"loss": 0.265733003616333,
"step": 926
},
{
"epoch": 0.8976034858387799,
"grad_norm": 0.18097904324531555,
"learning_rate": 1.0459433040078202e-06,
"loss": 0.297993540763855,
"step": 927
},
{
"epoch": 0.8985717743887679,
"grad_norm": 0.18121756613254547,
"learning_rate": 1.0361681329423266e-06,
"loss": 0.3206055760383606,
"step": 928
},
{
"epoch": 0.8995400629387558,
"grad_norm": 0.18943090736865997,
"learning_rate": 1.026392961876833e-06,
"loss": 0.3015185594558716,
"step": 929
},
{
"epoch": 0.9005083514887436,
"grad_norm": 0.19779494404792786,
"learning_rate": 1.0166177908113393e-06,
"loss": 0.27546051144599915,
"step": 930
},
{
"epoch": 0.9014766400387315,
"grad_norm": 0.1858789324760437,
"learning_rate": 1.0068426197458455e-06,
"loss": 0.2784835994243622,
"step": 931
},
{
"epoch": 0.9024449285887194,
"grad_norm": 0.19459734857082367,
"learning_rate": 9.97067448680352e-07,
"loss": 0.28571465611457825,
"step": 932
},
{
"epoch": 0.9034132171387074,
"grad_norm": 0.18275073170661926,
"learning_rate": 9.872922776148584e-07,
"loss": 0.2614639401435852,
"step": 933
},
{
"epoch": 0.9043815056886952,
"grad_norm": 0.19755122065544128,
"learning_rate": 9.775171065493646e-07,
"loss": 0.3016014099121094,
"step": 934
},
{
"epoch": 0.9053497942386831,
"grad_norm": 0.21569618582725525,
"learning_rate": 9.67741935483871e-07,
"loss": 0.29818177223205566,
"step": 935
},
{
"epoch": 0.906318082788671,
"grad_norm": 0.18675316870212555,
"learning_rate": 9.579667644183774e-07,
"loss": 0.3368891477584839,
"step": 936
},
{
"epoch": 0.907286371338659,
"grad_norm": 0.19871239364147186,
"learning_rate": 9.481915933528838e-07,
"loss": 0.3153863549232483,
"step": 937
},
{
"epoch": 0.9082546598886468,
"grad_norm": 0.22014066576957703,
"learning_rate": 9.384164222873901e-07,
"loss": 0.2810421884059906,
"step": 938
},
{
"epoch": 0.9092229484386347,
"grad_norm": 0.19278523325920105,
"learning_rate": 9.286412512218965e-07,
"loss": 0.2553982138633728,
"step": 939
},
{
"epoch": 0.9101912369886226,
"grad_norm": 0.20471501350402832,
"learning_rate": 9.188660801564028e-07,
"loss": 0.3324427902698517,
"step": 940
},
{
"epoch": 0.9111595255386105,
"grad_norm": 0.19074149429798126,
"learning_rate": 9.090909090909091e-07,
"loss": 0.2935166656970978,
"step": 941
},
{
"epoch": 0.9121278140885984,
"grad_norm": 0.19555461406707764,
"learning_rate": 8.993157380254156e-07,
"loss": 0.27848702669143677,
"step": 942
},
{
"epoch": 0.9130961026385863,
"grad_norm": 0.1958128958940506,
"learning_rate": 8.895405669599219e-07,
"loss": 0.3214573860168457,
"step": 943
},
{
"epoch": 0.9140643911885742,
"grad_norm": 0.20188724994659424,
"learning_rate": 8.797653958944283e-07,
"loss": 0.29266253113746643,
"step": 944
},
{
"epoch": 0.9150326797385621,
"grad_norm": 0.2061896175146103,
"learning_rate": 8.699902248289346e-07,
"loss": 0.26876091957092285,
"step": 945
},
{
"epoch": 0.9160009682885499,
"grad_norm": 0.18365229666233063,
"learning_rate": 8.60215053763441e-07,
"loss": 0.24429546296596527,
"step": 946
},
{
"epoch": 0.9169692568385379,
"grad_norm": 0.2009628713130951,
"learning_rate": 8.504398826979472e-07,
"loss": 0.2813577651977539,
"step": 947
},
{
"epoch": 0.9179375453885258,
"grad_norm": 0.21519120037555695,
"learning_rate": 8.406647116324536e-07,
"loss": 0.29421091079711914,
"step": 948
},
{
"epoch": 0.9189058339385137,
"grad_norm": 0.19519393146038055,
"learning_rate": 8.308895405669599e-07,
"loss": 0.27097785472869873,
"step": 949
},
{
"epoch": 0.9198741224885015,
"grad_norm": 0.18344323337078094,
"learning_rate": 8.211143695014663e-07,
"loss": 0.27933016419410706,
"step": 950
},
{
"epoch": 0.9208424110384895,
"grad_norm": 0.18683570623397827,
"learning_rate": 8.113391984359727e-07,
"loss": 0.28024059534072876,
"step": 951
},
{
"epoch": 0.9218106995884774,
"grad_norm": 0.2764555513858795,
"learning_rate": 8.01564027370479e-07,
"loss": 0.2519608438014984,
"step": 952
},
{
"epoch": 0.9227789881384653,
"grad_norm": 0.20227362215518951,
"learning_rate": 7.917888563049854e-07,
"loss": 0.2634407877922058,
"step": 953
},
{
"epoch": 0.9237472766884531,
"grad_norm": 0.20687641203403473,
"learning_rate": 7.820136852394918e-07,
"loss": 0.2730368375778198,
"step": 954
},
{
"epoch": 0.924715565238441,
"grad_norm": 0.18547162413597107,
"learning_rate": 7.722385141739981e-07,
"loss": 0.26113927364349365,
"step": 955
},
{
"epoch": 0.925683853788429,
"grad_norm": 0.1972709447145462,
"learning_rate": 7.624633431085045e-07,
"loss": 0.3210276663303375,
"step": 956
},
{
"epoch": 0.9266521423384169,
"grad_norm": 0.22296936810016632,
"learning_rate": 7.526881720430108e-07,
"loss": 0.2896474301815033,
"step": 957
},
{
"epoch": 0.9276204308884047,
"grad_norm": 0.1758430004119873,
"learning_rate": 7.429130009775172e-07,
"loss": 0.25095510482788086,
"step": 958
},
{
"epoch": 0.9285887194383926,
"grad_norm": 0.20484335720539093,
"learning_rate": 7.331378299120236e-07,
"loss": 0.27182087302207947,
"step": 959
},
{
"epoch": 0.9295570079883806,
"grad_norm": 0.18908201158046722,
"learning_rate": 7.233626588465299e-07,
"loss": 0.2869470417499542,
"step": 960
},
{
"epoch": 0.9305252965383685,
"grad_norm": 0.20601920783519745,
"learning_rate": 7.135874877810363e-07,
"loss": 0.31839150190353394,
"step": 961
},
{
"epoch": 0.9314935850883563,
"grad_norm": 0.2003796547651291,
"learning_rate": 7.038123167155427e-07,
"loss": 0.28072643280029297,
"step": 962
},
{
"epoch": 0.9324618736383442,
"grad_norm": 0.21452200412750244,
"learning_rate": 6.940371456500489e-07,
"loss": 0.3070773780345917,
"step": 963
},
{
"epoch": 0.9334301621883321,
"grad_norm": 0.20407654345035553,
"learning_rate": 6.842619745845552e-07,
"loss": 0.28470179438591003,
"step": 964
},
{
"epoch": 0.9343984507383201,
"grad_norm": 0.21125538647174835,
"learning_rate": 6.744868035190616e-07,
"loss": 0.29014891386032104,
"step": 965
},
{
"epoch": 0.9353667392883079,
"grad_norm": 0.18405841290950775,
"learning_rate": 6.64711632453568e-07,
"loss": 0.2623524069786072,
"step": 966
},
{
"epoch": 0.9363350278382958,
"grad_norm": 0.2125682681798935,
"learning_rate": 6.549364613880743e-07,
"loss": 0.3087378144264221,
"step": 967
},
{
"epoch": 0.9373033163882837,
"grad_norm": 0.20235757529735565,
"learning_rate": 6.451612903225807e-07,
"loss": 0.2936643660068512,
"step": 968
},
{
"epoch": 0.9382716049382716,
"grad_norm": 0.1939656287431717,
"learning_rate": 6.35386119257087e-07,
"loss": 0.2780473828315735,
"step": 969
},
{
"epoch": 0.9392398934882595,
"grad_norm": 0.20643159747123718,
"learning_rate": 6.256109481915935e-07,
"loss": 0.2650626003742218,
"step": 970
},
{
"epoch": 0.9402081820382474,
"grad_norm": 0.1930253654718399,
"learning_rate": 6.158357771260998e-07,
"loss": 0.305324912071228,
"step": 971
},
{
"epoch": 0.9411764705882353,
"grad_norm": 0.19949081540107727,
"learning_rate": 6.060606060606061e-07,
"loss": 0.27924615144729614,
"step": 972
},
{
"epoch": 0.9421447591382232,
"grad_norm": 0.1923617720603943,
"learning_rate": 5.962854349951125e-07,
"loss": 0.33369550108909607,
"step": 973
},
{
"epoch": 0.9431130476882111,
"grad_norm": 0.1924324929714203,
"learning_rate": 5.865102639296188e-07,
"loss": 0.2702648937702179,
"step": 974
},
{
"epoch": 0.944081336238199,
"grad_norm": 0.189810648560524,
"learning_rate": 5.767350928641252e-07,
"loss": 0.2990330457687378,
"step": 975
},
{
"epoch": 0.9450496247881869,
"grad_norm": 0.2015506476163864,
"learning_rate": 5.669599217986315e-07,
"loss": 0.30142831802368164,
"step": 976
},
{
"epoch": 0.9460179133381748,
"grad_norm": 0.21465028822422028,
"learning_rate": 5.571847507331378e-07,
"loss": 0.2767145037651062,
"step": 977
},
{
"epoch": 0.9469862018881626,
"grad_norm": 0.19279153645038605,
"learning_rate": 5.474095796676442e-07,
"loss": 0.24644437432289124,
"step": 978
},
{
"epoch": 0.9479544904381506,
"grad_norm": 0.20867611467838287,
"learning_rate": 5.376344086021506e-07,
"loss": 0.27333688735961914,
"step": 979
},
{
"epoch": 0.9489227789881385,
"grad_norm": 0.19024871289730072,
"learning_rate": 5.27859237536657e-07,
"loss": 0.2586132884025574,
"step": 980
},
{
"epoch": 0.9498910675381264,
"grad_norm": 0.18523293733596802,
"learning_rate": 5.180840664711633e-07,
"loss": 0.2814341187477112,
"step": 981
},
{
"epoch": 0.9508593560881142,
"grad_norm": 0.19874310493469238,
"learning_rate": 5.083088954056696e-07,
"loss": 0.27490949630737305,
"step": 982
},
{
"epoch": 0.9518276446381021,
"grad_norm": 0.21202170848846436,
"learning_rate": 4.98533724340176e-07,
"loss": 0.2904297411441803,
"step": 983
},
{
"epoch": 0.9527959331880901,
"grad_norm": 0.2094363272190094,
"learning_rate": 4.887585532746823e-07,
"loss": 0.27371150255203247,
"step": 984
},
{
"epoch": 0.953764221738078,
"grad_norm": 0.18295787274837494,
"learning_rate": 4.789833822091887e-07,
"loss": 0.2708626985549927,
"step": 985
},
{
"epoch": 0.9547325102880658,
"grad_norm": 0.2100997418165207,
"learning_rate": 4.6920821114369504e-07,
"loss": 0.26008886098861694,
"step": 986
},
{
"epoch": 0.9557007988380537,
"grad_norm": 0.20343877375125885,
"learning_rate": 4.594330400782014e-07,
"loss": 0.2885707914829254,
"step": 987
},
{
"epoch": 0.9566690873880417,
"grad_norm": 0.2062508761882782,
"learning_rate": 4.496578690127078e-07,
"loss": 0.2915845215320587,
"step": 988
},
{
"epoch": 0.9576373759380296,
"grad_norm": 0.21393194794654846,
"learning_rate": 4.3988269794721416e-07,
"loss": 0.3045470714569092,
"step": 989
},
{
"epoch": 0.9586056644880174,
"grad_norm": 0.20916247367858887,
"learning_rate": 4.301075268817205e-07,
"loss": 0.3206391930580139,
"step": 990
},
{
"epoch": 0.9595739530380053,
"grad_norm": 0.20829743146896362,
"learning_rate": 4.203323558162268e-07,
"loss": 0.281288743019104,
"step": 991
},
{
"epoch": 0.9605422415879932,
"grad_norm": 0.21254244446754456,
"learning_rate": 4.1055718475073313e-07,
"loss": 0.29028719663619995,
"step": 992
},
{
"epoch": 0.9615105301379812,
"grad_norm": 0.20817913115024567,
"learning_rate": 4.007820136852395e-07,
"loss": 0.3223232626914978,
"step": 993
},
{
"epoch": 0.962478818687969,
"grad_norm": 0.19397568702697754,
"learning_rate": 3.910068426197459e-07,
"loss": 0.2968447208404541,
"step": 994
},
{
"epoch": 0.9634471072379569,
"grad_norm": 0.1994376927614212,
"learning_rate": 3.8123167155425226e-07,
"loss": 0.2874579131603241,
"step": 995
},
{
"epoch": 0.9644153957879448,
"grad_norm": 0.20042456686496735,
"learning_rate": 3.714565004887586e-07,
"loss": 0.25470271706581116,
"step": 996
},
{
"epoch": 0.9653836843379328,
"grad_norm": 0.21064911782741547,
"learning_rate": 3.6168132942326495e-07,
"loss": 0.25948402285575867,
"step": 997
},
{
"epoch": 0.9663519728879206,
"grad_norm": 0.19920513033866882,
"learning_rate": 3.5190615835777133e-07,
"loss": 0.2682594358921051,
"step": 998
},
{
"epoch": 0.9673202614379085,
"grad_norm": 0.1974617838859558,
"learning_rate": 3.421309872922776e-07,
"loss": 0.2706855833530426,
"step": 999
},
{
"epoch": 0.9682885499878964,
"grad_norm": 0.19910918176174164,
"learning_rate": 3.32355816226784e-07,
"loss": 0.2881166338920593,
"step": 1000
},
{
"epoch": 0.9692568385378842,
"grad_norm": 0.19066068530082703,
"learning_rate": 3.2258064516129035e-07,
"loss": 0.2593529224395752,
"step": 1001
},
{
"epoch": 0.9702251270878722,
"grad_norm": 0.21316994726657867,
"learning_rate": 3.128054740957967e-07,
"loss": 0.2673231363296509,
"step": 1002
},
{
"epoch": 0.9711934156378601,
"grad_norm": 0.1907181590795517,
"learning_rate": 3.0303030303030305e-07,
"loss": 0.2969304323196411,
"step": 1003
},
{
"epoch": 0.972161704187848,
"grad_norm": 0.2059427797794342,
"learning_rate": 2.932551319648094e-07,
"loss": 0.2977202832698822,
"step": 1004
},
{
"epoch": 0.9731299927378358,
"grad_norm": 0.19578853249549866,
"learning_rate": 2.8347996089931575e-07,
"loss": 0.2898738980293274,
"step": 1005
},
{
"epoch": 0.9740982812878237,
"grad_norm": 0.19571205973625183,
"learning_rate": 2.737047898338221e-07,
"loss": 0.2661632299423218,
"step": 1006
},
{
"epoch": 0.9750665698378117,
"grad_norm": 0.21246828138828278,
"learning_rate": 2.639296187683285e-07,
"loss": 0.26930439472198486,
"step": 1007
},
{
"epoch": 0.9760348583877996,
"grad_norm": 0.20583873987197876,
"learning_rate": 2.541544477028348e-07,
"loss": 0.2842969298362732,
"step": 1008
},
{
"epoch": 0.9770031469377874,
"grad_norm": 0.18972201645374298,
"learning_rate": 2.4437927663734114e-07,
"loss": 0.28072866797447205,
"step": 1009
},
{
"epoch": 0.9779714354877753,
"grad_norm": 0.2141742706298828,
"learning_rate": 2.3460410557184752e-07,
"loss": 0.28579071164131165,
"step": 1010
},
{
"epoch": 0.9789397240377633,
"grad_norm": 0.2145223468542099,
"learning_rate": 2.248289345063539e-07,
"loss": 0.29237863421440125,
"step": 1011
},
{
"epoch": 0.9799080125877512,
"grad_norm": 0.18137916922569275,
"learning_rate": 2.1505376344086024e-07,
"loss": 0.27613335847854614,
"step": 1012
},
{
"epoch": 0.980876301137739,
"grad_norm": 0.18069401383399963,
"learning_rate": 2.0527859237536657e-07,
"loss": 0.255997896194458,
"step": 1013
},
{
"epoch": 0.9818445896877269,
"grad_norm": 0.1869657039642334,
"learning_rate": 1.9550342130987294e-07,
"loss": 0.25478553771972656,
"step": 1014
},
{
"epoch": 0.9828128782377148,
"grad_norm": 0.22846192121505737,
"learning_rate": 1.857282502443793e-07,
"loss": 0.2954884171485901,
"step": 1015
},
{
"epoch": 0.9837811667877028,
"grad_norm": 0.20486541092395782,
"learning_rate": 1.7595307917888567e-07,
"loss": 0.2752358317375183,
"step": 1016
},
{
"epoch": 0.9847494553376906,
"grad_norm": 0.20248091220855713,
"learning_rate": 1.66177908113392e-07,
"loss": 0.27697792649269104,
"step": 1017
},
{
"epoch": 0.9857177438876785,
"grad_norm": 0.2098854035139084,
"learning_rate": 1.5640273704789836e-07,
"loss": 0.30580762028694153,
"step": 1018
},
{
"epoch": 0.9866860324376664,
"grad_norm": 0.19671432673931122,
"learning_rate": 1.466275659824047e-07,
"loss": 0.2934240698814392,
"step": 1019
},
{
"epoch": 0.9876543209876543,
"grad_norm": 0.2045270800590515,
"learning_rate": 1.3685239491691106e-07,
"loss": 0.26624009013175964,
"step": 1020
},
{
"epoch": 0.9886226095376422,
"grad_norm": 0.18321022391319275,
"learning_rate": 1.270772238514174e-07,
"loss": 0.22881919145584106,
"step": 1021
},
{
"epoch": 0.9895908980876301,
"grad_norm": 0.1937808096408844,
"learning_rate": 1.1730205278592376e-07,
"loss": 0.27172714471817017,
"step": 1022
},
{
"epoch": 0.990559186637618,
"grad_norm": 0.22623102366924286,
"learning_rate": 1.0752688172043012e-07,
"loss": 0.3045693039894104,
"step": 1023
},
{
"epoch": 0.9915274751876059,
"grad_norm": 0.2050536870956421,
"learning_rate": 9.775171065493647e-08,
"loss": 0.26483532786369324,
"step": 1024
},
{
"epoch": 0.9924957637375939,
"grad_norm": 0.19890232384204865,
"learning_rate": 8.797653958944283e-08,
"loss": 0.25764352083206177,
"step": 1025
},
{
"epoch": 0.9934640522875817,
"grad_norm": 0.198257714509964,
"learning_rate": 7.820136852394918e-08,
"loss": 0.27279871702194214,
"step": 1026
},
{
"epoch": 0.9944323408375696,
"grad_norm": 0.19575795531272888,
"learning_rate": 6.842619745845553e-08,
"loss": 0.2848638594150543,
"step": 1027
},
{
"epoch": 0.9954006293875575,
"grad_norm": 0.19270561635494232,
"learning_rate": 5.865102639296188e-08,
"loss": 0.26724010705947876,
"step": 1028
},
{
"epoch": 0.9963689179375453,
"grad_norm": 0.2059524953365326,
"learning_rate": 4.8875855327468235e-08,
"loss": 0.28259921073913574,
"step": 1029
},
{
"epoch": 0.9973372064875333,
"grad_norm": 0.22036604583263397,
"learning_rate": 3.910068426197459e-08,
"loss": 0.2710026502609253,
"step": 1030
},
{
"epoch": 0.9983054950375212,
"grad_norm": 0.19827115535736084,
"learning_rate": 2.932551319648094e-08,
"loss": 0.2782309949398041,
"step": 1031
},
{
"epoch": 0.9992737835875091,
"grad_norm": 0.19505129754543304,
"learning_rate": 1.9550342130987295e-08,
"loss": 0.2857624292373657,
"step": 1032
},
{
"epoch": 1.0,
"grad_norm": 0.22344279289245605,
"learning_rate": 9.775171065493648e-09,
"loss": 0.25447842478752136,
"step": 1033
}
],
"logging_steps": 1,
"max_steps": 1033,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.907215148242811e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}