taf-agent / data /checkpoint_eval /eleutherai_benchmarks_pythia70m.json
karlexmarin's picture
feat: ship paper artefacts + CLI diagnostic alongside browser tool
535348a
raw
history blame
7.16 kB
[
{
"step": 0,
"lambada_ppl": 3684074.4512471026,
"lambada_acc": 0.0,
"piqa": 0.5255712731229597,
"sciq": 0.194,
"arc_easy": 0.2718855218855219,
"arc_challenge": 0.24658703071672355,
"winogrande": 0.4988161010260458
},
{
"step": 1,
"lambada_ppl": 3684074.4512471026,
"lambada_acc": 0.0,
"piqa": 0.5255712731229597,
"sciq": 0.194,
"arc_easy": 0.2718855218855219,
"arc_challenge": 0.24658703071672355,
"winogrande": 0.4988161010260458
},
{
"step": 2,
"lambada_ppl": 3683856.625077963,
"lambada_acc": 0.0,
"piqa": 0.5228509249183896,
"sciq": 0.194,
"arc_easy": 0.27104377104377103,
"arc_challenge": 0.24658703071672355,
"winogrande": 0.4980268350434096
},
{
"step": 4,
"lambada_ppl": 3681578.6002349453,
"lambada_acc": 0.0,
"piqa": 0.5261153427638737,
"sciq": 0.193,
"arc_easy": 0.27230639730639733,
"arc_challenge": 0.24573378839590443,
"winogrande": 0.49329123914759276
},
{
"step": 8,
"lambada_ppl": 3646061.8456566,
"lambada_acc": 0.0,
"piqa": 0.5239390642002176,
"sciq": 0.201,
"arc_easy": 0.26641414141414144,
"arc_challenge": 0.24488054607508533,
"winogrande": 0.489344909234412
},
{
"step": 16,
"lambada_ppl": 3526086.6936456356,
"lambada_acc": 0.0,
"piqa": 0.5266594124047879,
"sciq": 0.205,
"arc_easy": 0.26262626262626265,
"arc_challenge": 0.24829351535836178,
"winogrande": 0.48224151539068666
},
{
"step": 32,
"lambada_ppl": 3288862.4386760374,
"lambada_acc": 0.0,
"piqa": 0.5272034820457019,
"sciq": 0.223,
"arc_easy": 0.2668350168350168,
"arc_challenge": 0.2440273037542662,
"winogrande": 0.4940805051302289
},
{
"step": 64,
"lambada_ppl": 2347965.083490206,
"lambada_acc": 0.0,
"piqa": 0.5402611534276387,
"sciq": 0.199,
"arc_easy": 0.2706228956228956,
"arc_challenge": 0.24488054607508533,
"winogrande": 0.4972375690607735
},
{
"step": 128,
"lambada_ppl": 1665636.981895382,
"lambada_acc": 0.0,
"piqa": 0.5261153427638737,
"sciq": 0.219,
"arc_easy": 0.2676767676767677,
"arc_challenge": 0.23890784982935154,
"winogrande": 0.49171270718232046
},
{
"step": 256,
"lambada_ppl": 705314.6370389248,
"lambada_acc": 0.0,
"piqa": 0.5179542981501633,
"sciq": 0.228,
"arc_easy": 0.27441077441077444,
"arc_challenge": 0.24146757679180889,
"winogrande": 0.4925019731649566
},
{
"step": 512,
"lambada_ppl": 116756.33428953367,
"lambada_acc": 0.0,
"piqa": 0.5386289445048966,
"sciq": 0.264,
"arc_easy": 0.2984006734006734,
"arc_challenge": 0.2175767918088737,
"winogrande": 0.5098658247829518
},
{
"step": 1000,
"lambada_ppl": 4465.3093044480365,
"lambada_acc": 0.050067921599068504,
"piqa": 0.5603917301414582,
"sciq": 0.452,
"arc_easy": 0.3085016835016835,
"arc_challenge": 0.21160409556313994,
"winogrande": 0.5114443567482242
},
{
"step": 3000,
"lambada_ppl": 411.658325603736,
"lambada_acc": 0.12128856976518533,
"piqa": 0.5794341675734495,
"sciq": 0.592,
"arc_easy": 0.35058922558922556,
"arc_challenge": 0.21245733788395904,
"winogrande": 0.5082872928176796
},
{
"step": 13000,
"lambada_ppl": 136.24789967804702,
"lambada_acc": 0.21036289540073744,
"piqa": 0.5865070729053319,
"sciq": 0.65,
"arc_easy": 0.3952020202020202,
"arc_challenge": 0.22184300341296928,
"winogrande": 0.5027624309392266
},
{
"step": 23000,
"lambada_ppl": 121.29880202709046,
"lambada_acc": 0.2225887832330681,
"piqa": 0.6077257889009793,
"sciq": 0.653,
"arc_easy": 0.3952020202020202,
"arc_challenge": 0.20477815699658702,
"winogrande": 0.48539857932123126
},
{
"step": 33000,
"lambada_ppl": 118.09596009074914,
"lambada_acc": 0.2233650300795653,
"piqa": 0.5984766050054406,
"sciq": 0.664,
"arc_easy": 0.4010942760942761,
"arc_challenge": 0.21416382252559726,
"winogrande": 0.4972375690607735
},
{
"step": 43000,
"lambada_ppl": 112.36318862751271,
"lambada_acc": 0.22821657287017272,
"piqa": 0.5903155603917302,
"sciq": 0.663,
"arc_easy": 0.39941077441077444,
"arc_challenge": 0.20477815699658702,
"winogrande": 0.5027624309392266
},
{
"step": 53000,
"lambada_ppl": 94.31955728859376,
"lambada_acc": 0.25344459538133124,
"piqa": 0.5919477693144722,
"sciq": 0.664,
"arc_easy": 0.39225589225589225,
"arc_challenge": 0.21331058020477817,
"winogrande": 0.494869771112865
},
{
"step": 63000,
"lambada_ppl": 101.68439461161867,
"lambada_acc": 0.23287405394915583,
"piqa": 0.5930359085963003,
"sciq": 0.681,
"arc_easy": 0.4090909090909091,
"arc_challenge": 0.20733788395904437,
"winogrande": 0.4996053670086819
},
{
"step": 73000,
"lambada_ppl": 117.09850923121336,
"lambada_acc": 0.22627595575392975,
"piqa": 0.5984766050054406,
"sciq": 0.696,
"arc_easy": 0.4090909090909091,
"arc_challenge": 0.22098976109215018,
"winogrande": 0.5146014206787688
},
{
"step": 83000,
"lambada_ppl": 124.26962204175287,
"lambada_acc": 0.22627595575392975,
"piqa": 0.5973884657236126,
"sciq": 0.633,
"arc_easy": 0.37415824915824913,
"arc_challenge": 0.22440273037542663,
"winogrande": 0.5193370165745856
},
{
"step": 93000,
"lambada_ppl": 140.52328755411287,
"lambada_acc": 0.21405006792159906,
"piqa": 0.5979325353645266,
"sciq": 0.642,
"arc_easy": 0.359006734006734,
"arc_challenge": 0.22013651877133106,
"winogrande": 0.5082872928176796
},
{
"step": 103000,
"lambada_ppl": 125.56792285427366,
"lambada_acc": 0.22608189404230544,
"piqa": 0.6022850924918389,
"sciq": 0.628,
"arc_easy": 0.3707912457912458,
"arc_challenge": 0.2226962457337884,
"winogrande": 0.5098658247829518
},
{
"step": 113000,
"lambada_ppl": 120.54804436871059,
"lambada_acc": 0.21618474674946633,
"piqa": 0.5914036996735582,
"sciq": 0.617,
"arc_easy": 0.3720538720538721,
"arc_challenge": 0.21928327645051193,
"winogrande": 0.5185477505919495
},
{
"step": 123000,
"lambada_ppl": 133.11058088169239,
"lambada_acc": 0.20415292062875995,
"piqa": 0.5859630032644179,
"sciq": 0.614,
"arc_easy": 0.36574074074074076,
"arc_challenge": 0.23037542662116042,
"winogrande": 0.5027624309392266
},
{
"step": 133000,
"lambada_ppl": 148.4586759416483,
"lambada_acc": 0.19347952648942363,
"piqa": 0.5984766050054406,
"sciq": 0.617,
"arc_easy": 0.3707912457912458,
"arc_challenge": 0.22013651877133106,
"winogrande": 0.5232833464877664
},
{
"step": 143000,
"lambada_ppl": 142.42891015470678,
"lambada_acc": 0.18455268775470599,
"piqa": 0.5946681175190425,
"sciq": 0.601,
"arc_easy": 0.37373737373737376,
"arc_challenge": 0.22098976109215018,
"winogrande": 0.5280189423835833
}
]