taf-agent / data /exp_prune /EleutherAI--pythia-2.8b_prune_v2.json
karlexmarin's picture
feat: ship paper artefacts + CLI diagnostic alongside browser tool
535348a
raw
history blame
1.93 kB
{
"model": "EleutherAI/pythia-2.8b",
"n_layers": 32,
"n_heads": 32,
"d_head": 80,
"n_pairs": 40,
"k_dead": 29,
"theta": 10000.0,
"T_train": 2048,
"L_crit_pred": 21,
"alpha": 0.6700912468018047,
"n_eval_chunks": 100,
"chunk_size": 1024,
"n_lambada": 200,
"baseline": {
"ppl": 11.497348191010678,
"nll": 2.4421164166927336,
"std": 0.2581671149222198,
"lambada_acc": 0.615
},
"sweeps": [
{
"L_prune": 17,
"n_pruned_entries": 54067200,
"frac_qk_pruned": 0.12890625,
"ppl": 14.078933084106032,
"nll": 2.6446795725822447,
"std": 0.2594730613347292,
"delta_ppl": 2.5815848930953536,
"delta_nll": 0.20256315588951113,
"lambada_acc": 0.425,
"delta_lambada": -0.19,
"verdict": "DEGRADED"
},
{
"L_prune": 19,
"n_pruned_entries": 46858240,
"frac_qk_pruned": 0.11171875,
"ppl": 13.294295816582924,
"nll": 2.587335057258606,
"std": 0.2611973881683509,
"delta_ppl": 1.7969476255722459,
"delta_nll": 0.1452186405658722,
"lambada_acc": 0.485,
"delta_lambada": -0.13,
"verdict": "DEGRADED"
},
{
"L_prune": 21,
"n_pruned_entries": 39649280,
"frac_qk_pruned": 0.09453125,
"ppl": 12.887260668117273,
"nll": 2.556239278316498,
"std": 0.26105946628430654,
"delta_ppl": 1.389912477106595,
"delta_nll": 0.1141228616237644,
"lambada_acc": 0.475,
"delta_lambada": -0.14,
"verdict": "DEGRADED"
},
{
"L_prune": 23,
"n_pruned_entries": 32440320,
"frac_qk_pruned": 0.07734375,
"ppl": 12.418053153651558,
"nll": 2.519151313304901,
"std": 0.2594559840569815,
"delta_ppl": 0.9207049626408796,
"delta_nll": 0.07703489661216745,
"lambada_acc": 0.53,
"delta_lambada": -0.08499999999999996,
"verdict": "DEGRADED"
}
]
}