taf-agent / data /exp_prune /EleutherAI--pythia-1b_prune_v2.json
karlexmarin's picture
feat: ship paper artefacts + CLI diagnostic alongside browser tool
535348a
raw
history blame
1.9 kB
{
"model": "EleutherAI/pythia-1b",
"n_layers": 16,
"n_heads": 8,
"d_head": 256,
"n_pairs": 128,
"k_dead": 91,
"theta": 10000.0,
"T_train": 2048,
"L_crit_pred": 15,
"alpha": 0.9374717358310781,
"n_eval_chunks": 100,
"chunk_size": 1024,
"n_lambada": 200,
"baseline": {
"ppl": 14.973609132474365,
"nll": 2.7062892603874205,
"std": 0.2595318035858822,
"lambada_acc": 0.57
},
"sweeps": [
{
"L_prune": 11,
"n_pruned_entries": 12124160,
"frac_qk_pruned": 0.09033203125,
"ppl": 16.53646806228249,
"nll": 2.805568127632141,
"std": 0.2573620626824769,
"delta_ppl": 1.562858929808126,
"delta_nll": 0.09927886724472046,
"lambada_acc": 0.405,
"delta_lambada": -0.16499999999999992,
"verdict": "DEGRADED"
},
{
"L_prune": 13,
"n_pruned_entries": 7274496,
"frac_qk_pruned": 0.05419921875,
"ppl": 15.548956277713987,
"nll": 2.743993515968323,
"std": 0.25814710014606357,
"delta_ppl": 0.5753471452396219,
"delta_nll": 0.0377042555809024,
"lambada_acc": 0.55,
"delta_lambada": -0.019999999999999907,
"verdict": "DEGRADED"
},
{
"L_prune": 15,
"n_pruned_entries": 2424832,
"frac_qk_pruned": 0.01806640625,
"ppl": 15.16368330360712,
"nll": 2.7189033126831053,
"std": 0.2584823516819064,
"delta_ppl": 0.19007417113275515,
"delta_nll": 0.012614052295684797,
"lambada_acc": 0.54,
"delta_lambada": -0.029999999999999916,
"verdict": "OK"
},
{
"L_prune": 16,
"n_pruned_entries": 0,
"frac_qk_pruned": 0.0,
"ppl": 14.973609132474365,
"nll": 2.7062892603874205,
"std": 0.2595318035858822,
"delta_ppl": 0.0,
"delta_nll": 0.0,
"lambada_acc": 0.57,
"delta_lambada": 0.0,
"verdict": "OK"
}
]
}