Spaces:
Running
Running
| { | |
| "model": "EleutherAI/pythia-1b", | |
| "n_layers": 16, | |
| "n_heads": 8, | |
| "d_head": 256, | |
| "n_pairs": 128, | |
| "k_dead": 91, | |
| "theta": 10000.0, | |
| "T_train": 2048, | |
| "L_crit_pred": 15, | |
| "alpha": 0.9374717358310781, | |
| "n_eval_chunks": 100, | |
| "chunk_size": 1024, | |
| "n_lambada": 200, | |
| "baseline": { | |
| "ppl": 14.973609132474365, | |
| "nll": 2.7062892603874205, | |
| "std": 0.2595318035858822, | |
| "lambada_acc": 0.57 | |
| }, | |
| "sweeps": [ | |
| { | |
| "L_prune": 11, | |
| "n_pruned_entries": 12124160, | |
| "frac_qk_pruned": 0.09033203125, | |
| "ppl": 16.53646806228249, | |
| "nll": 2.805568127632141, | |
| "std": 0.2573620626824769, | |
| "delta_ppl": 1.562858929808126, | |
| "delta_nll": 0.09927886724472046, | |
| "lambada_acc": 0.405, | |
| "delta_lambada": -0.16499999999999992, | |
| "verdict": "DEGRADED" | |
| }, | |
| { | |
| "L_prune": 13, | |
| "n_pruned_entries": 7274496, | |
| "frac_qk_pruned": 0.05419921875, | |
| "ppl": 15.548956277713987, | |
| "nll": 2.743993515968323, | |
| "std": 0.25814710014606357, | |
| "delta_ppl": 0.5753471452396219, | |
| "delta_nll": 0.0377042555809024, | |
| "lambada_acc": 0.55, | |
| "delta_lambada": -0.019999999999999907, | |
| "verdict": "DEGRADED" | |
| }, | |
| { | |
| "L_prune": 15, | |
| "n_pruned_entries": 2424832, | |
| "frac_qk_pruned": 0.01806640625, | |
| "ppl": 15.16368330360712, | |
| "nll": 2.7189033126831053, | |
| "std": 0.2584823516819064, | |
| "delta_ppl": 0.19007417113275515, | |
| "delta_nll": 0.012614052295684797, | |
| "lambada_acc": 0.54, | |
| "delta_lambada": -0.029999999999999916, | |
| "verdict": "OK" | |
| }, | |
| { | |
| "L_prune": 16, | |
| "n_pruned_entries": 0, | |
| "frac_qk_pruned": 0.0, | |
| "ppl": 14.973609132474365, | |
| "nll": 2.7062892603874205, | |
| "std": 0.2595318035858822, | |
| "delta_ppl": 0.0, | |
| "delta_nll": 0.0, | |
| "lambada_acc": 0.57, | |
| "delta_lambada": 0.0, | |
| "verdict": "OK" | |
| } | |
| ] | |
| } |