{ "model": "EleutherAI/pythia-1b", "seq_len": 4096, "n_chunks": 30, "theta": 10000.0, "T_train": 2048, "gamma_used": 0.931, "gamma_pade_at_T_train": 0.7470064429851826, "gamma_pade_at_seq_len": 0.5508312818257147, "d_horizon": 505.3378343021414, "D_f_f0_9": 2145, "f_retain": 0.9, "modes": { "baseline": { "ppl": 79.08715824739956, "nll": 4.370550513267517, "std": 0.25347707687263066, "delta_ppl": 0.0 }, "hard_cutoff": { "ppl": 25.261271320317242, "nll": 3.229272445042928, "std": 0.20028306722355718, "delta_ppl": -53.825886927082315 }, "soft_decay": { "ppl": 26.720916594771683, "nll": 3.2854466517766316, "std": 0.2015519103709852, "delta_ppl": -52.36624165262788 }, "hard_df": { "ppl": 19.954902748726713, "nll": 2.9934748649597167, "std": 0.22525463122726258, "delta_ppl": -59.13225549867285 } }, "verdict": "SOFT_DECAY_LOSES \u2014 idea 4 v25 refuted" }