{ "model": "EleutherAI/pythia-31m", "theta": 10000, "gamma_pred": 0.99999561666838, "C_theory": 9.2103, "distances": [ 10, 20, 30, 50, 100, 200, 500, 1000, 2000 ], "distances_fit": [ 30, 50, 100, 200, 500, 1000, 2000 ], "d_min_fit": 30, "n_prompts_per_distance": 150, "seeds": [ 42, 123, 7 ], "attn_by_distance": { "10": { "mean_across_seeds": 0.01301000003495978, "std_across_seeds": 0.00046688234450363445, "per_seed_means": [ 0.013054606715838114, 0.01241719133220613, 0.013558202056835096 ] }, "20": { "mean_across_seeds": 0.00693967725809974, "std_across_seeds": 0.0003111505620949703, "per_seed_means": [ 0.006937875649115691, 0.006559501201457654, 0.007321654923725873 ] }, "30": { "mean_across_seeds": 0.004015894493398567, "std_across_seeds": 0.00011863265749955042, "per_seed_means": [ 0.0040281298368548355, 0.0038648689771071075, 0.004154684666233758 ] }, "50": { "mean_across_seeds": 0.0036222275638202617, "std_across_seeds": 0.0001889761810972585, "per_seed_means": [ 0.003356990779672439, 0.003726471992752825, 0.0037832199190355217 ] }, "100": { "mean_across_seeds": 0.0010882257377185548, "std_across_seeds": 0.00012946031918837256, "per_seed_means": [ 0.0009092199558411569, 0.0012110101386497262, 0.0011444471186647812 ] }, "200": { "mean_across_seeds": 0.0005025619622918183, "std_across_seeds": 6.674565621401741e-05, "per_seed_means": [ 0.0004423195596852262, 0.0005956165699171834, 0.00046974975727304504 ] }, "500": { "mean_across_seeds": 0.00019388230073268966, "std_across_seeds": 9.190553129509277e-06, "per_seed_means": [ 0.0001877195072908459, 0.00018705338045644262, 0.00020687401445078043 ] }, "1000": { "mean_across_seeds": 2.1299731392648228e-05, "std_across_seeds": 7.105662382177325e-06, "per_seed_means": [ 2.9809543584254546e-05, 2.167332172462011e-05, 1.2416328869070034e-05 ] }, "2000": { "mean_across_seeds": 7.186259580025636e-06, "std_across_seeds": 8.041626579142191e-07, "per_seed_means": [ 6.1229779024548255e-06, 7.368486014153556e-06, 8.067314823468526e-06 ] } }, "fit_power_law": { "gamma": 1.5398244746231446, "log_A": 0.24504650180534032, "R2": 0.964259, "n_points": 7 }, "fit_exponential": { "lambda": 0.003151060152234357, "log_A": -6.354208754917211, "R2": 0.870219, "n_points": 7 }, "delta_R2_power_minus_exp": 0.094, "gamma_CI_95": { "lo": 1.1197173993826866, "hi": 1.769418762310705 }, "decision": "ANOMALY: long-context training effect" }