{ "model": "EleutherAI/pythia-31m", "theta": 10000, "gamma_pred": 0.99999561666838, "C_theory": 9.2103, "distances": [ 10, 20, 30, 50, 100, 200, 500, 1000, 2000 ], "distances_fit": [ 30, 50, 100, 200, 500, 1000, 2000 ], "d_min_fit": 30, "n_prompts_per_distance": 150, "seeds": [ 42, 123, 7 ], "attn_by_distance": { "10": { "mean_across_seeds": 0.015075270254164932, "std_across_seeds": 0.0016076363765655506, "per_seed_means": [ 0.015652700106923777, 0.01288217271057268, 0.016690937944998343 ] }, "20": { "mean_across_seeds": 0.007979718983923603, "std_across_seeds": 0.0008011809172800115, "per_seed_means": [ 0.007325330031647657, 0.007505871877074241, 0.009107955043048909 ] }, "30": { "mean_across_seeds": 0.004943025353131816, "std_across_seeds": 0.0008214856215946895, "per_seed_means": [ 0.004706252690715095, 0.004076418298839902, 0.0060464050698404515 ] }, "50": { "mean_across_seeds": 0.003909375242526746, "std_across_seeds": 0.0004103009057058668, "per_seed_means": [ 0.004045853732774655, 0.0033527197137785455, 0.004329552281027039 ] }, "100": { "mean_across_seeds": 0.001335802334417369, "std_across_seeds": 0.00014613574339847555, "per_seed_means": [ 0.0012400429498666198, 0.0012250753167124156, 0.0015422887366730719 ] }, "200": { "mean_across_seeds": 0.0006274800656319712, "std_across_seeds": 8.133098864096775e-05, "per_seed_means": [ 0.000617688476195326, 0.0005331277462513148, 0.0007316239744492729 ] }, "500": { "mean_across_seeds": 0.0003229697247034993, "std_across_seeds": 0.00010411438760957841, "per_seed_means": [ 0.0002789432165203228, 0.0002233032413945087, 0.00046666271619566637 ] }, "1000": { "mean_across_seeds": 0.00010229767169578944, "std_across_seeds": 6.545994570499704e-05, "per_seed_means": [ 0.00014693857560094632, 9.742619926242924e-06, 0.00015021181956017908 ] }, "2000": { "mean_across_seeds": 2.207666448479115e-05, "std_across_seeds": 9.890072727231223e-06, "per_seed_means": [ 3.0325915145184013e-05, 8.170285452860545e-06, 2.7733792856328893e-05 ] } }, "fit_power_law": { "gamma": 1.2350013988825523, "log_A": -0.8481173688844952, "R2": 0.973742, "n_points": 7 }, "fit_exponential": { "lambda": 0.0025245756131658517, "log_A": -6.1424841272489665, "R2": 0.876899, "n_points": 7 }, "delta_R2_power_minus_exp": 0.0968, "gamma_CI_95": { "lo": 1.0142831942090689, "hi": 1.3931415910718121 }, "decision": "ANOMALY: long-context training effect" }