{ "model": "EleutherAI/pythia-70m", "theta": 10000, "gamma_pred": 0.99999561666838, "C_theory": 9.2103, "distances": [ 10, 20, 30, 50, 100, 200, 500, 1000, 2000 ], "distances_fit": [ 30, 50, 100, 200, 500, 1000, 2000 ], "d_min_fit": 30, "n_prompts_per_distance": 150, "seeds": [ 42, 123, 7 ], "attn_by_distance": { "10": { "mean_across_seeds": 0.014303823270731502, "std_across_seeds": 0.0005389411970747021, "per_seed_means": [ 0.013846093355678022, 0.01400491089404871, 0.015060465562467774 ] }, "20": { "mean_across_seeds": 0.011754385376763014, "std_across_seeds": 0.0003750825584104145, "per_seed_means": [ 0.012279952564276754, 0.011553768791879217, 0.011429434774133067 ] }, "30": { "mean_across_seeds": 0.004743960113911372, "std_across_seeds": 0.0004785557585293483, "per_seed_means": [ 0.004255784423633789, 0.004582108563821142, 0.005393987354279185 ] }, "50": { "mean_across_seeds": 0.0035309766319632113, "std_across_seeds": 0.00030331972886044056, "per_seed_means": [ 0.003315780726649488, 0.003317214461664359, 0.003959934707575788 ] }, "100": { "mean_across_seeds": 0.0014786803761186699, "std_across_seeds": 4.0198997401330876e-05, "per_seed_means": [ 0.0015339826285101783, 0.0014396193967938112, 0.00146243910305202 ] }, "200": { "mean_across_seeds": 0.0007715668188918952, "std_across_seeds": 6.92475008380853e-05, "per_seed_means": [ 0.0008646439060976263, 0.0007513971288184015, 0.0006986594217596576 ] }, "500": { "mean_across_seeds": 0.0002215023043552517, "std_across_seeds": 2.714460980421038e-05, "per_seed_means": [ 0.00024024882055527995, 0.00024114061049961797, 0.0001831174820108572 ] }, "1000": { "mean_across_seeds": 8.175638578702798e-05, "std_across_seeds": 1.2207321420922821e-05, "per_seed_means": [ 8.717483436763965e-05, 9.324252537529295e-05, 6.485179761815137e-05 ] }, "2000": { "mean_across_seeds": 4.288268759245915e-05, "std_across_seeds": 9.477341124989812e-06, "per_seed_means": [ 3.2012329573566e-05, 5.510804182828603e-05, 4.152769137552544e-05 ] } }, "fit_power_law": { "gamma": 1.1705141984482668, "log_A": -1.1650521267460994, "R2": 0.994076, "n_points": 7 }, "fit_exponential": { "lambda": 0.00226006648331684, "log_A": -6.2565121323123964, "R2": 0.79868, "n_points": 7 }, "delta_R2_power_minus_exp": 0.1954, "gamma_CI_95": { "lo": 1.065560635977702, "hi": 1.268916819874023 }, "decision": "ANOMALY: long-context training effect" }