Spaces:
Running
Running
File size: 3,047 Bytes
535348a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | {
"model": "EleutherAI/pythia-70m",
"theta": 10000,
"gamma_pred": 0.99999561666838,
"C_theory": 9.2103,
"distances": [
10,
20,
30,
50,
100,
200,
500,
1000,
2000
],
"distances_fit": [
30,
50,
100,
200,
500,
1000,
2000
],
"d_min_fit": 30,
"n_prompts_per_distance": 150,
"seeds": [
42,
123,
7
],
"attn_by_distance": {
"10": {
"mean_across_seeds": 0.014303823270731502,
"std_across_seeds": 0.0005389411970747021,
"per_seed_means": [
0.013846093355678022,
0.01400491089404871,
0.015060465562467774
]
},
"20": {
"mean_across_seeds": 0.011754385376763014,
"std_across_seeds": 0.0003750825584104145,
"per_seed_means": [
0.012279952564276754,
0.011553768791879217,
0.011429434774133067
]
},
"30": {
"mean_across_seeds": 0.004743960113911372,
"std_across_seeds": 0.0004785557585293483,
"per_seed_means": [
0.004255784423633789,
0.004582108563821142,
0.005393987354279185
]
},
"50": {
"mean_across_seeds": 0.0035309766319632113,
"std_across_seeds": 0.00030331972886044056,
"per_seed_means": [
0.003315780726649488,
0.003317214461664359,
0.003959934707575788
]
},
"100": {
"mean_across_seeds": 0.0014786803761186699,
"std_across_seeds": 4.0198997401330876e-05,
"per_seed_means": [
0.0015339826285101783,
0.0014396193967938112,
0.00146243910305202
]
},
"200": {
"mean_across_seeds": 0.0007715668188918952,
"std_across_seeds": 6.92475008380853e-05,
"per_seed_means": [
0.0008646439060976263,
0.0007513971288184015,
0.0006986594217596576
]
},
"500": {
"mean_across_seeds": 0.0002215023043552517,
"std_across_seeds": 2.714460980421038e-05,
"per_seed_means": [
0.00024024882055527995,
0.00024114061049961797,
0.0001831174820108572
]
},
"1000": {
"mean_across_seeds": 8.175638578702798e-05,
"std_across_seeds": 1.2207321420922821e-05,
"per_seed_means": [
8.717483436763965e-05,
9.324252537529295e-05,
6.485179761815137e-05
]
},
"2000": {
"mean_across_seeds": 4.288268759245915e-05,
"std_across_seeds": 9.477341124989812e-06,
"per_seed_means": [
3.2012329573566e-05,
5.510804182828603e-05,
4.152769137552544e-05
]
}
},
"fit_power_law": {
"gamma": 1.1705141984482668,
"log_A": -1.1650521267460994,
"R2": 0.994076,
"n_points": 7
},
"fit_exponential": {
"lambda": 0.00226006648331684,
"log_A": -6.2565121323123964,
"R2": 0.79868,
"n_points": 7
},
"delta_R2_power_minus_exp": 0.1954,
"gamma_CI_95": {
"lo": 1.065560635977702,
"hi": 1.268916819874023
},
"decision": "ANOMALY: long-context training effect"
} |