File size: 4,036 Bytes
535348a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{
  "experiment": "EXP-G4",
  "formula": "gamma = 1 - T_eval*sqrt(2)/theta",
  "date": "2026-04-18",
  "results": [
    {
      "prefix": "EleutherAI--pythia-70m",
      "name": "pythia-70m",
      "pe_type": "RoPE",
      "notes": "",
      "theta": 10000,
      "d_head": 64,
      "T_train": 2048,
      "T_eval_max": 2000.0,
      "n_points": 9,
      "gamma_obs": 0.7476017873166874,
      "gamma_new_pred": 0.717157287525381,
      "gamma_old_pred": 0.99999561666838,
      "err_new_pct": 4.245163553501351,
      "err_old_pct": -25.239493568239496,
      "R2": 0.9893049417040555
    },
    {
      "prefix": "meta-llama--Meta-Llama-3-8B",
      "name": "Meta-Llama-3-8B",
      "pe_type": "RoPE",
      "notes": "",
      "theta": 500000,
      "d_head": 128,
      "T_train": 8192,
      "T_eval_max": 2000.0,
      "n_points": 9,
      "gamma_obs": 1.0454762537473639,
      "gamma_new_pred": 0.9943431457505076,
      "gamma_old_pred": 0.7018781400200674,
      "err_new_pct": 5.1424006104313325,
      "err_old_pct": 48.95409817400391,
      "R2": 0.996718622313285
    },
    {
      "prefix": "Qwen--Qwen2.5-7B",
      "name": "Qwen2.5-7B",
      "pe_type": "RoPE",
      "notes": "",
      "theta": 1000000,
      "d_head": 128,
      "T_train": 8192,
      "T_eval_max": 2000.0,
      "n_points": 9,
      "gamma_obs": 0.9966953735480816,
      "gamma_new_pred": 0.9971715728752538,
      "gamma_old_pred": 0.6666637444455867,
      "err_new_pct": -0.047755004266634545,
      "err_old_pct": 49.5049613620367,
      "R2": 0.9963935840252253
    },
    {
      "prefix": "meta-llama--Llama-2-7b-hf",
      "name": "Llama-2-7b-hf",
      "pe_type": "RoPE",
      "notes": "artifact",
      "theta": 10000,
      "d_head": 128,
      "T_train": 4096,
      "T_eval_max": 2000.0,
      "n_points": 9,
      "gamma_obs": 0.2870574377368437,
      "gamma_new_pred": 0.717157287525381,
      "gamma_old_pred": 0.99999561666838,
      "err_new_pct": -59.97287586278841,
      "err_old_pct": -71.29413039896974,
      "R2": 0.881768027724978
    },
    {
      "prefix": "mistralai--Mistral-7B-v0.1",
      "name": "Mistral-7B-v0.1",
      "pe_type": "RoPE",
      "notes": "only_2pts",
      "theta": 10000,
      "d_head": 128,
      "T_train": 8192,
      "T_eval_max": 50.0,
      "n_points": 2,
      "gamma_obs": 1.213076772373504,
      "gamma_new_pred": 0.9929289321881345,
      "gamma_old_pred": 0.99999561666838,
      "err_new_pct": 22.17156062722697,
      "err_old_pct": 21.30820897145856,
      "R2": 1.0
    },
    {
      "prefix": "mistralai--Mistral-Nemo-Instruct-2407",
      "name": "Mistral-Nemo-Instruct-2407",
      "pe_type": "RoPE",
      "notes": "partial_7pts",
      "theta": 1000000,
      "d_head": 128,
      "T_train": 131072,
      "T_eval_max": 500.0,
      "n_points": 7,
      "gamma_obs": 0.5407084190220748,
      "gamma_new_pred": 0.9992928932188134,
      "gamma_old_pred": 0.6666637444455867,
      "err_new_pct": -45.8908971842676,
      "err_old_pct": -18.893381629483898,
      "R2": 0.9671649502959694
    },
    {
      "prefix": "google--gemma-2-9b-it",
      "name": "gemma-2-9b-it",
      "pe_type": "RoPE",
      "notes": "partial",
      "theta": 10000,
      "d_head": 256,
      "T_train": 8192,
      "T_eval_max": 1000.0,
      "n_points": 8,
      "gamma_obs": 0.6586407289285032,
      "gamma_new_pred": 0.8585786437626906,
      "gamma_old_pred": 0.99999561666838,
      "err_new_pct": -23.287082236050793,
      "err_old_pct": -34.13563840181085,
      "R2": 0.97730793078849
    },
    {
      "prefix": "tiiuae--falcon-7b",
      "name": "falcon-7b",
      "pe_type": "ALiBi",
      "notes": "no_rope",
      "theta": 10000,
      "d_head": 64,
      "T_train": 2048,
      "T_eval_max": 1000.0,
      "n_points": 8,
      "gamma_obs": 0.8928207115404576,
      "gamma_new_pred": 0.8585786437626906,
      "gamma_old_pred": 0.99999561666838,
      "err_new_pct": 3.9882272901294638,
      "err_old_pct": -10.717537491313212,
      "R2": 0.9927613215692025
    }
  ]
}