File size: 361,757 Bytes
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
81fc8a0
 
 
 
 
137cb0a
 
 
 
 
c11b76c
137cb0a
 
b4f7029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449213a
6e2328e
 
449213a
6e2328e
 
449213a
6e2328e
81fc8a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbec820
81fc8a0
 
 
 
 
 
 
fbec820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f09cd1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d61ea0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc5d7c
 
edb4038
 
 
ab15c91
 
 
d61ea0e
 
 
 
 
 
 
7bc5d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb4038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab15c91
 
 
 
7c80934
 
fbf3edc
 
ab15c91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c80934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbf3edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4eccb8
 
 
 
 
 
 
 
 
 
 
 
 
 
6378efa
 
 
 
 
449213a
 
 
137cb0a
0195057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
28ac122
 
137cb0a
28ac122
 
 
 
 
 
 
 
 
137cb0a
 
 
ed3d534
137cb0a
 
 
 
 
ed3d534
 
0637db4
 
 
 
ed3d534
2dbc41f
 
 
 
 
 
 
28ac122
 
 
 
2dbc41f
28ac122
 
 
2dbc41f
28ac122
2dbc41f
28ac122
2dbc41f
 
 
 
ab15c91
2dbc41f
 
 
 
 
 
 
c969a03
2dbc41f
 
c969a03
2dbc41f
 
c969a03
 
abea671
31b1415
 
 
 
 
 
 
 
 
abea671
 
 
31b1415
abea671
 
 
 
d5c0dac
 
abea671
 
 
 
31b1415
 
 
 
 
 
2dbc41f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959e23c
2dbc41f
137cb0a
5d885d7
 
 
c969a03
5d885d7
 
 
 
 
 
 
 
94766b3
 
 
 
 
 
 
 
 
 
 
 
03f7bfe
 
 
 
 
b54babf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77f164d
 
 
 
28ac122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
 
5d885d7
 
 
137cb0a
5d885d7
 
c969a03
5d885d7
 
 
 
 
 
 
 
 
94766b3
 
 
 
 
 
 
 
 
 
 
 
03f7bfe
 
 
 
 
b54babf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77f164d
 
 
 
28ac122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
81fc8a0
 
 
 
 
137cb0a
 
 
 
 
c11b76c
137cb0a
 
b4f7029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449213a
6e2328e
 
449213a
6e2328e
 
449213a
6e2328e
81fc8a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbec820
81fc8a0
 
 
 
 
 
 
fbec820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f09cd1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d61ea0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc5d7c
 
edb4038
 
 
ab15c91
 
 
d61ea0e
 
 
 
 
 
 
7bc5d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb4038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab15c91
 
 
 
7c80934
 
fbf3edc
 
ab15c91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c80934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbf3edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4eccb8
 
 
 
 
 
 
 
 
 
 
 
 
 
6378efa
 
 
 
 
449213a
 
 
137cb0a
0195057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
28ac122
 
137cb0a
28ac122
 
 
 
 
 
 
 
 
137cb0a
 
 
ed3d534
137cb0a
 
 
 
 
ed3d534
 
0637db4
 
 
 
ed3d534
2dbc41f
 
 
 
 
 
 
28ac122
 
 
 
2dbc41f
28ac122
 
 
2dbc41f
28ac122
2dbc41f
28ac122
2dbc41f
 
 
 
ab15c91
2dbc41f
 
 
 
 
 
 
c969a03
2dbc41f
 
c969a03
2dbc41f
 
c969a03
 
abea671
31b1415
 
 
 
 
 
 
 
 
abea671
 
 
 
 
 
 
d5c0dac
 
abea671
 
 
 
31b1415
 
 
 
 
 
 
2dbc41f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959e23c
2dbc41f
137cb0a
 
 
5d885d7
 
 
137cb0a
5d885d7
 
c969a03
5d885d7
 
 
 
 
 
 
 
 
94766b3
 
 
 
 
 
 
 
 
 
 
 
03f7bfe
 
 
 
 
b54babf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77f164d
 
 
 
28ac122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
81fc8a0
 
 
 
 
137cb0a
 
 
 
 
d5c0dac
137cb0a
 
b4f7029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449213a
6e2328e
 
449213a
6e2328e
 
449213a
6e2328e
81fc8a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbec820
81fc8a0
 
 
 
 
 
 
fbec820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f09cd1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d61ea0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc5d7c
 
edb4038
 
 
ab15c91
 
 
d61ea0e
 
 
 
 
 
 
7bc5d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb4038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab15c91
 
 
 
7c80934
 
fbf3edc
 
ab15c91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c80934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbf3edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4eccb8
 
 
 
 
 
 
 
 
 
 
 
 
 
6378efa
 
 
 
 
449213a
 
 
137cb0a
0195057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
28ac122
 
137cb0a
28ac122
 
 
 
 
 
 
 
 
137cb0a
 
 
ed3d534
137cb0a
 
 
 
 
ed3d534
 
0637db4
 
 
 
ed3d534
2dbc41f
 
 
 
 
 
 
28ac122
 
 
 
2dbc41f
28ac122
 
 
2dbc41f
28ac122
2dbc41f
28ac122
2dbc41f
 
 
 
ab15c91
2dbc41f
 
 
 
 
 
 
c969a03
2dbc41f
 
c969a03
2dbc41f
 
c969a03
 
abea671
31b1415
 
 
 
 
 
 
 
 
abea671
 
 
31b1415
abea671
 
 
 
d5c0dac
 
abea671
 
 
 
31b1415
 
 
 
 
 
2dbc41f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959e23c
2dbc41f
137cb0a
 
 
5d885d7
 
 
137cb0a
5d885d7
 
c969a03
5d885d7
 
 
 
 
 
 
 
 
94766b3
 
 
 
 
 
 
 
 
 
 
 
03f7bfe
 
 
 
 
b54babf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77f164d
 
 
 
28ac122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
81fc8a0
 
 
 
 
137cb0a
 
 
 
 
d5c0dac
137cb0a
 
b4f7029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449213a
6e2328e
 
449213a
6e2328e
 
449213a
6e2328e
81fc8a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbec820
81fc8a0
 
 
 
 
 
 
fbec820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f09cd1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d61ea0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bc5d7c
 
edb4038
 
 
ab15c91
 
 
d61ea0e
 
 
 
 
 
 
7bc5d7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb4038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab15c91
 
 
 
7c80934
 
fbf3edc
 
ab15c91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c80934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbf3edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4eccb8
 
 
 
 
 
 
 
 
 
 
 
 
 
6378efa
 
 
 
 
449213a
 
 
137cb0a
0195057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137cb0a
28ac122
 
137cb0a
28ac122
 
 
 
 
 
 
 
 
137cb0a
 
 
ed3d534
137cb0a
 
 
 
 
ed3d534
 
0637db4
 
 
 
ed3d534
2dbc41f
 
 
 
 
 
 
28ac122
 
 
 
2dbc41f
28ac122
 
 
2dbc41f
28ac122
2dbc41f
28ac122
2dbc41f
 
 
 
ab15c91
2dbc41f
 
 
 
 
 
 
c969a03
2dbc41f
 
c969a03
2dbc41f
 
c969a03
 
abea671
31b1415
 
 
 
 
 
 
 
 
abea671
 
 
31b1415
abea671
 
 
 
d5c0dac
 
abea671
 
 
 
31b1415
 
 
 
 
 
2dbc41f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959e23c
2dbc41f
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449213a
 
 
edb4038
 
 
449213a
 
137cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
// TAF Agent i18n — minimal translation system.
// Add languages by extending TRANSLATIONS. Set data-i18n="key" on any element.
// Persist user choice in localStorage.

export const LANGUAGES = [
  { code: "en", flag: "🇬🇧", label: "English" },
  { code: "es", flag: "🇪🇸", label: "Español" },
  { code: "fr", flag: "🇫🇷", label: "Français" },
  { code: "zh", flag: "🇨🇳", label: "中文" },
];

export const TRANSLATIONS = {
  en: {
    "hero.title":     "🔬 TAF Agent",
    "hero.tagline":   "Diagnose any transformer LLM in 30 seconds. Free. No GPU. No signup.",
    "hero.subtitle":  "Predicts whether a model will work for your use case <em>before</em> you spend money or time. Everything runs in your browser &mdash; your inputs never leave this tab.",
    "hero.help":      "📘 Manual &amp; examples",
    "hero.quickstart_btn": "⚡ Quick start",
    "hero.inventory_btn":  "🧰 What it gives you",
    "hero.about":     "Built by an independent researcher. Open source. Not affiliated with any model vendor.",

    "modes.title":    "🎯 Mode",
    "modes.profile":  "📇 Profile a model",
    "modes.compare":  "🆚 Compare models",
    "modes.inspector": "🔍 Inspect config",
    "modes.ask":      "💬 Ask plain English",
    "modes.recipe":   "📋 Pick recipe",
    "modes.diagnose": "🩺 Diagnose CLI",
    "diagnose.title": "🩺 Diagnose CLI Command Builder",
    "diagnose.tip":   "Browser predicts γ from config; the CLI measures γ_obs on real weights. Builder produces the exact command to run locally.",
    "diagnose.desc":  "Pick options and copy-paste the generated command on your local machine (Python + transformers + numpy). Fast mode ≈5 min CPU; full ≈20–60 min GPU.",
    "diagnose.model_label": "HF model id:",
    "diagnose.theta_label": "θ (auto if blank):",
    "diagnose.n_label": "Context N:",
    "diagnose.options_label": "Options:",
    "diagnose.opt_fast": "--fast (CPU, ~5 min)",
    "diagnose.opt_cpu": "--cpu (force CPU)",
    "diagnose.opt_4bit": "--load_in_4bit (≥7B models)",
    "diagnose.local_label": "--local path (optional):",
    "diagnose.build_btn": "📋 Build command",
    "diagnose.cmd_title": "Generated command:",
    "diagnose.copy_btn": "📋 Copy to clipboard",
    "diagnose.next_steps": "Next steps: (1) git clone https://github.com/karlesmarin/tafagent (2) cd tafagent && pip install torch transformers numpy (3) Run the command (4) Result JSON → upload via Inspect mode for full TAF analysis.",
    "modes.phase":    "📊 Phase diagram",
    "phase.title":    "📊 Phase diagram (γ × θ)",
    "phase.tip":      "Each dot is one model from the paper's empirical panel. x-axis log θ; y-axis γ. Hagedorn line γ=1 separates Phase A from Phase B. Hover for details, click to load into the recipe form.",
    "phase.desc":     "23 models in the panel; Padé curve at T=2000.",
    "modes.desc":     "<strong>Quickest start</strong>: paste any HuggingFace model id (e.g. <code>meta-llama/Meta-Llama-3-8B</code>), click Profile. See all 5 recipes scored in seconds.",

    "profile.title":           "📇 Profile a model",
    "profile.desc":            "<strong>For technicians</strong>: when you need a complete viability snapshot of a candidate model. One-click runs all 5 recipes and produces a unified TAF Card.",
    "profile.preset_label":    "Preset:",
    "profile.preset_default":  "— or pick from list —",
    "profile.hf_label":        "HF model id:",
    "profile.fetch_btn":       "📥 Fetch",
    "profile.btn":             "🚀 Generate full profile",
    "profile.quickstart":      "💡 Quick start: pick any preset → click Generate. Or paste a model id from <a href='https://huggingface.co/models?library=transformers&sort=trending' target='_blank'>HF Hub trending</a> → 📥 Fetch → Generate.",

    "compare.title":           "🆚 Compare models side-by-side",
    "compare.desc":            "<strong>For technicians</strong>: when choosing between 2-3 candidate models for a specific deployment scenario. Same recipe, multiple models, side-by-side verdicts.",
    "compare.recipe_label":    "Recipe:",
    "compare.T_eval_label":    "T_eval (target context):",
    "compare.models_title":    "Models to compare (add up to 3)",
    "compare.btn":             "🚀 Compare",
    "compare.example":         "💡 Try: paste 3 popular 7-8B models (Meta-Llama-3-8B, Mistral-7B-v0.1, Qwen/Qwen2.5-7B), pick recipe X-2, T_eval=16000. See which best handles long context.",

    "ask.title":               "❓ Your question",
    "ask.placeholder":         "e.g. Will Mistral-7B handle 16K NIAH retrieval? Or: I have $5,000, what model can I train? Or: Cheapest GPU to serve Llama-70B at 100M tokens/day?",
    "ask.btn":                 "🚀 Analyze",
    "ask.example_btn":         "💡 Try an example",

    "recipe.title":            "📋 Recipe",
    "recipe.default":          "— select a recipe —",
    "recipe.input_title":      "🎯 Inputs",

    "verdict.title":           "📊 Verdict",
    "chain.title":             "🔍 Computation Chain",
    "chain.desc":              "Every number below is deterministic Python. Click a step to expand.",
    "answer.title":            "💬 Plain-English Answer",
    "share.btn":               "🔗 Copy share link",
    "share.copied":            "✅ Copied to clipboard!",
    "share.download":          "💾 Download JSON",
    "share.download_md":       "📝 Markdown",
    "share.download_tex":      "📜 LaTeX",
    "share.submit":            "📤 Submit to registry",
    "share.submit_clip_ok":    "↗ Opened GitHub. Body copied to clipboard — paste it into the issue body.",
    "share.submit_clip_fail":  "↗ Opened GitHub. Clipboard blocked — body logged in browser console (F12).",
    "share.import_title":      "📂 Import a shared TAF result",
    "a11y.skip":               "Skip to main content",

    // v0.6.2 — landing rework: quick-start + inventory + arch tooltips
    "qs.title":                    "⚡ Quick start",
    "qs.step1":                    "Paste a HuggingFace model ID (e.g. <code>meta-llama/Meta-Llama-3-8B</code>)",
    "qs.step2":                    "Click <strong>📇 Profile a model</strong>",
    "qs.step3":                    "Read your TAF Card — verdict per use case + key numbers + math verified by Lean+Mathlib",
    "qs.cta":                      "↓ Start now",
    "inv.title":                   "🧰 What this tool gives you",
    "inv.recipes.title":           "🎯 8 recipes — does this model fit your use case?",
    "inv.recipes.x1.title":        "Custom train vs API",
    "inv.recipes.x1.body":         "which is cheaper for your traffic?",
    "inv.recipes.x2.title":        "Long context",
    "inv.recipes.x2.body":         "will it handle 32k / 128k tokens reliably?",
    "inv.recipes.x3.title":        "Budget",
    "inv.recipes.x3.body":         "with $X, what model can you train from scratch?",
    "inv.recipes.x5.title":        "Hardware",
    "inv.recipes.x5.body":         "which GPU to serve N tokens/day?",
    "inv.recipes.x19.title":       "KV cache",
    "inv.recipes.x19.body":        "how to compress without breaking quality?",
    "inv.recipes.x21.title":       "Imprint purity",
    "inv.recipes.x21.body":        "how clean is the model's positional encoding?",
    "inv.recipes.x22.title":       "Compute-context",
    "inv.recipes.x22.body":        "does the model fit the empirical band?",
    "inv.recipes.x23.title":       "IH-phase",
    "inv.recipes.x23.body":        "pre- or post-induction-head?",
    "inv.diag.title":              "🔬 Diagnostics",
    "inv.diag.gamma":              "<strong>γ predicted vs observed</strong> — auto-classifies the model into 5 regimes (normal · fraud / inflated context · compressed · over-Padé · sliding-window)",
    "inv.diag.cardy":              "<strong>Cardy ΔH</strong> — entropy shift between observed and nominal context",
    "inv.diag.fals":               "<strong>Falsification dashboard</strong> — checks 23 specific predictions (F1–F23)",
    "inv.diag.alg":                "<strong>Algebraic consistency</strong> — 8 mathematical identities the model must satisfy",
    "inv.verify.title":            "✓ Formally verified math",
    "inv.verify.count":            "<strong>37 theorems</strong> machine-proven in Lean 4 + Mathlib4",
    "inv.verify.click":            "Click any badge → opens the source line on GitHub",
    "inv.verify.reverify":         "Verify yourself: <code>lake build</code> (≈5 s after cache fetch)",
    "inv.export.title":            "📤 Export &amp; share",
    "inv.export.formats":          "<strong>JSON · Markdown · LaTeX</strong> (paper-ready)",
    "inv.export.share":            "Reproducible share link (state encoded in URL)",
    "inv.export.registry":         "Submit to community registry on GitHub",
    "arch.summary":                "Architectures supported",
    "arch.anyhf":                  "✓ Any HuggingFace public model",
    "tooltip.mha":                 "Multi-Head Attention: each token position attends through several parallel heads at once.",
    "tooltip.gqa":                 "Grouped Query Attention: queries share fewer keys/values than heads (saves memory but pushes γ toward Hagedorn).",
    "tooltip.alibi":               "Attention with Linear Biases: position info is a learned slope added to attention scores, no rotation.",
    "tooltip.abspe":               "Absolute Position Embeddings: each position has a fixed learned vector added to the token embedding.",
    "tooltip.swa":                 "Sliding Window Attention: each token only attends within a fixed local window (Mistral, gemma-2 use this).",
    "tooltip.ssm":                 "State Space Model: a sequence layer that maintains internal state instead of attention (Mamba, Jamba use this).",

    // v0.7.0 — anti-bullshit pack #1: SWA / RoPE-scaling unmasker
    "modes.unmask":                "🪟 Unmask",
    "unmask.title":                "🪟 Context Unmasker",
    "unmask.tip":                  "Paste a HuggingFace model id (or raw config.json). The tool checks for sliding-window attention, RoPE scaling (YaRN/linear/dynamic NTK), and GQA — anything that makes <code>max_position_embeddings</code> larger than the practical effective context. Mistral-7B-v0.1 is the canonical example: declared 32k, attends within ~4-8k.",
    "unmask.desc":                 "<strong>Are you about to spend money on a model that won't actually attend that far?</strong> Paste an id and find out in 1 second. No GPU, no inference — just config.json arithmetic.",
    "unmask.id_label":             "HF model id:",
    "unmask.fetch_btn":            "🔍 Unmask",
    "unmask.paste_summary":        "Or paste raw config.json (private / in-dev models)",
    "unmask.paste_btn":            "🔍 Unmask pasted config",
    "unmask.label.declared":       "Declared context",
    "unmask.label.effective":      "Effective (estimate)",
    "unmask.label.ratio":          "Ratio",
    "unmask.section.flags":        "Architecture flags",
    "unmask.section.warnings":     "Warnings",
    "unmask.section.reco":         "Recommendation",
    "unmask.flag.swa":             "SWA",
    "unmask.flag.rope":            "RoPE scaling",
    "unmask.flag.gqa":             "GQA",
    "unmask.flag.layers":          "Layers",
    "unmask.flag.dhead":           "d_head",
    "unmask.flag.theta":           "RoPE θ",
    "unmask.flag.yes":             "yes",
    "unmask.flag.no":              "no",
    "unmask.flag.full_mha":        "no (full MHA, {n} heads)",
    "unmask.verdict.honest":            "✅ HONEST",
    "unmask.verdict.inflated":          "⚠ INFLATED",
    "unmask.verdict.severely_inflated": "❌ SEVERELY INFLATED",
    "unmask.verdict.yarn_extended":     "⚠ YARN-EXTENDED",
    "unmask.verdict.unknown":           "❓ UNKNOWN",
    "unmask.warn.swa_window":      "SWA window: {window} tokens — each layer only attends within this window.",
    "unmask.warn.multihop":        "Multi-hop estimate: ~{multiHop} tokens (conservative: window × {factor}).",
    "unmask.warn.yarn":            "RoPE scaling ({type}) extends context {factor}× from ~{original} to {declared} tokens.",
    "unmask.warn.yarn_advice":     "RoPE-extended context — verify γ behavior at the full claimed length with the γ_check diagnostic.",
    "unmask.warn.gqa_small_dhead": "Small head dim ({d_head}) + GQA: KV cache compression at long context is likely (γ pushed toward Hagedorn).",
    "unmask.reco.honest":              "Standard full-attention model. Effective context matches declared ({declared} tokens).",
    "unmask.reco.inflated":            "Effective ~{effective} tokens via SWA. Use γ_check to verify behavior at your target evaluation length.",
    "unmask.reco.severely_inflated":   "Treat as a ~{effective}-token context model in practice. The {declared}-token claim only applies via cross-layer attention chains, which empirically degrade past ~2× the SWA window.",
    "unmask.reco.yarn_extended":       "RoPE-extended context. Run a long-context benchmark (NIAH at 8k / 16k / 32k / full) to confirm the extension holds. Use γ_check with T_eval = {declared}.",
    "unmask.reco.unknown":             "Could not parse config. Verify the URL is a valid HF model with public config.json.",
    "unmask.status.empty_id":      "⚠ Enter a model id (e.g. mistralai/Mistral-7B-v0.1).",
    "unmask.status.fetching":      "⏳ Fetching config.json for {modelId}...",
    "unmask.status.success":       "✅ Analyzed {modelId} (verdict: {verdict})",
    "unmask.status.empty_paste":   "⚠ Paste a config.json first.",
    "unmask.status.invalid_json":  "❌ Not valid JSON: {error}",
    "unmask.status.success_paste": "✅ Analyzed pasted config (verdict: {verdict})",
    "unmask.pasted_label":         "(pasted config)",
    "mode_desc.ask":               "Type a free-form question. The in-browser LLM picks the right recipe and runs it.",
    "mode_desc.recipe":            "Pick a recipe directly and fill the form. Full manual control.",
    "mode_desc.profile":           "Quickest start: paste any HuggingFace model id, click Profile. See all 5 recipes scored in seconds.",
    "mode_desc.compare":           "Pick 2-3 candidate models + one recipe. See verdicts side-by-side in a comparison table.",
    "mode_desc.inspector":         "Paste a config.json directly. Useful for private/in-development models not on HF Hub.",
    "mode_desc.diagnose":          "Build the diagnose_model.py CLI command to MEASURE γ_obs on real GPU. Browser predicts; CLI measures.",
    "mode_desc.phase":             "γ × θ scatter of the paper's empirical panel. Hover a dot for details, click to load into Diagnose / Recipe forms.",
    "mode_desc.unmask":            "Detects whether max_position_embeddings is misleading (SWA / YaRN / RoPE-scaling). Paste a model id, get a 1-line verdict.",
    "profile.preset_loaded":       "✅ Loaded preset for <strong>{id}</strong>. Form pre-filled. (Click 📥 Fetch to override with the latest config from HF Hub.)",

    // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
    "modes.template":              "📜 Chat-template",
    "mode_desc.template":          "Detects which chat-template family a model uses (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek). Gives the exact CLI flag for lm-eval / vLLM / transformers.",
    "template.title":              "📜 Chat-template Sniffer",
    "template.tip":                "Paste an HF model id (or raw tokenizer_config.json). Detects the chat-template family and gives you the exact framework command to use it correctly. lm-eval-harness silently halves accuracy if you forget to apply it (issue #1841).",
    "template.desc":               "<strong>Did you forget <code>--apply_chat_template</code>?</strong> Most multi-turn evals fail by ~50% because the chat template wasn't applied. Paste a model id, get the exact CLI flag for your stack.",
    "template.id_label":           "HF model id:",
    "template.fetch_btn":          "📜 Sniff",
    "template.paste_summary":      "Or paste raw tokenizer_config.json (private models)",
    "template.paste_btn":          "📜 Sniff pasted config",
    "template.label.family":       "Detected family",
    "template.label.markers":      "Matched markers",
    "template.label.tpl_len":      "Template length",
    "template.section.warnings":   "Warnings",
    "template.section.commands":   "Commands by framework",
    "template.section.raw":        "Raw template (preview)",
    "template.family.custom":      "custom (unknown family)",
    "template.family.none":        "(no chat_template)",
    "template.verdict.ok":         "✅ TEMPLATE DETECTED",
    "template.verdict.custom":     "⚠ CUSTOM TEMPLATE",
    "template.verdict.missing":    "❌ NO CHAT TEMPLATE",
    "template.verdict.base_model": "ℹ BASE MODEL (no chat)",
    "template.verdict.unknown":    "❓ UNKNOWN",
    "template.warn.no_chat_template": "No <code>chat_template</code> field in tokenizer_config.json. This is typical for base / pretrained-only models. If you intended an instruct-tuned model, the wrong file may be loaded.",
    "template.warn.custom_template":  "Template is non-standard ({length} chars). The tool could not match it against known families. Inspect the raw preview below and verify your eval framework supports it.",
    "template.warn.lm_eval_apply":    "<strong>lm-eval-harness:</strong> add <code>--apply_chat_template</code> or your accuracy will silently drop ~50% on multi-turn evals (issue #1841).",
    "template.warn.vllm_apply":       "<strong>vLLM serve:</strong> verify <code>--chat-template</code> is set (auto-detection sometimes fails for fine-tuned variants). Suggested: <code>{name}</code>.",
    "template.status.empty_id":    "⚠ Enter a model id (e.g. mistralai/Mistral-7B-Instruct-v0.3).",
    "template.status.fetching":    "⏳ Fetching tokenizer_config.json for {modelId}...",
    "template.status.success":     "✅ Sniffed {modelId} (verdict: {verdict})",
    "template.status.empty_paste": "⚠ Paste a tokenizer_config.json first.",
    "template.status.invalid_json":"❌ Not valid JSON: {error}",
    "template.status.success_paste":"✅ Sniffed pasted config (verdict: {verdict})",
    "template.pasted_label":       "(pasted tokenizer_config)",

    // v0.7.2 — anti-bullshit pack #3: Arena-Elo CI reconstructor
    "modes.arena":                 "🎯 Arena CI",
    "mode_desc.arena":             "Recovers confidence intervals from raw pairwise vote data (Bradley-Terry MLE + bootstrap). Detects statistically tied pairs that the public Arena leaderboard hides.",
    "arena.title":                 "🎯 Arena-Elo CI Reconstructor",
    "arena.tip":                   "Chatbot Arena strips confidence intervals from the public leaderboard. A 5-Elo gap can be statistically meaningless. Paste raw vote data (model_a, model_b, winner) — the tool computes Bradley-Terry MLE + bootstrap CIs and lists statistical ties (CI overlap).",
    "arena.desc":                  "<strong>Is GPT-4 actually better than Claude — or are they tied?</strong> Paste pairwise vote CSV (or click <em>Load sample</em>). Bradley-Terry MLE + 200-iteration bootstrap → ranked Elos with 95% CIs and statistical-tie detection. All in browser.",
    "arena.sample_btn":            "📊 Load sample data",
    "arena.run_btn":                "🎯 Compute CIs",
    "arena.clear_btn":             "🗑️ Clear",
    "arena.csv_summary":           "Vote CSV (header: <code>model_a,model_b,winner</code>; winner ∈ a/b/tie)",
    "arena.section.ranked":        "Ranked Elos with 95% CIs",
    "arena.section.ties":          "Statistical ties (CI overlap)",
    "arena.section.summary":       "Summary",
    "arena.col.rank":              "#",
    "arena.col.model":             "Model",
    "arena.col.elo":               "Elo",
    "arena.col.ci":                "95% CI",
    "arena.col.ci_width":          "± half-width",
    "arena.col.matches":           "Matches",
    "arena.col.wins":              "W / L / T",
    "arena.col.tie_pair":          "Pair",
    "arena.col.tie_diff":          "Elo gap",
    "arena.col.tie_overlap":       "CI overlap",
    "arena.no_ties":               "No statistical ties — all pairs distinguishable at 95% CI.",
    "arena.summary.votes":         "Total votes",
    "arena.summary.models":        "Models",
    "arena.summary.ties":          "Statistical ties",
    "arena.summary.bootstrap":     "Bootstrap iters",
    "arena.summary.ci_level":      "CI level",
    "arena.status.empty":          "⚠ Paste vote CSV or click Load sample.",
    "arena.status.too_few":        "⚠ Only {n} valid votes — need at least 10 to fit Bradley-Terry reliably.",
    "arena.status.computing":      "⏳ Computing Bradley-Terry MLE + bootstrap on {n} votes...",
    "arena.status.done":           "✅ {n} votes · {models} models · {ties} statistical ties · {ms} ms",
    "arena.status.sample_loaded":  "✅ Sample loaded (synthetic 6-model Arena data). Click Compute CIs.",

    // v0.7.3 — anti-bullshit pack #4: Contamination Prior
    "modes.contam":                "🧪 Contamination",
    "mode_desc.contam":            "Bayesian-ish prior on whether a benchmark score is contaminated. Enter your model's training cutoff → rates 20+ popular benchmarks (MMLU, GSM8K, HumanEval, MMLU-Pro…).",
    "contam.title":                "🧪 Contamination Prior",
    "contam.tip":                  "Computes a Bayesian-ish prior on whether a benchmark score is contaminated, based on (model training cutoff date) × (benchmark release date) × (known corpus inclusion + leak history). Open LLM Leaderboard v1 was killed in 2024 after MMLU/HellaSwag scores became contaminated.",
    "contam.desc":                 "<strong>Should you trust your model's MMLU score?</strong> Enter the model's training cutoff date — the tool rates 20+ popular benchmarks (MMLU, HellaSwag, GSM8K, HumanEval, IFEval, MMLU-Pro, GPQA…) and tells you which scores are likely contaminated.",
    "contam.cutoff_label":         "Training cutoff:",
    "contam.run_btn":              "🧪 Rate all benchmarks",
    "contam.section.ranked":       "Benchmark contamination priors",
    "contam.section.high":         "🔴 High-risk benchmarks (treat scores as unreliable)",
    "contam.section.medium":       "🟡 Medium-risk (verify with alternates)",
    "contam.section.low":          "🟢 Low-risk (likely clean)",
    "contam.col.benchmark":        "Benchmark",
    "contam.col.released":         "Released",
    "contam.col.gap":              "Gap (months)",
    "contam.col.prior":            "P(contam)",
    "contam.col.level":            "Level",
    "contam.col.corpora":          "In corpora",
    "contam.col.category":         "Category",
    "contam.label.high":           "High risk",
    "contam.label.medium":         "Medium",
    "contam.label.low":            "Low",
    "contam.no_entries":           "(none in this category)",
    "contam.advice.high":          "Treat these scores as unreliable. Replace with newer / private-test alternates (MMLU-Pro, GPQA, MUSR, MATH-500).",
    "contam.advice.medium":        "Take with caution. Look for replication on a held-out subset or community reproductions.",
    "contam.advice.low":           "Score likely uncontaminated, but absence of leak is not proof — still cross-check with alternate test.",
    "contam.summary.headline":     "Cutoff <code>{cutoff}</code> · {n} benchmarks rated",
    "contam.status.empty":         "⚠ Enter a model training cutoff date (e.g. 2023-12).",
    "contam.status.bad_date":      "⚠ Bad date format. Use YYYY-MM or YYYY-MM-DD.",
    "contam.status.done":          "✅ Cutoff {cutoff} · {n} benchmarks rated · {high} high-risk",

    // v0.7 — Help modal section
    "help.v07.title":              "🆕 v0.7 — Anti-bullshit pack (4 new modes)",
    "help.v07.intro":              "<em>v0.7 (2026-05-06): four new modes that solve concrete pain points reported by the HuggingFace community. Each one runs in your browser with no inference — pure metadata + math.</em>",
    "help.v07.unmask.title":       "🪟 Context Unmasker",
    "help.v07.unmask.body":        "Detects when <code>max_position_embeddings</code> is misleading. Mistral-7B-v0.1 declares 32k but attends within ~4-8k via SWA. Paste an HF model id → 1-second verdict (HONEST / INFLATED / SEVERELY INFLATED / YARN-EXTENDED). Catches SWA, RoPE-scaling (YaRN/linear/dynamic NTK), small-d_head + GQA. <em>Use case</em>: before paying GPU for 32k context, verify the model actually attends that far.",
    "help.v07.template.title":     "📜 Chat-template Sniffer",
    "help.v07.template.body":      "Detects which chat-template family a model uses (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek / custom / none) and gives you the exact CLI flag for lm-evaluation-harness, vLLM, and transformers. Solves issue #1841 in lm-eval-harness: forgetting <code>--apply_chat_template</code> silently halves multi-turn accuracy. <em>Use case</em>: before reporting a benchmark score, confirm you applied the template correctly.",
    "help.v07.arena.title":        "🎯 Arena-Elo CI Reconstructor",
    "help.v07.arena.body":         "Chatbot Arena strips confidence intervals from its public leaderboard — a 5-Elo gap can be statistically meaningless. Paste raw pairwise vote data (model_a, model_b, winner) → Bradley-Terry MLE + 200-iteration bootstrap → ranked Elos with 95% CIs and a \"statistical ties\" panel listing pairs whose CIs overlap. Try the Load sample button. <em>Use case</em>: before declaring \"model A beats model B\", verify their CIs don't overlap.",
    "help.v07.contam.title":       "🧪 Contamination Prior",
    "help.v07.contam.body":        "Bayesian-ish prior on whether a benchmark score is contaminated. Enter your model's training cutoff date → tool rates 20+ popular benchmarks (MMLU, HellaSwag, GSM8K, HumanEval, IFEval, MMLU-Pro, GPQA, AIME, MATH-500, BBH, MUSR…) by P(contamination) based on time gap, corpus inclusion, and known leak history. Open LLM Leaderboard v1 was killed in 2024 after MMLU/HellaSwag scores became contaminated. <em>Use case</em>: decide which scores to trust when comparing two models.",
    "help.v07.quant.title":        "⚖️ Quant-regime Classifier",
    "help.v07.quant.body":         "Predicts γ-shift and ΔPPL for any (model × quant scheme: NF4, AWQ, GPTQ, GGUF Q4_K_M / Q5_K_M / Q8_0, int8, FP8, …). Architecture-aware: small d_head + aggressive GQA → more sensitive; calibrated schemes (AWQ) absorb shift better than uncalibrated (NF4). Recommends safer alternatives if a cliff is detected. <em>Use case</em>: before quantizing, predict whether your specific architecture × scheme combo will keep PPL acceptable, with a concrete switch-to suggestion otherwise.",
    "help.v07.drift.title":        "🔀 Cross-framework Drift Bound",
    "help.v07.drift.body":         "Same model, different scores on different setups. Tool predicts the maximum drift admissible from numerical noise alone (dtype, framework, batch). If the observed gap exceeds it → real bug, typically chat-template mismatch (lm-eval-harness issue #1841) or KV-cache layout. Try the &quot;Load sample&quot; button for the canonical chat-template bug. <em>Use case</em>: before reporting a regression or claiming reproducibility, verify whether the gap between two evals is bigger than what numerical noise can explain.",
    "inv.v07.drift":               "<strong>🔀 Drift</strong> — bug or noise? Predict max admissible gap between two evals",
    "help.v07.niah.title":         "🔍 NIAH → Reasoning Gap",
    "help.v07.niah.body":          "RULER paper (NVIDIA 2024) shows that long-context models often pass NIAH (needle retrieval) but fail multi-hop reasoning at the same context. Tool predicts both pass rates from architecture (γ_Padé + d_horizon + arch pressure: small d_head, GQA, SWA), reports the gap, and finds your model's \"safe reasoning context\" where reasoning stays ≥65%. Sweep mode shows the curve across 1k/4k/16k/64k/T_train. <em>Use case</em>: before deploying at the claimed context, find out whether the model will actually reason there or just retrieve.",
    "inv.v07.niah":                "<strong>🔍 NIAH→Reason</strong> — does your \"128k context\" actually reason there, or just retrieve?",

    // v0.7 — Inventory modal 5th card
    "inv.v07.title":               "🆕 v0.7 anti-bullshit pack",
    "inv.v07.unmask":              "<strong>🪟 Unmask</strong> — config.json claims 32k? See if it actually attends that far",
    "inv.v07.template":            "<strong>📜 Chat-template</strong> — exact CLI flag so lm-eval doesn't silently halve your accuracy",
    "inv.v07.arena":               "<strong>🎯 Arena CI</strong> — recover the confidence intervals Chatbot Arena hides",
    "inv.v07.contam":              "<strong>🧪 Contamination</strong> — rate 20+ benchmarks for contamination probability",
    "inv.v07.quant":               "<strong>⚖️ Quant</strong> — predict γ shift + ΔPPL for any (model × quant scheme) combo",

    // v0.7.3 — anti-bullshit pack #5: Quant-regime classifier
    "modes.quant":                 "⚖️ Quant",
    "mode_desc.quant":             "Predicts γ-shift and ΔPPL for any (model × quant scheme). Architecture-aware: small d_head + GQA → more sensitive. Recommends safer alternatives if a cliff is detected.",
    "quant.title":                 "⚖️ Quant-regime Classifier",
    "quant.tip":                   "Predicts γ-shift (and downstream ΔPPL) for a given (model × quant scheme). Generic claims like 'AWQ ~95% retention' are too vague — TAF uses d_head, GQA ratio, SWA flag, and model size to give an architecture-specific verdict. Solves: HF community widely reports unpredictable quant cliffs (NF4 -2 PPL on Phi-3 but fine on Llama-3-8B).",
    "quant.desc":                  "<strong>Will quantizing your model break it?</strong> Paste an HF model id, pick a quant scheme — get predicted γ-shift, expected ΔPPL band, and a recommended alternative if it's a cliff. Browser-only, no GPU, no calibration set required.",
    "quant.id_label":              "HF model id:",
    "quant.fetch_btn":             "📥 Fetch config",
    "quant.scheme_label":          "Quant scheme:",
    "quant.run_btn":                "⚖️ Predict",
    "quant.all_btn":               "📊 Compare all schemes",
    "quant.regime.safe":           "✅ SAFE",
    "quant.regime.mild":           "✅ MILD COMPRESSION",
    "quant.regime.significant":    "⚠ SIGNIFICANT DEGRADATION",
    "quant.regime.cliff":          "❌ HEAVY CLIFF",
    "quant.label.gamma_shift":     "γ shift",
    "quant.label.delta_ppl":       "ΔPPL (est.)",
    "quant.label.arch_mult":       "Arch multiplier",
    "quant.section.breakdown":     "Breakdown",
    "quant.section.reco":          "Recommendation",
    "quant.section.compare":       "All schemes (sorted by safety)",
    "quant.field.scheme":          "Scheme",
    "quant.field.calibrated":      "calibrated",
    "quant.field.uncalibrated":    "uncalibrated",
    "quant.field.base_penalty":    "Base penalty",
    "quant.field.arch_mult_full":  "Architecture multiplier",
    "quant.field.gamma_shift":     "Predicted γ shift",
    "quant.field.ppl_band":        "ΔPPL band (est.)",
    "quant.field.params":          "Parameters",
    "quant.col.scheme":            "Scheme",
    "quant.col.bits":              "Bits",
    "quant.col.gamma_shift":       "γ shift",
    "quant.col.ppl_band":          "ΔPPL band",
    "quant.col.regime":            "Regime",
    "quant.reco.switch_to_awq":    "<strong>Switch to {scheme}</strong> — calibrated 4-bit handles small d_head + GQA much better than NF4. Expected ΔPPL drops ~2-3×.",
    "quant.reco.switch_to_q5_km":  "<strong>Switch to {scheme}</strong> — Q5 keeps more head dimensions intact at low cost (only ~25% bigger file).",
    "quant.reco.switch_to_q4_km":  "<strong>Switch to {scheme}</strong> — Q3/Q2 are too aggressive for this architecture.",
    "quant.reco.consider_awq":     "<strong>Consider {scheme}</strong> — calibration meaningfully reduces γ-shift on this architecture.",
    "quant.reco.use_higher_bits":  "<strong>Use higher-bit alternative</strong> — this architecture cannot absorb 4-bit cleanly. Try 5- or 8-bit.",
    "quant.reco.verify_with_eval": "<strong>Verify with a real eval</strong> — predicted shift is borderline. Run NIAH at your target context before deploying.",
    "quant.reco.no_action":        "No action needed — quantization is safe for this architecture.",
    "quant.summary.headline_all":  "All schemes for <code>{modelId}</code>",
    "quant.status.empty_id":       "⚠ Enter a model id (e.g. meta-llama/Llama-3.2-1B).",
    "quant.status.fetching":       "⏳ Fetching config.json for {modelId}...",
    "quant.status.fetched":        "✅ Config fetched for {modelId}. Pick a scheme and click Predict (or Compare all schemes).",
    "quant.status.no_scheme":      "⚠ Pick a quant scheme from the dropdown.",
    "quant.status.done":           "✅ Predicted regime: {regime}",
    "quant.status.done_all":       "✅ Compared {n} schemes — sorted by safety.",

    // v0.7.4 — HF Hub autocomplete privacy + rate-limit notices
    "hf_auto.privacy":             "🔒 Queries sent to huggingface.co/api · cached locally 5 min",
    "hf_auto.rate_limited":        "⚠ HuggingFace rate limit — try again in a moment, or type the full model id manually",
    "hf_auto.gated_msg":           "is gated. Accept the license here:",

    // v0.7.5 — anti-bullshit pack #6: Cross-framework drift bound
    "modes.drift":                 "🔀 Drift",
    "mode_desc.drift":             "Predicts max-allowable drift between two benchmark scores given (framework, dtype, batch, chat-template). Flags real bugs vs numerical noise.",
    "drift.title":                 "🔀 Cross-framework Drift Bound",
    "drift.tip":                   "Same model, different scores on different setups. Is the gap noise or a real bug? Enter two scores with their (framework, dtype, batch, chat-template) — tool predicts the maximum allowable drift from numerical noise alone. If observed gap exceeds it → real bug, usually chat-template mismatch (lm-eval issue #1841) or KV-cache layout.",
    "drift.desc":                  "<strong>Your model gives 67.2 on lm-eval-hf and 65.1 on vLLM-served. Bug or noise?</strong> Enter both scores with (framework, dtype, batch, chat-template applied?). Tool predicts the noise band and flags real bugs. arxiv 2506.09501 documents this as a major eval reproducibility problem.",
    "drift.setup_a":               "Setup A",
    "drift.setup_b":               "Setup B",
    "drift.score":                 "Score",
    "drift.framework":             "Framework",
    "drift.dtype":                 "Dtype",
    "drift.batch":                 "Batch",
    "drift.template":              "Chat-template",
    "drift.template.applied":      "applied",
    "drift.template.not_applied":  "not applied",
    "drift.template.unknown":      "unknown",
    "drift.run_btn":               "🔀 Compute drift bound",
    "drift.sample_btn":            "📊 Load sample (chat-template bug)",
    "drift.label.observed":        "Observed gap",
    "drift.label.band":            "Numerical band",
    "drift.label.ratio":           "Gap / band",
    "drift.section.setups":        "Setups",
    "drift.section.breakdown":     "Drift contributors (numerical band)",
    "drift.section.verdict":       "Verdict & recommendation",
    "drift.contrib.dtype":         "Dtype mismatch",
    "drift.contrib.framework":     "Framework",
    "drift.contrib.batch":         "Batch difference",
    "drift.contrib.template":      "Chat-template MISMATCH",
    "drift.dominant_cause":        "Dominant cause",
    "drift.cause.dtype":           "dtype precision difference",
    "drift.cause.framework":       "framework / kernel difference",
    "drift.cause.batch":           "batch normalization paths",
    "drift.cause.template_mismatch": "chat-template applied on one side but not the other (lm-eval-harness #1841 pattern — typical -50% drop on multi-turn)",
    "drift.verdict.noise":         "✅ NUMERICAL NOISE",
    "drift.verdict.suspicious":    "⚠ SUSPICIOUS — verify",
    "drift.verdict.bug":           "❌ REAL BUG — investigate",
    "drift.verdict.bug_template":  "❌ CHAT-TEMPLATE BUG",
    "drift.reco.noise":            "Gap fits within the expected numerical-noise band. No action needed; the difference is consistent with framework/dtype/batch variation alone.",
    "drift.reco.suspicious":       "Gap is 1–2× the predicted noise band. Borderline — possibly a real bug. Try aligning the dominant contributor (e.g. match framework or dtype) and re-test.",
    "drift.reco.bug":              "Gap is &gt; 2× the predicted noise band. This is a real bug. Inspect the dominant contributor — most likely tokenizer / chat-template / KV-cache layout difference. Run lm-eval-harness with <code>--apply_chat_template</code> and confirm.",
    "drift.reco.bug_template":     "Chat-template mismatch detected. This is the most common cause of large eval discrepancies (lm-eval-harness issue #1841). Re-run the &quot;not applied&quot; side with <code>--apply_chat_template</code> (or set vLLM <code>--chat-template &lt;name&gt;</code>) and re-test.",
    "drift.status.empty_scores":   "⚠ Enter both scores.",
    "drift.status.done":           "✅ Verdict: {verdict}",
    "drift.status.sample_loaded":  "✅ Sample loaded (canonical chat-template bug). Click Compute drift bound.",

    // v0.7.6 — anti-bullshit pack #7: NIAH → reasoning gap predictor
    "modes.niah":                  "🔍 NIAH→Reason",
    "mode_desc.niah":              "Predicts NIAH (retrieval) and multi-hop reasoning pass rates at any context. Solves: long-context models often pass NIAH but fail reasoning at the same context (RULER paper).",
    "modes.saturation":            "📈 Saturation",
    "mode_desc.saturation":        "Tells you whether a benchmark still discriminates frontier models or has saturated (e.g. MMLU 88-94% top, AIME 2025 already 96-100%). Returns top-3 + verdict + recommended replacements.",
    "modes.hub":                   "🧭 Solutions",
    "mode_desc.hub":               "Map of every documented LLM-eval pain → tafagent mode (if covered) + curated external tools. Find the right solution without rebuilding it. 30+ pains, 7 categories.",
    "niah.title":                  "🔍 NIAH → Reasoning Gap",
    "niah.tip":                    "NIAH (Needle in a Haystack) tests retrieval: 'find this fact in long text'. Multi-hop reasoning tests inference: 'combine facts X+Y at the start with fact Z at the end'. RULER paper (NVIDIA 2024) shows long-context models often pass NIAH but fail reasoning at the same context. This tool predicts both pass rates from architecture alone.",
    "niah.desc":                   "<strong>Your model claims 128k context. Will it actually reason at 64k, or just retrieve?</strong> Paste an HF model id and a target eval context — tool predicts NIAH and multi-hop reasoning pass rates, the gap, and a 'safe context' where reasoning stays ≥65%.",
    "niah.id_label":               "HF model id:",
    "niah.fetch_btn":              "📥 Fetch config",
    "niah.teval_label":            "Target context (T_eval):",
    "niah.run_btn":                "🔍 Predict",
    "niah.sweep_btn":              "📊 Sweep contexts",
    "niah.label.niah":             "NIAH pass rate",
    "niah.label.reasoning":        "Reasoning pass rate",
    "niah.label.gap":              "Gap",
    "niah.label.safe_ctx":         "Safe reasoning context",
    "niah.section.breakdown":      "Architecture breakdown",
    "niah.section.reco":           "Recommendation",
    "niah.section.sweep":          "Pass rate sweep across context lengths",
    "niah.field.dhorizon":         "d_horizon (effective)",
    "niah.field.ratio":            "T_eval / d_horizon",
    "niah.field.arch_pressure":    "Arch pressure (small d_head + GQA + SWA)",
    "niah.field.theta":            "RoPE θ",
    "niah.field.t_train":          "T_train (claimed)",
    "niah.col.context":            "T_eval",
    "niah.col.niah":               "NIAH",
    "niah.col.reasoning":          "Reasoning",
    "niah.col.gap":                "Gap",
    "niah.col.verdict":            "Verdict",
    "niah.verdict.robust":         "✅ ROBUST",
    "niah.verdict.marginal":       "⚠ MARGINAL",
    "niah.verdict.degraded":       "⚠ DEGRADED",
    "niah.verdict.retrieval_only": "❌ RETRIEVAL-ONLY",
    "niah.verdict.broken":         "❌ BROKEN",
    "niah.reco.robust":            "Both retrieval and reasoning hold up at this context. Safe to deploy for both lookup and inference tasks.",
    "niah.reco.marginal":          "Borderline. Retrieval works but reasoning is shaky. Use for fact-lookup, not multi-step inference.",
    "niah.reco.degraded":          "Significant reasoning drop. The model can find facts but struggles to combine them. Avoid multi-hop tasks at this length.",
    "niah.reco.retrieval_only":    "Canonical RULER finding: model passes NIAH but fails reasoning. Useful for retrieval-augmented setups (where the LLM only locates facts) but NOT for chained inference. Cut your context to the 'safe' value below.",
    "niah.reco.broken":            "Model fails even basic retrieval at this context. Treat as out-of-distribution — re-test at a shorter context.",
    "niah.safe_context":           "≤ {ctx} tokens (reasoning ≥ 65%)",
    "niah.safe_context_none":      "No safe context found below your target — model fails reasoning even at small contexts.",
    "niah.summary.sweep":          "<code>{modelId}</code> — pass rates by context",
    "niah.status.empty_id":        "⚠ Enter a model id (e.g. meta-llama/Llama-3.1-8B-Instruct).",
    "niah.status.bad_teval":       "⚠ Enter a target context (≥ 512 tokens).",
    "niah.status.fetching":        "⏳ Fetching config.json for {modelId}...",
    "niah.status.fetched":        "✅ Config fetched for {modelId}. Set T_eval and click Predict (or Sweep contexts).",
    "niah.status.done":            "✅ {verdict} — NIAH {niah}% · reasoning {reasoning}%",
    "niah.status.sweep_done":      "✅ Swept {n} context lengths.",
    "saturation.title":            "📈 Benchmark Saturation Detector",
    "saturation.tip":              "MMLU is saturated (88-94% all frontier models). Reporting '92% on MMLU' is now meaningless. This tool tells you which benchmarks still discriminate frontier models, which are saturated, and what to use instead. Data: DemandSphere AI Frontier Tracker (CC BY-NC 4.0) refreshed 2026-05.",
    "saturation.desc":             "<strong>Is your benchmark still useful?</strong> Pick a benchmark to see top-3 frontier scores, spread, and a verdict (saturated / near-saturated / discriminative) plus recommended replacements.",
    "saturation.select_label":     "Benchmark:",
    "saturation.select.all":       "— show all benchmarks —",
    "saturation.run_btn":          "📈 Classify",
    "saturation.all_btn":          "📊 Show all",
    "saturation.col.spread":       "Top-3 spread",
    "saturation.col.mean":         "Top-3 mean",
    "saturation.col.n":            "Models",
    "saturation.col.bench":        "Benchmark",
    "saturation.col.verdict":      "Verdict",
    "saturation.col.reco":         "Top reco",
    "saturation.col.model":        "Model",
    "saturation.col.score":        "Score",
    "saturation.section.top3":     "Top-3 frontier scores",
    "saturation.section.recommendations": "Recommended alternatives",
    "saturation.section.note":     "Notes",
    "saturation.section.all":      "All tracked benchmarks",
    "saturation.verdict.saturated":      "🚨 SATURATED",
    "saturation.verdict.near_saturated": "⚠ NEAR SATURATED",
    "saturation.verdict.discriminative": "✅ DISCRIMINATIVE",
    "saturation.verdict.sparse_data":    "ℹ SPARSE DATA",
    "saturation.borderline":       "Borderline — within ±1pp of a threshold cutoff. Treat verdict as 'check carefully'.",
    "saturation.unknown":          "Unknown benchmark.",
    "saturation.attribution":      "Data: DemandSphere AI Frontier Model Tracker (CC BY-NC 4.0) · HF Open LLM Leaderboard v3 (open-weight historical) · last fetch 2026-05-05.",
    "saturation.status.live":      "✅ Live data loaded — {count} models.",
    "saturation.status.baked":     "ℹ Using baked snapshot (live fetch unavailable).",
    "saturation.status.kb_fail":   "⚠ Could not load saturation KB.",
    "saturation.status.done":      "✅ {name} — {verdict}",
    "saturation.status.all_done":  "✅ Classified {n} benchmarks.",
    "help.v08.saturation.title":   "📈 Benchmark Saturation Detector",
    "help.v08.saturation.body":    "MMLU is saturated (88-94% top), AIME 2025 saturated within months of release, HumanEval near-saturated. Pick any benchmark and the tool returns top-3 frontier scores, spread, mean, and a verdict — saturated / near-saturated / discriminative — plus a recommended replacement (e.g. MMLU → MMLU-Pro / GPQA / HLE). Live fetch from DemandSphere AI Frontier Tracker (CC BY-NC 4.0) when reachable; baked 2026-05-05 snapshot when not. <em>Use case</em>: before you cite '92% on MMLU' or design an eval, check whether the benchmark still discriminates anything.",
    "inv.v08.saturation":          "<strong>📈 Saturation</strong> — is your benchmark still useful, or are all frontier models tied at the top?",
    "inv.v081.hub":                "<strong>🧭 Solutions Hub</strong> — every documented pain mapped to a tafagent mode or curated external tool. Don't reinvent — find.",
    "help.v081.hub.title":         "🧭 Solutions Hub",
    "help.v081.hub.body":          "tafagent as integrator, not silo. 30+ pains across 7 categories (eval reliability · diagnostics · setup · training · retrieval · multimodal · observability), each mapped to (a) the tafagent mode that addresses it, if any, and (b) the best-of-breed external tools the community already trusts (RAGAS, MTEB, HELM, MCP Schema Validator, llm-stats, llguidance, GlitchMiner, etc.). Search box matches across pain, scenario, and tool name. <em>Use case</em>: 'I have problem X — does tafagent solve it, and if not, who does?'",
    "hub.title":                   "🧭 Solutions Hub",
    "hub.tip":                     "Map of every documented LLM-eval pain we know about: which tafagent mode addresses it (if any), and the best-of-breed external tools the community already trusts. Goal: full coverage. If a canonical tool exists elsewhere, we link rather than rebuild.",
    "hub.desc":                    "<strong>Don't reinvent — find.</strong> 30+ pains mapped to tafagent modes + curated external tools. Browse by category, search by keyword, or see the gaps where new modes would help most.",
    "hub.clear_btn":               "✕ Clear",
    "hub.no_mode":                 "external",
    "hub.planned":                 "planned:",
    "hub.best_for":                "Best for",
    "hub.not_for":                 "Not for",
    "hub.tools":                   "External tools",
    "hub.status.loaded":           "✅ Loaded {total} pains across {categories} categories — {covered} covered by tafagent modes, {externalLinks} external links curated. Compiled {compiled}.",
    "hub.status.fail":             "⚠ Could not load Solutions Hub.",
    "hub.search.empty":            "No matches for '{query}'. Try broader terms (e.g. 'eval', 'rag', 'tokenizer').",
    "hub.search.results":          "Found {n} match(es) for '{query}'.",

    // v0.7.7 — Task tiles (UX restructure: 14 modes grouped by user intent)
    "tiles.title":                 "🎯 What do you want to do?",
    "tiles.subtitle":              "Pick a task. Each one opens the right tool below. Or scroll down for the full list of 14 modes.",
    "tile.diagnose.title":         "🔬 Diagnose a model",
    "tile.diagnose.desc":          "Will this specific model work for my use case?",
    "tile.trust.title":            "✓ Trust a benchmark score",
    "tile.trust.desc":             "Should I believe this number? Bug or noise?",
    "tile.eval.title":              "⚙️ Set up an eval correctly",
    "tile.eval.desc":              "Get the exact CLI flag for lm-eval / vLLM / transformers.",
    "tile.compare.title":          "🆚 Compare models",
    "tile.compare.desc":           "Side-by-side, or browse the empirical model landscape.",
    "tile.manual.title":           "📋 Manual / free-form",
    "tile.manual.desc":            "Pick a specific recipe by hand, or ask in plain English.",
    "tile.diagnose.tip":           "Start here when you have a specific model id and want a full diagnostic: <strong>Profile</strong> runs all 5 recipes at once. <strong>Unmask</strong> checks if max_position_embeddings is honest. <strong>NIAH→Reason</strong> predicts retrieval-vs-reasoning gap. <strong>Quant</strong> predicts whether quantizing will break it. <strong>Inspect</strong> lets you paste raw config.json for private/in-dev models.",
    "tile.trust.tip":              "When you see a score and want to know if it's real. <strong>Contamination</strong> rates 20+ benchmarks for likelihood the model saw them during training. <strong>Drift</strong> tells you if a gap between two evals is numerical noise or a real bug (chat-template mismatch, KV-cache layout, etc.). <strong>Arena CI</strong> reconstructs the confidence intervals Chatbot Arena hides — many top-Elo &quot;wins&quot; are statistically tied.",
    "tile.eval.tip":               "Before you run lm-eval-harness or vLLM serve, get the right CLI flag. <strong>Chat-template Sniffer</strong> detects the template family (Llama-3 / ChatML / Mistral / Phi-3 / DeepSeek / Alpaca / custom / none) and emits the exact <code>--apply_chat_template</code> / <code>--chat-template</code> invocation. Solves issue #1841 in lm-eval-harness (silent ÷2 accuracy). <strong>Diagnose CLI</strong> generates the Python command to measure γ_obs on your local GPU.",
    "tile.compare.tip":            "<strong>Compare</strong>: pick 2-3 candidate models + one recipe, see verdicts in a side-by-side table (e.g. Llama-3-8B vs Mistral-7B at 32k context). <strong>Phase diagram</strong>: scatter of 23 empirical models on the (log θ, γ) plane, with the Padé curve overlaid. Hover dots for details, click to load that model into the Recipe form.",
    "tile.manual.tip":             "<strong>Recipe</strong>: pick a specific X-N recipe (X-1 custom-vs-API, X-2 long context, X-3 budget, X-5 hardware, X-19 KV compression, X-21 imprint, X-22 compute-context invariant, X-23 IH-phase) and fill the form by hand for full control. <strong>Ask</strong>: type a free-form question; an in-browser 0.5B LLM (Qwen2.5) picks the right recipe and runs it. Best for &quot;what would happen if...&quot; exploration.",
    "share.import_desc":       "Got a JSON file from someone else's TAF analysis? Load it here to see the verdict + chain locally. Same view as if you'd run it yourself.",
    "share.import_btn":        "📂 Load shared JSON",
    "synthesis.system":        "You are a precise transformer LLM diagnostic assistant. Given pre-computed TAF formula results, write a clear plain-English summary in 4-6 sentences. Cite the section number (§X.Y) for each number you mention. Always give a concrete recommendation. Do NOT invent numbers.",

    // INSPECTOR mode
    "inspector.title":         "🔍 Architecture Inspector",
    "inspector.desc":          "Paste the raw <code>config.json</code> contents. The tool extracts the architectural parameters and runs the full 5-recipe Profile.",
    "inspector.tip":           "<strong>Paste any config.json directly</strong>. Tool parses it and runs the full Profile. Useful for: private models, in-development configs, models not yet on HuggingFace, or comparing what your custom architecture would do.",
    "inspector.quickstart":    "💡 Use case: you have a private model not on HF Hub, or a config you're designing. Paste the raw JSON below and get a full TAF profile.",
    "inspector.placeholder":   "{\n  \"model_type\": \"llama\",\n  \"rope_theta\": 500000,\n  \"max_position_embeddings\": 8192,\n  \"num_attention_heads\": 32,\n  \"num_key_value_heads\": 8,\n  \"hidden_size\": 4096,\n  \"num_hidden_layers\": 32\n}",
    "inspector.T_eval":        "T_eval (your target context):",
    "inspector.btn":           "🚀 Inspect & profile",

    // WHAT-IF slider
    "whatif.title":            "🎚 What-if: drag T_eval to see γ change live",
    "whatif.desc":             "Pure JS recompute (no Pyodide call). Shows the geometric γ_Padé and d_horizon as you slide. The full chain re-runs on click.",
    "whatif.T_eval":           "<strong>T_eval</strong>",
    "whatif.gamma_pade":       "<strong>γ_Padé</strong>",
    "whatif.d_horizon":        "<strong>d_horizon</strong>",
    "whatif.l_niah":           "<strong>L_NIAH ceiling</strong>",
    "whatif.predicted":        "<strong>Predicted geometric verdict</strong>",
    "whatif.rerun":            "↻ Recompute full chain at this T_eval",

    // COMMUNITY feed
    "community.title":         "🌐 Recent community submissions",
    "community.desc":          "Live feed from the public registry. Click any submission to view full analysis.",
    "community.browse_all":    "Browse all →",
    "community.loading":       "Loading...",
    "community.no_repo":       "The registry repo isn't created yet. Once it exists with submissions, they'll appear here live.",
    "community.no_submissions": "No submissions yet. Be the first — generate a Profile and click 📤 Submit to registry.",

    // FALSIFICATION dashboard
    "falsification.title":     "🔬 Paper predictions — falsification status",
    "falsification.desc":      "The TAF framework rests on falsifiable predictions (F1-F23). Each is empirically tested. Here's the live status of every prediction in the paper.",
    "falsification.summary":   "{confirmed} confirmed · {partial} partial · {refuted} refuted · {untested} untested (out of {total} total predictions)",
    "falsification.col.id":    "ID",
    "falsification.col.claim": "Claim",
    "falsification.col.status": "Status",
    "falsification.col.evidence": "Evidence",

    "tafcard.title":           "📇 TAF Card — full model profile",
    "tafcard.recipes_title":   "📋 Recipes — verdict per dimension",
    "tafcard.recipes_count_label": "dimensions",
    "tafcard.numbers_title":   "🔢 Key numbers (paper §26)",
    "tafcard.fals_title":      "🔬 Falsification status (F1-F23)",
    "tafcard.fals_none":       "No falsifications applicable.",
    "tafcard.diag_title":      "🔬 Diagnostics — numbers · γ check · what-if",
    "tafcard.verify_title":    "✓ Verification — Lean + Sage + falsification",
    "tafcard.share_title":     "📂 Provenance & share",
    "tafcard.whatif_title":    "🎚️ What-if explorer",
    "verdict.go":              "GO",
    "verdict.no":              "NO",
    "verdict.degraded":        "DEGRADED",

    "compare.title_out":       "🆚 Comparison Table",

    "status.loading_pyodide":  "⏳ Loading Python runtime (~10MB, first time only)...",
    "status.loading_taf":      "⏳ Loading TAF formulas + recipes...",
    "status.ready":            "✅ Ready. Pick a model and click Profile to start.",
    "status.computing":        "🧮 Computing TAF chain...",
    "status.done":             "✅ Done.",

    "profile.hf_placeholder":  "e.g. meta-llama/Meta-Llama-3-8B or Qwen/Qwen2.5-7B",
    "compare.hf_placeholder":  "HF model id (e.g. meta-llama/Meta-Llama-3-8B)",
    "compare.slot1_placeholder": "HF model id (e.g. meta-llama/Meta-Llama-3-8B)",
    "compare.slot2_placeholder": "HF model id #2",
    "compare.slot3_placeholder": "HF model id #3 (optional)",
    "compare.preset_default": "— or preset —",

    // Form parameters
    "param.theta":         "θ (rope_theta)",
    "param.theta.tip":     "<strong>RoPE base frequency</strong> from <code>config.rope_theta</code>. Higher = more long-range capacity.",
    "param.T_train":       "T_train",
    "param.T_train.tip":   "<strong>Max training context</strong>. From <code>max_position_embeddings</code>. Beyond this is extrapolation.",
    "param.T_eval":        "T_eval (your target)",
    "param.T_eval.tip":    "<strong>Your target inference context</strong>. The whole question is: will the model behave well at THIS length?",
    "param.n_attn":        "n_attention_heads",
    "param.n_attn.tip":    "<strong>Number of attention heads</strong> per layer. From <code>num_attention_heads</code>.",
    "param.n_kv":          "n_kv_heads",
    "param.n_kv.tip":      "<strong>KV heads</strong>. If &lt; n_attention_heads → GQA (Grouped Query Attention). Reduces KV memory but pushes γ toward Hagedorn.",
    "param.d_head":        "head_dim",
    "param.d_head.tip":    "<strong>Per-head dimension</strong>. Typical 64, 96, 128. From <code>head_dim</code> or <code>hidden_size / num_attention_heads</code>.",
    "param.n_layers":      "n_layers",
    "param.n_layers.tip":  "<strong>Number of transformer blocks</strong>. From <code>num_hidden_layers</code>.",
    "param.n_params":      "n_params (e.g. 8e9)",
    "param.n_params.tip":  "<strong>Total parameter count</strong>. Threshold ~400M for induction-head emergence. Affects KV memory and budget recipes.",
    "param.has_swa":       "Has SWA?",
    "param.has_swa.tip":   "<strong>Sliding Window Attention</strong>. <code>true</code> for Mistral, gemma-2, phi-3. v0.5.3 calibration audit disabled the historical δ_SWA correction (n=1 fit).",
    "common.yes":          "Yes",
    "common.no":           "No",

    // Mode tooltips
    "modes.tip":           "<strong>Fourteen ways to use the tool</strong>.<br><strong>📇 Profile</strong>: paste a model id → 5-recipe TAF Card.<br><strong>🆚 Compare</strong>: 2-3 models side-by-side on one recipe.<br><strong>🔍 Inspect config</strong>: paste raw config.json → full Profile.<br><strong>💬 Ask</strong>: free-form question, browser LLM picks the recipe.<br><strong>📋 Recipe</strong>: manual selection with full form control.<br><strong>🩺 Diagnose CLI</strong>: generate Python command for local γ measurement.<br><strong>📊 Phase diagram</strong>: 23-model panel on (log θ, γ) plane.<br><strong>🪟 Unmask</strong>: detect misleading max_position_embeddings (SWA / YaRN / RoPE-scaling).<br><strong>📜 Chat-template</strong>: detect family + give exact CLI flag for lm-eval / vLLM / transformers.<br><strong>🎯 Arena CI</strong>: reconstruct confidence intervals from raw pairwise vote data; detect statistical ties Arena hides.<br><strong>🧪 Contamination</strong>: rate 20+ benchmarks for contamination probability based on training cutoff vs release date.<br><strong>⚖️ Quant</strong>: predict γ-shift and ΔPPL for any (model × quant scheme); recommend safer alternative on cliff.<br><strong>🔀 Drift</strong>: same model, different scores on two setups — bug or noise? Predict numerical-noise band and flag real bugs.<br><strong>🔍 NIAH→Reason</strong>: predict NIAH and multi-hop reasoning pass rates from architecture; find your model's safe reasoning context.",
    "profile.tip":         "<strong>One-click full diagnosis</strong>. Paste any HF model id (or pick preset). Tool runs all 5 recipes (long-context, KV-compression, custom-vs-API, budget, hardware) and produces a single <strong>TAF Card</strong> with verdict per dimension + key numbers + architecture classification.<br><br><strong>Use case</strong>: \"I'm evaluating Qwen2.5-32B for production — what's its full viability profile?\" → paste id → Profile → done.",
    "compare.tip":         "<strong>Same recipe, multiple models</strong>. Pick 2-3 candidate models and one recipe. See verdicts in a single comparison table.<br><br><strong>Use case</strong>: \"I need long-context retrieval at 16K — which is best: Llama-3-8B, Mistral-7B, or Qwen-7B?\" → pick 3 + X-2 + 16K → see winner.",

    // Help modal
    "help.title":               "📘 TAF Agent — User Manual",
    "help.what.title":          "What does it do?",
    "help.what.body":           "Predicts <strong>practical viability</strong> of any transformer LLM <em>before you spend GPU/$</em>. Answers questions like \"will this model work at L=32K?\" or \"should I train custom or use API?\" using deterministic Python formulas (TAF — Thermodynamic Attention Framework).",
    "help.modes.title":         "How to use — 7 modes",
    "help.modes.profile":       "<strong>📇 Profile</strong>: paste model id → all recipes at once = TAF Card. <strong>Best starting point</strong>.",
    "help.modes.compare":       "<strong>🆚 Compare</strong>: 2-3 models side-by-side on same recipe. Best when choosing between candidates.",
    "help.modes.inspector":     "<strong>🔍 Inspect config</strong>: paste raw <code>config.json</code> → tool parses + runs full Profile. For private models, in-development configs, or models not yet on HF Hub.",
    "help.modes.ask":           "<strong>💬 Ask plain English</strong>: free-form question, in-browser LLM picks the recipe. Best for casual exploration.",
    "help.modes.recipe":        "<strong>📋 Recipe + form</strong>: manual selection, full parameter control. Best when you want exact control.",
    "help.modes.diagnose":      "<strong>🩺 Diagnose CLI</strong>: generate Python command to measure γ on your local machine (transformers + numpy). Fast ≈5 min CPU; full ≈20–60 min GPU. Output JSON re-uploadable via Inspect.",
    "help.modes.phase":         "<strong>📊 Phase diagram</strong>: scatter plot of 23 panel models on (log θ, γ) plane. Hagedorn line γ=1 separates Phase A from Phase B. Click a dot to load that model into Recipe form.",
    "help.recipes.title":       "The 8 recipes available",
    "help.recipe.x1.title":     "<strong>X-1 Custom training vs API</strong> — compares cost of training your own model vs paying for API access.",
    "help.recipe.x1.example":   "Try: <em>\"Should I train an 8B custom model or use GPT-4o for 50M tokens/month?\"</em><br>Answer types: YES (custom) / NO (API) with break-even months.",
    "help.recipe.x2.title":     "<strong>X-2 Long Context Viability</strong> — predicts if a model serves a target context length reliably.",
    "help.recipe.x2.example":   "Try: <em>\"Will Meta-Llama-3-8B handle 32000 tokens for retrieval?\"</em><br>Chains: γ_Padé → decomposition → d_horizon → NIAH ceiling → hallucination → KV memory.<br>Verdict: YES / DEGRADED / NO with mitigation if needed.",
    "help.recipe.x3.title":     "<strong>X-3 Budget pre-flight</strong> — given $ budget, what model is feasible to train?",
    "help.recipe.x3.example":   "Try: <em>\"I have $5000, what model can I train?\"</em><br>Answer: GO / TINY-MODEL / MEMORY-LIMITED with concrete N (params) and D (tokens).",
    "help.recipe.x5.title":     "<strong>X-5 Hardware selection</strong> — which GPU should I use to serve at target throughput?",
    "help.recipe.x5.example":   "Try: <em>\"Cheapest hardware to serve Llama-3-8B at 10M tokens/day\"</em><br>Answer: best GPU + $/Mtok + capacity vs target.",
    "help.recipe.x19.title":    "<strong>X-19 KV Compression decision</strong> — should I use soft decay, hard cutoff, or literature methods?",
    "help.recipe.x21.title":    "<strong>X-21 Imprint Purity Diagnostic</strong> — predicts γ on RANDOM tokens via ν=−1/(2π); how clean is the model's RoPE prediction?",
    "help.recipe.x22.title":    "<strong>X-22 Compute-Context Invariant</strong> — does γ × log(N²·D) lie in panel band 51.2 ± 16.8? Detects scaling/training anomalies.",
    "help.recipe.x23.title":    "<strong>X-23 IH-Phase Detector</strong> — pre- or post-induction-head? Cheap probe via sign(γ_text − γ_random).",
    "help.recipe.x19.example":  "Try: <em>\"How to compress KV cache for Qwen2.5-7B at 32K?\"</em><br>Answer: USE SOFT DECAY / USE D_f CUTOFF / USE LITERATURE METHODS / USE HARD T_train.",
    "help.recipe.x21.example":  "Try: <em>\"How clean is the RoPE prediction on Llama-3-8B?\"</em><br>Answer: predicted γ_random + purity diagnostic (CLEAN / OVER-IMPRINTED / UNDER-IMPRINTED).",
    "help.recipe.x22.example":  "Try: <em>\"Does Mistral-7B fit the compute-context invariant?\"</em><br>Answer: K = γ·log(N²·D), z-score, IN-BAND or OUTLIER.",
    "help.recipe.x23.example":  "Try: <em>\"Is Qwen2.5-7B post-induction-head?\"</em><br>Answer: CONFIRMED PRE-IH / CONFIRMED POST-IH / ANOMALY (with size-vs-Δγ consistency check).",
    "help.section.v04":         "<strong>What's new in v0.4</strong> (sesión 29 findings 2026-04-28): three diagnostic recipes derived from cross-model panel analysis (n=22 LLMs).",
    "help.divider.v04_s29":     "— v0.4 (sesión 29 findings) —",
    "footer.tech_stack":        "Computation: Pyodide · Synthesis: WebLLM (Qwen2.5-0.5B local) · Hosting: GitHub Pages · Cost: $0",
    "help.v04.imprint":         "<strong>Learned-imprint slope ν = −1/(2π)</strong>: RoPE rotation period 2π drives a positional bias on weights, proportional to log(N_params). Even random tokens show this scaling. ν is DERIVED — not fitted (empirical err 0.3%).",
    "help.v04.invariant":       "<strong>Chinchilla-attention invariant K</strong>: γ × log(N²·D) ≈ 51.2 ± 16.8 (CV=0.329). Connects compute scaling and attention exponent into a single dimensionless number.",
    "help.v04.ih_probe":        "<strong>Δγ as IH probe</strong>: sign(γ_text − γ_random) > 0 ⟺ post-induction-head. Cheaper than running an in-context-learning benchmark.",
    "help.v04.constants":       "<strong>γ-cluster on famous constants</strong> (intriguing, n=4): CodeLlama-13b γ=0.382 ≈ 1−1/φ (golden conjugate, err 0.0003); pythia-1.4b γ=0.705 ≈ 1/√2; Llama-2-7b γ=0.287 ≈ 1−1/√2; Mistral-Nemo γ=0.428 ≈ log_10(e). Caveat: could be coincidence.",
    "help.param.theta":         "<strong>θ (rope_theta)</strong>: RoPE base frequency. Higher = more long-range capacity. Typical: 10000 (early), 500000 (Llama-3), 1000000 (Qwen2.5).",
    "help.param.T_train":       "<strong>T_train</strong>: max context the model was trained on. From <code>max_position_embeddings</code>.",
    "help.param.T_eval":        "<strong>T_eval</strong>: <em>your target</em> inference context length. The key knob.",
    "help.param.gqa":           "<strong>n_kv_heads &lt; n_attention_heads</strong>: model uses GQA (Grouped Query Attention). Reduces KV memory but pushes γ toward Hagedorn.",
    "help.param.swa":           "<strong>has_SWA</strong>: model uses Sliding Window Attention (Mistral, gemma-2).",
    "help.param.nparams":       "<strong>n_params</strong>: total parameter count. Threshold ~400M for induction-head emergence.",
    "help.add_models.title":    "Adding new models (3 ways)",
    "help.add_models.preset":   "<strong>Preset list</strong>: 11 popular models curated. Just select from dropdown.",
    "help.add_models.hf":       "<strong>HF Hub fetch</strong>: paste any model id (e.g. <code>Qwen/Qwen2.5-32B-Instruct</code>), click 📥 Fetch. Browser downloads <code>config.json</code> directly from HuggingFace, fills the form. Works for any public model.",
    "help.add_models.manual":   "<strong>Manual</strong>: fill the form fields directly with values from the model card.",
    "help.audit.title":         "The audit chain",
    "help.audit.body":          "Every result shows the full <strong>Computation Chain</strong> — each formula step with its inputs, output, and interpretation. Click any step to expand. Cite section numbers (§26.1, §19.1, etc.) refer to the underlying paper for derivation.",
    "help.synthesis.title":     "The plain-English answer",
    "help.synthesis.body":      "After the deterministic chain runs, an in-browser LLM (Qwen2.5-0.5B, ~350MB cached after first load) synthesizes a plain-English summary. The numbers above are <em>always correct</em> (deterministic Python); the synthesis is LLM-generated — verify against the chain if in doubt.",
    "help.params.title":        "Common parameters explained",
    "help.verdicts.title":      "What to look for in verdicts",
    "help.verdict.yes":         "<strong style=\"color:#3fb950;\">YES / GO</strong> — proceed with confidence; numbers support the choice.",
    "help.verdict.deg":         "<strong style=\"color:#d29922;\">DEGRADED / TINY-MODEL</strong> — works but with caveats; read the action.",
    "help.verdict.no":          "<strong style=\"color:#f85149;\">NO / MEMORY-LIMITED</strong> — don't proceed as-is; mitigation provided.",
    "help.privacy.title":       "Privacy",
    "help.privacy.body":        "Everything runs in your browser. No telemetry, no analytics, no data sent anywhere. Even the LLM model runs locally via WebGPU/WebAssembly. Your model_ids and questions never leave this page.",
    "help.source.title":        "Source & paper",
    "help.source.body":         "Source code: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv forthcoming)<br>Dataset: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 γ-measurements across 32 models (CC-BY-4.0)",

    "footer.text":             "© 2026 Carles Marin · Apache-2.0 · independent research · the tool that closes the loop of the paper.",

    // §33 v0.4 (sesion 31, 2026-04-30) — new diagnostic functions
    "v04.title":                  "🆕 v0.4 — New diagnostics (sesion 31)",
    "v04.section.intro":          "Four new diagnostic functions derived sesion 31 (2026-04-30) from cross-of-crosses formula games + Sócratic interrogation. Available in <code>taf_browser.py</code> §33.",
    "v04.arch.label":             "Architectural Concentration",
    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv. Cross-panel correlational law (R²=0.30). Caveat: not per-model predictor.",
    "v04.pdi.label":              "PDI — Padé Deviation Index",
    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval. Traffic light: green (≈1), orange (>>1), yellow (<<1), red (Phase B negative).",
    "v04.4bit.label":             "4-bit Shift Predictor",
    "v04.4bit.desc":              "MHA: R²(bf16)<0.9 → γ rises; R²>0.99 → γ drops. GQA: precision-robust regardless.",
    "v04.crit.label":             "Critical Exponents Bundle",
    "v04.crit.desc":              "ν_c, β_c, η_c (=γ−1, CORRECTED), α_C, γ_susc with AM-GM minimum at γ=1−1/√2≈0.293.",

    // §34 v0.5 (sesion 32, 2026-05-01) — Machine-verified framework consistency
    "v05.title":                  "🔬 v0.5 — Machine-verified consistency (sesion 32)",
    "v05.section.intro":          "Sage Groebner basis + Lean Mathlib4 dual-tool verification of <strong>15 algebraic identities</strong> of TAF critical exponents. First transformer-attention framework with formal machine-proof backing.",
    "v05.verify.label":           "Algebraic Consistency Check",
    "v05.verify.desc":            "Given measured γ, verifies 12 D-SAGE identities (D-SAGE-1: 2η²+η·γ_χ+1=0, β·χ=−1, α+χ=2, etc.). All passing = framework intact. Failures indicate bf16 outliers / quantization artifacts.",
    "v05.dsage1.label":           "D-SAGE-1 (★★ core)",
    "v05.dsage1.desc":             "Quadratic identity 2η² + η·γ_χ + 1 = 0 (Sage Groebner-discovered, Lean-verified). Replaces incorrect 'triple closure' claim. Refutes paper 1's η=2γ algebraically.",
    "v05.erratum.label":          "Paper 1 erratum — η correction",
    "v05.erratum.desc":            "Paper 1 originally claimed η = 2γ. Sage Groebner + Lean Mathlib4 proved this fails (residual (-4γ³+5γ+1)/(1-γ) > 0 ∀γ ∈ Phase A). Correct value: η = γ−1, satisfying D-SAGE-1.",
    "v05.repro.label":            "Reproducibility",
    "v05.repro.desc":              "All 15 theorems machine-proof in Lean Mathlib4 (1973 jobs build success). Sage script: <code>analysis/sage_recursive_sweep_2026-04-30.sage</code>. Lean code: <code>lean_taf/taf/Taf/Identities.lean</code>.",

    // v0.5.1 — TAF Card consistency check button
    "v05.consistency.title":      "🔬 Algebraic consistency check (Sage + Lean v0.5)",
    "v05.consistency.desc":       "Verifies 12 D-SAGE algebraic identities of TAF critical exponents (machine-proof Sage Groebner basis + Lean Mathlib4). Pass = framework intact. Fail = bf16 outlier / quantization artifact.",
    "v05.consistency.btn":        "🔬 Verify algebraic consistency",

    // v0.5.2 — Anti-Ising universality class badge
    "v05.antiising.badge":        "🧲 Anti-Ising class (β=γ−1&lt;0, machine-verified)",

    // v0.5.2 — Per-identity tooltips (plain English explanations)
    "v05.tooltip.D_SAGE_1":       "Quadratic algebraic identity connecting anomalous dimension η and susceptibility γ_χ. The CORE identity discovered by Sage Groebner basis (machine-proof). Replaces earlier wrong claim of triple closure.",
    "v05.tooltip.D_SAGE_2":       "In Phase A, β = γ−1 is negative (anti-Ising). Multiplied by χ = 1/(1−γ) gives exactly −1. Signature of TAF's negative-β regime.",
    "v05.tooltip.D_SAGE_4":       "The specific heat exponent α and susceptibility χ sum to exactly 2 in TAF. Algebraic consequence of Josephson hyperscaling.",
    "v05.tooltip.D_SAGE_5":       "Linear sum identity: α + γ_χ = 2(2−γ). Means as γ approaches 1 (Hagedorn), the sum approaches 2; at γ=0 it's 4.",
    "v05.tooltip.D_SAGE_6":       "Order parameter exponent times susceptibility exponent equals a specific quadratic in γ. Factored algebraic relation.",
    "v05.tooltip.Rushbrooke_tautology": "Standard Rushbrooke hyperscaling 2β + γ_χ = ν·d at d=1. In TAF this is a TAUTOLOGY — γ_χ is defined exactly so this holds. Confirmed by Sage Groebner basis.",
    "v05.tooltip.Josephson_tautology": "Standard Josephson hyperscaling 2 − α = ν·d at d=1. In TAF this is a TAUTOLOGY — α is defined exactly so this holds.",
    "v05.tooltip.Fisher_independent": "Fisher relation γ_χ = (2−η)·ν. In TAF this is INDEPENDENT (does NOT close as identity, contrary to triple-closure claim). Residual is γ(2γ−3)/(1−γ).",
    "v05.tooltip.eta_2gamma_REFUTED": "Paper 1 originally claimed η=2γ. This identity refutes it: residual is positive throughout Phase A. Lean Mathlib4 machine-proof refutation.",
    "v05.tooltip.D_14_nu_imprint": "The learned imprint slope ν = −1/(2π) times 2π equals −1. Trivial dimensional check from paper 1.",
    "v05.tooltip.D_SAGE_7":       "The central charge c=3 times |ν_imprint| times 2π equals 3. Dimensional closure connecting CFT and training imprint.",
    "v05.tooltip.nu_beta_id":     "Correlation length exponent ν times order parameter exponent β equals −1 in Phase A. Variant of D-SAGE-2.",

    "v053.calibration.title":     "🔬 v0.5.3 — Calibration audit (2026-05-02)",
    "v053.calibration.note":      "<strong>SWA correction disabled</strong> — original δ_SWA = -0.21 was fit on n=1 model (insufficient data; group-mean +0.355 with single yes-case). <strong>post_IH correction marked exploratory</strong> — group-mean ≈ 0 in re-audit (n=22 panel) does not replicate the OLS fit. <strong>GQA correction replicates</strong> (panel +0.115 vs hardcoded +0.11). <strong>D_f formula corrected for Phase B (γ&gt;1)</strong> — uses discrete cumulative sum instead of continuum approximation. LLaMA-3, Mistral, Gemma now report correct compression values.",
    "v053.release.banner":        "🔧 v0.5.3 — Audit-driven fixes: KV compression D_f now uses discrete sum (correct for all γ); δ_SWA disabled (n=1 calibration); paper §5.2 C_V coefficient erratum (1/4 → 1/12).",

    // §35 v0.6 — γ predicted-vs-observed diagnostic
    "gamma_check.title":           "🔍 γ predicted vs observed",
    "gamma_check.desc":            "Enter your empirically measured γ. Tool detects regime: fraud (θ inflated) / compressed / over-Padé / SWA-random / normal.",
    "gamma_check.gobs_label":      "γ_observed",
    "gamma_check.gobs_tip":        "Empirically measured γ from your model's attention scores. Use the Diagnose CLI to obtain this from real weights.",
    "gamma_check.random_label":    "Random corpus?",
    "gamma_check.random_tip":      "Tick if γ_observed was measured on random/unstructured tokens. Distinguishes SWA signature (γ_obs > 1) from anomaly.",
    "gamma_check.regime":          "Regime",
    "gamma_check.regime.normal":         "Normal",
    "gamma_check.regime.fraud":          "Fraud (θ inflated)",
    "gamma_check.regime.compressed":     "Compressed context",
    "gamma_check.regime.overpade":       "Over-Padé",
    "gamma_check.regime.swa":            "SWA random-corpus signature",
    "gamma_check.regime.unknown":        "Unknown",
    "gamma_check.regime.normal.desc":    "η ∈ [0.85, 1.15]: model uses its full nominal context, no anomaly.",
    "gamma_check.regime.fraud.desc":     "η < 0.01: nominal θ inflated. Model behaves as if θ ≪ advertised. Likely YaRN/marketing inflation without true context extension.",
    "gamma_check.regime.compressed.desc":"η ∈ [0.01, 0.5): context is compressed (model attends less far than nominal θ predicts). Common in instruction-tuned / RLHF models.",
    "gamma_check.regime.overpade.desc":  "η > 1.5: model attends farther than Padé predicts. Possible Lerch-corrected regime or undertrained early-checkpoint.",
    "gamma_check.regime.swa.desc":       "γ_obs > 1.05 on random corpus = sliding-window attention signature (Mistral / Gemma family).",
    "gamma_check.regime.unknown.desc":   "Inputs out of range or γ_obs > 1 without random-corpus flag. Verify measurement.",
    "gamma_check.glossary.title":        "ⓘ Glossary — what these variables mean",
    "gamma_check.glossary.gamma_pade":   "<strong>γ_Padé</strong>: closed-form prediction (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.",
    "gamma_check.glossary.gamma_obs":    "<strong>γ_observed</strong>: empirically measured from your model's attention scores (run the Diagnose CLI on real weights).",
    "gamma_check.glossary.theta_eff_obs":"<strong>θ_eff (observed)</strong>: inverted from γ_obs via T√2 / (1 − γ_obs). Effective θ implied by your measurement.",
    "gamma_check.glossary.theta_eff_pade":"<strong>θ_eff (Padé)</strong>: θ + T/√2. Effective θ predicted by closed-form.",
    "gamma_check.glossary.efficiency":   "<strong>η</strong>: ratio θ_eff_obs / θ_eff_Padé. ≈1 = normal · &lt;0.01 = fraud · &lt;0.5 = compressed · &gt;1.5 = over-Padé.",
    "gamma_check.glossary.delta_h":      "<strong>ΔH_Cardy</strong>: log(θ_eff_obs / θ_nominal). Cardy entropy shift. Negative = compression entropy. ~0 = nominal match.",
    "gamma_check.glossary.regime":       "<strong>Regime</strong>: automatic classifier from η + γ_obs + random_corpus flag.",

    // §36 v0.6 — Tooltips for inline ⓘ icons (per-variable explanations)
    "tooltip.gamma_pade":          "<strong>γ_Padé(T_eval)</strong>: closed-form prediction (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.",
    "tooltip.gamma_decomposed":    "<strong>γ_decomposed</strong>: γ from full architectural decomposition. Padé baseline + GQA shift + post-IH shift (calibrated audit-replicated subset).",
    "tooltip.d_horizon":           "<strong>d_horizon</strong>: effective attention horizon. Beyond this position, scores fall below noise floor (paper §26).",
    "tooltip.L_NIAH":              "<strong>L_NIAH ceiling</strong>: predicted ceiling for needle-in-a-haystack retrieval reliability at current d_horizon.",
    "tooltip.chi":                 "<strong>χ susceptibility</strong>: χ = 1/(1−γ). Diverges at the Hagedorn line γ=1.",
    "tooltip.kv_memory":           "<strong>KV memory @ T_eval (BF16)</strong>: per-request KV cache = 2 · n_layers · n_kv_heads · d_head · T_eval bytes.",
    "tooltip.theta_eff_obs":       "<strong>θ_eff (observed)</strong>: effective θ implied by your γ_observed: T√2 / (1 − γ_obs).",
    "tooltip.theta_eff_pade":      "<strong>θ_eff (Padé)</strong>: effective θ predicted by closed-form: θ + T/√2.",
    "tooltip.efficiency":          "<strong>η = θ_eff_obs / θ_eff_Padé</strong>: efficiency ratio. ≈1 = normal · &lt;0.01 = fraud · &lt;0.5 = compressed · &gt;1.5 = over-Padé.",
    "tooltip.delta_h_cardy":       "<strong>ΔH_Cardy</strong>: log(θ_eff_obs / θ_nominal). Cardy entropy shift. Negative = compression entropy. ~0 = nominal match.",
    "tooltip.verdict_aggregate":   "<strong>Verdict</strong>: worst-of across all recipes. ✅ GO = all green · ⚠ DEGRADED = ≥1 yellow · ❌ NO = ≥1 red.",
    "tooltip.verdict_breakdown":   "<strong>Per-recipe breakdown</strong>: each recipe tests an <em>independent</em> decision axis (long-context · budget · hardware · custom-vs-API · KV-compression). A ❌ on X-1 means \"use the API for your volume\" not \"the model fails\" — open the Recipes section for per-axis context.",
    "tooltip.gamma_pill":          "<strong>γ headline</strong>: γ_decomposed (or γ_Padé fallback). Range (0,1) = Phase A (anti-Ising). γ ≥ 1 = Hagedorn / Phase B.",
    "tooltip.anti_ising":          "<strong>Anti-Ising class</strong>: Phase A → β = γ−1 &lt; 0. Machine-verified (Sage + Lean Mathlib4). See §35 v0.5.",

    // §37 v0.6 — Lean+Mathlib theorem table
    "lean.table.title":            "📑 Lean+Mathlib theorem table",
    "lean.table.desc":             "Every entry below is machine-proven against Lean 4 + Mathlib4. Click any L# link to jump to the source line on GitHub. Grouped by topic — click a header to expand.",
    "lean.table.theorem":          "Theorem",
    "lean.table.claim":            "Claim",
    "lean.table.tactic":           "Tactic",
    "lean.table.source":           "Source",
    "lean.table.lean":             "Lean",
    "lean.findings.title":         "🔎 Substantive findings",
    "lean.findings.detected_by":   "Detected by",
    "lean.findings.fixed_by":      "Fixed by",
    "lean.findings.recommendation":"Recommendation",
    "lean.meta.repo":              "Repo",
    "lean.meta.build":             "Build",
    "lean.meta.theorems":          "Theorems",
    "lean.meta.verified":          "verified",
    "lean.meta.rejected":          "rejected",
    "lean.meta.sorry":             "sorry",
    "lean.meta.findings":          "substantive findings",
    "lean.manifest.loading":       "Loading Lean manifest…",
    "lean.manifest.error":         "Lean manifest unavailable",

    // Help modal — v0.6 section
    "help.v06.title":              "🆕 v0.6 — γ predicted-vs-observed + Cardy ΔH + Lean badges",
    "help.v06.intro":              "<em>v0.6 (2026-05-06): three new diagnostics live in the TAF Card under <strong>🔬 Diagnostics</strong>. All run in your browser; γ_observed comes from the Diagnose CLI on real weights.</em>",
    "help.v06.layout.title":       "TAF Card layout (new in v0.6)",
    "help.v06.layout.body":        "After clicking <strong>🚀 Generate full profile</strong> the card shows: a <strong>hero strip</strong> on top (architecture class + meta + 3 pills: aggregate verdict ✅/⚠/❌, γ headline, 🧲 Anti-Ising if Phase A) and four <strong>expandable sections</strong>: <strong>📋 Recipes</strong> (open by default — verdict per dimension), <strong>🔬 Diagnostics</strong> (key numbers, γ predicted vs observed, what-if explorer), <strong>✓ Verification</strong> (Sage+Lean algebraic consistency, falsification F1-F23), <strong>📂 Provenance &amp; share</strong> (calibration audit + JSON download / share link / registry submit). Click any header to expand. Every variable has an inline <strong>ⓘ</strong> tooltip.",
    "help.v06.gamma_check.title":  "γ predicted vs observed",
    "help.v06.gamma_check.body":   "Enter the empirically-measured γ from your model and the tool computes <strong>η = θ_eff_obs / θ_eff_Padé</strong> and classifies into one of 5 regimes:",
    "help.v06.case.normal":        "<strong>Normal</strong> (η ∈ [0.85, 1.15]) — model uses its full nominal context. <em>Use case</em>: validate a new release before adopting it.",
    "help.v06.case.fraud":         "<strong>Fraud</strong> (η &lt; 0.01) — nominal θ inflated; model behaves as if θ ≪ advertised. <em>Use case</em>: detect YaRN/marketing inflation (CodeLlama / Mistral-Nemo pattern).",
    "help.v06.case.compressed":    "<strong>Compressed</strong> (η &lt; 0.5) — context compressed; model attends shorter than nominal θ. <em>Use case</em>: spot RLHF/instruction-tuning compression (LLaMA-2 pattern).",
    "help.v06.case.overpade":      "<strong>Over-Padé</strong> (η &gt; 1.5) — model attends farther than Padé predicts. <em>Use case</em>: identify Lerch-corrected regime or undertrained early checkpoints (pythia-1b pattern).",
    "help.v06.case.swa":           "<strong>SWA random-corpus</strong> (γ_obs &gt; 1.05 with random_corpus=Yes) — sliding-window attention signature. <em>Use case</em>: confirm Mistral / Gemma SWA on random tokens.",
    "help.v06.cardy.title":        "Cardy ΔH diagnostic",
    "help.v06.cardy.body":         "<strong>ΔH_Cardy = log(θ_eff_obs / θ_nominal)</strong>. Entropy shift between observed effective θ and nominal θ. Strong negative = compression entropy; near zero = nominal match. Complements η for borderline cases.",
    "help.v06.lean.title":         "Lean + Mathlib verification badges",
    "help.v06.lean.body":          "TAF identities are formally machine-proven in Lean Mathlib4: <strong>37 theorems</strong> in 7 groups (Padé, RG flow, Cayley, D-SAGE, audit findings, erratum CV, misc) + <strong>1 substantive finding</strong> (V-derivative factor-2, theorem <code>V_derivative_ne_RG_beta</code>). Source: <a href=\"https://github.com/karlesmarin/lean-taf\" target=\"_blank\">github.com/karlesmarin/lean-taf</a> (commit 25c77fd). Re-verify locally: <code>git clone --depth=1 https://github.com/karlesmarin/lean-taf &amp;&amp; cd lean-taf &amp;&amp; lake exe cache get &amp;&amp; lake env lean Taf/Identities.lean</code>. The 🧲 Anti-Ising pill in the hero strip and the Verification accordion link to specific source lines.",
    "help.v06.glossary.title":     "Variable glossary (also embedded in TAF Card)",
    "help.v06.glossary.body":      "Every variable in the TAF Card has an inline ⓘ tooltip. The complete list: γ, γ_Padé, γ_decomposed, γ_observed, θ, θ_eff_obs, θ_eff_Padé, η, ΔH_Cardy, χ, d_horizon, L_NIAH, KV memory, regime. Hover any ⓘ for the definition + paper section.",
  },

  // ────────────────────────────────────────────────────────────────────────
  // ES — Español
  // ────────────────────────────────────────────────────────────────────────
  es: {
    // §33 v0.4 (sesion 31, 2026-04-30) — nuevas funciones diagnósticas
    "v04.title":                  "🆕 v0.4 — Nuevos diagnósticos (sesion 31)",
    "v04.section.intro":          "Cuatro nuevas funciones diagnósticas derivadas en sesión 31 (2026-04-30) desde juegos de fórmulas cross-of-crosses + interrogación socrática. Disponibles en <code>taf_browser.py</code> §33.",
    "v04.arch.label":             "Concentración Arquitectural",
    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv. Ley correlacional cross-panel (R²=0.30). Caveat: no es predictor per-model.",
    "v04.pdi.label":              "PDI — Índice de Desviación de Padé",
    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval. Semáforo: verde (≈1), naranja (>>1), amarillo (<<1), rojo (Phase B negativo).",
    "v04.4bit.label":             "Predictor de Shift 4-bit",
    "v04.4bit.desc":              "MHA: R²(bf16)<0.9 → γ sube; R²>0.99 → γ baja. GQA: precision-robusto.",
    "v04.crit.label":             "Bundle de Exponentes Críticos",
    "v04.crit.desc":              "ν_c, β_c, η_c (=γ−1, CORREGIDO), α_C, γ_susc con mínimo AM-GM en γ=1−1/√2≈0.293.",

    // §34 v0.5 (sesion 32, 2026-05-01) — Consistencia algebraica verificada por máquina
    "v05.title":                  "🔬 v0.5 — Consistencia verificada por máquina (sesion 32)",
    "v05.section.intro":          "Verificación dual con Sage Groebner basis + Lean Mathlib4 de <strong>15 identidades algebraicas</strong> de los exponentes críticos TAF. Primer framework transformer-attention con respaldo formal machine-proof.",
    "v05.verify.label":           "Comprobación de Consistencia Algebraica",
    "v05.verify.desc":            "Dado γ medido, verifica 12 identidades D-SAGE (D-SAGE-1: 2η²+η·γ_χ+1=0, β·χ=−1, α+χ=2, etc.). Todas pasando = framework intacto. Fallos indican bf16 outliers / artefactos de cuantización.",
    "v05.dsage1.label":           "D-SAGE-1 (★★ core)",
    "v05.dsage1.desc":             "Identidad cuadrática 2η² + η·γ_χ + 1 = 0 (descubierta por Sage Groebner, verificada Lean). Reemplaza claim incorrecto de 'cierre triple'. Refuta η=2γ del paper 1 algebraicamente.",
    "v05.erratum.label":          "Erratum paper 1 — corrección η",
    "v05.erratum.desc":            "Paper 1 afirmaba η = 2γ. Sage Groebner + Lean Mathlib4 demostraron que falla (residual (-4γ³+5γ+1)/(1-γ) > 0 ∀γ ∈ Fase A). Valor correcto: η = γ−1, satisface D-SAGE-1.",
    "v05.repro.label":            "Reproducibilidad",
    "v05.repro.desc":              "Los 15 teoremas son machine-proof en Lean Mathlib4 (build exitoso 1973 jobs). Script Sage: <code>analysis/sage_recursive_sweep_2026-04-30.sage</code>. Código Lean: <code>lean_taf/taf/Taf/Identities.lean</code>.",

    // v0.5.1 — TAF Card consistency check button
    "v05.consistency.title":      "🔬 Comprobación de consistencia algebraica (Sage + Lean v0.5)",
    "v05.consistency.desc":       "Verifica 12 identidades algebraicas D-SAGE de los exponentes críticos TAF (machine-proof Sage Groebner basis + Lean Mathlib4). Pasa = framework intacto. Falla = bf16 outlier / artefacto de cuantización.",
    "v05.consistency.btn":        "🔬 Verificar consistencia algebraica",

    // v0.5.2 — Anti-Ising universality class badge
    "v05.antiising.badge":        "🧲 Clase Anti-Ising (β=γ−1&lt;0, verificado por máquina)",

    // v0.5.2 — Per-identity tooltips (explicaciones en lenguaje claro)
    "v05.tooltip.D_SAGE_1":       "Identidad algebraica cuadrática que conecta dimensión anómala η con susceptibilidad γ_χ. Identidad CORE descubierta por Sage Groebner basis (machine-proof). Reemplaza claim incorrecto de triple closure.",
    "v05.tooltip.D_SAGE_2":       "En Fase A, β = γ−1 es negativo (anti-Ising). Multiplicado por χ = 1/(1−γ) da exactamente −1. Signature del régimen negativo-β de TAF.",
    "v05.tooltip.D_SAGE_4":       "El exponente de calor específico α y la susceptibilidad χ suman exactamente 2 en TAF. Consecuencia algebraica del hiperescalado de Josephson.",
    "v05.tooltip.D_SAGE_5":       "Identidad lineal: α + γ_χ = 2(2−γ). Significa que cuando γ se acerca a 1 (Hagedorn), la suma se acerca a 2; en γ=0 vale 4.",
    "v05.tooltip.D_SAGE_6":       "Exponente de parámetro de orden multiplicado por exponente de susceptibilidad da una cuadrática específica en γ. Relación algebraica factorizada.",
    "v05.tooltip.Rushbrooke_tautology": "Hiperescalado de Rushbrooke estándar 2β + γ_χ = ν·d en d=1. En TAF es TAUTOLOGÍA — γ_χ se define exactamente para que se cumpla. Confirmado por Sage Groebner basis.",
    "v05.tooltip.Josephson_tautology": "Hiperescalado de Josephson estándar 2 − α = ν·d en d=1. En TAF es TAUTOLOGÍA — α se define exactamente para que se cumpla.",
    "v05.tooltip.Fisher_independent": "Relación de Fisher γ_χ = (2−η)·ν. En TAF es INDEPENDIENTE (NO cierra como identidad, contrario al claim de triple closure). El residuo es γ(2γ−3)/(1−γ).",
    "v05.tooltip.eta_2gamma_REFUTED": "Paper 1 afirmaba η=2γ. Esta identidad lo refuta: el residuo es positivo en toda Fase A. Refutación machine-proof por Lean Mathlib4.",
    "v05.tooltip.D_14_nu_imprint": "La pendiente de imprint aprendido ν = −1/(2π) multiplicada por 2π da −1. Verificación dimensional trivial del paper 1.",
    "v05.tooltip.D_SAGE_7":       "La carga central c=3 multiplicada por |ν_imprint| multiplicada por 2π da 3. Cierre dimensional conectando CFT con imprint de entrenamiento.",
    "v05.tooltip.nu_beta_id":     "Exponente de longitud de correlación ν multiplicado por exponente de parámetro de orden β da −1 en Fase A. Variante de D-SAGE-2.",

    "v053.calibration.title":     "🔬 v0.5.3 — Auditoría de calibración (2026-05-02)",
    "v053.calibration.note":      "<strong>Corrección SWA desactivada</strong> — δ_SWA = -0.21 original se ajustó con n=1 modelo (datos insuficientes; promedio del único caso +0.355). <strong>Corrección post_IH marcada exploratoria</strong> — promedio de grupo ≈ 0 en re-auditoría (panel n=22) no replica el ajuste OLS. <strong>Corrección GQA replica</strong> (panel +0.115 vs hardcoded +0.11). <strong>Fórmula D_f corregida para Fase B (γ&gt;1)</strong> — usa suma cumulativa discreta en lugar de aproximación continua. LLaMA-3, Mistral, Gemma ahora reportan valores correctos de compresión.",
    "v053.release.banner":        "🔧 v0.5.3 — Correcciones por audit: D_f de compresión KV ahora usa suma discreta (correcto para todo γ); δ_SWA desactivado (calibración n=1); erratum coeficiente C_V paper §5.2 (1/4 → 1/12).",

    // §35 v0.6 — Diagnóstico γ predicho vs observado
    "gamma_check.title":           "🔍 γ predicho vs observado",
    "gamma_check.desc":            "Introduce tu γ medido empíricamente. La herramienta detecta el régimen: fraude (θ inflado) / comprimido / sobre-Padé / SWA-aleatorio / normal.",
    "gamma_check.gobs_label":      "γ_observado",
    "gamma_check.gobs_tip":        "γ medido empíricamente desde los attention scores de tu modelo. Usa la CLI de Diagnose para obtenerlo desde pesos reales.",
    "gamma_check.random_label":    "¿Corpus aleatorio?",
    "gamma_check.random_tip":      "Marca sí si γ_observado se midió sobre tokens aleatorios/no estructurados. Distingue la firma SWA (γ_obs > 1) de una anomalía.",
    "gamma_check.regime":          "Régimen",
    "gamma_check.regime.normal":         "Normal",
    "gamma_check.regime.fraud":          "Fraude (θ inflado)",
    "gamma_check.regime.compressed":     "Contexto comprimido",
    "gamma_check.regime.overpade":       "Sobre-Padé",
    "gamma_check.regime.swa":            "Firma SWA (corpus aleatorio)",
    "gamma_check.regime.unknown":        "Desconocido",
    "gamma_check.regime.normal.desc":    "η ∈ [0.85, 1.15]: el modelo usa su contexto nominal completo, sin anomalías.",
    "gamma_check.regime.fraud.desc":     "η < 0.01: θ nominal inflado. El modelo se comporta como si θ ≪ del anunciado. Probable inflación tipo YaRN/marketing sin extensión real de contexto.",
    "gamma_check.regime.compressed.desc":"η ∈ [0.01, 0.5): contexto comprimido (el modelo atiende menos lejos de lo que predice θ nominal). Común en modelos instruction-tuned / RLHF.",
    "gamma_check.regime.overpade.desc":  "η > 1.5: el modelo atiende más lejos de lo que Padé predice. Posible régimen Lerch-corregido o checkpoint temprano sub-entrenado.",
    "gamma_check.regime.swa.desc":       "γ_obs > 1.05 sobre corpus aleatorio = firma de sliding-window attention (familias Mistral / Gemma).",
    "gamma_check.regime.unknown.desc":   "Entradas fuera de rango o γ_obs > 1 sin flag de corpus aleatorio. Verifica la medición.",
    "gamma_check.glossary.title":        "ⓘ Glosario — significado de las variables",
    "gamma_check.glossary.gamma_pade":   "<strong>γ_Padé</strong>: predicción cerrada (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.",
    "gamma_check.glossary.gamma_obs":    "<strong>γ_observado</strong>: medido empíricamente desde los attention scores (ejecuta Diagnose CLI sobre pesos reales).",
    "gamma_check.glossary.theta_eff_obs":"<strong>θ_eff (observado)</strong>: invertido desde γ_obs vía T√2 / (1 − γ_obs). θ efectivo implicado por tu medición.",
    "gamma_check.glossary.theta_eff_pade":"<strong>θ_eff (Padé)</strong>: θ + T/√2. θ efectivo predicho por la fórmula cerrada.",
    "gamma_check.glossary.efficiency":   "<strong>η</strong>: ratio θ_eff_obs / θ_eff_Padé. ≈1 = normal · &lt;0.01 = fraude · &lt;0.5 = comprimido · &gt;1.5 = sobre-Padé.",
    "gamma_check.glossary.delta_h":      "<strong>ΔH_Cardy</strong>: log(θ_eff_obs / θ_nominal). Cambio de entropía de Cardy. Negativo = entropía de compresión. ~0 = coincide con nominal.",
    "gamma_check.glossary.regime":       "<strong>Régimen</strong>: clasificador automático a partir de η + γ_obs + flag corpus_aleatorio.",

    // §36 v0.6 — Tooltips para iconos ⓘ inline
    "tooltip.gamma_pade":          "<strong>γ_Padé(T_eval)</strong>: predicción cerrada (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.",
    "tooltip.gamma_decomposed":    "<strong>γ_descompuesto</strong>: γ desde descomposición arquitectural completa. Línea base Padé + shift GQA + shift post-IH (subconjunto replicado en audit calibrado).",
    "tooltip.d_horizon":           "<strong>d_horizon</strong>: horizonte efectivo de atención. Más allá los scores caen bajo el suelo de ruido (paper §26).",
    "tooltip.L_NIAH":              "<strong>Techo L_NIAH</strong>: techo predicho de fiabilidad needle-in-a-haystack al d_horizon actual.",
    "tooltip.chi":                 "<strong>χ susceptibilidad</strong>: χ = 1/(1−γ). Diverge en la línea Hagedorn γ=1.",
    "tooltip.kv_memory":           "<strong>Memoria KV @ T_eval (BF16)</strong>: caché KV por petición = 2 · n_layers · n_kv_heads · d_head · T_eval bytes.",
    "tooltip.theta_eff_obs":       "<strong>θ_eff (observado)</strong>: θ efectivo implicado por tu γ_observado: T√2 / (1 − γ_obs).",
    "tooltip.theta_eff_pade":      "<strong>θ_eff (Padé)</strong>: θ efectivo predicho por la fórmula cerrada: θ + T/√2.",
    "tooltip.efficiency":          "<strong>η = θ_eff_obs / θ_eff_Padé</strong>: ratio de eficiencia. ≈1 = normal · &lt;0.01 = fraude · &lt;0.5 = comprimido · &gt;1.5 = sobre-Padé.",
    "tooltip.delta_h_cardy":       "<strong>ΔH_Cardy</strong>: log(θ_eff_obs / θ_nominal). Cambio de entropía de Cardy. Negativo = entropía de compresión. ~0 = coincide con nominal.",
    "tooltip.verdict_aggregate":   "<strong>Veredicto</strong>: peor-de entre todas las recipes. ✅ ADELANTE = todo verde · ⚠ DEGRADADO = ≥1 amarillo · ❌ NO = ≥1 rojo.",
    "tooltip.verdict_breakdown":   "<strong>Desglose por recipe</strong>: cada recipe evalúa un eje de decisión <em>independiente</em> (contexto-largo · presupuesto · hardware · custom-vs-API · compresión-KV). Un ❌ en X-1 significa «usa la API para tu volumen» no «el modelo falla» — abre la sección Recipes para contexto por eje.",
    "tooltip.gamma_pill":          "<strong>γ titular</strong>: γ_descompuesto (o γ_Padé como fallback). Rango (0,1) = Fase A (anti-Ising). γ ≥ 1 = Hagedorn / Fase B.",
    "tooltip.anti_ising":          "<strong>Clase Anti-Ising</strong>: Fase A → β = γ−1 &lt; 0. Machine-verified (Sage + Lean Mathlib4). Ver §35 v0.5.",

    // §37 v0.6 — Tabla de teoremas Lean+Mathlib
    "lean.table.title":            "📑 Tabla de teoremas Lean+Mathlib",
    "lean.table.desc":             "Cada entrada está machine-proven contra Lean 4 + Mathlib4. Click en cualquier link L# para saltar a la línea fuente en GitHub. Agrupado por tema — click en cabecera para expandir.",
    "lean.table.theorem":          "Teorema",
    "lean.table.claim":            "Afirmación",
    "lean.table.tactic":           "Táctica",
    "lean.table.source":           "Fuente",
    "lean.table.lean":             "Lean",
    "lean.findings.title":         "🔎 Findings sustantivos",
    "lean.findings.detected_by":   "Detectado por",
    "lean.findings.fixed_by":      "Arreglado por",
    "lean.findings.recommendation":"Recomendación",
    "lean.meta.repo":              "Repo",
    "lean.meta.build":             "Build",
    "lean.meta.theorems":          "Teoremas",
    "lean.meta.verified":          "verificados",
    "lean.meta.rejected":          "rechazados",
    "lean.meta.sorry":             "sorry",
    "lean.meta.findings":          "findings sustantivos",
    "lean.manifest.loading":       "Cargando manifest Lean…",
    "lean.manifest.error":         "Manifest Lean no disponible",

    // Help modal — sección v0.6
    "help.v06.title":              "🆕 v0.6 — γ predicho-vs-observado + Cardy ΔH + badges Lean",
    "help.v06.intro":              "<em>v0.6 (2026-05-06): tres diagnósticos nuevos viven en el TAF Card bajo <strong>🔬 Diagnósticos</strong>. Todo corre en tu navegador; γ_observado lo obtienes con la Diagnose CLI sobre pesos reales.</em>",
    "help.v06.layout.title":       "Layout del TAF Card (nuevo en v0.6)",
    "help.v06.layout.body":        "Tras click en <strong>🚀 Generar perfil completo</strong> la tarjeta muestra: una <strong>tira hero</strong> arriba (clase de arquitectura + meta + 3 pills: veredicto agregado ✅/⚠/❌, γ titular, 🧲 Anti-Ising si Fase A) y cuatro <strong>secciones plegables</strong>: <strong>📋 Recipes</strong> (abierto por defecto — veredicto por dimensión), <strong>🔬 Diagnósticos</strong> (números clave, γ predicho vs observado, explorador what-if), <strong>✓ Verificación</strong> (consistencia algebraica Sage+Lean, falsificación F1-F23), <strong>📂 Procedencia y compartir</strong> (auditoría de calibración + descarga JSON / enlace / submit al registro). Click en cualquier cabecera para expandir. Cada variable tiene tooltip <strong>ⓘ</strong> inline.",
    "help.v06.gamma_check.title":  "γ predicho vs observado",
    "help.v06.gamma_check.body":   "Introduces el γ medido empíricamente y la herramienta calcula <strong>η = θ_eff_obs / θ_eff_Padé</strong> y clasifica en uno de 5 regímenes:",
    "help.v06.case.normal":        "<strong>Normal</strong> (η ∈ [0.85, 1.15]) — el modelo usa su contexto nominal completo. <em>Caso de uso</em>: validar un release nuevo antes de adoptarlo.",
    "help.v06.case.fraud":         "<strong>Fraude</strong> (η &lt; 0.01) — θ nominal inflado; el modelo se comporta como si θ ≪ del anunciado. <em>Caso de uso</em>: detectar inflación YaRN/marketing (patrón CodeLlama / Mistral-Nemo).",
    "help.v06.case.compressed":    "<strong>Comprimido</strong> (η &lt; 0.5) — contexto comprimido; el modelo atiende menos lejos que θ nominal. <em>Caso de uso</em>: detectar compresión por RLHF/instruction-tuning (patrón LLaMA-2).",
    "help.v06.case.overpade":      "<strong>Sobre-Padé</strong> (η &gt; 1.5) — el modelo atiende más lejos que Padé predice. <em>Caso de uso</em>: identificar régimen Lerch-corregido o checkpoints tempranos sub-entrenados (patrón pythia-1b).",
    "help.v06.case.swa":           "<strong>SWA corpus aleatorio</strong> (γ_obs &gt; 1.05 con corpus_aleatorio=Sí) — firma de sliding-window attention. <em>Caso de uso</em>: confirmar SWA en Mistral / Gemma sobre tokens random.",
    "help.v06.cardy.title":        "Diagnóstico Cardy ΔH",
    "help.v06.cardy.body":         "<strong>ΔH_Cardy = log(θ_eff_obs / θ_nominal)</strong>. Cambio de entropía entre el θ efectivo observado y el θ nominal. Negativo fuerte = entropía de compresión; cerca de cero = coincide con nominal. Complementa a η para casos borderline.",
    "help.v06.lean.title":         "Badges de verificación Lean + Mathlib",
    "help.v06.lean.body":          "Las identidades TAF están formalmente machine-proven en Lean Mathlib4: <strong>37 teoremas</strong> en 7 grupos (Padé, flujo RG, Cayley, D-SAGE, hallazgos de auditoría, erratum CV, misc) + <strong>1 hallazgo sustantivo</strong> (factor 2 en derivada V, teorema <code>V_derivative_ne_RG_beta</code>). Fuente: <a href=\"https://github.com/karlesmarin/lean-taf\" target=\"_blank\">github.com/karlesmarin/lean-taf</a> (commit 25c77fd). Re-verifica localmente: <code>git clone --depth=1 https://github.com/karlesmarin/lean-taf &amp;&amp; cd lean-taf &amp;&amp; lake exe cache get &amp;&amp; lake env lean Taf/Identities.lean</code>. La pill 🧲 Anti-Ising del hero y la sección Verificación enlazan a líneas específicas del código fuente.",
    "help.v06.glossary.title":     "Glosario de variables (también embebido en TAF Card)",
    "help.v06.glossary.body":      "Cada variable del TAF Card tiene un tooltip ⓘ inline. Lista completa: γ, γ_Padé, γ_descompuesto, γ_observado, θ, θ_eff_obs, θ_eff_Padé, η, ΔH_Cardy, χ, d_horizon, L_NIAH, memoria KV, régimen. Pasa el ratón sobre cualquier ⓘ para la definición + sección del paper.",

    "hero.title":     "🔬 TAF Agent",
    "hero.tagline":   "Diagnostica cualquier LLM transformer en 30 segundos. Gratis. Sin GPU. Sin registro.",
    "hero.subtitle":  "Predice si un modelo te servirá para tu caso de uso <em>antes</em> de gastar dinero o tiempo. Todo corre en tu navegador &mdash; tus datos nunca salen de esta pestaña.",
    "hero.help":      "📘 Manual y ejemplos",
    "hero.quickstart_btn": "⚡ Inicio rápido",
    "hero.inventory_btn":  "🧰 Qué te ofrece",
    "hero.about":     "Construido por un investigador independiente. Código abierto. Sin afiliación con ningún proveedor de modelos.",

    "modes.title":    "🎯 Modo",
    "modes.profile":  "📇 Perfilar un modelo",
    "modes.compare":  "🆚 Comparar modelos",
    "modes.inspector": "🔍 Inspeccionar config",
    "modes.ask":      "💬 Pregunta libre",
    "modes.recipe":   "📋 Elegir receta",
    "modes.diagnose": "🩺 Diagnóstico CLI",
    "diagnose.title": "🩺 Generador del comando Diagnose CLI",
    "diagnose.tip":   "El navegador predice γ desde config; el CLI mide γ_obs sobre los pesos reales. Este generador produce el comando exacto para ejecutar localmente.",
    "diagnose.desc":  "Elige opciones y copia-pega el comando generado en tu máquina local (Python + transformers + numpy). Modo rápido ≈5 min CPU; completo ≈20–60 min GPU.",
    "diagnose.model_label": "ID del modelo HF:",
    "diagnose.theta_label": "θ (auto si vacío):",
    "diagnose.n_label": "Contexto N:",
    "diagnose.options_label": "Opciones:",
    "diagnose.opt_fast": "--fast (CPU, ~5 min)",
    "diagnose.opt_cpu": "--cpu (forzar CPU)",
    "diagnose.opt_4bit": "--load_in_4bit (modelos ≥7B)",
    "diagnose.local_label": "--local path (opcional):",
    "diagnose.build_btn": "📋 Generar comando",
    "diagnose.cmd_title": "Comando generado:",
    "diagnose.copy_btn": "📋 Copiar al portapapeles",
    "diagnose.next_steps": "Siguientes pasos: (1) git clone https://github.com/karlesmarin/tafagent (2) cd tafagent && pip install torch transformers numpy (3) Ejecuta el comando (4) JSON resultado → subir vía modo Inspect para análisis TAF completo.",
    "modes.phase":    "📊 Diagrama de fase",
    "phase.title":    "📊 Diagrama de fase (γ × θ)",
    "phase.tip":      "Cada punto es un modelo del panel empírico del paper. x: log θ; y: γ. La línea Hagedorn γ=1 separa Fase A de Fase B. Hover para detalles, click para cargar en el formulario.",
    "phase.desc":     "23 modelos en el panel; curva Padé a T=2000.",
    "modes.desc":     "<strong>Inicio rápido</strong>: pega cualquier id de modelo HuggingFace (ej. <code>meta-llama/Meta-Llama-3-8B</code>), click Perfilar. Verás las 5 recetas evaluadas en segundos.",

    "profile.title":           "📇 Perfilar un modelo",
    "profile.desc":            "<strong>Para técnicos</strong>: cuando necesitas una foto completa de viabilidad de un modelo candidato. Un click ejecuta las 5 recetas y produce una TAF Card unificada.",
    "profile.preset_label":    "Preset:",
    "profile.preset_default":  "— o elige de la lista —",
    "profile.hf_label":        "ID modelo HF:",
    "profile.fetch_btn":       "📥 Cargar",
    "profile.btn":             "🚀 Generar perfil completo",
    "profile.quickstart":      "💡 Inicio rápido: elige cualquier preset → click Generar. O pega un id desde <a href='https://huggingface.co/models?library=transformers&sort=trending' target='_blank'>HF Hub trending</a> → 📥 Cargar → Generar.",

    "compare.title":           "🆚 Comparar modelos lado a lado",
    "compare.desc":            "<strong>Para técnicos</strong>: cuando eliges entre 2-3 modelos candidatos para un escenario de despliegue específico. Misma receta, múltiples modelos, veredictos lado a lado.",
    "compare.recipe_label":    "Receta:",
    "compare.T_eval_label":    "T_eval (contexto objetivo):",
    "compare.models_title":    "Modelos a comparar (hasta 3)",
    "compare.btn":             "🚀 Comparar",
    "compare.example":         "💡 Prueba: pega 3 modelos populares de 7-8B (Meta-Llama-3-8B, Mistral-7B-v0.1, Qwen/Qwen2.5-7B), receta X-2, T_eval=16000. Mira cuál maneja mejor contexto largo.",

    "ask.title":               "❓ Tu pregunta",
    "ask.placeholder":         "ej. ¿Mistral-7B aguanta 16K NIAH retrieval? O: Tengo 5,000$, ¿qué modelo puedo entrenar? O: ¿GPU más barato para servir Llama-70B a 100M tokens/día?",
    "ask.btn":                 "🚀 Analizar",
    "ask.example_btn":         "💡 Probar ejemplo",

    "recipe.title":            "📋 Receta",
    "recipe.default":          "— elige una receta —",
    "recipe.input_title":      "🎯 Entradas",

    "verdict.title":           "📊 Veredicto",
    "chain.title":             "🔍 Cadena de cálculo",
    "chain.desc":              "Cada número de abajo es Python determinista. Click en un paso para expandir.",
    "answer.title":            "💬 Respuesta en lenguaje natural",
    "share.btn":               "🔗 Copiar link",
    "share.copied":            "✅ ¡Copiado al portapapeles!",
    "share.download":          "💾 Descargar JSON",
    "share.download_md":       "📝 Markdown",
    "share.download_tex":      "📜 LaTeX",
    "share.submit":            "📤 Enviar al registry",
    "share.submit_clip_ok":    "↗ GitHub abierto. Cuerpo copiado al portapapeles — pégalo en el cuerpo del issue.",
    "share.submit_clip_fail":  "↗ GitHub abierto. Portapapeles bloqueado — cuerpo volcado en la consola del navegador (F12).",
    "share.import_title":      "📂 Importar un resultado TAF compartido",
    "a11y.skip":               "Saltar al contenido principal",

    // v0.6.2 — rework de landing: inicio rápido + inventario + tooltips de arquitectura
    "qs.title":                    "⚡ Inicio rápido",
    "qs.step1":                    "Pega un model ID de HuggingFace (ej. <code>meta-llama/Meta-Llama-3-8B</code>)",
    "qs.step2":                    "Click en <strong>📇 Profile a model</strong>",
    "qs.step3":                    "Lee tu TAF Card — veredicto por caso de uso + números clave + matemáticas verificadas con Lean+Mathlib",
    "qs.cta":                      "↓ Empezar ahora",
    "inv.title":                   "🧰 Qué te ofrece esta herramienta",
    "inv.recipes.title":           "🎯 8 recetas — ¿sirve este modelo para tu caso?",
    "inv.recipes.x1.title":        "Entrenar propio vs API",
    "inv.recipes.x1.body":         "¿cuál sale más barato para tu tráfico?",
    "inv.recipes.x2.title":        "Contexto largo",
    "inv.recipes.x2.body":         "¿aguanta 32k / 128k tokens de forma fiable?",
    "inv.recipes.x3.title":        "Presupuesto",
    "inv.recipes.x3.body":         "con $X, ¿qué modelo puedes entrenar desde cero?",
    "inv.recipes.x5.title":        "Hardware",
    "inv.recipes.x5.body":         "¿qué GPU para servir N tokens/día?",
    "inv.recipes.x19.title":       "KV cache",
    "inv.recipes.x19.body":        "¿cómo comprimir sin romper la calidad?",
    "inv.recipes.x21.title":       "Pureza de imprint",
    "inv.recipes.x21.body":        "¿cómo de limpia es la codificación posicional del modelo?",
    "inv.recipes.x22.title":       "Compute-context",
    "inv.recipes.x22.body":        "¿el modelo entra en la banda empírica?",
    "inv.recipes.x23.title":       "Fase IH",
    "inv.recipes.x23.body":        "¿pre- o post-induction-head?",
    "inv.diag.title":              "🔬 Diagnósticos",
    "inv.diag.gamma":              "<strong>γ predicho vs observado</strong> — auto-clasifica el modelo en 5 regímenes (normal · fraude / contexto inflado · comprimido · over-Padé · sliding-window)",
    "inv.diag.cardy":              "<strong>Cardy ΔH</strong> — desplazamiento de entropía entre contexto observado y nominal",
    "inv.diag.fals":               "<strong>Tabla de falsabilidad</strong> — comprueba 23 predicciones específicas (F1–F23)",
    "inv.diag.alg":                "<strong>Consistencia algebraica</strong> — 8 identidades matemáticas que el modelo debe cumplir",
    "inv.verify.title":            "✓ Matemáticas formalmente verificadas",
    "inv.verify.count":            "<strong>37 teoremas</strong> machine-proven en Lean 4 + Mathlib4",
    "inv.verify.click":            "Click en cualquier badge → abre la línea fuente en GitHub",
    "inv.verify.reverify":         "Verifícalo tú: <code>lake build</code> (≈5 s tras cache)",
    "inv.export.title":            "📤 Exportar y compartir",
    "inv.export.formats":          "<strong>JSON · Markdown · LaTeX</strong> (listo para paper)",
    "inv.export.share":            "Link reproducible (estado codificado en URL)",
    "inv.export.registry":         "Envía al registro comunitario en GitHub",
    "arch.summary":                "Arquitecturas soportadas",
    "arch.anyhf":                  "✓ Cualquier modelo público de HuggingFace",
    "tooltip.mha":                 "Multi-Head Attention: cada posición atiende mediante varios heads paralelos a la vez.",
    "tooltip.gqa":                 "Grouped Query Attention: las queries comparten menos keys/values que heads (ahorra memoria pero empuja γ hacia Hagedorn).",
    "tooltip.alibi":               "Attention with Linear Biases: la info de posición es una pendiente aprendida añadida a los scores, sin rotación.",
    "tooltip.abspe":               "Absolute Position Embeddings: cada posición tiene un vector fijo aprendido sumado al embedding del token.",
    "tooltip.swa":                 "Sliding Window Attention: cada token solo atiende dentro de una ventana local fija (Mistral, gemma-2 lo usan).",
    "tooltip.ssm":                 "State Space Model: capa de secuencia que mantiene estado interno en lugar de atención (Mamba, Jamba lo usan).",

    // v0.7.0 — anti-bullshit pack #1: SWA / RoPE-scaling unmasker
    "modes.unmask":                "🪟 Desenmascarar",
    "unmask.title":                "🪟 Desenmascarador de contexto",
    "unmask.tip":                  "Pega un id de modelo HuggingFace (o config.json crudo). La herramienta detecta sliding-window attention, RoPE scaling (YaRN/linear/dynamic NTK), y GQA — todo lo que hace que <code>max_position_embeddings</code> sea mayor que el contexto efectivo real. Mistral-7B-v0.1 es el ejemplo canónico: declara 32k, atiende dentro de ~4-8k.",
    "unmask.desc":                 "<strong>¿Estás a punto de gastar dinero en un modelo que en realidad no atiende tan lejos?</strong> Pega un id y descúbrelo en 1 segundo. Sin GPU, sin inferencia — solo aritmética sobre config.json.",
    "unmask.id_label":             "ID modelo HF:",
    "unmask.fetch_btn":            "🔍 Desenmascarar",
    "unmask.paste_summary":        "O pega config.json crudo (modelos privados / en desarrollo)",
    "unmask.paste_btn":            "🔍 Desenmascarar config pegado",
    "unmask.label.declared":       "Contexto declarado",
    "unmask.label.effective":      "Efectivo (estimado)",
    "unmask.label.ratio":          "Ratio",
    "unmask.section.flags":        "Banderas de arquitectura",
    "unmask.section.warnings":     "Avisos",
    "unmask.section.reco":         "Recomendación",
    "unmask.flag.swa":             "SWA",
    "unmask.flag.rope":            "RoPE scaling",
    "unmask.flag.gqa":             "GQA",
    "unmask.flag.layers":          "Capas",
    "unmask.flag.dhead":           "d_head",
    "unmask.flag.theta":           "RoPE θ",
    "unmask.flag.yes":             "sí",
    "unmask.flag.no":              "no",
    "unmask.flag.full_mha":        "no (MHA completo, {n} heads)",
    "unmask.verdict.honest":            "✅ HONESTO",
    "unmask.verdict.inflated":          "⚠ INFLADO",
    "unmask.verdict.severely_inflated": "❌ GRAVEMENTE INFLADO",
    "unmask.verdict.yarn_extended":     "⚠ YARN-EXTENDIDO",
    "unmask.verdict.unknown":           "❓ DESCONOCIDO",
    "unmask.warn.swa_window":      "Ventana SWA: {window} tokens — cada capa solo atiende dentro de esta ventana.",
    "unmask.warn.multihop":        "Estimación multi-hop: ~{multiHop} tokens (conservador: ventana × {factor}).",
    "unmask.warn.yarn":            "RoPE scaling ({type}) extiende contexto {factor}× desde ~{original} hasta {declared} tokens.",
    "unmask.warn.yarn_advice":     "Contexto RoPE-extendido — verifica el comportamiento de γ a la longitud declarada con el diagnóstico γ_check.",
    "unmask.warn.gqa_small_dhead": "head dim pequeño ({d_head}) + GQA: probable compresión de KV cache a contexto largo (γ empujado hacia Hagedorn).",
    "unmask.reco.honest":              "Modelo de atención completa estándar. Contexto efectivo coincide con declarado ({declared} tokens).",
    "unmask.reco.inflated":            "Efectivo ~{effective} tokens vía SWA. Usa γ_check para verificar el comportamiento a tu longitud objetivo.",
    "unmask.reco.severely_inflated":   "Trátalo como un modelo de ~{effective} tokens en la práctica. El claim de {declared} tokens solo aplica vía cadenas de atención cross-layer, que empíricamente degradan más allá de ~2× la ventana SWA.",
    "unmask.reco.yarn_extended":       "Contexto RoPE-extendido. Corre un benchmark long-context (NIAH a 8k / 16k / 32k / full) para confirmar que la extensión se sostiene. Usa γ_check con T_eval = {declared}.",
    "unmask.reco.unknown":             "No se pudo parsear el config. Verifica que la URL sea un modelo HF válido con config.json público.",
    "unmask.status.empty_id":      "⚠ Introduce un model id (ej. mistralai/Mistral-7B-v0.1).",
    "unmask.status.fetching":      "⏳ Obteniendo config.json para {modelId}...",
    "unmask.status.success":       "✅ Analizado {modelId} (veredicto: {verdict})",
    "unmask.status.empty_paste":   "⚠ Pega un config.json primero.",
    "unmask.status.invalid_json":  "❌ JSON inválido: {error}",
    "unmask.status.success_paste": "✅ Config pegado analizado (veredicto: {verdict})",
    "unmask.pasted_label":         "(config pegado)",
    "mode_desc.ask":               "Escribe una pregunta libre. El LLM en el navegador elige la receta correcta y la ejecuta.",
    "mode_desc.recipe":            "Selecciona una receta directamente y rellena el formulario. Control manual completo.",
    "mode_desc.profile":           "Inicio más rápido: pega cualquier model id de HuggingFace, click Profile. Mira las 5 recetas en segundos.",
    "mode_desc.compare":           "Elige 2-3 modelos candidatos + una receta. Ve veredictos lado a lado en tabla.",
    "mode_desc.inspector":         "Pega un config.json directamente. Útil para modelos privados / en desarrollo no en HF Hub.",
    "mode_desc.diagnose":          "Construye el comando CLI diagnose_model.py para MEDIR γ_obs en GPU real. El navegador predice; el CLI mide.",
    "mode_desc.phase":             "Scatter γ × θ del panel empírico del paper. Hover sobre puntos para detalles, click para cargar en Diagnose / Recipe.",
    "mode_desc.unmask":            "Detecta si max_position_embeddings es engañoso (SWA / YaRN / RoPE-scaling). Pega un model id, obtén un veredicto en 1 línea.",
    "profile.preset_loaded":       "✅ Preset cargado para <strong>{id}</strong>. Formulario pre-rellenado. (Click 📥 Fetch para sobreescribir con el último config de HF Hub.)",

    // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
    "modes.template":              "📜 Chat-template",
    "mode_desc.template":          "Detecta qué familia de chat-template usa un modelo (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek). Da el flag CLI exacto para lm-eval / vLLM / transformers.",
    "template.title":              "📜 Detector de Chat-template",
    "template.tip":                "Pega un model id de HF (o tokenizer_config.json crudo). Detecta la familia del chat-template y te da el comando exacto para usarlo bien. lm-eval-harness divide la accuracy entre 2 silenciosamente si te olvidas de aplicarlo (issue #1841).",
    "template.desc":               "<strong>¿Olvidaste <code>--apply_chat_template</code>?</strong> La mayoría de evals multi-turn fallan ~50% porque el chat template no se aplicó. Pega un model id, obtén el flag CLI exacto para tu stack.",
    "template.id_label":           "ID modelo HF:",
    "template.fetch_btn":          "📜 Detectar",
    "template.paste_summary":      "O pega tokenizer_config.json crudo (modelos privados)",
    "template.paste_btn":          "📜 Detectar config pegado",
    "template.label.family":       "Familia detectada",
    "template.label.markers":      "Marcadores coincidentes",
    "template.label.tpl_len":      "Longitud template",
    "template.section.warnings":   "Avisos",
    "template.section.commands":   "Comandos por framework",
    "template.section.raw":        "Template crudo (preview)",
    "template.family.custom":      "custom (familia desconocida)",
    "template.family.none":        "(sin chat_template)",
    "template.verdict.ok":         "✅ TEMPLATE DETECTADO",
    "template.verdict.custom":     "⚠ TEMPLATE CUSTOM",
    "template.verdict.missing":    "❌ SIN CHAT TEMPLATE",
    "template.verdict.base_model": "ℹ MODELO BASE (sin chat)",
    "template.verdict.unknown":    "❓ DESCONOCIDO",
    "template.warn.no_chat_template": "Sin campo <code>chat_template</code> en tokenizer_config.json. Típico de modelos base / pretrained. Si esperabas un modelo instruct-tuned, puede que el archivo cargado sea incorrecto.",
    "template.warn.custom_template":  "Template no estándar ({length} chars). La herramienta no lo encajó en familias conocidas. Revisa el preview y verifica que tu framework de eval lo soporta.",
    "template.warn.lm_eval_apply":    "<strong>lm-eval-harness:</strong> añade <code>--apply_chat_template</code> o tu accuracy bajará ~50% silenciosamente en evals multi-turn (issue #1841).",
    "template.warn.vllm_apply":       "<strong>vLLM serve:</strong> verifica que <code>--chat-template</code> esté puesto (la auto-detección a veces falla en variantes fine-tuned). Sugerido: <code>{name}</code>.",
    "template.status.empty_id":    "⚠ Introduce un model id (ej. mistralai/Mistral-7B-Instruct-v0.3).",
    "template.status.fetching":    "⏳ Obteniendo tokenizer_config.json para {modelId}...",
    "template.status.success":     "✅ Detectado {modelId} (veredicto: {verdict})",
    "template.status.empty_paste": "⚠ Pega un tokenizer_config.json primero.",
    "template.status.invalid_json":"❌ JSON inválido: {error}",
    "template.status.success_paste":"✅ Config pegado detectado (veredicto: {verdict})",
    "template.pasted_label":       "(tokenizer_config pegado)",

    // v0.7.2 — anti-bullshit pack #3: Arena-Elo CI reconstructor
    "modes.arena":                 "🎯 Arena CI",
    "mode_desc.arena":             "Recupera intervalos de confianza desde datos crudos de votos pairwise (MLE Bradley-Terry + bootstrap). Detecta pares estadísticamente empatados que el leaderboard público de Arena oculta.",
    "arena.title":                 "🎯 Reconstructor Arena-Elo CI",
    "arena.tip":                   "Chatbot Arena oculta los intervalos de confianza en el leaderboard público. Una diferencia de 5 Elo puede ser estadísticamente irrelevante. Pega datos crudos de votos (model_a, model_b, winner) — la herramienta calcula MLE Bradley-Terry + bootstrap CIs y lista los empates estadísticos (overlap de CI).",
    "arena.desc":                  "<strong>¿GPT-4 es realmente mejor que Claude — o están empatados?</strong> Pega CSV de votos pairwise (o click <em>Cargar sample</em>). MLE Bradley-Terry + 200 iteraciones de bootstrap → Elos ranked con CIs 95% y detección de empates estadísticos. Todo en el navegador.",
    "arena.sample_btn":            "📊 Cargar datos sample",
    "arena.run_btn":                "🎯 Calcular CIs",
    "arena.clear_btn":             "🗑️ Limpiar",
    "arena.csv_summary":           "CSV de votos (header: <code>model_a,model_b,winner</code>; winner ∈ a/b/tie)",
    "arena.section.ranked":        "Elos ranked con CIs 95%",
    "arena.section.ties":          "Empates estadísticos (overlap CI)",
    "arena.section.summary":       "Resumen",
    "arena.col.rank":              "#",
    "arena.col.model":             "Modelo",
    "arena.col.elo":               "Elo",
    "arena.col.ci":                "CI 95%",
    "arena.col.ci_width":          "± semi-anchura",
    "arena.col.matches":           "Partidas",
    "arena.col.wins":              "V / D / E",
    "arena.col.tie_pair":          "Par",
    "arena.col.tie_diff":          "Brecha Elo",
    "arena.col.tie_overlap":       "Overlap CI",
    "arena.no_ties":               "Sin empates estadísticos — todos los pares distinguibles al CI 95%.",
    "arena.summary.votes":         "Votos totales",
    "arena.summary.models":        "Modelos",
    "arena.summary.ties":          "Empates estadísticos",
    "arena.summary.bootstrap":     "Iteraciones bootstrap",
    "arena.summary.ci_level":      "Nivel CI",
    "arena.status.empty":          "⚠ Pega un CSV de votos o click en Cargar sample.",
    "arena.status.too_few":        "⚠ Solo {n} votos válidos — se necesitan al menos 10 para ajustar Bradley-Terry de forma fiable.",
    "arena.status.computing":      "⏳ Calculando MLE Bradley-Terry + bootstrap sobre {n} votos...",
    "arena.status.done":           "✅ {n} votos · {models} modelos · {ties} empates estadísticos · {ms} ms",
    "arena.status.sample_loaded":  "✅ Sample cargado (datos sintéticos Arena de 6 modelos). Click en Calcular CIs.",

    // v0.7.3 — anti-bullshit pack #4: Contamination Prior
    "modes.contam":                "🧪 Contaminación",
    "mode_desc.contam":            "Prior bayesiano-ish sobre si un score de benchmark está contaminado. Introduce la fecha de cutoff de entrenamiento → puntúa 20+ benchmarks populares (MMLU, GSM8K, HumanEval, MMLU-Pro…).",
    "contam.title":                "🧪 Prior de Contaminación",
    "contam.tip":                  "Calcula un prior bayesiano-ish sobre si un score de benchmark está contaminado, basado en (fecha de cutoff de entrenamiento) × (fecha de release del benchmark) × (inclusión conocida en corpus + historial de leaks). Open LLM Leaderboard v1 fue cancelado en 2024 tras la contaminación de MMLU/HellaSwag.",
    "contam.desc":                 "<strong>¿Deberías confiar en el MMLU de tu modelo?</strong> Introduce la fecha cutoff de entrenamiento — la herramienta puntúa 20+ benchmarks populares (MMLU, HellaSwag, GSM8K, HumanEval, IFEval, MMLU-Pro, GPQA…) y te dice qué scores son probablemente contaminados.",
    "contam.cutoff_label":         "Cutoff entrenamiento:",
    "contam.run_btn":              "🧪 Puntuar todos los benchmarks",
    "contam.section.ranked":       "Priors de contaminación por benchmark",
    "contam.section.high":         "🔴 Benchmarks de alto riesgo (trata los scores como no fiables)",
    "contam.section.medium":       "🟡 Riesgo medio (verifica con alternativas)",
    "contam.section.low":          "🟢 Bajo riesgo (probablemente limpios)",
    "contam.col.benchmark":        "Benchmark",
    "contam.col.released":         "Release",
    "contam.col.gap":              "Gap (meses)",
    "contam.col.prior":            "P(contam)",
    "contam.col.level":            "Nivel",
    "contam.col.corpora":          "En corpus",
    "contam.col.category":         "Categoría",
    "contam.label.high":           "Alto riesgo",
    "contam.label.medium":         "Medio",
    "contam.label.low":            "Bajo",
    "contam.no_entries":           "(ninguno en esta categoría)",
    "contam.advice.high":          "Trata estos scores como no fiables. Sustituye por alternativas más recientes / con test privado (MMLU-Pro, GPQA, MUSR, MATH-500).",
    "contam.advice.medium":        "Toma con cautela. Busca replicación sobre subset held-out o reproducciones comunitarias.",
    "contam.advice.low":           "Score probablemente no contaminado, pero ausencia de leak no es prueba — verifica también con test alternativo.",
    "contam.summary.headline":     "Cutoff <code>{cutoff}</code> · {n} benchmarks puntuados",
    "contam.status.empty":         "⚠ Introduce una fecha cutoff de entrenamiento (ej. 2023-12).",
    "contam.status.bad_date":      "⚠ Formato de fecha incorrecto. Usa YYYY-MM o YYYY-MM-DD.",
    "contam.status.done":          "✅ Cutoff {cutoff} · {n} benchmarks puntuados · {high} de alto riesgo",

    // v0.7 — Sección Help modal
    "help.v07.title":              "🆕 v0.7 — Pack anti-bullshit (4 modos nuevos)",
    "help.v07.intro":              "<em>v0.7 (2026-05-06): cuatro modos nuevos que resuelven problemas concretos reportados por la comunidad HuggingFace. Cada uno corre en tu navegador sin inferencia — pura metadata + matemáticas.</em>",
    "help.v07.unmask.title":       "🪟 Desenmascarador de Contexto",
    "help.v07.unmask.body":        "Detecta cuándo <code>max_position_embeddings</code> es engañoso. Mistral-7B-v0.1 declara 32k pero atiende dentro de ~4-8k vía SWA. Pega un id HF → veredicto en 1 segundo (HONESTO / INFLADO / GRAVEMENTE INFLADO / YARN-EXTENDIDO). Pilla SWA, RoPE-scaling (YaRN/linear/dynamic NTK), d_head pequeño + GQA. <em>Caso de uso</em>: antes de pagar GPU para 32k de contexto, verifica que el modelo realmente atiende tan lejos.",
    "help.v07.template.title":     "📜 Detector de Chat-template",
    "help.v07.template.body":      "Detecta qué familia de chat-template usa un modelo (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek / custom / none) y te da el flag CLI exacto para lm-evaluation-harness, vLLM, y transformers. Resuelve el issue #1841 de lm-eval-harness: olvidar <code>--apply_chat_template</code> divide la accuracy multi-turn por 2 silenciosamente. <em>Caso de uso</em>: antes de reportar un score, confirma que aplicaste el template correctamente.",
    "help.v07.arena.title":        "🎯 Reconstructor Arena-Elo CI",
    "help.v07.arena.body":         "Chatbot Arena oculta los intervalos de confianza en su leaderboard público — una diferencia de 5 Elo puede ser estadísticamente irrelevante. Pega datos crudos de votos pairwise (model_a, model_b, winner) → MLE Bradley-Terry + bootstrap de 200 iteraciones → Elos ranked con CIs 95% y un panel de \"empates estadísticos\" listando pares cuyos CIs se solapan. Prueba el botón Cargar sample. <em>Caso de uso</em>: antes de afirmar \"modelo A vence a modelo B\", verifica que sus CIs no se solapen.",
    "help.v07.contam.title":       "🧪 Prior de Contaminación",
    "help.v07.contam.body":        "Prior bayesiano-ish sobre si un score de benchmark está contaminado. Introduce la fecha cutoff de entrenamiento de tu modelo → la herramienta puntúa 20+ benchmarks populares (MMLU, HellaSwag, GSM8K, HumanEval, IFEval, MMLU-Pro, GPQA, AIME, MATH-500, BBH, MUSR…) por P(contaminación) según gap temporal, inclusión en corpus y historial de leaks conocidos. Open LLM Leaderboard v1 fue cancelado en 2024 tras la contaminación de MMLU/HellaSwag. <em>Caso de uso</em>: decide qué scores te puedes creer al comparar dos modelos.",
    "help.v07.quant.title":        "⚖️ Clasificador de régimen de cuantización",
    "help.v07.quant.body":         "Predice γ-shift y ΔPPL para cualquier (modelo × esquema de cuantización: NF4, AWQ, GPTQ, GGUF Q4_K_M / Q5_K_M / Q8_0, int8, FP8…). Arch-aware: d_head pequeño + GQA agresivo → más sensible; los esquemas calibrados (AWQ) absorben mejor el shift que los no calibrados (NF4). Recomienda alternativas más seguras si detecta cliff. <em>Caso de uso</em>: antes de cuantizar, predice si tu combo arquitectura × esquema mantendrá la PPL aceptable, con sugerencia concreta de switch si no.",
    "help.v07.drift.title":        "🔀 Cota de drift entre frameworks",
    "help.v07.drift.body":         "Mismo modelo, scores distintos en setups distintos. La herramienta predice el drift máximo admisible solo por ruido numérico (dtype, framework, batch). Si el gap observado lo excede → bug real, normalmente chat-template mismatch (issue #1841 de lm-eval-harness) o layout de KV-cache. Prueba el botón &quot;Cargar sample&quot; para el bug canónico de chat-template. <em>Caso de uso</em>: antes de reportar una regresión o reclamar reproducibilidad, verifica si el gap entre dos evals es mayor de lo que el ruido numérico puede explicar.",
    "inv.v07.drift":               "<strong>🔀 Drift</strong> — ¿bug o ruido? Predice el gap máximo admisible entre dos evals",
    "help.v07.niah.title":         "🔍 Gap NIAH → Reasoning",
    "help.v07.niah.body":          "El paper RULER (NVIDIA 2024) muestra que modelos long-context a menudo pasan NIAH (retrieval de needle) pero fallan reasoning multi-hop al mismo contexto. La herramienta predice ambas tasas de pass desde la arquitectura (γ_Padé + d_horizon + presión arq: d_head pequeño, GQA, SWA), reporta el gap, y encuentra el \"contexto seguro de reasoning\" donde reasoning se mantiene ≥65%. Modo barrido muestra la curva a 1k/4k/16k/64k/T_train. <em>Caso de uso</em>: antes de desplegar al contexto declarado, descubre si el modelo realmente razonará ahí o solo encontrará.",
    "inv.v07.niah":                "<strong>🔍 NIAH→Reason</strong> — ¿tu \"128k\" realmente razona ahí, o solo encuentra?",

    // v0.7 — Inventory modal 5ª card
    "inv.v07.title":               "🆕 Pack anti-bullshit v0.7",
    "inv.v07.unmask":              "<strong>🪟 Unmask</strong> — ¿config.json declara 32k? Mira si de verdad atiende tan lejos",
    "inv.v07.template":            "<strong>📜 Chat-template</strong> — flag CLI exacto para que lm-eval no divida tu accuracy entre 2 silenciosamente",
    "inv.v07.arena":               "<strong>🎯 Arena CI</strong> — recupera los intervalos de confianza que Chatbot Arena oculta",
    "inv.v07.contam":              "<strong>🧪 Contaminación</strong> — puntúa 20+ benchmarks por probabilidad de contaminación",
    "inv.v07.quant":               "<strong>⚖️ Quant</strong> — predice γ-shift + ΔPPL para cualquier combo (modelo × esquema de cuantización)",

    // v0.7.3 — anti-bullshit pack #5: Quant-regime classifier
    "modes.quant":                 "⚖️ Quant",
    "mode_desc.quant":             "Predice γ-shift y ΔPPL para cualquier (modelo × esquema de cuantización). Arch-aware: d_head pequeño + GQA → más sensible. Recomienda alternativas más seguras si detecta cliff.",
    "quant.title":                 "⚖️ Clasificador de régimen de cuantización",
    "quant.tip":                   "Predice γ-shift (y la ΔPPL resultante) para un par (modelo × esquema). Claims genéricos como 'AWQ ~95% retención' son demasiado vagos — TAF usa d_head, ratio GQA, flag SWA y tamaño del modelo para dar veredicto arquitectura-específico. Resuelve: la comunidad HF reporta cliffs de cuantización impredecibles (NF4 -2 PPL en Phi-3 pero bien en Llama-3-8B).",
    "quant.desc":                  "<strong>¿Cuantizar romperá tu modelo?</strong> Pega un id HF, elige esquema de cuantización — obtén γ-shift predicho, banda ΔPPL esperada y alternativa recomendada si es un cliff. Solo navegador, sin GPU, sin set de calibración.",
    "quant.id_label":              "ID modelo HF:",
    "quant.fetch_btn":             "📥 Fetch config",
    "quant.scheme_label":          "Esquema cuant:",
    "quant.run_btn":                "⚖️ Predecir",
    "quant.all_btn":               "📊 Comparar todos los esquemas",
    "quant.regime.safe":           "✅ SEGURO",
    "quant.regime.mild":           "✅ COMPRESIÓN LEVE",
    "quant.regime.significant":    "⚠ DEGRADACIÓN SIGNIFICATIVA",
    "quant.regime.cliff":          "❌ CLIFF FUERTE",
    "quant.label.gamma_shift":     "γ shift",
    "quant.label.delta_ppl":       "ΔPPL (est.)",
    "quant.label.arch_mult":       "Multiplicador arch",
    "quant.section.breakdown":     "Desglose",
    "quant.section.reco":          "Recomendación",
    "quant.section.compare":       "Todos los esquemas (ordenados por seguridad)",
    "quant.field.scheme":          "Esquema",
    "quant.field.calibrated":      "calibrado",
    "quant.field.uncalibrated":    "no calibrado",
    "quant.field.base_penalty":    "Penalización base",
    "quant.field.arch_mult_full":  "Multiplicador arquitectónico",
    "quant.field.gamma_shift":     "γ shift predicho",
    "quant.field.ppl_band":        "Banda ΔPPL (est.)",
    "quant.field.params":          "Parámetros",
    "quant.col.scheme":            "Esquema",
    "quant.col.bits":              "Bits",
    "quant.col.gamma_shift":       "γ shift",
    "quant.col.ppl_band":          "Banda ΔPPL",
    "quant.col.regime":            "Régimen",
    "quant.reco.switch_to_awq":    "<strong>Cambia a {scheme}</strong> — el 4-bit calibrado maneja d_head pequeño + GQA mucho mejor que NF4. ΔPPL esperada cae ~2-3×.",
    "quant.reco.switch_to_q5_km":  "<strong>Cambia a {scheme}</strong> — Q5 mantiene más dimensiones de head intactas a bajo coste (solo ~25% más grande).",
    "quant.reco.switch_to_q4_km":  "<strong>Cambia a {scheme}</strong> — Q3/Q2 son demasiado agresivos para esta arquitectura.",
    "quant.reco.consider_awq":     "<strong>Considera {scheme}</strong> — la calibración reduce γ-shift significativamente en esta arquitectura.",
    "quant.reco.use_higher_bits":  "<strong>Usa alternativa de mayor bit</strong> — esta arquitectura no absorbe 4-bit limpiamente. Prueba 5 u 8-bit.",
    "quant.reco.verify_with_eval": "<strong>Verifica con eval real</strong> — el shift predicho está en el límite. Corre NIAH a tu contexto objetivo antes de desplegar.",
    "quant.reco.no_action":        "No requiere acción — la cuantización es segura para esta arquitectura.",
    "quant.summary.headline_all":  "Todos los esquemas para <code>{modelId}</code>",
    "quant.status.empty_id":       "⚠ Introduce un model id (ej. meta-llama/Llama-3.2-1B).",
    "quant.status.fetching":       "⏳ Obteniendo config.json para {modelId}...",
    "quant.status.fetched":        "✅ Config obtenido para {modelId}. Elige un esquema y click Predecir (o Comparar todos).",
    "quant.status.no_scheme":      "⚠ Elige un esquema de cuantización del dropdown.",
    "quant.status.done":           "✅ Régimen predicho: {regime}",
    "quant.status.done_all":       "✅ Comparados {n} esquemas — ordenados por seguridad.",

    // v0.7.4 — autocomplete HF Hub: privacy + rate-limit
    "hf_auto.privacy":             "🔒 Queries enviadas a huggingface.co/api · caché local 5 min",
    "hf_auto.rate_limited":        "⚠ Rate limit de HuggingFace — espera un momento, o teclea el id completo manualmente",
    "hf_auto.gated_msg":           "es gated. Acepta la licencia aquí:",

    // v0.7.5 — anti-bullshit pack #6: Cross-framework drift bound
    "modes.drift":                 "🔀 Drift",
    "mode_desc.drift":             "Predice el drift máximo permitido entre dos scores de benchmark dados (framework, dtype, batch, chat-template). Distingue bugs reales de ruido numérico.",
    "drift.title":                 "🔀 Cota de drift entre frameworks",
    "drift.tip":                   "Mismo modelo, scores distintos en setups distintos. ¿La diferencia es ruido o un bug real? Introduce dos scores con su (framework, dtype, batch, chat-template) — la herramienta predice el drift máximo permitido por ruido numérico solo. Si el gap observado lo excede → bug real, normalmente chat-template mismatch (issue #1841 de lm-eval) o layout de KV-cache.",
    "drift.desc":                  "<strong>Tu modelo da 67.2 en lm-eval-hf y 65.1 en vLLM-served. ¿Bug o ruido?</strong> Introduce ambos scores con (framework, dtype, batch, ¿chat-template aplicado?). La herramienta predice la banda de ruido y flagea bugs reales. arxiv 2506.09501 documenta esto como problema mayor de reproducibilidad de evals.",
    "drift.setup_a":               "Setup A",
    "drift.setup_b":               "Setup B",
    "drift.score":                 "Score",
    "drift.framework":             "Framework",
    "drift.dtype":                 "Dtype",
    "drift.batch":                 "Batch",
    "drift.template":              "Chat-template",
    "drift.template.applied":      "aplicado",
    "drift.template.not_applied":  "no aplicado",
    "drift.template.unknown":      "desconocido",
    "drift.run_btn":               "🔀 Calcular cota de drift",
    "drift.sample_btn":            "📊 Cargar sample (bug de chat-template)",
    "drift.label.observed":        "Gap observado",
    "drift.label.band":            "Banda numérica",
    "drift.label.ratio":           "Gap / banda",
    "drift.section.setups":        "Setups",
    "drift.section.breakdown":     "Contribuyentes al drift (banda numérica)",
    "drift.section.verdict":       "Veredicto y recomendación",
    "drift.contrib.dtype":         "Mismatch de dtype",
    "drift.contrib.framework":     "Framework",
    "drift.contrib.batch":         "Diferencia de batch",
    "drift.contrib.template":      "MISMATCH de chat-template",
    "drift.dominant_cause":        "Causa dominante",
    "drift.cause.dtype":           "diferencia de precisión dtype",
    "drift.cause.framework":       "diferencia de framework / kernel",
    "drift.cause.batch":           "paths de normalización por batch",
    "drift.cause.template_mismatch": "chat-template aplicado en un lado pero no en el otro (patrón #1841 de lm-eval-harness — típico -50% en multi-turn)",
    "drift.verdict.noise":         "✅ RUIDO NUMÉRICO",
    "drift.verdict.suspicious":    "⚠ SOSPECHOSO — verifica",
    "drift.verdict.bug":           "❌ BUG REAL — investiga",
    "drift.verdict.bug_template":  "❌ BUG DE CHAT-TEMPLATE",
    "drift.reco.noise":            "El gap encaja en la banda esperada de ruido numérico. No requiere acción; la diferencia es consistente con variación de framework/dtype/batch sola.",
    "drift.reco.suspicious":       "El gap es 1–2× la banda predicha. Borderline — posible bug real. Intenta alinear el contribuyente dominante (ej. iguala framework o dtype) y re-testea.",
    "drift.reco.bug":              "El gap es &gt; 2× la banda predicha. Es un bug real. Inspecciona el contribuyente dominante — probablemente diferencia de tokenizer / chat-template / layout de KV-cache. Corre lm-eval-harness con <code>--apply_chat_template</code> y confirma.",
    "drift.reco.bug_template":     "Mismatch de chat-template detectado. Es la causa más común de gaps grandes en evals (issue #1841 de lm-eval-harness). Re-corre el lado &quot;no aplicado&quot; con <code>--apply_chat_template</code> (o pon vLLM <code>--chat-template &lt;name&gt;</code>) y re-testea.",
    "drift.status.empty_scores":   "⚠ Introduce ambos scores.",
    "drift.status.done":           "✅ Veredicto: {verdict}",
    "drift.status.sample_loaded":  "✅ Sample cargado (bug canónico de chat-template). Click en Calcular cota de drift.",

    // v0.7.6 — anti-bullshit pack #7: NIAH → predictor de gap de reasoning
    "modes.niah":                  "🔍 NIAH→Reason",
    "mode_desc.niah":              "Predice tasas de pass de NIAH (retrieval) y reasoning multi-hop a cualquier contexto. Resuelve: modelos long-context pasan NIAH pero fallan reasoning al mismo contexto (paper RULER).",
    "modes.saturation":            "📈 Saturación",
    "mode_desc.saturation":        "Te dice si un benchmark sigue discriminando frontier models o ya está saturado (ej. MMLU 88-94% top, AIME 2025 ya 96-100%). Devuelve top-3 + veredicto + reemplazos recomendados.",
    "modes.hub":                   "🧭 Soluciones",
    "mode_desc.hub":               "Mapa de cada problema documentado de LLM-eval → mode tafagent (si cubierto) + herramientas externas curadas. Encuentra la solución sin reinventarla. 30+ pains, 7 categorías.",
    "niah.title":                  "🔍 Gap NIAH → Reasoning",
    "niah.tip":                    "NIAH (Needle in a Haystack) testea retrieval: 'encuentra este hecho en texto largo'. Reasoning multi-hop testea inferencia: 'combina hechos X+Y del principio con hecho Z del final'. El paper RULER (NVIDIA 2024) muestra que modelos long-context a menudo pasan NIAH pero fallan reasoning al mismo contexto. Esta herramienta predice ambas tasas desde la arquitectura sola.",
    "niah.desc":                   "<strong>Tu modelo dice 128k de contexto. ¿Razonará realmente a 64k, o solo encontrará?</strong> Pega un model id HF y un contexto objetivo — la herramienta predice tasas de pass NIAH y reasoning multi-hop, el gap, y un 'contexto seguro' donde reasoning se mantiene ≥65%.",
    "niah.id_label":               "ID modelo HF:",
    "niah.fetch_btn":              "📥 Fetch config",
    "niah.teval_label":            "Contexto objetivo (T_eval):",
    "niah.run_btn":                "🔍 Predecir",
    "niah.sweep_btn":              "📊 Barrer contextos",
    "niah.label.niah":             "Tasa pass NIAH",
    "niah.label.reasoning":        "Tasa pass Reasoning",
    "niah.label.gap":              "Gap",
    "niah.label.safe_ctx":         "Contexto seguro de reasoning",
    "niah.section.breakdown":      "Desglose arquitectónico",
    "niah.section.reco":           "Recomendación",
    "niah.section.sweep":          "Barrido de tasas pass por longitud de contexto",
    "niah.field.dhorizon":         "d_horizon (efectivo)",
    "niah.field.ratio":            "T_eval / d_horizon",
    "niah.field.arch_pressure":    "Presión arq (d_head pequeño + GQA + SWA)",
    "niah.field.theta":            "RoPE θ",
    "niah.field.t_train":          "T_train (declarado)",
    "niah.col.context":            "T_eval",
    "niah.col.niah":               "NIAH",
    "niah.col.reasoning":          "Reasoning",
    "niah.col.gap":                "Gap",
    "niah.col.verdict":            "Veredicto",
    "niah.verdict.robust":         "✅ ROBUSTO",
    "niah.verdict.marginal":       "⚠ MARGINAL",
    "niah.verdict.degraded":       "⚠ DEGRADADO",
    "niah.verdict.retrieval_only": "❌ SOLO RETRIEVAL",
    "niah.verdict.broken":         "❌ ROTO",
    "niah.reco.robust":            "Tanto retrieval como reasoning aguantan a este contexto. Seguro para desplegar tareas de lookup e inferencia.",
    "niah.reco.marginal":          "Borderline. Retrieval funciona pero reasoning está flojo. Úsalo para lookup, no para inferencia multi-paso.",
    "niah.reco.degraded":          "Caída significativa de reasoning. El modelo encuentra hechos pero le cuesta combinarlos. Evita tareas multi-hop a esta longitud.",
    "niah.reco.retrieval_only":    "Hallazgo canónico de RULER: el modelo pasa NIAH pero falla reasoning. Útil para setups RAG (donde el LLM solo localiza hechos) pero NO para inferencia encadenada. Reduce tu contexto al valor 'seguro' de abajo.",
    "niah.reco.broken":            "El modelo falla incluso retrieval básico a este contexto. Trátalo como out-of-distribution — re-testea a contexto más corto.",
    "niah.safe_context":           "≤ {ctx} tokens (reasoning ≥ 65%)",
    "niah.safe_context_none":      "No se encontró contexto seguro bajo tu objetivo — el modelo falla reasoning incluso a contextos pequeños.",
    "niah.summary.sweep":          "<code>{modelId}</code> — tasas pass por contexto",
    "niah.status.empty_id":        "⚠ Introduce un model id (ej. meta-llama/Llama-3.1-8B-Instruct).",
    "niah.status.bad_teval":       "⚠ Introduce un contexto objetivo (≥ 512 tokens).",
    "niah.status.fetching":        "⏳ Obteniendo config.json para {modelId}...",
    "niah.status.fetched":        "✅ Config obtenido para {modelId}. Pon T_eval y click Predecir (o Barrer contextos).",
    "niah.status.done":            "✅ {verdict} — NIAH {niah}% · reasoning {reasoning}%",
    "niah.status.sweep_done":      "✅ Barridos {n} largos de contexto.",
    "saturation.title":            "📈 Detector de saturación de benchmarks",
    "saturation.tip":              "MMLU está saturado (88-94% en todos los frontier). Reportar '92% en MMLU' ya no significa nada. Esta herramienta te dice qué benchmarks aún discriminan frontier models, cuáles están saturados, y qué usar en su lugar. Datos: DemandSphere AI Frontier Tracker (CC BY-NC 4.0) refrescado 2026-05.",
    "saturation.desc":             "<strong>¿Sigue siendo útil tu benchmark?</strong> Elige un benchmark para ver top-3 frontier scores, spread, y un veredicto (saturated / near-saturated / discriminative) + reemplazos recomendados.",
    "saturation.select_label":     "Benchmark:",
    "saturation.select.all":       "— mostrar todos los benchmarks —",
    "saturation.run_btn":          "📈 Clasificar",
    "saturation.all_btn":          "📊 Mostrar todos",
    "saturation.col.spread":       "Spread top-3",
    "saturation.col.mean":         "Media top-3",
    "saturation.col.n":            "Modelos",
    "saturation.col.bench":        "Benchmark",
    "saturation.col.verdict":      "Veredicto",
    "saturation.col.reco":         "Mejor reco",
    "saturation.col.model":        "Modelo",
    "saturation.col.score":        "Score",
    "saturation.section.top3":     "Top-3 frontier scores",
    "saturation.section.recommendations": "Alternativas recomendadas",
    "saturation.section.note":     "Notas",
    "saturation.section.all":      "Todos los benchmarks rastreados",
    "saturation.verdict.saturated":      "🚨 SATURADO",
    "saturation.verdict.near_saturated": "⚠ CASI SATURADO",
    "saturation.verdict.discriminative": "✅ DISCRIMINATIVO",
    "saturation.verdict.sparse_data":    "ℹ DATOS ESCASOS",
    "saturation.borderline":       "Borderline — dentro de ±1pp de un umbral. Trata el veredicto como 'verifica con cuidado'.",
    "saturation.unknown":          "Benchmark desconocido.",
    "saturation.attribution":      "Datos: DemandSphere AI Frontier Model Tracker (CC BY-NC 4.0) · HF Open LLM Leaderboard v3 (histórico open-weight) · último fetch 2026-05-05.",
    "saturation.status.live":      "✅ Datos en vivo cargados — {count} modelos.",
    "saturation.status.baked":     "ℹ Usando snapshot baked (fetch en vivo no disponible).",
    "saturation.status.kb_fail":   "⚠ No se pudo cargar el KB de saturación.",
    "saturation.status.done":      "✅ {name} — {verdict}",
    "saturation.status.all_done":  "✅ Clasificados {n} benchmarks.",
    "help.v08.saturation.title":   "📈 Detector de saturación de benchmarks",
    "help.v08.saturation.body":    "MMLU está saturado (top 88-94%), AIME 2025 saturó a los pocos meses de salir, HumanEval near-saturated. Elige cualquier benchmark y la herramienta retorna top-3 frontier scores, spread, media, y un veredicto — saturated / near-saturated / discriminative — más un reemplazo recomendado (ej. MMLU → MMLU-Pro / GPQA / HLE). Fetch en vivo desde DemandSphere AI Frontier Tracker (CC BY-NC 4.0) cuando llega; snapshot baked 2026-05-05 cuando no. <em>Caso de uso</em>: antes de citar '92% en MMLU' o diseñar una eval, verifica si el benchmark aún discrimina algo.",
    "inv.v08.saturation":          "<strong>📈 Saturation</strong> — ¿sigue siendo útil tu benchmark, o están todos los frontiers empatados arriba?",
    "inv.v081.hub":                "<strong>🧭 Solutions Hub</strong> — cada pain documentado mapeado a un mode tafagent o herramienta externa curada. No reinventes — encuentra.",
    "help.v081.hub.title":         "🧭 Solutions Hub",
    "help.v081.hub.body":          "tafagent como integrador, no silo. 30+ pains en 7 categorías (eval reliability · diagnósticos · setup · training · retrieval · multimodal · observability), cada uno mapeado a (a) el mode tafagent que lo resuelve, si existe, y (b) las herramientas externas best-of-breed que la comunidad ya usa (RAGAS, MTEB, HELM, MCP Schema Validator, llm-stats, llguidance, GlitchMiner, etc.). Caja de búsqueda matchea pain, scenario, y nombre de herramienta. <em>Caso de uso</em>: 'tengo problema X — ¿lo resuelve tafagent, y si no, quién?'",
    "hub.title":                   "🧭 Solutions Hub",
    "hub.tip":                     "Mapa de cada pain de LLM-eval documentado: qué mode tafagent lo resuelve (si alguno), y las herramientas externas best-of-breed que la comunidad ya usa. Objetivo: cobertura total. Si la herramienta canónica existe en otra parte, enlazamos en vez de rebuildear.",
    "hub.desc":                    "<strong>No reinventes — encuentra.</strong> 30+ pains mapeados a modes tafagent + herramientas externas curadas. Navega por categoría, busca por keyword, o ve los huecos donde nuevos modes ayudarían más.",
    "hub.clear_btn":               "✕ Limpiar",
    "hub.no_mode":                 "externo",
    "hub.planned":                 "planeado:",
    "hub.best_for":                "Mejor para",
    "hub.not_for":                 "No para",
    "hub.tools":                   "Herramientas externas",
    "hub.status.loaded":           "✅ Cargados {total} pains en {categories} categorías — {covered} cubiertos por modes tafagent, {externalLinks} enlaces externos curados. Compilado {compiled}.",
    "hub.status.fail":             "⚠ No se pudo cargar Solutions Hub.",
    "hub.search.empty":            "Sin coincidencias para '{query}'. Prueba términos más amplios (ej. 'eval', 'rag', 'tokenizer').",
    "hub.search.results":          "Encontradas {n} coincidencia(s) para '{query}'.",

    // v0.7.7 — Tiles de tareas (UX restructure: 14 modos agrupados por intención)
    "tiles.title":                 "🎯 ¿Qué quieres hacer?",
    "tiles.subtitle":              "Elige una tarea. Cada una abre la herramienta adecuada debajo. O baja para la lista completa de 14 modos.",
    "tile.diagnose.title":         "🔬 Diagnosticar un modelo",
    "tile.diagnose.desc":          "¿Servirá este modelo concreto para mi caso de uso?",
    "tile.trust.title":            "✓ Confiar en un score de benchmark",
    "tile.trust.desc":             "¿Me creo este número? ¿Es bug o ruido?",
    "tile.eval.title":              "⚙️ Configurar bien una eval",
    "tile.eval.desc":              "Obtén el flag CLI exacto para lm-eval / vLLM / transformers.",
    "tile.compare.title":          "🆚 Comparar modelos",
    "tile.compare.desc":           "Lado a lado, o explora el panel empírico de modelos.",
    "tile.manual.title":           "📋 Manual / libre",
    "tile.manual.desc":            "Elige una receta concreta a mano, o pregunta en inglés llano.",
    "tile.diagnose.tip":           "Empieza aquí cuando tengas un id de modelo concreto y quieras diagnóstico completo: <strong>Profile</strong> corre las 5 recetas a la vez. <strong>Unmask</strong> comprueba si max_position_embeddings es honesto. <strong>NIAH→Reason</strong> predice el gap retrieval-vs-reasoning. <strong>Quant</strong> predice si cuantizar lo romperá. <strong>Inspect</strong> permite pegar config.json crudo para modelos privados / en desarrollo.",
    "tile.trust.tip":              "Cuando ves un score y quieres saber si es real. <strong>Contamination</strong> puntúa 20+ benchmarks por probabilidad de que el modelo los viera en entrenamiento. <strong>Drift</strong> te dice si el gap entre dos evals es ruido numérico o bug real (chat-template mismatch, layout KV-cache, etc.). <strong>Arena CI</strong> reconstruye los intervalos de confianza que Chatbot Arena oculta — muchas &quot;victorias&quot; top-Elo están estadísticamente empatadas.",
    "tile.eval.tip":               "Antes de correr lm-eval-harness o vLLM serve, obtén el flag CLI correcto. <strong>Chat-template Sniffer</strong> detecta la familia de template (Llama-3 / ChatML / Mistral / Phi-3 / DeepSeek / Alpaca / custom / none) y emite la invocación exacta <code>--apply_chat_template</code> / <code>--chat-template</code>. Resuelve el issue #1841 de lm-eval-harness (÷2 accuracy silencioso). <strong>Diagnose CLI</strong> genera el comando Python para medir γ_obs en tu GPU local.",
    "tile.compare.tip":            "<strong>Compare</strong>: elige 2-3 modelos candidatos + una receta, ve veredictos en tabla lado a lado (ej. Llama-3-8B vs Mistral-7B a 32k). <strong>Phase diagram</strong>: scatter de 23 modelos empíricos en el plano (log θ, γ), con la curva Padé superpuesta. Hover puntos para detalles, click para cargar ese modelo en la Recipe form.",
    "tile.manual.tip":             "<strong>Recipe</strong>: elige una receta X-N específica (X-1 custom-vs-API, X-2 long context, X-3 budget, X-5 hardware, X-19 compresión KV, X-21 imprint, X-22 compute-context invariant, X-23 IH-phase) y rellena la form a mano para control total. <strong>Ask</strong>: escribe una pregunta libre; un LLM 0.5B (Qwen2.5) en tu navegador elige la receta correcta y la ejecuta. Ideal para exploración &quot;qué pasaría si...&quot;.",
    "share.import_desc":       "¿Tienes un fichero JSON del análisis TAF de alguien? Cárgalo aquí para ver el veredicto + cadena localmente. La misma vista que si lo hubieras ejecutado tú.",
    "share.import_btn":        "📂 Cargar JSON compartido",
    "synthesis.system":        "Eres un asistente de diagnóstico preciso para LLMs transformer. Dados resultados de fórmulas TAF pre-calculados, escribe un resumen claro en español de 4-6 frases. Cita el número de sección (§X.Y) para cada número que menciones. Da siempre una recomendación concreta. NO inventes números.",

    // INSPECTOR mode
    "inspector.title":         "🔍 Inspector de Arquitectura",
    "inspector.desc":          "Pega el contenido crudo de <code>config.json</code>. La herramienta extrae los parámetros arquitectónicos y ejecuta el Profile completo de 5 recetas.",
    "inspector.tip":           "<strong>Pega cualquier config.json directamente</strong>. La herramienta lo parsea y ejecuta el Profile completo. Útil para: modelos privados, configs en desarrollo, modelos aún no en HuggingFace, o comparar qué haría tu arquitectura custom.",
    "inspector.quickstart":    "💡 Caso de uso: tienes un modelo privado no en HF Hub, o una config que estás diseñando. Pega el JSON crudo abajo y obtén un perfil TAF completo.",
    "inspector.placeholder":   "{\n  \"model_type\": \"llama\",\n  \"rope_theta\": 500000,\n  \"max_position_embeddings\": 8192,\n  \"num_attention_heads\": 32,\n  \"num_key_value_heads\": 8,\n  \"hidden_size\": 4096,\n  \"num_hidden_layers\": 32\n}",
    "inspector.T_eval":        "T_eval (tu contexto objetivo):",
    "inspector.btn":           "🚀 Inspeccionar y perfilar",

    // WHAT-IF slider
    "whatif.title":            "🎚 What-if: arrastra T_eval para ver γ cambiar en vivo",
    "whatif.desc":             "Recálculo puro JS (sin llamada Pyodide). Muestra γ_Padé y d_horizon geométricos mientras deslizas. Click en el botón para re-ejecutar la cadena completa.",
    "whatif.T_eval":           "<strong>T_eval</strong>",
    "whatif.gamma_pade":       "<strong>γ_Padé</strong>",
    "whatif.d_horizon":        "<strong>d_horizon</strong>",
    "whatif.l_niah":           "<strong>Techo L_NIAH</strong>",
    "whatif.predicted":        "<strong>Veredicto geométrico predicho</strong>",
    "whatif.rerun":            "↻ Re-calcular cadena completa con este T_eval",

    // COMMUNITY feed
    "community.title":         "🌐 Envíos recientes de la comunidad",
    "community.desc":          "Feed en vivo del registry público. Click en cualquier envío para ver análisis completo.",
    "community.browse_all":    "Ver todo →",
    "community.loading":       "Cargando...",
    "community.no_repo":       "El repo del registry aún no está creado. Cuando exista con envíos, aparecerán aquí en vivo.",
    "community.no_submissions": "Sin envíos aún. Sé el primero — genera un Profile y click 📤 Enviar al registry.",

    // FALSIFICATION dashboard
    "falsification.title":     "🔬 Predicciones del paper — estado de falsificación",
    "falsification.desc":      "El framework TAF se basa en predicciones falsificables (F1-F23). Cada una está empíricamente testada. Aquí está el estado en vivo de cada predicción del paper.",
    "falsification.summary":   "{confirmed} confirmadas · {partial} parciales · {refuted} refutadas · {untested} sin testear (de {total} predicciones totales)",
    "falsification.col.id":    "ID",
    "falsification.col.claim": "Claim",
    "falsification.col.status": "Estado",
    "falsification.col.evidence": "Evidencia",

    "tafcard.title":           "📇 TAF Card — perfil completo del modelo",
    "tafcard.recipes_title":   "📋 Recetas — veredicto por dimensión",
    "tafcard.recipes_count_label": "dimensiones",
    "tafcard.numbers_title":   "🔢 Números clave (paper §26)",
    "tafcard.fals_title":      "🔬 Estado de falsificación (F1-F23)",
    "tafcard.fals_none":       "Sin falsificaciones aplicables.",
    "tafcard.diag_title":      "🔬 Diagnósticos — números · γ check · what-if",
    "tafcard.verify_title":    "✓ Verificación — Lean + Sage + falsificación",
    "tafcard.share_title":     "📂 Procedencia y compartir",
    "tafcard.whatif_title":    "🎚️ Explorador what-if",
    "verdict.go":              "ADELANTE",
    "verdict.no":              "NO",
    "verdict.degraded":        "DEGRADADO",

    "compare.title_out":       "🆚 Tabla comparativa",

    "status.loading_pyodide":  "⏳ Cargando runtime Python (~10MB, solo primera vez)...",
    "status.loading_taf":      "⏳ Cargando fórmulas TAF + recetas...",
    "status.ready":            "✅ Listo. Elige un modelo y click Perfilar para empezar.",
    "status.computing":        "🧮 Calculando cadena TAF...",
    "status.done":             "✅ Hecho.",

    "profile.hf_placeholder":  "ej. meta-llama/Meta-Llama-3-8B o Qwen/Qwen2.5-7B",
    "compare.hf_placeholder":  "ID modelo HF (ej. meta-llama/Meta-Llama-3-8B)",
    "compare.slot1_placeholder": "ID modelo HF (ej. meta-llama/Meta-Llama-3-8B)",
    "compare.slot2_placeholder": "ID modelo HF #2",
    "compare.slot3_placeholder": "ID modelo HF #3 (opcional)",
    "compare.preset_default": "— o preset —",

    // Parámetros del formulario
    "param.theta":         "θ (rope_theta)",
    "param.theta.tip":     "<strong>Frecuencia base RoPE</strong> de <code>config.rope_theta</code>. Mayor = más capacidad de largo alcance.",
    "param.T_train":       "T_train",
    "param.T_train.tip":   "<strong>Contexto máximo de entrenamiento</strong>. De <code>max_position_embeddings</code>. Más allá es extrapolación.",
    "param.T_eval":        "T_eval (tu objetivo)",
    "param.T_eval.tip":    "<strong>Tu contexto de inferencia objetivo</strong>. La pregunta clave: ¿se comportará bien el modelo a ESTA longitud?",
    "param.n_attn":        "n_attention_heads",
    "param.n_attn.tip":    "<strong>Número de attention heads</strong> por capa. De <code>num_attention_heads</code>.",
    "param.n_kv":          "n_kv_heads",
    "param.n_kv.tip":      "<strong>KV heads</strong>. Si &lt; n_attention_heads → GQA (Grouped Query Attention). Reduce memoria KV pero empuja γ hacia Hagedorn.",
    "param.d_head":        "head_dim",
    "param.d_head.tip":    "<strong>Dimensión por head</strong>. Típico 64, 96, 128. De <code>head_dim</code> o <code>hidden_size / num_attention_heads</code>.",
    "param.n_layers":      "n_layers",
    "param.n_layers.tip":  "<strong>Número de bloques transformer</strong>. De <code>num_hidden_layers</code>.",
    "param.n_params":      "n_params (ej. 8e9)",
    "param.n_params.tip":  "<strong>Número total de parámetros</strong>. Umbral ~400M para emergencia de induction heads. Afecta memoria KV y recipes de presupuesto.",
    "param.has_swa":       "¿Tiene SWA?",
    "param.has_swa.tip":   "<strong>Sliding Window Attention</strong>. <code>true</code> para Mistral, gemma-2, phi-3. El audit de calibración v0.5.3 desactivó la corrección histórica δ_SWA (ajuste n=1).",
    "common.yes":          "Sí",
    "common.no":           "No",

    // Tooltips de modos
    "modes.tip":           "<strong>Catorce formas de usar la herramienta</strong>.<br><strong>📇 Perfil</strong>: pega un id → TAF Card de 5 recetas.<br><strong>🆚 Comparar</strong>: 2-3 modelos lado a lado en una receta.<br><strong>🔍 Inspeccionar config</strong>: pega config.json crudo → Perfil completo.<br><strong>💬 Pregunta</strong>: pregunta libre, el LLM del navegador elige la receta.<br><strong>📋 Receta</strong>: selección manual con control total del formulario.<br><strong>🩺 Diagnóstico CLI</strong>: genera comando Python para medir γ localmente.<br><strong>📊 Diagrama de fase</strong>: panel de 23 modelos en plano (log θ, γ).<br><strong>🪟 Desenmascarar</strong>: detecta max_position_embeddings engañoso (SWA / YaRN / RoPE-scaling).<br><strong>📜 Chat-template</strong>: detecta familia + da el flag CLI exacto para lm-eval / vLLM / transformers.<br><strong>🎯 Arena CI</strong>: reconstruye intervalos de confianza desde votos pairwise crudos; detecta empates estadísticos que Arena oculta.<br><strong>🧪 Contaminación</strong>: puntúa 20+ benchmarks por probabilidad de contaminación según cutoff de entrenamiento vs fecha de release.<br><strong>⚖️ Quant</strong>: predice γ-shift y ΔPPL para cualquier (modelo × esquema de cuantización); recomienda alternativa segura si hay cliff.<br><strong>🔀 Drift</strong>: mismo modelo, scores distintos en dos setups — ¿bug o ruido? Predice banda de ruido numérico y flagea bugs reales.<br><strong>🔍 NIAH→Reason</strong>: predice tasas pass NIAH y reasoning multi-hop desde arquitectura; encuentra el contexto seguro de reasoning.",
    "profile.tip":         "<strong>Diagnóstico completo en un click</strong>. Pega cualquier id de modelo HF (o elige preset). La herramienta ejecuta las 5 recetas (contexto largo, compresión KV, custom vs API, presupuesto, hardware) y produce una única <strong>TAF Card</strong> con veredicto por dimensión + números clave + clasificación arquitectónica.<br><br><strong>Caso de uso</strong>: \"Estoy evaluando Qwen2.5-32B para producción — ¿cuál es su perfil completo de viabilidad?\" → pega id → Perfilar → listo.",
    "compare.tip":         "<strong>Misma receta, múltiples modelos</strong>. Elige 2-3 modelos candidatos y una receta. Ve los veredictos en una única tabla comparativa.<br><br><strong>Caso de uso</strong>: \"Necesito recuperación de contexto largo a 16K — ¿cuál es mejor: Llama-3-8B, Mistral-7B o Qwen-7B?\" → elige 3 + X-2 + 16K → ve el ganador.",

    // Modal de ayuda
    "help.title":               "📘 TAF Agent — Manual de Usuario",
    "help.what.title":          "¿Qué hace?",
    "help.what.body":           "Predice la <strong>viabilidad práctica</strong> de cualquier LLM transformer <em>antes de gastar GPU/€</em>. Responde preguntas como \"¿funcionará este modelo a L=32K?\" o \"¿debería entrenar custom o usar API?\" usando fórmulas Python deterministas (TAF — Thermodynamic Attention Framework).",
    "help.modes.title":         "Cómo usar — 7 modos",
    "help.modes.profile":       "<strong>📇 Perfilar</strong>: pega id de modelo → todas las recetas a la vez = TAF Card. <strong>Mejor punto de inicio</strong>.",
    "help.modes.compare":       "<strong>🆚 Comparar</strong>: 2-3 modelos lado a lado en la misma receta. Mejor al elegir entre candidatos.",
    "help.modes.inspector":     "<strong>🔍 Inspeccionar config</strong>: pega <code>config.json</code> crudo → la herramienta lo parsea y ejecuta el Perfil completo. Para modelos privados, configs en desarrollo, o modelos aún no en HF Hub.",
    "help.modes.ask":           "<strong>💬 Pregunta libre</strong>: pregunta en lenguaje natural, el LLM del navegador elige la receta. Mejor para exploración casual.",
    "help.modes.recipe":        "<strong>📋 Receta + formulario</strong>: selección manual, control total de parámetros. Mejor cuando quieres control exacto.",
    "help.modes.diagnose":      "<strong>🩺 Diagnóstico CLI</strong>: genera comando Python para medir γ en tu máquina local (transformers + numpy). Rápido ≈5 min CPU; completo ≈20–60 min GPU. JSON resultado re-subible por Inspect.",
    "help.modes.phase":         "<strong>📊 Diagrama de fase</strong>: scatter de 23 modelos del panel en plano (log θ, γ). Línea Hagedorn γ=1 separa Fase A de Fase B. Click en un punto para cargar ese modelo en el formulario de Receta.",
    "help.recipes.title":       "Las 8 recetas disponibles",
    "help.recipe.x1.title":     "<strong>X-1 Entrenamiento custom vs API</strong> — compara coste de entrenar tu propio modelo vs pagar API.",
    "help.recipe.x1.example":   "Prueba: <em>\"¿Entrenar 8B custom o usar GPT-4o para 50M tokens/mes?\"</em><br>Respuestas: SÍ (custom) / NO (API) con meses para break-even.",
    "help.recipe.x2.title":     "<strong>X-2 Viabilidad contexto largo</strong> — predice si un modelo sirve longitud objetivo de manera fiable.",
    "help.recipe.x2.example":   "Prueba: <em>\"¿Meta-Llama-3-8B maneja 32000 tokens para retrieval?\"</em><br>Cadena: γ_Padé → descomposición → d_horizon → techo NIAH → alucinación → memoria KV.<br>Veredicto: SÍ / DEGRADADO / NO con mitigación si hace falta.",
    "help.recipe.x3.title":     "<strong>X-3 Pre-flight presupuesto</strong> — dado un presupuesto $, ¿qué modelo es viable entrenar?",
    "help.recipe.x3.example":   "Prueba: <em>\"Tengo $5000, ¿qué modelo puedo entrenar?\"</em><br>Respuesta: GO / TINY-MODEL / MEMORY-LIMITED con N (params) y D (tokens) concretos.",
    "help.recipe.x5.title":     "<strong>X-5 Selección hardware</strong> — ¿qué GPU usar para servir al throughput objetivo?",
    "help.recipe.x5.example":   "Prueba: <em>\"Hardware más barato para servir Llama-3-8B a 10M tokens/día\"</em><br>Respuesta: mejor GPU + $/Mtok + capacidad vs objetivo.",
    "help.recipe.x19.title":    "<strong>X-19 Decisión compresión KV</strong> — ¿usar soft decay, hard cutoff, o métodos de literatura?",
    "help.recipe.x21.title":    "<strong>X-21 Diagnóstico Pureza Imprint</strong> — predice γ sobre tokens RANDOM via ν=−1/(2π); ¿cuán limpia es la predicción RoPE del modelo?",
    "help.recipe.x22.title":    "<strong>X-22 Invariante Compute-Context</strong> — ¿γ × log(N²·D) está en banda 51.2 ± 16.8? Detecta anomalías de scaling/training.",
    "help.recipe.x23.title":    "<strong>X-23 Detector Fase IH</strong> — ¿pre- o post-induction-head? Probe barato via sign(γ_text − γ_random).",
    "help.recipe.x21.example":  "Prueba: <em>«¿Cuán limpia es la predicción RoPE en Llama-3-8B?»</em><br>Respuesta: γ_random predicho + diagnóstico (CLEAN / OVER-IMPRINTED / UNDER-IMPRINTED).",
    "help.recipe.x22.example":  "Prueba: <em>«¿Mistral-7B entra en el invariante compute-context?»</em><br>Respuesta: K = γ·log(N²·D), z-score, IN-BAND u OUTLIER.",
    "help.recipe.x23.example":  "Prueba: <em>«¿Qwen2.5-7B es post-induction-head?»</em><br>Respuesta: CONFIRMED PRE-IH / CONFIRMED POST-IH / ANOMALY (chequeo consistencia tamaño vs Δγ).",
    "help.section.v04":         "<strong>Novedades v0.4</strong> (hallazgos sesión 29 del 2026-04-28): tres recipes diagnósticas derivadas del análisis panel cross-model (n=22 LLMs).",
    "help.divider.v04_s29":     "— v0.4 (hallazgos sesión 29) —",
    "footer.tech_stack":        "Cómputo: Pyodide · Síntesis: WebLLM (Qwen2.5-0.5B local) · Hosting: GitHub Pages · Coste: $0",
    "help.v04.imprint":         "<strong>Slope imprint aprendido ν = −1/(2π)</strong>: el periodo de rotación RoPE 2π provoca un sesgo posicional en los pesos, proporcional a log(N_params). Incluso tokens random muestran este scaling. ν es DERIVADO — no ajustado (err empírico 0.3%).",
    "help.v04.invariant":       "<strong>Invariante Chinchilla-atención K</strong>: γ × log(N²·D) ≈ 51.2 ± 16.8 (CV=0.329). Conecta compute scaling y exponente de atención en un solo número adimensional.",
    "help.v04.ih_probe":        "<strong>Δγ como probe IH</strong>: sign(γ_text − γ_random) > 0 ⟺ post-induction-head. Más barato que correr un benchmark in-context-learning.",
    "help.v04.constants":       "<strong>γ-cluster en constantes famosas</strong> (intrigante, n=4): CodeLlama-13b γ=0.382 ≈ 1−1/φ (conjugado áureo, err 0.0003); pythia-1.4b γ=0.705 ≈ 1/√2; Llama-2-7b γ=0.287 ≈ 1−1/√2; Mistral-Nemo γ=0.428 ≈ log_10(e). Caveat: podría ser coincidencia.",
    "help.recipe.x19.example":  "Prueba: <em>\"¿Cómo comprimir caché KV para Qwen2.5-7B a 32K?\"</em><br>Respuesta: USE SOFT DECAY / USE D_f CUTOFF / USE LITERATURE METHODS / USE HARD T_train.",
    "help.param.theta":         "<strong>θ (rope_theta)</strong>: frecuencia base RoPE. Mayor = más capacidad de largo alcance. Típico: 10000 (modelos antiguos), 500000 (Llama-3), 1000000 (Qwen2.5).",
    "help.param.T_train":       "<strong>T_train</strong>: contexto máximo que vio el modelo durante entrenamiento. De <code>max_position_embeddings</code>.",
    "help.param.T_eval":        "<strong>T_eval</strong>: <em>tu</em> longitud de contexto objetivo en inferencia. La perilla clave.",
    "help.param.gqa":           "<strong>n_kv_heads &lt; n_attention_heads</strong>: el modelo usa GQA (Grouped Query Attention). Reduce memoria KV pero empuja γ hacia Hagedorn.",
    "help.param.swa":           "<strong>has_SWA</strong>: el modelo usa Sliding Window Attention (Mistral, gemma-2).",
    "help.param.nparams":       "<strong>n_params</strong>: número total de parámetros. Umbral ~400M para emergencia de induction heads.",
    "help.add_models.title":    "Añadir nuevos modelos (3 maneras)",
    "help.add_models.preset":   "<strong>Lista de presets</strong>: 11 modelos populares curados. Selecciona del dropdown.",
    "help.add_models.hf":       "<strong>HF Hub fetch</strong>: pega cualquier id (ej. <code>Qwen/Qwen2.5-32B-Instruct</code>), click 📥 Cargar. El navegador descarga <code>config.json</code> directamente de HuggingFace, llena el formulario. Funciona con cualquier modelo público.",
    "help.add_models.manual":   "<strong>Manual</strong>: rellena los campos directamente con valores de la model card.",
    "help.audit.title":         "La cadena auditable",
    "help.audit.body":          "Cada resultado muestra la <strong>Cadena de Cálculo</strong> completa — cada paso de fórmula con sus entradas, salida e interpretación. Click en cualquier paso para expandir. Las referencias de sección (§26.1, §19.1, etc.) apuntan al paper para la derivación.",
    "help.synthesis.title":     "La respuesta en lenguaje natural",
    "help.synthesis.body":      "Tras ejecutar la cadena determinista, un LLM en el navegador (Qwen2.5-0.5B, ~350MB cacheado tras primera carga) sintetiza un resumen en lenguaje natural. Los números arriba son <em>siempre correctos</em> (Python determinista); la síntesis la genera el LLM — verifica contra la cadena si dudas.",
    "help.params.title":        "Parámetros comunes explicados",
    "help.verdicts.title":      "Qué mirar en los veredictos",
    "help.verdict.yes":         "<strong style=\"color:#3fb950;\">SÍ / GO</strong> — procede con confianza; los números apoyan la elección.",
    "help.verdict.deg":         "<strong style=\"color:#d29922;\">DEGRADADO / TINY-MODEL</strong> — funciona con caveats; lee la acción.",
    "help.verdict.no":          "<strong style=\"color:#f85149;\">NO / MEMORY-LIMITED</strong> — no procedas tal cual; se da mitigación.",
    "help.privacy.title":       "Privacidad",
    "help.privacy.body":        "Todo corre en tu navegador. Sin telemetría, sin analytics, sin datos enviados a ningún sitio. Incluso el modelo LLM corre localmente vía WebGPU/WebAssembly. Tus model_ids y preguntas nunca abandonan esta página.",
    "help.source.title":        "Código fuente y paper",
    "help.source.body":         "Código: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv próximamente)<br>Dataset: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 mediciones γ sobre 32 modelos (CC-BY-4.0)",

    "footer.text":             "© 2026 Carles Marin · Apache-2.0 · investigación independiente · la herramienta que cierra el círculo del paper.",
  },

  // ────────────────────────────────────────────────────────────────────────
  // FR — Français
  // ────────────────────────────────────────────────────────────────────────
  fr: {
    // §33 v0.4 (sesion 31, 2026-04-30) — nouvelles fonctions de diagnostic
    "v04.title":                  "🆕 v0.4 — Nouveaux diagnostics (sesion 31)",
    "v04.section.intro":          "Quatre nouvelles fonctions diagnostiques dérivées en session 31 (2026-04-30) depuis jeux de formules cross-of-crosses + interrogation socratique. Disponibles dans <code>taf_browser.py</code> §33.",
    "v04.arch.label":             "Concentration Architecturale",
    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv. Loi corrélationnelle cross-panel (R²=0.30). Caveat : pas un prédicteur par-modèle.",
    "v04.pdi.label":              "PDI — Indice de Déviation de Padé",
    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval. Feu : vert (≈1), orange (>>1), jaune (<<1), rouge (Phase B négatif).",
    "v04.4bit.label":             "Prédicteur de Décalage 4-bit",
    "v04.4bit.desc":              "MHA : R²(bf16)<0.9 → γ monte ; R²>0.99 → γ descend. GQA : précision-robuste.",
    "v04.crit.label":             "Ensemble d'Exposants Critiques",
    "v04.crit.desc":              "ν_c, β_c, η_c (=γ−1, CORRIGÉ), α_C, γ_susc avec minimum AM-GM à γ=1−1/√2≈0.293.",

    // §34 v0.5 (session 32, 2026-05-01) — Cohérence algébrique vérifiée par machine
    "v05.title":                  "🔬 v0.5 — Cohérence vérifiée par machine (session 32)",
    "v05.section.intro":          "Vérification duale par Sage Groebner basis + Lean Mathlib4 de <strong>15 identités algébriques</strong> des exposants critiques TAF. Premier framework transformer-attention avec preuve formelle machine.",
    "v05.verify.label":           "Vérification de Cohérence Algébrique",
    "v05.verify.desc":            "Étant donné γ mesuré, vérifie 12 identités D-SAGE (D-SAGE-1 : 2η²+η·γ_χ+1=0, β·χ=−1, α+χ=2, etc.). Toutes passantes = framework intact. Échecs = outliers bf16 / artefacts de quantification.",
    "v05.dsage1.label":           "D-SAGE-1 (★★ core)",
    "v05.dsage1.desc":             "Identité quadratique 2η² + η·γ_χ + 1 = 0 (découverte par Sage Groebner, vérifiée Lean). Remplace l'affirmation incorrecte de 'fermeture triple'. Réfute η=2γ du paper 1 algébriquement.",
    "v05.erratum.label":          "Erratum paper 1 — correction η",
    "v05.erratum.desc":            "Paper 1 affirmait η = 2γ. Sage Groebner + Lean Mathlib4 ont prouvé l'échec (résidu (-4γ³+5γ+1)/(1-γ) > 0 ∀γ ∈ Phase A). Valeur correcte : η = γ−1, satisfaisant D-SAGE-1.",
    "v05.repro.label":            "Reproductibilité",
    "v05.repro.desc":              "Les 15 théorèmes sont machine-proof en Lean Mathlib4 (build réussi 1973 jobs). Script Sage : <code>analysis/sage_recursive_sweep_2026-04-30.sage</code>. Code Lean : <code>lean_taf/taf/Taf/Identities.lean</code>.",

    // v0.5.1 — TAF Card consistency check button
    "v05.consistency.title":      "🔬 Vérification de cohérence algébrique (Sage + Lean v0.5)",
    "v05.consistency.desc":       "Vérifie 12 identités algébriques D-SAGE des exposants critiques TAF (machine-proof Sage Groebner basis + Lean Mathlib4). Passe = framework intact. Échec = outlier bf16 / artefact de quantification.",
    "v05.consistency.btn":        "🔬 Vérifier cohérence algébrique",

    // v0.5.2 — Anti-Ising universality class badge
    "v05.antiising.badge":        "🧲 Classe Anti-Ising (β=γ−1&lt;0, vérifié par machine)",

    // v0.5.2 — Tooltips par identité (explications en langage clair)
    "v05.tooltip.D_SAGE_1":       "Identité algébrique quadratique reliant la dimension anormale η et la susceptibilité γ_χ. Identité CENTRALE découverte par Sage Groebner basis (machine-proof). Remplace l'ancienne affirmation incorrecte de triple closure.",
    "v05.tooltip.D_SAGE_2":       "En Phase A, β = γ−1 est négatif (anti-Ising). Multiplié par χ = 1/(1−γ) donne exactement −1. Signature du régime négatif-β de TAF.",
    "v05.tooltip.D_SAGE_4":       "L'exposant de chaleur spécifique α et la susceptibilité χ se somment exactement à 2 en TAF. Conséquence algébrique de l'hyperscaling de Josephson.",
    "v05.tooltip.D_SAGE_5":       "Identité linéaire : α + γ_χ = 2(2−γ). Signifie que quand γ s'approche de 1 (Hagedorn), la somme s'approche de 2 ; à γ=0 elle vaut 4.",
    "v05.tooltip.D_SAGE_6":       "Exposant de paramètre d'ordre multiplié par exposant de susceptibilité donne une quadratique spécifique en γ. Relation algébrique factorisée.",
    "v05.tooltip.Rushbrooke_tautology": "Hyperscaling de Rushbrooke standard 2β + γ_χ = ν·d à d=1. En TAF c'est une TAUTOLOGIE — γ_χ est défini exactement pour que cela soit vrai. Confirmé par Sage Groebner basis.",
    "v05.tooltip.Josephson_tautology": "Hyperscaling de Josephson standard 2 − α = ν·d à d=1. En TAF c'est une TAUTOLOGIE — α est défini exactement pour que cela soit vrai.",
    "v05.tooltip.Fisher_independent": "Relation de Fisher γ_χ = (2−η)·ν. En TAF est INDÉPENDANTE (ne ferme PAS comme identité, contrairement à l'affirmation de triple closure). Le résidu est γ(2γ−3)/(1−γ).",
    "v05.tooltip.eta_2gamma_REFUTED": "Paper 1 affirmait η=2γ. Cette identité le réfute : le résidu est positif dans toute la Phase A. Réfutation machine-proof par Lean Mathlib4.",
    "v05.tooltip.D_14_nu_imprint": "La pente d'empreinte apprise ν = −1/(2π) multipliée par 2π donne −1. Vérification dimensionnelle triviale du paper 1.",
    "v05.tooltip.D_SAGE_7":       "La charge centrale c=3 multipliée par |ν_imprint| multipliée par 2π donne 3. Fermeture dimensionnelle reliant CFT à l'empreinte d'entraînement.",
    "v05.tooltip.nu_beta_id":     "Exposant de longueur de corrélation ν multiplié par exposant de paramètre d'ordre β donne −1 en Phase A. Variante de D-SAGE-2.",

    "v053.calibration.title":     "🔬 v0.5.3 — Audit de calibrage (2026-05-02)",
    "v053.calibration.note":      "<strong>Correction SWA désactivée</strong> — δ_SWA = -0.21 d'origine était calibrée sur n=1 modèle (données insuffisantes ; moyenne du cas unique +0.355). <strong>Correction post_IH marquée exploratoire</strong> — moyenne de groupe ≈ 0 en ré-audit (panel n=22) ne réplique pas l'ajustement OLS. <strong>Correction GQA réplique</strong> (panel +0.115 vs hardcoded +0.11). <strong>Formule D_f corrigée pour Phase B (γ&gt;1)</strong> — utilise une somme cumulative discrète au lieu d'une approximation continue. LLaMA-3, Mistral, Gemma rapportent maintenant des valeurs de compression correctes.",
    "v053.release.banner":        "🔧 v0.5.3 — Corrections issues d'audit : D_f de compression KV utilise maintenant la somme discrète (correct pour tout γ) ; δ_SWA désactivé (calibrage n=1) ; erratum du coefficient C_V paper §5.2 (1/4 → 1/12).",

    // §35 v0.6 — Diagnostic γ prédit vs observé
    "gamma_check.title":           "🔍 γ prédit vs observé",
    "gamma_check.desc":            "Saisissez votre γ mesuré empiriquement. L'outil détecte le régime : fraude (θ gonflé) / comprimé / sur-Padé / SWA-aléatoire / normal.",
    "gamma_check.gobs_label":      "γ_observé",
    "gamma_check.gobs_tip":        "γ mesuré empiriquement à partir des attention scores de votre modèle. Utilisez la CLI Diagnose pour l'obtenir depuis les poids réels.",
    "gamma_check.random_label":    "Corpus aléatoire ?",
    "gamma_check.random_tip":      "Cochez si γ_observé a été mesuré sur des tokens aléatoires/non structurés. Distingue la signature SWA (γ_obs > 1) d'une anomalie.",
    "gamma_check.regime":          "Régime",
    "gamma_check.regime.normal":         "Normal",
    "gamma_check.regime.fraud":          "Fraude (θ gonflé)",
    "gamma_check.regime.compressed":     "Contexte comprimé",
    "gamma_check.regime.overpade":       "Sur-Padé",
    "gamma_check.regime.swa":            "Signature SWA (corpus aléatoire)",
    "gamma_check.regime.unknown":        "Inconnu",
    "gamma_check.regime.normal.desc":    "η ∈ [0.85, 1.15] : le modèle utilise son contexte nominal complet, sans anomalie.",
    "gamma_check.regime.fraud.desc":     "η < 0.01 : θ nominal gonflé. Le modèle se comporte comme si θ ≪ annoncé. Probable inflation YaRN/marketing sans vraie extension de contexte.",
    "gamma_check.regime.compressed.desc":"η ∈ [0.01, 0.5) : contexte comprimé (le modèle attend moins loin que ne le prédit θ nominal). Fréquent en instruction-tuned / RLHF.",
    "gamma_check.regime.overpade.desc":  "η > 1.5 : le modèle attend plus loin que Padé ne le prédit. Régime Lerch-corrigé possible ou checkpoint précoce sous-entraîné.",
    "gamma_check.regime.swa.desc":       "γ_obs > 1.05 sur corpus aléatoire = signature de sliding-window attention (familles Mistral / Gemma).",
    "gamma_check.regime.unknown.desc":   "Entrées hors plage ou γ_obs > 1 sans flag corpus_aléatoire. Vérifiez la mesure.",
    "gamma_check.glossary.title":        "ⓘ Glossaire — signification des variables",
    "gamma_check.glossary.gamma_pade":   "<strong>γ_Padé</strong> : prédiction fermée (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.",
    "gamma_check.glossary.gamma_obs":    "<strong>γ_observé</strong> : mesuré empiriquement à partir des attention scores (exécutez Diagnose CLI sur poids réels).",
    "gamma_check.glossary.theta_eff_obs":"<strong>θ_eff (observé)</strong> : inversé depuis γ_obs via T√2 / (1 − γ_obs). θ effectif impliqué par votre mesure.",
    "gamma_check.glossary.theta_eff_pade":"<strong>θ_eff (Padé)</strong> : θ + T/√2. θ effectif prédit par la formule fermée.",
    "gamma_check.glossary.efficiency":   "<strong>η</strong> : rapport θ_eff_obs / θ_eff_Padé. ≈1 = normal · &lt;0.01 = fraude · &lt;0.5 = comprimé · &gt;1.5 = sur-Padé.",
    "gamma_check.glossary.delta_h":      "<strong>ΔH_Cardy</strong> : log(θ_eff_obs / θ_nominal). Variation d'entropie de Cardy. Négatif = entropie de compression. ~0 = correspondance nominale.",
    "gamma_check.glossary.regime":       "<strong>Régime</strong> : classifieur automatique à partir de η + γ_obs + flag corpus_aléatoire.",

    // §36 v0.6 — Tooltips pour icônes ⓘ inline
    "tooltip.gamma_pade":          "<strong>γ_Padé(T_eval)</strong> : prédiction fermée (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.",
    "tooltip.gamma_decomposed":    "<strong>γ_décomposé</strong> : γ depuis la décomposition architecturale complète. Ligne de base Padé + shift GQA + shift post-IH (sous-ensemble répliqué dans audit calibré).",
    "tooltip.d_horizon":           "<strong>d_horizon</strong> : horizon d'attention effectif. Au-delà, les scores tombent sous le plancher de bruit (paper §26).",
    "tooltip.L_NIAH":              "<strong>Plafond L_NIAH</strong> : plafond prédit de fiabilité needle-in-a-haystack au d_horizon courant.",
    "tooltip.chi":                 "<strong>χ susceptibilité</strong> : χ = 1/(1−γ). Diverge à la ligne Hagedorn γ=1.",
    "tooltip.kv_memory":           "<strong>Mémoire KV @ T_eval (BF16)</strong> : cache KV par requête = 2 · n_layers · n_kv_heads · d_head · T_eval octets.",
    "tooltip.theta_eff_obs":       "<strong>θ_eff (observé)</strong> : θ effectif impliqué par votre γ_observé : T√2 / (1 − γ_obs).",
    "tooltip.theta_eff_pade":      "<strong>θ_eff (Padé)</strong> : θ effectif prédit par la formule fermée : θ + T/√2.",
    "tooltip.efficiency":          "<strong>η = θ_eff_obs / θ_eff_Padé</strong> : ratio d'efficacité. ≈1 = normal · &lt;0.01 = fraude · &lt;0.5 = comprimé · &gt;1.5 = sur-Padé.",
    "tooltip.delta_h_cardy":       "<strong>ΔH_Cardy</strong> : log(θ_eff_obs / θ_nominal). Variation d'entropie de Cardy. Négatif = entropie de compression. ~0 = correspondance nominale.",
    "tooltip.verdict_aggregate":   "<strong>Verdict</strong> : pire-de toutes les recettes. ✅ GO = tout vert · ⚠ DÉGRADÉ = ≥1 jaune · ❌ NON = ≥1 rouge.",
    "tooltip.verdict_breakdown":   "<strong>Décomposition par recette</strong> : chaque recette teste un axe de décision <em>indépendant</em> (contexte-long · budget · matériel · custom-vs-API · compression-KV). Un ❌ en X-1 signifie « utilisez l'API pour votre volume » et non « le modèle échoue » — ouvrez la section Recettes pour le contexte par axe.",
    "tooltip.gamma_pill":          "<strong>γ vedette</strong> : γ_décomposé (ou γ_Padé en fallback). Plage (0,1) = Phase A (anti-Ising). γ ≥ 1 = Hagedorn / Phase B.",
    "tooltip.anti_ising":          "<strong>Classe Anti-Ising</strong> : Phase A → β = γ−1 &lt; 0. Machine-verified (Sage + Lean Mathlib4). Voir §35 v0.5.",

    // §37 v0.6 — Table des théorèmes Lean+Mathlib
    "lean.table.title":            "📑 Table des théorèmes Lean+Mathlib",
    "lean.table.desc":             "Chaque entrée ci-dessous est machine-proven contre Lean 4 + Mathlib4. Cliquez sur un lien L# pour aller à la ligne source sur GitHub. Groupé par thème — cliquez sur un en-tête pour déplier.",
    "lean.table.theorem":          "Théorème",
    "lean.table.claim":            "Énoncé",
    "lean.table.tactic":           "Tactique",
    "lean.table.source":           "Source",
    "lean.table.lean":             "Lean",
    "lean.findings.title":         "🔎 Findings substantiels",
    "lean.findings.detected_by":   "Détecté par",
    "lean.findings.fixed_by":      "Corrigé par",
    "lean.findings.recommendation":"Recommandation",
    "lean.meta.repo":              "Repo",
    "lean.meta.build":             "Build",
    "lean.meta.theorems":          "Théorèmes",
    "lean.meta.verified":          "vérifiés",
    "lean.meta.rejected":          "rejetés",
    "lean.meta.sorry":             "sorry",
    "lean.meta.findings":          "findings substantiels",
    "lean.manifest.loading":       "Chargement du manifeste Lean…",
    "lean.manifest.error":         "Manifeste Lean indisponible",

    // Help modal — section v0.6
    "help.v06.title":              "🆕 v0.6 — γ prédit-vs-observé + Cardy ΔH + badges Lean",
    "help.v06.intro":              "<em>v0.6 (2026-05-06) : trois nouveaux diagnostics vivent dans la TAF Card sous <strong>🔬 Diagnostics</strong>. Tout tourne dans votre navigateur ; γ_observé provient de la Diagnose CLI sur poids réels.</em>",
    "help.v06.layout.title":       "Disposition de la TAF Card (nouveau en v0.6)",
    "help.v06.layout.body":        "Après avoir cliqué <strong>🚀 Générer profil complet</strong>, la carte affiche : une <strong>bande hero</strong> en haut (classe d'architecture + méta + 3 pills : verdict agrégé ✅/⚠/❌, γ vedette, 🧲 Anti-Ising si Phase A) et quatre <strong>sections pliables</strong> : <strong>📋 Recettes</strong> (ouverte par défaut — verdict par dimension), <strong>🔬 Diagnostics</strong> (nombres clés, γ prédit vs observé, explorateur what-if), <strong>✓ Vérification</strong> (cohérence algébrique Sage+Lean, falsification F1-F23), <strong>📂 Provenance &amp; partage</strong> (audit de calibration + téléchargement JSON / lien / soumission registre). Cliquez sur n'importe quel en-tête pour déplier. Chaque variable a un tooltip <strong>ⓘ</strong> inline.",
    "help.v06.gamma_check.title":  "γ prédit vs observé",
    "help.v06.gamma_check.body":   "Saisissez le γ mesuré empiriquement et l'outil calcule <strong>η = θ_eff_obs / θ_eff_Padé</strong> et classe en l'un de 5 régimes :",
    "help.v06.case.normal":        "<strong>Normal</strong> (η ∈ [0.85, 1.15]) — le modèle utilise son contexte nominal complet. <em>Cas d'usage</em> : valider une nouvelle release avant adoption.",
    "help.v06.case.fraud":         "<strong>Fraude</strong> (η &lt; 0.01) — θ nominal gonflé ; le modèle se comporte comme si θ ≪ annoncé. <em>Cas d'usage</em> : détecter inflation YaRN/marketing (motif CodeLlama / Mistral-Nemo).",
    "help.v06.case.compressed":    "<strong>Comprimé</strong> (η &lt; 0.5) — contexte comprimé ; le modèle attend moins loin que θ nominal. <em>Cas d'usage</em> : repérer compression par RLHF/instruction-tuning (motif LLaMA-2).",
    "help.v06.case.overpade":      "<strong>Sur-Padé</strong> (η &gt; 1.5) — le modèle attend plus loin que Padé ne le prédit. <em>Cas d'usage</em> : identifier régime Lerch-corrigé ou checkpoints précoces sous-entraînés (motif pythia-1b).",
    "help.v06.case.swa":           "<strong>SWA corpus aléatoire</strong> (γ_obs &gt; 1.05 avec corpus_aléatoire=Oui) — signature de sliding-window attention. <em>Cas d'usage</em> : confirmer SWA Mistral / Gemma sur tokens aléatoires.",
    "help.v06.cardy.title":        "Diagnostic Cardy ΔH",
    "help.v06.cardy.body":         "<strong>ΔH_Cardy = log(θ_eff_obs / θ_nominal)</strong>. Variation d'entropie entre le θ effectif observé et le θ nominal. Fortement négatif = entropie de compression ; proche de zéro = correspondance nominale. Complète η pour les cas borderline.",
    "help.v06.lean.title":         "Badges de vérification Lean + Mathlib",
    "help.v06.lean.body":          "Les identités TAF sont formellement machine-proven en Lean Mathlib4 : <strong>37 théorèmes</strong> en 7 groupes (Padé, flot RG, Cayley, D-SAGE, résultats d'audit, erratum CV, divers) + <strong>1 résultat substantiel</strong> (facteur 2 dans la dérivée V, théorème <code>V_derivative_ne_RG_beta</code>). Source : <a href=\"https://github.com/karlesmarin/lean-taf\" target=\"_blank\">github.com/karlesmarin/lean-taf</a> (commit 25c77fd). Re-vérifier localement : <code>git clone --depth=1 https://github.com/karlesmarin/lean-taf &amp;&amp; cd lean-taf &amp;&amp; lake exe cache get &amp;&amp; lake env lean Taf/Identities.lean</code>. La pill 🧲 Anti-Ising du hero et la section Vérification renvoient à des lignes sources spécifiques.",
    "help.v06.glossary.title":     "Glossaire des variables (également intégré dans la TAF Card)",
    "help.v06.glossary.body":      "Chaque variable de la TAF Card a un tooltip ⓘ inline. Liste complète : γ, γ_Padé, γ_décomposé, γ_observé, θ, θ_eff_obs, θ_eff_Padé, η, ΔH_Cardy, χ, d_horizon, L_NIAH, mémoire KV, régime. Survolez n'importe quel ⓘ pour la définition + section du paper.",

    "hero.title":     "🔬 TAF Agent",
    "hero.tagline":   "Diagnostiquez n'importe quel LLM transformer en 30 secondes. Gratuit. Sans GPU. Sans inscription.",
    "hero.subtitle":  "Prédit si un modèle conviendra à votre cas d'usage <em>avant</em> que vous ne dépensiez argent ou temps. Tout tourne dans votre navigateur &mdash; vos données ne quittent jamais cet onglet.",
    "hero.help":      "📘 Manuel et exemples",
    "hero.quickstart_btn": "⚡ Démarrage rapide",
    "hero.inventory_btn":  "🧰 Ce que ça offre",
    "hero.about":     "Conçu par un chercheur indépendant. Open source. Non affilié à un fournisseur de modèles.",

    "modes.title":    "🎯 Mode",
    "modes.profile":  "📇 Profiler un modèle",
    "modes.compare":  "🆚 Comparer des modèles",
    "modes.inspector": "🔍 Inspecter config",
    "modes.ask":      "💬 Question libre",
    "modes.recipe":   "📋 Choisir une recette",
    "modes.diagnose": "🩺 Diagnose CLI",
    "diagnose.title": "🩺 Générateur de commande Diagnose CLI",
    "diagnose.tip":   "Le navigateur prédit γ à partir de la config; le CLI mesure γ_obs sur les poids réels. Ce générateur produit la commande exacte à exécuter localement.",
    "diagnose.desc":  "Choisis les options et copie-colle la commande générée sur ta machine locale (Python + transformers + numpy). Mode rapide ≈5 min CPU; complet ≈20–60 min GPU.",
    "diagnose.model_label": "ID du modèle HF:",
    "diagnose.theta_label": "θ (auto si vide):",
    "diagnose.n_label": "Contexte N:",
    "diagnose.options_label": "Options:",
    "diagnose.opt_fast": "--fast (CPU, ~5 min)",
    "diagnose.opt_cpu": "--cpu (forcer CPU)",
    "diagnose.opt_4bit": "--load_in_4bit (modèles ≥7B)",
    "diagnose.local_label": "--local path (optionnel):",
    "diagnose.build_btn": "📋 Générer la commande",
    "diagnose.cmd_title": "Commande générée :",
    "diagnose.copy_btn": "📋 Copier dans le presse-papiers",
    "diagnose.next_steps": "Prochaines étapes: (1) git clone https://github.com/karlesmarin/tafagent (2) cd tafagent && pip install torch transformers numpy (3) Exécute la commande (4) JSON résultat → upload via mode Inspect pour analyse TAF complète.",
    "modes.phase":    "📊 Diagramme de phase",
    "phase.title":    "📊 Diagramme de phase (γ × θ)",
    "phase.tip":      "Chaque point est un modèle du panel empirique du paper. x: log θ; y: γ. La ligne Hagedorn γ=1 sépare Phase A de Phase B. Hover pour détails, click pour charger dans le formulaire.",
    "phase.desc":     "23 modèles dans le panel; courbe Padé à T=2000.",
    "modes.desc":     "<strong>Démarrage rapide</strong>: collez n'importe quel id de modèle HuggingFace (ex. <code>meta-llama/Meta-Llama-3-8B</code>), cliquez Profiler. Voyez les 5 recettes évaluées en quelques secondes.",

    "profile.title":           "📇 Profiler un modèle",
    "profile.desc":            "<strong>Pour techniciens</strong>: quand vous avez besoin d'un instantané complet de viabilité d'un modèle candidat. Un clic exécute les 5 recettes et produit une TAF Card unifiée.",
    "profile.preset_label":    "Préréglage:",
    "profile.preset_default":  "— ou choisir dans la liste —",
    "profile.hf_label":        "ID modèle HF:",
    "profile.fetch_btn":       "📥 Charger",
    "profile.btn":             "🚀 Générer profil complet",
    "profile.quickstart":      "💡 Démarrage rapide: choisissez un préréglage → cliquez Générer. Ou collez un id depuis <a href='https://huggingface.co/models?library=transformers&sort=trending' target='_blank'>HF Hub tendances</a> → 📥 Charger → Générer.",

    "compare.title":           "🆚 Comparer côte à côte",
    "compare.desc":            "<strong>Pour techniciens</strong>: quand vous choisissez entre 2-3 modèles candidats pour un scénario de déploiement spécifique. Même recette, plusieurs modèles, verdicts côte à côte.",
    "compare.recipe_label":    "Recette:",
    "compare.T_eval_label":    "T_eval (contexte cible):",
    "compare.models_title":    "Modèles à comparer (jusqu'à 3)",
    "compare.btn":             "🚀 Comparer",
    "compare.example":         "💡 Essayez: collez 3 modèles populaires de 7-8B (Meta-Llama-3-8B, Mistral-7B-v0.1, Qwen/Qwen2.5-7B), recette X-2, T_eval=16000. Voyez lequel gère le mieux le contexte long.",

    "ask.title":               "❓ Votre question",
    "ask.placeholder":         "ex. Mistral-7B gérera-t-il 16K NIAH? Ou: J'ai 5,000$, quel modèle puis-je entraîner? Ou: GPU le moins cher pour servir Llama-70B à 100M tokens/jour?",
    "ask.btn":                 "🚀 Analyser",
    "ask.example_btn":         "💡 Essayer un exemple",

    "recipe.title":            "📋 Recette",
    "recipe.default":          "— choisir une recette —",
    "recipe.input_title":      "🎯 Entrées",

    "verdict.title":           "📊 Verdict",
    "chain.title":             "🔍 Chaîne de calcul",
    "chain.desc":              "Chaque nombre ci-dessous est du Python déterministe. Cliquez sur une étape pour développer.",
    "answer.title":            "💬 Réponse en langage naturel",
    "share.btn":               "🔗 Copier le lien",
    "share.copied":            "✅ Copié dans le presse-papiers!",
    "share.download":          "💾 Télécharger JSON",
    "share.download_md":       "📝 Markdown",
    "share.download_tex":      "📜 LaTeX",
    "share.submit":            "📤 Soumettre au registry",
    "share.submit_clip_ok":    "↗ GitHub ouvert. Corps copié dans le presse-papiers — collez-le dans le corps de l'issue.",
    "share.submit_clip_fail":  "↗ GitHub ouvert. Presse-papiers bloqué — corps dans la console du navigateur (F12).",
    "share.import_title":      "📂 Importer un résultat TAF partagé",
    "a11y.skip":               "Aller au contenu principal",

    // v0.6.2 — refonte de la landing : démarrage rapide + inventaire + tooltips d'architecture
    "qs.title":                    "⚡ Démarrage rapide",
    "qs.step1":                    "Collez un model ID HuggingFace (ex. <code>meta-llama/Meta-Llama-3-8B</code>)",
    "qs.step2":                    "Cliquez sur <strong>📇 Profile a model</strong>",
    "qs.step3":                    "Lisez votre TAF Card — verdict par cas d'usage + chiffres clés + maths vérifiées par Lean+Mathlib",
    "qs.cta":                      "↓ Commencer",
    "inv.title":                   "🧰 Ce que cet outil vous offre",
    "inv.recipes.title":           "🎯 8 recettes — ce modèle convient-il à votre usage ?",
    "inv.recipes.x1.title":        "Entraînement propre vs API",
    "inv.recipes.x1.body":         "lequel coûte moins cher pour votre trafic ?",
    "inv.recipes.x2.title":        "Contexte long",
    "inv.recipes.x2.body":         "tient-il 32k / 128k tokens de manière fiable ?",
    "inv.recipes.x3.title":        "Budget",
    "inv.recipes.x3.body":         "avec $X, quel modèle pouvez-vous entraîner ?",
    "inv.recipes.x5.title":        "Matériel",
    "inv.recipes.x5.body":         "quel GPU pour servir N tokens/jour ?",
    "inv.recipes.x19.title":       "KV cache",
    "inv.recipes.x19.body":        "comment compresser sans casser la qualité ?",
    "inv.recipes.x21.title":       "Pureté d'imprint",
    "inv.recipes.x21.body":        "à quel point l'encodage positionnel est-il propre ?",
    "inv.recipes.x22.title":       "Compute-contexte",
    "inv.recipes.x22.body":        "le modèle entre-t-il dans la bande empirique ?",
    "inv.recipes.x23.title":       "Phase IH",
    "inv.recipes.x23.body":        "pré- ou post-induction-head ?",
    "inv.diag.title":              "🔬 Diagnostics",
    "inv.diag.gamma":              "<strong>γ prédit vs observé</strong> — auto-classe le modèle en 5 régimes (normal · fraude / contexte gonflé · compressé · over-Padé · sliding-window)",
    "inv.diag.cardy":              "<strong>Cardy ΔH</strong> — décalage d'entropie entre contexte observé et nominal",
    "inv.diag.fals":               "<strong>Tableau de falsifiabilité</strong> — vérifie 23 prédictions spécifiques (F1–F23)",
    "inv.diag.alg":                "<strong>Cohérence algébrique</strong> — 8 identités mathématiques que le modèle doit satisfaire",
    "inv.verify.title":            "✓ Maths formellement vérifiées",
    "inv.verify.count":            "<strong>37 théorèmes</strong> machine-proven en Lean 4 + Mathlib4",
    "inv.verify.click":            "Cliquez sur un badge → ouvre la ligne source sur GitHub",
    "inv.verify.reverify":         "Vérifiez vous-même : <code>lake build</code> (≈5 s après cache)",
    "inv.export.title":            "📤 Export et partage",
    "inv.export.formats":          "<strong>JSON · Markdown · LaTeX</strong> (prêt pour papier)",
    "inv.export.share":            "Lien reproductible (état encodé dans l'URL)",
    "inv.export.registry":         "Soumettre au registre communautaire sur GitHub",
    "arch.summary":                "Architectures prises en charge",
    "arch.anyhf":                  "✓ Tout modèle public HuggingFace",
    "tooltip.mha":                 "Multi-Head Attention : chaque position attend via plusieurs têtes parallèles à la fois.",
    "tooltip.gqa":                 "Grouped Query Attention : les queries partagent moins de keys/values que de heads (économise mémoire mais pousse γ vers Hagedorn).",
    "tooltip.alibi":               "Attention with Linear Biases : l'info de position est une pente apprise ajoutée aux scores, sans rotation.",
    "tooltip.abspe":               "Absolute Position Embeddings : chaque position a un vecteur fixe appris ajouté au token.",
    "tooltip.swa":                 "Sliding Window Attention : chaque token n'attend que dans une fenêtre locale fixe (Mistral, gemma-2 l'utilisent).",
    "tooltip.ssm":                 "State Space Model : couche de séquence qui maintient un état interne au lieu d'attention (Mamba, Jamba l'utilisent).",

    // v0.7.0 — anti-bullshit pack #1: SWA / RoPE-scaling unmasker
    "modes.unmask":                "🪟 Démasquer",
    "unmask.title":                "🪟 Démasqueur de contexte",
    "unmask.tip":                  "Collez un id de modèle HuggingFace (ou config.json brut). L'outil détecte sliding-window attention, RoPE scaling (YaRN/linear/dynamic NTK), et GQA — tout ce qui rend <code>max_position_embeddings</code> plus grand que le contexte effectif réel. Mistral-7B-v0.1 est l'exemple canonique : déclare 32k, attend dans ~4-8k.",
    "unmask.desc":                 "<strong>Êtes-vous sur le point de dépenser de l'argent sur un modèle qui n'attend pas vraiment aussi loin ?</strong> Collez un id et découvrez-le en 1 seconde. Sans GPU, sans inférence — juste de l'arithmétique sur config.json.",
    "unmask.id_label":             "ID modèle HF :",
    "unmask.fetch_btn":            "🔍 Démasquer",
    "unmask.paste_summary":        "Ou collez config.json brut (modèles privés / en dev)",
    "unmask.paste_btn":            "🔍 Démasquer config collé",
    "unmask.label.declared":       "Contexte déclaré",
    "unmask.label.effective":      "Effectif (estimé)",
    "unmask.label.ratio":          "Ratio",
    "unmask.section.flags":        "Drapeaux d'architecture",
    "unmask.section.warnings":     "Avertissements",
    "unmask.section.reco":         "Recommandation",
    "unmask.flag.swa":             "SWA",
    "unmask.flag.rope":            "RoPE scaling",
    "unmask.flag.gqa":             "GQA",
    "unmask.flag.layers":          "Couches",
    "unmask.flag.dhead":           "d_head",
    "unmask.flag.theta":           "RoPE θ",
    "unmask.flag.yes":             "oui",
    "unmask.flag.no":              "non",
    "unmask.flag.full_mha":        "non (MHA complet, {n} heads)",
    "unmask.verdict.honest":            "✅ HONNÊTE",
    "unmask.verdict.inflated":          "⚠ GONFLÉ",
    "unmask.verdict.severely_inflated": "❌ GRAVEMENT GONFLÉ",
    "unmask.verdict.yarn_extended":     "⚠ YARN-ÉTENDU",
    "unmask.verdict.unknown":           "❓ INCONNU",
    "unmask.warn.swa_window":      "Fenêtre SWA : {window} tokens — chaque couche n'attend que dans cette fenêtre.",
    "unmask.warn.multihop":        "Estimation multi-hop : ~{multiHop} tokens (conservateur : fenêtre × {factor}).",
    "unmask.warn.yarn":            "RoPE scaling ({type}) étend le contexte {factor}× de ~{original} à {declared} tokens.",
    "unmask.warn.yarn_advice":     "Contexte RoPE-étendu — vérifiez le comportement de γ à la longueur déclarée avec le diagnostic γ_check.",
    "unmask.warn.gqa_small_dhead": "Petite head dim ({d_head}) + GQA : compression de KV cache probable en contexte long (γ poussé vers Hagedorn).",
    "unmask.reco.honest":              "Modèle d'attention complète standard. Contexte effectif correspond au déclaré ({declared} tokens).",
    "unmask.reco.inflated":            "Effectif ~{effective} tokens via SWA. Utilisez γ_check pour vérifier le comportement à votre longueur cible.",
    "unmask.reco.severely_inflated":   "Traitez-le comme un modèle de ~{effective} tokens en pratique. Le claim de {declared} tokens ne s'applique que via des chaînes d'attention cross-layer, qui dégradent empiriquement au-delà de ~2× la fenêtre SWA.",
    "unmask.reco.yarn_extended":       "Contexte RoPE-étendu. Lancez un benchmark long-context (NIAH à 8k / 16k / 32k / full) pour confirmer que l'extension tient. Utilisez γ_check avec T_eval = {declared}.",
    "unmask.reco.unknown":             "Impossible de parser le config. Vérifiez que l'URL est un modèle HF valide avec config.json public.",
    "unmask.status.empty_id":      "⚠ Saisissez un model id (ex. mistralai/Mistral-7B-v0.1).",
    "unmask.status.fetching":      "⏳ Récupération config.json pour {modelId}...",
    "unmask.status.success":       "✅ {modelId} analysé (verdict : {verdict})",
    "unmask.status.empty_paste":   "⚠ Collez d'abord un config.json.",
    "unmask.status.invalid_json":  "❌ JSON invalide : {error}",
    "unmask.status.success_paste": "✅ Config collé analysé (verdict : {verdict})",
    "unmask.pasted_label":         "(config collé)",
    "mode_desc.ask":               "Tapez une question libre. Le LLM dans le navigateur choisit la recette et l'exécute.",
    "mode_desc.recipe":            "Sélectionnez une recette directement et remplissez le formulaire. Contrôle manuel complet.",
    "mode_desc.profile":           "Démarrage le plus rapide : collez n'importe quel model id HuggingFace, cliquez Profile. Voyez les 5 recettes en quelques secondes.",
    "mode_desc.compare":           "Choisissez 2-3 modèles candidats + une recette. Verdicts côte à côte dans un tableau.",
    "mode_desc.inspector":         "Collez un config.json directement. Utile pour modèles privés / en dev non publiés sur HF Hub.",
    "mode_desc.diagnose":          "Construit la commande CLI diagnose_model.py pour MESURER γ_obs sur GPU réel. Le navigateur prédit ; le CLI mesure.",
    "mode_desc.phase":             "Scatter γ × θ du panel empirique du papier. Survolez les points pour détails, cliquez pour charger dans Diagnose / Recipe.",
    "mode_desc.unmask":            "Détecte si max_position_embeddings est trompeur (SWA / YaRN / RoPE-scaling). Collez un model id, obtenez un verdict en 1 ligne.",
    "profile.preset_loaded":       "✅ Préréglage chargé pour <strong>{id}</strong>. Formulaire pré-rempli. (Cliquez 📥 Fetch pour écraser avec le dernier config depuis HF Hub.)",

    // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
    "modes.template":              "📜 Chat-template",
    "mode_desc.template":          "Détecte la famille de chat-template d'un modèle (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek). Donne le flag CLI exact pour lm-eval / vLLM / transformers.",
    "template.title":              "📜 Détecteur de Chat-template",
    "template.tip":                "Collez un model id HF (ou tokenizer_config.json brut). Détecte la famille du chat-template et donne le commande exacte pour l'utiliser correctement. lm-eval-harness divise l'accuracy par 2 silencieusement si vous oubliez de l'appliquer (issue #1841).",
    "template.desc":               "<strong>Avez-vous oublié <code>--apply_chat_template</code> ?</strong> La plupart des évals multi-tours échouent à ~50% parce que le chat template n'a pas été appliqué. Collez un model id, obtenez le flag CLI exact pour votre stack.",
    "template.id_label":           "ID modèle HF :",
    "template.fetch_btn":          "📜 Détecter",
    "template.paste_summary":      "Ou collez tokenizer_config.json brut (modèles privés)",
    "template.paste_btn":          "📜 Détecter config collé",
    "template.label.family":       "Famille détectée",
    "template.label.markers":      "Marqueurs correspondants",
    "template.label.tpl_len":      "Longueur du template",
    "template.section.warnings":   "Avertissements",
    "template.section.commands":   "Commandes par framework",
    "template.section.raw":        "Template brut (preview)",
    "template.family.custom":      "custom (famille inconnue)",
    "template.family.none":        "(pas de chat_template)",
    "template.verdict.ok":         "✅ TEMPLATE DÉTECTÉ",
    "template.verdict.custom":     "⚠ TEMPLATE CUSTOM",
    "template.verdict.missing":    "❌ PAS DE CHAT TEMPLATE",
    "template.verdict.base_model": "ℹ MODÈLE DE BASE (sans chat)",
    "template.verdict.unknown":    "❓ INCONNU",
    "template.warn.no_chat_template": "Pas de champ <code>chat_template</code> dans tokenizer_config.json. Typique des modèles base / pré-entraînés. Si vous attendiez un modèle instruct-tuned, le mauvais fichier peut être chargé.",
    "template.warn.custom_template":  "Template non standard ({length} chars). L'outil n'a pas pu le faire correspondre aux familles connues. Inspectez le preview et vérifiez que votre framework d'éval le supporte.",
    "template.warn.lm_eval_apply":    "<strong>lm-eval-harness :</strong> ajoutez <code>--apply_chat_template</code> ou votre accuracy chutera silencieusement de ~50% sur les évals multi-tours (issue #1841).",
    "template.warn.vllm_apply":       "<strong>vLLM serve :</strong> vérifiez que <code>--chat-template</code> est défini (l'auto-détection échoue parfois sur les variantes fine-tunées). Suggéré : <code>{name}</code>.",
    "template.status.empty_id":    "⚠ Saisissez un model id (ex. mistralai/Mistral-7B-Instruct-v0.3).",
    "template.status.fetching":    "⏳ Récupération tokenizer_config.json pour {modelId}...",
    "template.status.success":     "✅ {modelId} détecté (verdict : {verdict})",
    "template.status.empty_paste": "⚠ Collez d'abord un tokenizer_config.json.",
    "template.status.invalid_json":"❌ JSON invalide : {error}",
    "template.status.success_paste":"✅ Config collé détecté (verdict : {verdict})",
    "template.pasted_label":       "(tokenizer_config collé)",

    // v0.7.2 — anti-bullshit pack #3: Arena-Elo CI reconstructor
    "modes.arena":                 "🎯 Arena CI",
    "mode_desc.arena":             "Récupère les intervalles de confiance à partir des données brutes de votes pairwise (MLE Bradley-Terry + bootstrap). Détecte les paires statistiquement à égalité que le leaderboard public d'Arena cache.",
    "arena.title":                 "🎯 Reconstructeur Arena-Elo CI",
    "arena.tip":                   "Chatbot Arena masque les intervalles de confiance dans le leaderboard public. Un écart de 5 Elo peut être statistiquement insignifiant. Collez les données brutes de votes (model_a, model_b, winner) — l'outil calcule le MLE Bradley-Terry + bootstrap CIs et liste les égalités statistiques (overlap CI).",
    "arena.desc":                  "<strong>GPT-4 est-il vraiment meilleur que Claude — ou sont-ils à égalité ?</strong> Collez le CSV de votes pairwise (ou cliquez <em>Charger un échantillon</em>). MLE Bradley-Terry + 200 itérations de bootstrap → Elos classés avec CIs 95% et détection d'égalités statistiques. Tout dans le navigateur.",
    "arena.sample_btn":            "📊 Charger échantillon",
    "arena.run_btn":                "🎯 Calculer CIs",
    "arena.clear_btn":             "🗑️ Effacer",
    "arena.csv_summary":           "CSV de votes (header : <code>model_a,model_b,winner</code> ; winner ∈ a/b/tie)",
    "arena.section.ranked":        "Elos classés avec CIs 95%",
    "arena.section.ties":          "Égalités statistiques (overlap CI)",
    "arena.section.summary":       "Résumé",
    "arena.col.rank":              "#",
    "arena.col.model":             "Modèle",
    "arena.col.elo":               "Elo",
    "arena.col.ci":                "CI 95%",
    "arena.col.ci_width":          "± demi-largeur",
    "arena.col.matches":           "Matchs",
    "arena.col.wins":              "V / D / E",
    "arena.col.tie_pair":          "Paire",
    "arena.col.tie_diff":          "Écart Elo",
    "arena.col.tie_overlap":       "Overlap CI",
    "arena.no_ties":               "Aucune égalité statistique — toutes les paires sont distinguables à 95% CI.",
    "arena.summary.votes":         "Total des votes",
    "arena.summary.models":        "Modèles",
    "arena.summary.ties":          "Égalités statistiques",
    "arena.summary.bootstrap":     "Itérations bootstrap",
    "arena.summary.ci_level":      "Niveau CI",
    "arena.status.empty":          "⚠ Collez un CSV de votes ou cliquez sur Charger échantillon.",
    "arena.status.too_few":        "⚠ Seulement {n} votes valides — il en faut au moins 10 pour ajuster Bradley-Terry de manière fiable.",
    "arena.status.computing":      "⏳ Calcul MLE Bradley-Terry + bootstrap sur {n} votes...",
    "arena.status.done":           "✅ {n} votes · {models} modèles · {ties} égalités statistiques · {ms} ms",
    "arena.status.sample_loaded":  "✅ Échantillon chargé (données Arena synthétiques 6 modèles). Cliquez sur Calculer CIs.",

    // v0.7.3 — anti-bullshit pack #4: Contamination Prior
    "modes.contam":                "🧪 Contamination",
    "mode_desc.contam":            "Prior bayésien-ish sur la contamination d'un score de benchmark. Saisissez le cutoff d'entraînement → note 20+ benchmarks populaires (MMLU, GSM8K, HumanEval, MMLU-Pro…).",
    "contam.title":                "🧪 Prior de Contamination",
    "contam.tip":                  "Calcule un prior bayésien-ish indiquant si un score de benchmark est contaminé, basé sur (date de cutoff d'entraînement) × (date de sortie du benchmark) × (inclusion connue dans corpus + historique de leaks). Open LLM Leaderboard v1 a été tué en 2024 après la contamination de MMLU/HellaSwag.",
    "contam.desc":                 "<strong>Devez-vous faire confiance au score MMLU de votre modèle ?</strong> Saisissez la date de cutoff d'entraînement — l'outil note 20+ benchmarks populaires (MMLU, HellaSwag, GSM8K, HumanEval, IFEval, MMLU-Pro, GPQA…) et vous dit quels scores sont probablement contaminés.",
    "contam.cutoff_label":         "Cutoff entraînement :",
    "contam.run_btn":              "🧪 Noter tous les benchmarks",
    "contam.section.ranked":       "Priors de contamination par benchmark",
    "contam.section.high":         "🔴 Benchmarks à haut risque (traitez les scores comme non fiables)",
    "contam.section.medium":       "🟡 Risque moyen (vérifiez avec des alternatives)",
    "contam.section.low":          "🟢 Faible risque (probablement propres)",
    "contam.col.benchmark":        "Benchmark",
    "contam.col.released":         "Sorti",
    "contam.col.gap":              "Écart (mois)",
    "contam.col.prior":            "P(contam)",
    "contam.col.level":            "Niveau",
    "contam.col.corpora":          "Dans corpus",
    "contam.col.category":         "Catégorie",
    "contam.label.high":           "Haut risque",
    "contam.label.medium":         "Moyen",
    "contam.label.low":            "Faible",
    "contam.no_entries":           "(aucun dans cette catégorie)",
    "contam.advice.high":          "Traitez ces scores comme non fiables. Remplacez par des alternatives plus récentes / à test privé (MMLU-Pro, GPQA, MUSR, MATH-500).",
    "contam.advice.medium":        "À prendre avec précaution. Cherchez une réplication sur un subset held-out ou des reproductions communautaires.",
    "contam.advice.low":           "Score probablement non contaminé, mais absence de leak n'est pas une preuve — vérifiez avec un test alternatif.",
    "contam.summary.headline":     "Cutoff <code>{cutoff}</code> · {n} benchmarks notés",
    "contam.status.empty":         "⚠ Saisissez une date de cutoff d'entraînement (ex. 2023-12).",
    "contam.status.bad_date":      "⚠ Format de date incorrect. Utilisez YYYY-MM ou YYYY-MM-DD.",
    "contam.status.done":          "✅ Cutoff {cutoff} · {n} benchmarks notés · {high} à haut risque",

    // v0.7 — Section Help modal
    "help.v07.title":              "🆕 v0.7 — Pack anti-bullshit (4 nouveaux modes)",
    "help.v07.intro":              "<em>v0.7 (2026-05-06) : quatre nouveaux modes qui résolvent des problèmes concrets remontés par la communauté HuggingFace. Chacun tourne dans votre navigateur sans inférence — pure métadonnée + maths.</em>",
    "help.v07.unmask.title":       "🪟 Démasqueur de Contexte",
    "help.v07.unmask.body":        "Détecte quand <code>max_position_embeddings</code> est trompeur. Mistral-7B-v0.1 déclare 32k mais attend dans ~4-8k via SWA. Collez un id HF → verdict en 1 seconde (HONNÊTE / GONFLÉ / GRAVEMENT GONFLÉ / YARN-ÉTENDU). Détecte SWA, RoPE-scaling (YaRN/linear/dynamic NTK), petit d_head + GQA. <em>Cas d'usage</em> : avant de payer un GPU pour 32k de contexte, vérifiez que le modèle attend vraiment aussi loin.",
    "help.v07.template.title":     "📜 Détecteur de Chat-template",
    "help.v07.template.body":      "Détecte la famille de chat-template d'un modèle (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek / custom / none) et donne le flag CLI exact pour lm-evaluation-harness, vLLM, et transformers. Résout l'issue #1841 de lm-eval-harness : oublier <code>--apply_chat_template</code> divise l'accuracy multi-tours par 2 silencieusement. <em>Cas d'usage</em> : avant de reporter un score, confirmez avoir appliqué le template correctement.",
    "help.v07.arena.title":        "🎯 Reconstructeur Arena-Elo CI",
    "help.v07.arena.body":         "Chatbot Arena masque les intervalles de confiance de son leaderboard public — un écart de 5 Elo peut être statistiquement insignifiant. Collez des données brutes de votes pairwise (model_a, model_b, winner) → MLE Bradley-Terry + bootstrap 200 itérations → Elos classés avec CIs 95% et un panneau \"égalités statistiques\" listant les paires dont les CIs se chevauchent. Essayez le bouton Charger échantillon. <em>Cas d'usage</em> : avant de déclarer \"modèle A bat modèle B\", vérifiez que leurs CIs ne se chevauchent pas.",
    "help.v07.contam.title":       "🧪 Prior de Contamination",
    "help.v07.contam.body":        "Prior bayésien-ish sur la contamination d'un score de benchmark. Saisissez la date de cutoff d'entraînement de votre modèle → l'outil note 20+ benchmarks populaires (MMLU, HellaSwag, GSM8K, HumanEval, IFEval, MMLU-Pro, GPQA, AIME, MATH-500, BBH, MUSR…) par P(contamination) selon l'écart temporel, l'inclusion dans corpus et l'historique de leaks connus. Open LLM Leaderboard v1 a été tué en 2024 après la contamination de MMLU/HellaSwag. <em>Cas d'usage</em> : décidez quels scores croire en comparant deux modèles.",
    "help.v07.quant.title":        "⚖️ Classificateur de régime de quantification",
    "help.v07.quant.body":         "Prédit le γ-shift et ΔPPL pour tout (modèle × schéma de quantification : NF4, AWQ, GPTQ, GGUF Q4_K_M / Q5_K_M / Q8_0, int8, FP8…). Arch-aware : petit d_head + GQA agressif → plus sensible ; les schémas calibrés (AWQ) absorbent mieux le shift que les non calibrés (NF4). Recommande des alternatives plus sûres si un cliff est détecté. <em>Cas d'usage</em> : avant de quantifier, prédisez si votre combo architecture × schéma maintiendra la PPL acceptable, avec une suggestion concrète de switch sinon.",
    "help.v07.drift.title":        "🔀 Borne de drift inter-frameworks",
    "help.v07.drift.body":         "Même modèle, scores différents sur setups différents. L'outil prédit le drift max admissible dû au seul bruit numérique (dtype, framework, batch). Si l'écart observé le dépasse → vrai bug, généralement chat-template mismatch (issue #1841 lm-eval-harness) ou layout KV-cache. Essayez le bouton &quot;Charger échantillon&quot; pour le bug chat-template canonique. <em>Cas d'usage</em> : avant de reporter une régression ou de revendiquer la reproductibilité, vérifiez si l'écart entre deux évals est plus grand que ce que le bruit numérique peut expliquer.",
    "inv.v07.drift":               "<strong>🔀 Drift</strong> — bug ou bruit ? Prédit l'écart max admissible entre deux évals",
    "help.v07.niah.title":         "🔍 Gap NIAH → Reasoning",
    "help.v07.niah.body":          "Le paper RULER (NVIDIA 2024) montre que les modèles long-context passent souvent NIAH (retrieval de needle) mais échouent au reasoning multi-hop au même contexte. L'outil prédit les deux taux de réussite à partir de l'architecture (γ_Padé + d_horizon + pression arch : petit d_head, GQA, SWA), reporte le gap, et trouve le \"contexte sûr pour reasoning\" où le reasoning reste ≥65%. Mode balayage montre la courbe à 1k/4k/16k/64k/T_train. <em>Cas d'usage</em> : avant de déployer au contexte revendiqué, découvrez si le modèle va vraiment raisonner là ou seulement retrouver.",
    "inv.v07.niah":                "<strong>🔍 NIAH→Reason</strong> — votre \"128k\" raisonne-t-il vraiment là, ou seulement retrouve ?",

    // v0.7 — Inventory modal 5ème card
    "inv.v07.title":               "🆕 Pack anti-bullshit v0.7",
    "inv.v07.unmask":              "<strong>🪟 Unmask</strong> — config.json annonce 32k ? Voyez s'il attend vraiment aussi loin",
    "inv.v07.template":            "<strong>📜 Chat-template</strong> — flag CLI exact pour que lm-eval ne divise pas votre accuracy par 2 en silence",
    "inv.v07.arena":               "<strong>🎯 Arena CI</strong> — récupère les intervalles de confiance que Chatbot Arena cache",
    "inv.v07.contam":              "<strong>🧪 Contamination</strong> — note 20+ benchmarks par probabilité de contamination",
    "inv.v07.quant":               "<strong>⚖️ Quant</strong> — prédit le γ-shift + ΔPPL pour tout combo (modèle × schéma de quantification)",

    // v0.7.3 — anti-bullshit pack #5: Quant-regime classifier
    "modes.quant":                 "⚖️ Quant",
    "mode_desc.quant":             "Prédit le γ-shift et ΔPPL pour tout (modèle × schéma de quantification). Arch-aware : petit d_head + GQA → plus sensible. Recommande des alternatives plus sûres si un cliff est détecté.",
    "quant.title":                 "⚖️ Classificateur de régime de quantification",
    "quant.tip":                   "Prédit le γ-shift (et la ΔPPL résultante) pour une paire (modèle × schéma). Les claims génériques comme 'AWQ ~95% retention' sont trop vagues — TAF utilise d_head, ratio GQA, flag SWA et taille du modèle pour donner un verdict arch-spécifique. Résout : la communauté HF rapporte des cliffs de quantification imprédictibles (NF4 -2 PPL sur Phi-3 mais OK sur Llama-3-8B).",
    "quant.desc":                  "<strong>La quantification cassera-t-elle votre modèle ?</strong> Collez un id HF, choisissez un schéma — obtenez le γ-shift prédit, la bande ΔPPL attendue et une alternative recommandée si c'est un cliff. Navigateur uniquement, sans GPU, sans set de calibration.",
    "quant.id_label":              "ID modèle HF :",
    "quant.fetch_btn":             "📥 Récupérer config",
    "quant.scheme_label":          "Schéma quant :",
    "quant.run_btn":                "⚖️ Prédire",
    "quant.all_btn":               "📊 Comparer tous les schémas",
    "quant.regime.safe":           "✅ SÛR",
    "quant.regime.mild":           "✅ COMPRESSION LÉGÈRE",
    "quant.regime.significant":    "⚠ DÉGRADATION SIGNIFICATIVE",
    "quant.regime.cliff":          "❌ CLIFF SÉVÈRE",
    "quant.label.gamma_shift":     "γ shift",
    "quant.label.delta_ppl":       "ΔPPL (est.)",
    "quant.label.arch_mult":       "Multiplicateur arch",
    "quant.section.breakdown":     "Détail",
    "quant.section.reco":          "Recommandation",
    "quant.section.compare":       "Tous les schémas (triés par sécurité)",
    "quant.field.scheme":          "Schéma",
    "quant.field.calibrated":      "calibré",
    "quant.field.uncalibrated":    "non calibré",
    "quant.field.base_penalty":    "Pénalité de base",
    "quant.field.arch_mult_full":  "Multiplicateur architectural",
    "quant.field.gamma_shift":     "γ shift prédit",
    "quant.field.ppl_band":        "Bande ΔPPL (est.)",
    "quant.field.params":          "Paramètres",
    "quant.col.scheme":            "Schéma",
    "quant.col.bits":              "Bits",
    "quant.col.gamma_shift":       "γ shift",
    "quant.col.ppl_band":          "Bande ΔPPL",
    "quant.col.regime":            "Régime",
    "quant.reco.switch_to_awq":    "<strong>Passez à {scheme}</strong> — le 4-bit calibré gère bien mieux les petits d_head + GQA que NF4. ΔPPL attendue chute ~2-3×.",
    "quant.reco.switch_to_q5_km":  "<strong>Passez à {scheme}</strong> — Q5 garde plus de dimensions de head intactes à faible coût (~25% plus grand seulement).",
    "quant.reco.switch_to_q4_km":  "<strong>Passez à {scheme}</strong> — Q3/Q2 sont trop agressifs pour cette architecture.",
    "quant.reco.consider_awq":     "<strong>Considérez {scheme}</strong> — la calibration réduit significativement le γ-shift sur cette architecture.",
    "quant.reco.use_higher_bits":  "<strong>Utilisez une alternative à plus de bits</strong> — cette architecture n'absorbe pas le 4-bit proprement. Essayez 5 ou 8-bit.",
    "quant.reco.verify_with_eval": "<strong>Vérifiez avec une vraie éval</strong> — le shift prédit est borderline. Lancez NIAH à votre contexte cible avant de déployer.",
    "quant.reco.no_action":        "Pas d'action requise — la quantification est sûre pour cette architecture.",
    "quant.summary.headline_all":  "Tous les schémas pour <code>{modelId}</code>",
    "quant.status.empty_id":       "⚠ Saisissez un model id (ex. meta-llama/Llama-3.2-1B).",
    "quant.status.fetching":       "⏳ Récupération config.json pour {modelId}...",
    "quant.status.fetched":        "✅ Config récupéré pour {modelId}. Choisissez un schéma et cliquez Prédire (ou Comparer tous).",
    "quant.status.no_scheme":      "⚠ Choisissez un schéma de quantification dans le dropdown.",
    "quant.status.done":           "✅ Régime prédit : {regime}",
    "quant.status.done_all":       "✅ Comparé {n} schémas — triés par sécurité.",

    // v0.7.4 — autocomplete HF Hub : confidentialité + rate-limit
    "hf_auto.privacy":             "🔒 Requêtes envoyées à huggingface.co/api · cache local 5 min",
    "hf_auto.rate_limited":        "⚠ Rate limit HuggingFace — réessayez dans un moment, ou tapez l'id complet manuellement",
    "hf_auto.gated_msg":           "est gated. Acceptez la licence ici :",

    // v0.7.5 — anti-bullshit pack #6: Cross-framework drift bound
    "modes.drift":                 "🔀 Drift",
    "mode_desc.drift":             "Prédit le drift max admissible entre deux scores de benchmark donnés (framework, dtype, batch, chat-template). Distingue les vrais bugs du bruit numérique.",
    "drift.title":                 "🔀 Borne de drift inter-frameworks",
    "drift.tip":                   "Même modèle, scores différents sur des setups différents. L'écart est-il du bruit ou un vrai bug ? Saisissez deux scores avec leur (framework, dtype, batch, chat-template) — l'outil prédit le drift max admissible dû au seul bruit numérique. Si l'écart observé le dépasse → vrai bug, généralement chat-template mismatch (issue #1841 lm-eval) ou layout KV-cache.",
    "drift.desc":                  "<strong>Votre modèle donne 67.2 sur lm-eval-hf et 65.1 sur vLLM-served. Bug ou bruit ?</strong> Saisissez les deux scores avec (framework, dtype, batch, chat-template appliqué ?). L'outil prédit la bande de bruit et signale les vrais bugs. arxiv 2506.09501 documente cela comme un problème majeur de reproductibilité d'évals.",
    "drift.setup_a":               "Setup A",
    "drift.setup_b":               "Setup B",
    "drift.score":                 "Score",
    "drift.framework":             "Framework",
    "drift.dtype":                 "Dtype",
    "drift.batch":                 "Batch",
    "drift.template":              "Chat-template",
    "drift.template.applied":      "appliqué",
    "drift.template.not_applied":  "non appliqué",
    "drift.template.unknown":      "inconnu",
    "drift.run_btn":               "🔀 Calculer la borne de drift",
    "drift.sample_btn":            "📊 Charger échantillon (bug chat-template)",
    "drift.label.observed":        "Écart observé",
    "drift.label.band":            "Bande numérique",
    "drift.label.ratio":           "Écart / bande",
    "drift.section.setups":        "Setups",
    "drift.section.breakdown":     "Contributeurs au drift (bande numérique)",
    "drift.section.verdict":       "Verdict et recommandation",
    "drift.contrib.dtype":         "Mismatch de dtype",
    "drift.contrib.framework":     "Framework",
    "drift.contrib.batch":         "Différence de batch",
    "drift.contrib.template":      "MISMATCH de chat-template",
    "drift.dominant_cause":        "Cause dominante",
    "drift.cause.dtype":           "différence de précision dtype",
    "drift.cause.framework":       "différence de framework / kernel",
    "drift.cause.batch":           "chemins de normalisation par batch",
    "drift.cause.template_mismatch": "chat-template appliqué d'un côté mais pas de l'autre (motif #1841 lm-eval-harness — typiquement -50% sur multi-tours)",
    "drift.verdict.noise":         "✅ BRUIT NUMÉRIQUE",
    "drift.verdict.suspicious":    "⚠ SUSPECT — vérifiez",
    "drift.verdict.bug":           "❌ VRAI BUG — investiguez",
    "drift.verdict.bug_template":  "❌ BUG DE CHAT-TEMPLATE",
    "drift.reco.noise":            "L'écart entre dans la bande de bruit numérique attendue. Pas d'action requise ; la différence est cohérente avec la seule variation framework/dtype/batch.",
    "drift.reco.suspicious":       "L'écart est 1–2× la bande prédite. Borderline — possible vrai bug. Essayez d'aligner le contributeur dominant (ex. égalisez framework ou dtype) et re-testez.",
    "drift.reco.bug":              "L'écart est &gt; 2× la bande prédite. C'est un vrai bug. Inspectez le contributeur dominant — probablement une différence de tokenizer / chat-template / layout KV-cache. Lancez lm-eval-harness avec <code>--apply_chat_template</code> et confirmez.",
    "drift.reco.bug_template":     "Mismatch de chat-template détecté. C'est la cause la plus commune des grands écarts d'évals (issue #1841 lm-eval-harness). Relancez le côté &quot;non appliqué&quot; avec <code>--apply_chat_template</code> (ou réglez vLLM <code>--chat-template &lt;name&gt;</code>) et re-testez.",
    "drift.status.empty_scores":   "⚠ Saisissez les deux scores.",
    "drift.status.done":           "✅ Verdict : {verdict}",
    "drift.status.sample_loaded":  "✅ Échantillon chargé (bug chat-template canonique). Cliquez sur Calculer la borne de drift.",

    // v0.7.6 — anti-bullshit pack #7: prédicteur de gap NIAH → reasoning
    "modes.niah":                  "🔍 NIAH→Reason",
    "mode_desc.niah":              "Prédit les taux de réussite NIAH (retrieval) et reasoning multi-hop à n'importe quel contexte. Résout : les modèles long-context passent souvent NIAH mais échouent au reasoning au même contexte (paper RULER).",
    "modes.saturation":            "📈 Saturation",
    "mode_desc.saturation":        "Indique si un benchmark discrimine encore les frontier models ou s'il est saturé (ex. MMLU 88-94% top, AIME 2025 déjà 96-100%). Retourne top-3 + verdict + remplacements recommandés.",
    "modes.hub":                   "🧭 Solutions",
    "mode_desc.hub":               "Carte de chaque problème documenté de LLM-eval → mode tafagent (si couvert) + outils externes curés. Trouvez la solution sans la réinventer. 30+ pains, 7 catégories.",
    "niah.title":                  "🔍 Gap NIAH → Reasoning",
    "niah.tip":                    "NIAH (Needle in a Haystack) teste le retrieval : 'trouve ce fait dans un long texte'. Le reasoning multi-hop teste l'inférence : 'combine les faits X+Y au début avec le fait Z à la fin'. Le paper RULER (NVIDIA 2024) montre que les modèles long-context passent souvent NIAH mais échouent au reasoning au même contexte. Cet outil prédit les deux taux à partir de la seule architecture.",
    "niah.desc":                   "<strong>Votre modèle revendique 128k de contexte. Va-t-il vraiment raisonner à 64k, ou seulement retrouver ?</strong> Collez un model id HF et un contexte cible — l'outil prédit les taux de réussite NIAH et reasoning multi-hop, le gap, et un 'contexte sûr' où le reasoning reste ≥65%.",
    "niah.id_label":               "ID modèle HF :",
    "niah.fetch_btn":              "📥 Récupérer config",
    "niah.teval_label":            "Contexte cible (T_eval) :",
    "niah.run_btn":                "🔍 Prédire",
    "niah.sweep_btn":              "📊 Balayer les contextes",
    "niah.label.niah":             "Taux NIAH",
    "niah.label.reasoning":        "Taux Reasoning",
    "niah.label.gap":              "Gap",
    "niah.label.safe_ctx":         "Contexte sûr pour reasoning",
    "niah.section.breakdown":      "Détail architectural",
    "niah.section.reco":           "Recommandation",
    "niah.section.sweep":          "Balayage des taux par longueur de contexte",
    "niah.field.dhorizon":         "d_horizon (effectif)",
    "niah.field.ratio":            "T_eval / d_horizon",
    "niah.field.arch_pressure":    "Pression arch (petit d_head + GQA + SWA)",
    "niah.field.theta":            "RoPE θ",
    "niah.field.t_train":          "T_train (revendiqué)",
    "niah.col.context":            "T_eval",
    "niah.col.niah":               "NIAH",
    "niah.col.reasoning":          "Reasoning",
    "niah.col.gap":                "Gap",
    "niah.col.verdict":            "Verdict",
    "niah.verdict.robust":         "✅ ROBUSTE",
    "niah.verdict.marginal":       "⚠ MARGINAL",
    "niah.verdict.degraded":       "⚠ DÉGRADÉ",
    "niah.verdict.retrieval_only": "❌ RETRIEVAL UNIQUEMENT",
    "niah.verdict.broken":         "❌ CASSÉ",
    "niah.reco.robust":            "Retrieval et reasoning tiennent tous deux à ce contexte. Sûr de déployer pour les tâches de lookup et d'inférence.",
    "niah.reco.marginal":          "Borderline. Le retrieval fonctionne mais le reasoning est fragile. À utiliser pour le lookup, pas pour l'inférence multi-étapes.",
    "niah.reco.degraded":          "Chute significative du reasoning. Le modèle trouve des faits mais peine à les combiner. Évitez les tâches multi-hop à cette longueur.",
    "niah.reco.retrieval_only":    "Constat canonique de RULER : le modèle passe NIAH mais échoue au reasoning. Utile pour les setups RAG (où le LLM ne fait que localiser les faits) mais PAS pour l'inférence chaînée. Réduisez votre contexte à la valeur 'sûre' ci-dessous.",
    "niah.reco.broken":            "Le modèle échoue même au retrieval basique à ce contexte. Traitez-le comme hors-distribution — re-testez à un contexte plus court.",
    "niah.safe_context":           "≤ {ctx} tokens (reasoning ≥ 65%)",
    "niah.safe_context_none":      "Aucun contexte sûr trouvé sous votre cible — le modèle échoue au reasoning même à de petits contextes.",
    "niah.summary.sweep":          "<code>{modelId}</code> — taux par contexte",
    "niah.status.empty_id":        "⚠ Saisissez un model id (ex. meta-llama/Llama-3.1-8B-Instruct).",
    "niah.status.bad_teval":       "⚠ Saisissez un contexte cible (≥ 512 tokens).",
    "niah.status.fetching":        "⏳ Récupération config.json pour {modelId}...",
    "niah.status.fetched":        "✅ Config récupéré pour {modelId}. Réglez T_eval et cliquez Prédire (ou Balayer les contextes).",
    "niah.status.done":            "✅ {verdict} — NIAH {niah}% · reasoning {reasoning}%",
    "niah.status.sweep_done":      "✅ Balayé {n} longueurs de contexte.",
    "saturation.title":            "📈 Détecteur de saturation des benchmarks",
    "saturation.tip":              "MMLU est saturé (88-94% sur tous les frontier models). Annoncer '92% sur MMLU' n'a plus de sens. Cet outil vous dit quels benchmarks discriminent encore les frontier models, lesquels sont saturés, et quoi utiliser à la place. Données : DemandSphere AI Frontier Tracker (CC BY-NC 4.0) rafraîchi 2026-05.",
    "saturation.desc":             "<strong>Votre benchmark est-il encore utile ?</strong> Choisissez un benchmark pour voir top-3 frontier scores, spread, et un verdict (saturated / near-saturated / discriminative) + remplacements recommandés.",
    "saturation.select_label":     "Benchmark :",
    "saturation.select.all":       "— afficher tous les benchmarks —",
    "saturation.run_btn":          "📈 Classer",
    "saturation.all_btn":          "📊 Afficher tout",
    "saturation.col.spread":       "Écart top-3",
    "saturation.col.mean":         "Moyenne top-3",
    "saturation.col.n":            "Modèles",
    "saturation.col.bench":        "Benchmark",
    "saturation.col.verdict":      "Verdict",
    "saturation.col.reco":         "Reco principale",
    "saturation.col.model":        "Modèle",
    "saturation.col.score":        "Score",
    "saturation.section.top3":     "Top-3 frontier scores",
    "saturation.section.recommendations": "Alternatives recommandées",
    "saturation.section.note":     "Notes",
    "saturation.section.all":      "Tous les benchmarks suivis",
    "saturation.verdict.saturated":      "🚨 SATURÉ",
    "saturation.verdict.near_saturated": "⚠ PRESQUE SATURÉ",
    "saturation.verdict.discriminative": "✅ DISCRIMINATIF",
    "saturation.verdict.sparse_data":    "ℹ DONNÉES RARES",
    "saturation.borderline":       "Borderline — à ±1pp d'un seuil de coupure. Traitez le verdict comme 'à vérifier soigneusement'.",
    "saturation.unknown":          "Benchmark inconnu.",
    "saturation.attribution":      "Données : DemandSphere AI Frontier Model Tracker (CC BY-NC 4.0) · HF Open LLM Leaderboard v3 (historique open-weight) · dernier fetch 2026-05-05.",
    "saturation.status.live":      "✅ Données en direct chargées — {count} modèles.",
    "saturation.status.baked":     "ℹ Utilisation du snapshot baked (fetch en direct indisponible).",
    "saturation.status.kb_fail":   "⚠ Impossible de charger le KB de saturation.",
    "saturation.status.done":      "✅ {name} — {verdict}",
    "saturation.status.all_done":  "✅ {n} benchmarks classés.",
    "help.v08.saturation.title":   "📈 Détecteur de saturation des benchmarks",
    "help.v08.saturation.body":    "MMLU est saturé (top 88-94%), AIME 2025 saturé en quelques mois après sa sortie, HumanEval presque saturé. Choisissez un benchmark et l'outil retourne top-3 frontier scores, spread, moyenne, et un verdict — saturated / near-saturated / discriminative — plus un remplacement recommandé (ex. MMLU → MMLU-Pro / GPQA / HLE). Fetch en direct depuis DemandSphere AI Frontier Tracker (CC BY-NC 4.0) si accessible ; snapshot baked 2026-05-05 sinon. <em>Cas d'usage</em> : avant de citer '92% sur MMLU' ou de concevoir une eval, vérifiez si le benchmark discrimine encore quelque chose.",
    "inv.v08.saturation":          "<strong>📈 Saturation</strong> — votre benchmark est-il encore utile, ou tous les frontiers sont-ils à égalité au sommet ?",
    "inv.v081.hub":                "<strong>🧭 Solutions Hub</strong> — chaque pain documenté mappé à un mode tafagent ou outil externe curé. Ne réinventez pas — trouvez.",
    "help.v081.hub.title":         "🧭 Solutions Hub",
    "help.v081.hub.body":          "tafagent comme intégrateur, pas silo. 30+ pains à travers 7 catégories (eval reliability · diagnostics · setup · training · retrieval · multimodal · observability), chacun mappé à (a) le mode tafagent qui le résout, s'il existe, et (b) les outils externes best-of-breed que la communauté utilise déjà (RAGAS, MTEB, HELM, MCP Schema Validator, llm-stats, llguidance, GlitchMiner, etc.). La barre de recherche matche pain, scénario, et nom d'outil. <em>Cas d'usage</em> : 'j'ai le problème X — tafagent le résout-il, et sinon, qui ?'",
    "hub.title":                   "🧭 Solutions Hub",
    "hub.tip":                     "Carte de chaque pain de LLM-eval documenté : quel mode tafagent l'adresse (si applicable), et les outils externes best-of-breed que la communauté utilise déjà. Objectif : couverture totale. Si l'outil canonique existe ailleurs, nous lions plutôt que de reconstruire.",
    "hub.desc":                    "<strong>Ne réinventez pas — trouvez.</strong> 30+ pains mappés à des modes tafagent + outils externes curés. Naviguez par catégorie, recherchez par mot-clé, ou voyez les lacunes où de nouveaux modes aideraient le plus.",
    "hub.clear_btn":               "✕ Effacer",
    "hub.no_mode":                 "externe",
    "hub.planned":                 "prévu :",
    "hub.best_for":                "Idéal pour",
    "hub.not_for":                 "Pas pour",
    "hub.tools":                   "Outils externes",
    "hub.status.loaded":           "✅ Chargés {total} pains dans {categories} catégories — {covered} couverts par des modes tafagent, {externalLinks} liens externes curés. Compilé {compiled}.",
    "hub.status.fail":             "⚠ Impossible de charger Solutions Hub.",
    "hub.search.empty":            "Aucune correspondance pour '{query}'. Essayez des termes plus larges (ex. 'eval', 'rag', 'tokenizer').",
    "hub.search.results":          "{n} correspondance(s) trouvée(s) pour '{query}'.",

    // v0.7.7 — Tuiles de tâches (refonte UX : 14 modes regroupés par intention)
    "tiles.title":                 "🎯 Que voulez-vous faire ?",
    "tiles.subtitle":              "Choisissez une tâche. Chacune ouvre l'outil adéquat ci-dessous. Ou faites défiler pour la liste complète des 14 modes.",
    "tile.diagnose.title":         "🔬 Diagnostiquer un modèle",
    "tile.diagnose.desc":          "Ce modèle conviendra-t-il à mon cas d'usage ?",
    "tile.trust.title":            "✓ Faire confiance à un score",
    "tile.trust.desc":             "Dois-je croire ce nombre ? Bug ou bruit ?",
    "tile.eval.title":              "⚙️ Configurer une éval correctement",
    "tile.eval.desc":              "Obtenez le flag CLI exact pour lm-eval / vLLM / transformers.",
    "tile.compare.title":          "🆚 Comparer des modèles",
    "tile.compare.desc":           "Côte à côte, ou explorez le panel empirique de modèles.",
    "tile.manual.title":           "📋 Manuel / libre",
    "tile.manual.desc":            "Choisissez une recette à la main, ou demandez en langage naturel.",
    "tile.diagnose.tip":           "Commencez ici quand vous avez un id de modèle spécifique et voulez un diagnostic complet : <strong>Profile</strong> lance les 5 recettes d'un coup. <strong>Unmask</strong> vérifie si max_position_embeddings est honnête. <strong>NIAH→Reason</strong> prédit le gap retrieval-vs-reasoning. <strong>Quant</strong> prédit si quantifier va le casser. <strong>Inspect</strong> permet de coller un config.json brut pour modèles privés / en dev.",
    "tile.trust.tip":              "Quand vous voyez un score et voulez savoir s'il est réel. <strong>Contamination</strong> note 20+ benchmarks selon la probabilité que le modèle les ait vus en entraînement. <strong>Drift</strong> vous dit si l'écart entre deux évals est du bruit numérique ou un vrai bug (chat-template mismatch, layout KV-cache, etc.). <strong>Arena CI</strong> reconstruit les intervalles de confiance que Chatbot Arena cache — beaucoup de &quot;victoires&quot; top-Elo sont statistiquement à égalité.",
    "tile.eval.tip":               "Avant de lancer lm-eval-harness ou vLLM serve, obtenez le bon flag CLI. <strong>Chat-template Sniffer</strong> détecte la famille de template (Llama-3 / ChatML / Mistral / Phi-3 / DeepSeek / Alpaca / custom / none) et émet l'invocation exacte <code>--apply_chat_template</code> / <code>--chat-template</code>. Résout l'issue #1841 de lm-eval-harness (÷2 accuracy silencieux). <strong>Diagnose CLI</strong> génère la commande Python pour mesurer γ_obs sur votre GPU local.",
    "tile.compare.tip":            "<strong>Compare</strong> : choisissez 2-3 modèles candidats + une recette, voyez les verdicts dans un tableau côte à côte (ex. Llama-3-8B vs Mistral-7B à 32k). <strong>Phase diagram</strong> : nuage de 23 modèles empiriques dans le plan (log θ, γ), avec la courbe Padé superposée. Survolez les points pour détails, cliquez pour charger ce modèle dans le formulaire Recipe.",
    "tile.manual.tip":             "<strong>Recipe</strong> : choisissez une recette X-N spécifique (X-1 custom-vs-API, X-2 long context, X-3 budget, X-5 hardware, X-19 compression KV, X-21 imprint, X-22 compute-context invariant, X-23 IH-phase) et remplissez le formulaire à la main pour contrôle total. <strong>Ask</strong> : tapez une question libre ; un LLM 0.5B (Qwen2.5) dans votre navigateur choisit la bonne recette et la lance. Idéal pour explorer &quot;que se passerait-il si...&quot;.",
    "share.import_desc":       "Vous avez un fichier JSON de l'analyse TAF de quelqu'un ? Chargez-le ici pour voir le verdict + la chaîne localement. La même vue que si vous l'aviez exécuté vous-même.",
    "share.import_btn":        "📂 Charger JSON partagé",
    "synthesis.system":        "Vous êtes un assistant de diagnostic précis pour LLMs transformer. Étant donné des résultats de formules TAF pré-calculés, écrivez un résumé clair en français de 4-6 phrases. Citez le numéro de section (§X.Y) pour chaque nombre mentionné. Donnez toujours une recommandation concrète. N'INVENTEZ PAS de nombres.",

    // INSPECTOR mode
    "inspector.title":         "🔍 Inspecteur d'Architecture",
    "inspector.desc":          "Collez le contenu brut de <code>config.json</code>. L'outil extrait les paramètres architecturaux et exécute le Profil complet à 5 recettes.",
    "inspector.tip":           "<strong>Collez n'importe quel config.json directement</strong>. L'outil le parse et exécute le Profil complet. Utile pour : modèles privés, configs en développement, modèles pas encore sur HuggingFace, ou comparer ce que ferait votre architecture custom.",
    "inspector.quickstart":    "💡 Cas d'usage : vous avez un modèle privé pas sur HF Hub, ou une config que vous concevez. Collez le JSON brut ci-dessous et obtenez un profil TAF complet.",
    "inspector.placeholder":   "{\n  \"model_type\": \"llama\",\n  \"rope_theta\": 500000,\n  \"max_position_embeddings\": 8192,\n  \"num_attention_heads\": 32,\n  \"num_key_value_heads\": 8,\n  \"hidden_size\": 4096,\n  \"num_hidden_layers\": 32\n}",
    "inspector.T_eval":        "T_eval (votre contexte cible) :",
    "inspector.btn":           "🚀 Inspecter et profiler",

    // WHAT-IF slider
    "whatif.title":            "🎚 What-if : faites glisser T_eval pour voir γ changer en direct",
    "whatif.desc":             "Recalcul pur JS (sans appel Pyodide). Montre γ_Padé et d_horizon géométriques pendant que vous glissez. Cliquez pour ré-exécuter la chaîne complète.",
    "whatif.T_eval":           "<strong>T_eval</strong>",
    "whatif.gamma_pade":       "<strong>γ_Padé</strong>",
    "whatif.d_horizon":        "<strong>d_horizon</strong>",
    "whatif.l_niah":           "<strong>Plafond L_NIAH</strong>",
    "whatif.predicted":        "<strong>Verdict géométrique prédit</strong>",
    "whatif.rerun":            "↻ Recalculer la chaîne complète à ce T_eval",

    // COMMUNITY feed
    "community.title":         "🌐 Soumissions récentes de la communauté",
    "community.desc":          "Flux en direct du registre public. Cliquez sur n'importe quelle soumission pour voir l'analyse complète.",
    "community.browse_all":    "Voir tout →",
    "community.loading":       "Chargement...",
    "community.no_repo":       "Le repo du registre n'est pas encore créé. Une fois qu'il existe avec des soumissions, elles apparaîtront ici en direct.",
    "community.no_submissions": "Aucune soumission. Soyez le premier — générez un Profil et cliquez 📤 Soumettre au registry.",

    // FALSIFICATION dashboard
    "falsification.title":     "🔬 Prédictions du paper — statut de falsification",
    "falsification.desc":      "Le framework TAF repose sur des prédictions falsifiables (F1-F23). Chacune est empiriquement testée. Voici le statut en direct de chaque prédiction du paper.",
    "falsification.summary":   "{confirmed} confirmées · {partial} partielles · {refuted} réfutées · {untested} non testées (sur {total} prédictions au total)",
    "falsification.col.id":    "ID",
    "falsification.col.claim": "Claim",
    "falsification.col.status": "Statut",
    "falsification.col.evidence": "Preuve",

    "tafcard.title":           "📇 TAF Card — profil complet du modèle",
    "tafcard.recipes_title":   "📋 Recettes — verdict par dimension",
    "tafcard.recipes_count_label": "dimensions",
    "tafcard.numbers_title":   "🔢 Nombres clés (paper §26)",
    "tafcard.fals_title":      "🔬 État de falsification (F1-F23)",
    "tafcard.fals_none":       "Aucune falsification applicable.",
    "tafcard.diag_title":      "🔬 Diagnostics — nombres · contrôle γ · what-if",
    "tafcard.verify_title":    "✓ Vérification — Lean + Sage + falsification",
    "tafcard.share_title":     "📂 Provenance & partage",
    "tafcard.whatif_title":    "🎚️ Explorateur what-if",
    "verdict.go":              "GO",
    "verdict.no":              "NON",
    "verdict.degraded":        "DÉGRADÉ",

    "compare.title_out":       "🆚 Tableau comparatif",

    "status.loading_pyodide":  "⏳ Chargement du runtime Python (~10MB, première fois)...",
    "status.loading_taf":      "⏳ Chargement des formules TAF + recettes...",
    "status.ready":            "✅ Prêt. Choisissez un modèle et cliquez Profiler pour commencer.",
    "status.computing":        "🧮 Calcul de la chaîne TAF...",
    "status.done":             "✅ Terminé.",

    "profile.hf_placeholder":  "ex. meta-llama/Meta-Llama-3-8B ou Qwen/Qwen2.5-7B",
    "compare.hf_placeholder":  "ID modèle HF (ex. meta-llama/Meta-Llama-3-8B)",
    "compare.slot1_placeholder": "ID modèle HF (ex. meta-llama/Meta-Llama-3-8B)",
    "compare.slot2_placeholder": "ID modèle HF #2",
    "compare.slot3_placeholder": "ID modèle HF #3 (optionnel)",
    "compare.preset_default": "— ou préréglage —",

    // Paramètres du formulaire
    "param.theta":         "θ (rope_theta)",
    "param.theta.tip":     "<strong>Fréquence de base RoPE</strong> de <code>config.rope_theta</code>. Plus haut = plus de capacité longue portée.",
    "param.T_train":       "T_train",
    "param.T_train.tip":   "<strong>Contexte max d'entraînement</strong>. De <code>max_position_embeddings</code>. Au-delà c'est de l'extrapolation.",
    "param.T_eval":        "T_eval (votre cible)",
    "param.T_eval.tip":    "<strong>Votre contexte d'inférence cible</strong>. La question clé : le modèle se comportera-t-il bien à CETTE longueur ?",
    "param.n_attn":        "n_attention_heads",
    "param.n_attn.tip":    "<strong>Nombre d'attention heads</strong> par couche. De <code>num_attention_heads</code>.",
    "param.n_kv":          "n_kv_heads",
    "param.n_kv.tip":      "<strong>KV heads</strong>. Si &lt; n_attention_heads → GQA (Grouped Query Attention). Réduit la mémoire KV mais pousse γ vers Hagedorn.",
    "param.d_head":        "head_dim",
    "param.d_head.tip":    "<strong>Dimension par head</strong>. Typique 64, 96, 128. De <code>head_dim</code> ou <code>hidden_size / num_attention_heads</code>.",
    "param.n_layers":      "n_layers",
    "param.n_layers.tip":  "<strong>Nombre de blocs transformer</strong>. De <code>num_hidden_layers</code>.",
    "param.n_params":      "n_params (ex. 8e9)",
    "param.n_params.tip":  "<strong>Nombre total de paramètres</strong>. Seuil ~400M pour l'émergence d'induction heads. Affecte la mémoire KV et les recettes de budget.",
    "param.has_swa":       "A SWA ?",
    "param.has_swa.tip":   "<strong>Sliding Window Attention</strong>. <code>true</code> pour Mistral, gemma-2, phi-3. L'audit de calibration v0.5.3 a désactivé la correction historique δ_SWA (ajustement n=1).",
    "common.yes":          "Oui",
    "common.no":           "Non",

    // Tooltips des modes
    "modes.tip":           "<strong>Quatorze façons d'utiliser l'outil</strong>.<br><strong>📇 Profil</strong>: collez un id → TAF Card avec 5 recettes.<br><strong>🆚 Comparer</strong>: 2-3 modèles côte à côte sur une recette.<br><strong>🔍 Inspecter config</strong>: collez config.json brut → Profil complet.<br><strong>💬 Question</strong>: question libre, le LLM du navigateur choisit la recette.<br><strong>📋 Recette</strong>: sélection manuelle avec contrôle total du formulaire.<br><strong>🩺 Diagnostic CLI</strong>: génère commande Python pour mesurer γ localement.<br><strong>📊 Diagramme de phase</strong>: panel de 23 modèles dans le plan (log θ, γ).<br><strong>🪟 Démasquer</strong>: détecte un max_position_embeddings trompeur (SWA / YaRN / RoPE-scaling).<br><strong>📜 Chat-template</strong>: détecte la famille + donne le flag CLI exact pour lm-eval / vLLM / transformers.<br><strong>🎯 Arena CI</strong>: reconstruit les intervalles de confiance depuis les votes pairwise bruts ; détecte les égalités statistiques qu'Arena cache.<br><strong>🧪 Contamination</strong>: note 20+ benchmarks pour leur probabilité de contamination selon le cutoff d'entraînement vs la date de sortie.<br><strong>⚖️ Quant</strong>: prédit γ-shift et ΔPPL pour tout (modèle × schéma de quantification) ; recommande une alternative sûre en cas de cliff.<br><strong>🔀 Drift</strong>: même modèle, scores différents sur deux setups — bug ou bruit ? Prédit la bande de bruit numérique et signale les vrais bugs.<br><strong>🔍 NIAH→Reason</strong>: prédit les taux NIAH et reasoning multi-hop depuis l'architecture ; trouve le contexte sûr pour reasoning.",
    "profile.tip":         "<strong>Diagnostic complet en un clic</strong>. Collez n'importe quel id de modèle HF (ou choisissez préréglage). L'outil exécute les 5 recettes (contexte long, compression KV, custom vs API, budget, hardware) et produit une <strong>TAF Card</strong> unique avec verdict par dimension + nombres clés + classification architecturale.<br><br><strong>Cas d'usage</strong>: « J'évalue Qwen2.5-32B pour la production — quel est son profil complet de viabilité ? » → collez id → Profiler → fait.",
    "compare.tip":         "<strong>Même recette, plusieurs modèles</strong>. Choisissez 2-3 modèles candidats et une recette. Voyez les verdicts dans un seul tableau comparatif.<br><br><strong>Cas d'usage</strong>: « J'ai besoin de récupération longue contexte à 16K — quel est le meilleur : Llama-3-8B, Mistral-7B ou Qwen-7B ? » → choisissez 3 + X-2 + 16K → voyez le gagnant.",

    // Modal d'aide
    "help.title":               "📘 TAF Agent — Manuel d'utilisation",
    "help.what.title":          "Que fait-il ?",
    "help.what.body":           "Prédit la <strong>viabilité pratique</strong> de tout LLM transformer <em>avant de dépenser du GPU/€</em>. Répond à des questions comme « ce modèle fonctionnera-t-il à L=32K ? » ou « dois-je entraîner sur mesure ou utiliser une API ? » via des formules Python déterministes (TAF — Thermodynamic Attention Framework).",
    "help.modes.title":         "Comment l'utiliser — 7 modes",
    "help.modes.profile":       "<strong>📇 Profiler</strong>: collez id de modèle → toutes les recettes à la fois = TAF Card. <strong>Meilleur point de départ</strong>.",
    "help.modes.compare":       "<strong>🆚 Comparer</strong>: 2-3 modèles côte à côte sur la même recette. Mieux pour choisir entre candidats.",
    "help.modes.inspector":     "<strong>🔍 Inspecter config</strong>: collez <code>config.json</code> brut → l'outil le parse et lance le Profil complet. Pour modèles privés, configs en développement, ou modèles pas encore sur HF Hub.",
    "help.modes.ask":           "<strong>💬 Question libre</strong>: question en langage naturel, le LLM du navigateur choisit la recette. Mieux pour exploration casuelle.",
    "help.modes.recipe":        "<strong>📋 Recette + formulaire</strong>: sélection manuelle, contrôle total des paramètres. Mieux quand vous voulez un contrôle exact.",
    "help.modes.diagnose":      "<strong>🩺 Diagnostic CLI</strong>: génère commande Python pour mesurer γ sur votre machine locale (transformers + numpy). Rapide ≈5 min CPU; complet ≈20–60 min GPU. JSON résultat ré-uploadable via Inspect.",
    "help.modes.phase":         "<strong>📊 Diagramme de phase</strong>: nuage de 23 modèles du panel dans le plan (log θ, γ). Ligne Hagedorn γ=1 sépare Phase A de Phase B. Cliquer un point pour charger ce modèle dans le formulaire Recette.",
    "help.recipes.title":       "Les 8 recettes disponibles",
    "help.recipe.x1.title":     "<strong>X-1 Entraînement custom vs API</strong> — compare le coût d'entraîner votre propre modèle vs payer l'accès API.",
    "help.recipe.x1.example":   "Essayez: <em>« Dois-je entraîner un 8B custom ou utiliser GPT-4o pour 50M tokens/mois ? »</em><br>Réponses: OUI (custom) / NON (API) avec mois pour break-even.",
    "help.recipe.x2.title":     "<strong>X-2 Viabilité contexte long</strong> — prédit si un modèle sert une longueur cible de manière fiable.",
    "help.recipe.x2.example":   "Essayez: <em>« Meta-Llama-3-8B gérera-t-il 32000 tokens pour récupération ? »</em><br>Chaîne: γ_Padé → décomposition → d_horizon → plafond NIAH → hallucination → mémoire KV.<br>Verdict: OUI / DÉGRADÉ / NON avec mitigation si nécessaire.",
    "help.recipe.x3.title":     "<strong>X-3 Pre-flight budget</strong> — étant donné un budget $, quel modèle est faisable à entraîner ?",
    "help.recipe.x3.example":   "Essayez: <em>« J'ai $5000, quel modèle puis-je entraîner ? »</em><br>Réponse: GO / TINY-MODEL / MEMORY-LIMITED avec N (params) et D (tokens) concrets.",
    "help.recipe.x5.title":     "<strong>X-5 Sélection hardware</strong> — quel GPU utiliser pour servir au throughput cible ?",
    "help.recipe.x5.example":   "Essayez: <em>« Hardware le moins cher pour servir Llama-3-8B à 10M tokens/jour »</em><br>Réponse: meilleur GPU + $/Mtok + capacité vs cible.",
    "help.recipe.x19.title":    "<strong>X-19 Décision compression KV</strong> — utiliser soft decay, hard cutoff, ou méthodes de littérature ?",
    "help.recipe.x21.title":    "<strong>X-21 Diagnostic Pureté Imprint</strong> — prédit γ sur tokens RANDOM via ν=−1/(2π); à quel point la prédiction RoPE du modèle est-elle propre ?",
    "help.recipe.x22.title":    "<strong>X-22 Invariant Compute-Context</strong> — γ × log(N²·D) est-il dans la bande 51.2 ± 16.8 ? Détecte anomalies de scaling/training.",
    "help.recipe.x23.title":    "<strong>X-23 Détecteur Phase IH</strong> — pré- ou post-induction-head ? Probe peu coûteux via sign(γ_text − γ_random).",
    "help.recipe.x19.example":  "Essayez: <em>« Comment compresser le cache KV pour Qwen2.5-7B à 32K ? »</em><br>Réponse: USE SOFT DECAY / USE D_f CUTOFF / USE LITERATURE METHODS / USE HARD T_train.",
    "help.recipe.x21.example":  "Essayez: <em>« Quelle est la pureté de la prédiction RoPE sur Llama-3-8B ? »</em><br>Réponse: γ_random prédit + diagnostic (CLEAN / OVER-IMPRINTED / UNDER-IMPRINTED).",
    "help.recipe.x22.example":  "Essayez: <em>« Mistral-7B entre-t-il dans l'invariant compute-context ? »</em><br>Réponse: K = γ·log(N²·D), z-score, IN-BAND ou OUTLIER.",
    "help.recipe.x23.example":  "Essayez: <em>« Qwen2.5-7B est-il post-induction-head ? »</em><br>Réponse: CONFIRMED PRE-IH / CONFIRMED POST-IH / ANOMALY.",
    "help.section.v04":         "<strong>Nouveautés v0.4</strong> (résultats session 29, 2026-04-28) : trois recettes de diagnostic dérivées de l'analyse panel cross-model (n=22 LLMs).",
    "help.divider.v04_s29":     "— v0.4 (résultats session 29) —",
    "footer.tech_stack":        "Calcul : Pyodide · Synthèse : WebLLM (Qwen2.5-0.5B local) · Hébergement : GitHub Pages · Coût : 0 $",
    "help.v04.imprint":         "<strong>Pente d'imprint apprise ν = −1/(2π)</strong> : la période de rotation RoPE 2π entraîne un biais positionnel dans les poids, proportionnel à log(N_params). Même les tokens aléatoires montrent ce scaling. ν est DÉRIVÉ — non ajusté (erreur empirique 0,3 %).",
    "help.v04.invariant":       "<strong>Invariant Chinchilla-attention K</strong> : γ × log(N²·D) ≈ 51.2 ± 16.8 (CV=0.329). Connecte le scaling de compute et l'exposant d'attention en un seul nombre sans dimension.",
    "help.v04.ih_probe":        "<strong>Δγ comme probe IH</strong> : sign(γ_text − γ_random) > 0 ⟺ post-induction-head. Moins coûteux que de lancer un benchmark in-context-learning.",
    "help.v04.constants":       "<strong>γ-cluster sur constantes célèbres</strong> (intriguant, n=4) : CodeLlama-13b γ=0.382 ≈ 1−1/φ (conjugué doré, err 0,0003) ; pythia-1.4b γ=0.705 ≈ 1/√2 ; Llama-2-7b γ=0.287 ≈ 1−1/√2 ; Mistral-Nemo γ=0.428 ≈ log_10(e). Caveat : peut être coïncidence.",
    "help.param.theta":         "<strong>θ (rope_theta)</strong>: fréquence de base RoPE. Plus haut = plus de capacité longue portée. Typique: 10000 (anciens), 500000 (Llama-3), 1000000 (Qwen2.5).",
    "help.param.T_train":       "<strong>T_train</strong>: contexte max vu par le modèle pendant l'entraînement. De <code>max_position_embeddings</code>.",
    "help.param.T_eval":        "<strong>T_eval</strong>: <em>votre</em> longueur de contexte cible en inférence. Le bouton clé.",
    "help.param.gqa":           "<strong>n_kv_heads &lt; n_attention_heads</strong>: le modèle utilise GQA (Grouped Query Attention). Réduit la mémoire KV mais pousse γ vers Hagedorn.",
    "help.param.swa":           "<strong>has_SWA</strong>: le modèle utilise Sliding Window Attention (Mistral, gemma-2).",
    "help.param.nparams":       "<strong>n_params</strong>: nombre total de paramètres. Seuil ~400M pour l'émergence des induction heads.",
    "help.add_models.title":    "Ajouter de nouveaux modèles (3 façons)",
    "help.add_models.preset":   "<strong>Liste de préréglages</strong>: 11 modèles populaires curés. Sélectionnez dans le dropdown.",
    "help.add_models.hf":       "<strong>HF Hub fetch</strong>: collez n'importe quel id (ex. <code>Qwen/Qwen2.5-32B-Instruct</code>), cliquez 📥 Charger. Le navigateur télécharge <code>config.json</code> directement de HuggingFace, remplit le formulaire. Fonctionne avec tout modèle public.",
    "help.add_models.manual":   "<strong>Manuel</strong>: remplissez les champs directement avec les valeurs de la model card.",
    "help.audit.title":         "La chaîne auditable",
    "help.audit.body":          "Chaque résultat montre la <strong>Chaîne de Calcul</strong> complète — chaque étape de formule avec ses entrées, sortie et interprétation. Cliquez sur n'importe quelle étape pour développer. Les références de section (§26.1, §19.1, etc.) renvoient au paper pour la dérivation.",
    "help.synthesis.title":     "La réponse en langage naturel",
    "help.synthesis.body":      "Après exécution de la chaîne déterministe, un LLM dans le navigateur (Qwen2.5-0.5B, ~350MB cachés après premier chargement) synthétise un résumé en langage naturel. Les nombres ci-dessus sont <em>toujours corrects</em> (Python déterministe) ; la synthèse est générée par LLM — vérifiez contre la chaîne en cas de doute.",
    "help.params.title":        "Paramètres communs expliqués",
    "help.verdicts.title":      "Quoi regarder dans les verdicts",
    "help.verdict.yes":         "<strong style=\"color:#3fb950;\">OUI / GO</strong> — procédez avec confiance ; les nombres soutiennent le choix.",
    "help.verdict.deg":         "<strong style=\"color:#d29922;\">DÉGRADÉ / TINY-MODEL</strong> — fonctionne avec caveats ; lisez l'action.",
    "help.verdict.no":          "<strong style=\"color:#f85149;\">NON / MEMORY-LIMITED</strong> — ne procédez pas tel quel ; mitigation fournie.",
    "help.privacy.title":       "Confidentialité",
    "help.privacy.body":        "Tout s'exécute dans votre navigateur. Pas de télémétrie, pas d'analytique, pas de données envoyées ailleurs. Même le modèle LLM s'exécute localement via WebGPU/WebAssembly. Vos model_ids et questions ne quittent jamais cette page.",
    "help.source.title":        "Code source et paper",
    "help.source.body":         "Code : <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>Paper : <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a> ; arXiv à venir)<br>Dataset : <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 58 mesures γ sur 32 modèles (CC-BY-4.0)",

    "footer.text":             "© 2026 Carles Marin · Apache-2.0 · recherche indépendante · l'outil qui ferme la boucle du paper.",
  },

  // ────────────────────────────────────────────────────────────────────────
  // ZH — 中文
  // ────────────────────────────────────────────────────────────────────────
  zh: {
    // §33 v0.4 (sesion 31, 2026-04-30) — 新诊断功能
    "v04.title":                  "🆕 v0.4 — 新诊断 (会话 31)",
    "v04.section.intro":          "会话 31 (2026-04-30) 从公式 cross-of-crosses 游戏 + 苏格拉底质询中得出的四个新诊断函数。在 <code>taf_browser.py</code> §33 中可用。",
    "v04.arch.label":             "架构集中度",
    "v04.arch.desc":              "γ_text ≈ γ_Padé − 0.012·n_kv。跨面板相关性定律(R²=0.30)。警告:不是逐模型预测器。",
    "v04.pdi.label":              "PDI — Padé 偏差指数",
    "v04.pdi.desc":               "PDI = d_horizon_obs/T_eval。交通灯:绿色(≈1)、橙色(>>1)、黄色(<<1)、红色(B 阶段负值)。",
    "v04.4bit.label":             "4 位精度移位预测器",
    "v04.4bit.desc":              "MHA: R²(bf16)<0.9 → γ 上升;R²>0.99 → γ 下降。GQA: 精度稳健。",
    "v04.crit.label":             "临界指数捆绑",
    "v04.crit.desc":              "ν_c、β_c、η_c (=γ−1, 已修正)、α_C、γ_susc,AM-GM 最小值在 γ=1−1/√2≈0.293。",

    // §34 v0.5 (会话 32, 2026-05-01) — 机器验证的代数一致性
    "v05.title":                  "🔬 v0.5 — 机器验证一致性 (会话 32)",
    "v05.section.intro":          "Sage Groebner basis + Lean Mathlib4 双工具验证 TAF 临界指数的<strong>15 个代数恒等式</strong>。首个具有形式化机器证明支持的 transformer-attention 框架。",
    "v05.verify.label":           "代数一致性检查",
    "v05.verify.desc":            "给定测得的 γ,验证 12 个 D-SAGE 恒等式(D-SAGE-1:2η²+η·γ_χ+1=0、β·χ=−1、α+χ=2 等)。全部通过 = 框架完整。失败表明 bf16 异常值 / 量化伪影。",
    "v05.dsage1.label":           "D-SAGE-1 (★★ 核心)",
    "v05.dsage1.desc":             "二次恒等式 2η² + η·γ_χ + 1 = 0(Sage Groebner 发现, Lean 验证)。取代错误的 '三重闭合' 主张。从代数上反驳 paper 1 的 η=2γ。",
    "v05.erratum.label":          "Paper 1 勘误 — η 修正",
    "v05.erratum.desc":            "Paper 1 原本声明 η = 2γ。Sage Groebner + Lean Mathlib4 证明此为失败(残差 (-4γ³+5γ+1)/(1-γ) > 0 ∀γ ∈ A 相)。正确值:η = γ−1,满足 D-SAGE-1。",
    "v05.repro.label":            "可重现性",
    "v05.repro.desc":              "全部 15 个定理在 Lean Mathlib4 中机器证明(build 成功 1973 jobs)。Sage 脚本:<code>analysis/sage_recursive_sweep_2026-04-30.sage</code>。Lean 代码:<code>lean_taf/taf/Taf/Identities.lean</code>。",

    // v0.5.1 — TAF Card consistency check button
    "v05.consistency.title":      "🔬 代数一致性检查 (Sage + Lean v0.5)",
    "v05.consistency.desc":       "验证 TAF 临界指数的 12 个 D-SAGE 代数恒等式(Sage Groebner basis + Lean Mathlib4 机器证明)。通过 = 框架完整。失败 = bf16 异常值 / 量化伪影。",
    "v05.consistency.btn":        "🔬 验证代数一致性",

    // v0.5.2 — Anti-Ising universality class badge
    "v05.antiising.badge":        "🧲 反 Ising 类 (β=γ−1&lt;0,机器验证)",

    // v0.5.2 — 每个恒等式的工具提示(通俗解释)
    "v05.tooltip.D_SAGE_1":       "二次代数恒等式,连接异常维度 η 和磁化率 γ_χ。Sage Groebner basis 发现的核心恒等式(机器证明)。取代了之前关于三重闭合的错误声明。",
    "v05.tooltip.D_SAGE_2":       "在 A 相中,β = γ−1 为负(反 Ising)。乘以 χ = 1/(1−γ) 恰好等于 −1。TAF 负 β 体制的标志。",
    "v05.tooltip.D_SAGE_4":       "比热指数 α 和磁化率 χ 在 TAF 中精确加和为 2。Josephson 超标度的代数推论。",
    "v05.tooltip.D_SAGE_5":       "线性恒等式:α + γ_χ = 2(2−γ)。意味着当 γ 接近 1(Hagedorn)时,总和接近 2;在 γ=0 时为 4。",
    "v05.tooltip.D_SAGE_6":       "序参量指数乘以磁化率指数等于 γ 的特定二次式。因式分解的代数关系。",
    "v05.tooltip.Rushbrooke_tautology": "标准 Rushbrooke 超标度 2β + γ_χ = ν·d 在 d=1。在 TAF 中这是一个重言式 — γ_χ 的定义就是为了使其成立。Sage Groebner basis 确认。",
    "v05.tooltip.Josephson_tautology": "标准 Josephson 超标度 2 − α = ν·d 在 d=1。在 TAF 中这是一个重言式 — α 的定义就是为了使其成立。",
    "v05.tooltip.Fisher_independent": "Fisher 关系 γ_χ = (2−η)·ν。在 TAF 中是独立的(不作为恒等式闭合,与三重闭合声明相反)。残差为 γ(2γ−3)/(1−γ)。",
    "v05.tooltip.eta_2gamma_REFUTED": "Paper 1 声称 η=2γ。这个恒等式驳斥了它:残差在整个 A 相中为正。Lean Mathlib4 的机器证明驳斥。",
    "v05.tooltip.D_14_nu_imprint": "学习到的印记斜率 ν = −1/(2π) 乘以 2π 得 −1。来自 paper 1 的简单维度检查。",
    "v05.tooltip.D_SAGE_7":       "中心电荷 c=3 乘以 |ν_imprint| 乘以 2π 得 3。连接 CFT 和训练印记的维度闭合。",
    "v05.tooltip.nu_beta_id":     "关联长度指数 ν 乘以序参量指数 β 在 A 相中得 −1。D-SAGE-2 的变体。",

    "v053.calibration.title":     "🔬 v0.5.3 — 校准审计 (2026-05-02)",
    "v053.calibration.note":      "<strong>SWA 修正已禁用</strong> — 原 δ_SWA = -0.21 基于 n=1 模型拟合(数据不足;唯一案例的均值为 +0.355)。<strong>post_IH 修正标记为探索性</strong> — 重审中组均值 ≈ 0(n=22 面板)未能复现 OLS 拟合。<strong>GQA 修正可复现</strong>(面板 +0.115 vs 硬编码 +0.11)。<strong>D_f 公式修正 Phase B (γ&gt;1)</strong> — 使用离散累积和代替连续近似。LLaMA-3、Mistral、Gemma 现在报告正确的压缩值。",
    "v053.release.banner":        "🔧 v0.5.3 — 审计驱动的修复:KV 压缩 D_f 现使用离散和(适用于所有 γ);δ_SWA 禁用(n=1 校准);论文 §5.2 C_V 系数勘误 (1/4 → 1/12)。",

    // §35 v0.6 — γ 预测 vs 观测 诊断
    "gamma_check.title":           "🔍 γ 预测 vs 观测",
    "gamma_check.desc":            "输入你经验测量的 γ。工具自动检测体制:欺诈 (θ 虚高) / 压缩 / 超 Padé / SWA-随机 / 正常。",
    "gamma_check.gobs_label":      "γ_观测",
    "gamma_check.gobs_tip":        "从模型注意力分数经验测量的 γ。使用 Diagnose CLI 从真实权重获取。",
    "gamma_check.random_label":    "随机语料?",
    "gamma_check.random_tip":      "若 γ_观测在随机/无结构 token 上测得请勾选。区分 SWA 签名 (γ_obs > 1) 与异常。",
    "gamma_check.regime":          "体制",
    "gamma_check.regime.normal":         "正常",
    "gamma_check.regime.fraud":          "欺诈 (θ 虚高)",
    "gamma_check.regime.compressed":     "上下文压缩",
    "gamma_check.regime.overpade":       "超 Padé",
    "gamma_check.regime.swa":            "SWA 签名 (随机语料)",
    "gamma_check.regime.unknown":        "未知",
    "gamma_check.regime.normal.desc":    "η ∈ [0.85, 1.15]:模型完全利用名义上下文,无异常。",
    "gamma_check.regime.fraud.desc":     "η < 0.01:名义 θ 虚高。模型表现如同 θ 远小于宣称值。可能是 YaRN/营销虚标,无真实上下文扩展。",
    "gamma_check.regime.compressed.desc":"η ∈ [0.01, 0.5):上下文压缩 (模型注意距离比名义 θ 预测更短)。常见于 instruction-tuned / RLHF 模型。",
    "gamma_check.regime.overpade.desc":  "η > 1.5:模型注意距离超过 Padé 预测。可能是 Lerch 修正体制或欠训练早期 checkpoint。",
    "gamma_check.regime.swa.desc":       "随机语料上 γ_obs > 1.05 = 滑动窗口注意力签名 (Mistral / Gemma 系列)。",
    "gamma_check.regime.unknown.desc":   "输入超范围或 γ_obs > 1 但未标记随机语料。请核验测量。",
    "gamma_check.glossary.title":        "ⓘ 词汇表 — 变量含义",
    "gamma_check.glossary.gamma_pade":   "<strong>γ_Padé</strong>:闭式预测 (2−z)/(2+z), z = T√2/θ。论文 §sec:gamma_decomposition。",
    "gamma_check.glossary.gamma_obs":    "<strong>γ_观测</strong>:从注意力分数经验测得 (在真实权重上运行 Diagnose CLI)。",
    "gamma_check.glossary.theta_eff_obs":"<strong>θ_eff (观测)</strong>:由 γ_obs 反演 T√2 / (1 − γ_obs)。测量隐含的有效 θ。",
    "gamma_check.glossary.theta_eff_pade":"<strong>θ_eff (Padé)</strong>:θ + T/√2。闭式公式预测的有效 θ。",
    "gamma_check.glossary.efficiency":   "<strong>η</strong>:θ_eff_obs / θ_eff_Padé 比值。≈1 = 正常 · &lt;0.01 = 欺诈 · &lt;0.5 = 压缩 · &gt;1.5 = 超 Padé。",
    "gamma_check.glossary.delta_h":      "<strong>ΔH_Cardy</strong>:log(θ_eff_obs / θ_nominal)。Cardy 熵变。负值 = 压缩熵。~0 = 与名义匹配。",
    "gamma_check.glossary.regime":       "<strong>体制</strong>:基于 η + γ_obs + 随机语料标志的自动分类器。",

    // §36 v0.6 — 内联 ⓘ 图标提示
    "tooltip.gamma_pade":          "<strong>γ_Padé(T_eval)</strong>:闭式预测 (2−z)/(2+z), z = T√2/θ。论文 §sec:gamma_decomposition。",
    "tooltip.gamma_decomposed":    "<strong>γ_分解</strong>:基于完整架构分解的 γ。Padé 基线 + GQA 偏移 + post-IH 偏移 (校准审计已复制子集)。",
    "tooltip.d_horizon":           "<strong>d_horizon</strong>:有效注意力视野。超过此位置分数低于噪声底 (论文 §26)。",
    "tooltip.L_NIAH":              "<strong>L_NIAH 上限</strong>:当前 d_horizon 下针-在-干草堆检索可靠性的预测上限。",
    "tooltip.chi":                 "<strong>χ 易感性</strong>:χ = 1/(1−γ)。在 Hagedorn 线 γ=1 处发散。",
    "tooltip.kv_memory":           "<strong>KV 内存 @ T_eval (BF16)</strong>:每请求 KV 缓存 = 2 · n_layers · n_kv_heads · d_head · T_eval 字节。",
    "tooltip.theta_eff_obs":       "<strong>θ_eff (观测)</strong>:由 γ_观测 隐含的有效 θ:T√2 / (1 − γ_obs)。",
    "tooltip.theta_eff_pade":      "<strong>θ_eff (Padé)</strong>:闭式公式预测的有效 θ:θ + T/√2。",
    "tooltip.efficiency":          "<strong>η = θ_eff_obs / θ_eff_Padé</strong>:效率比。≈1 = 正常 · &lt;0.01 = 欺诈 · &lt;0.5 = 压缩 · &gt;1.5 = 超 Padé。",
    "tooltip.delta_h_cardy":       "<strong>ΔH_Cardy</strong>:log(θ_eff_obs / θ_nominal)。Cardy 熵变。负值 = 压缩熵。~0 = 与名义匹配。",
    "tooltip.verdict_aggregate":   "<strong>判定</strong>:所有配方中最差。✅ 通过 = 全绿 · ⚠ 降级 = ≥1 黄 · ❌ 否 = ≥1 红。",
    "tooltip.verdict_breakdown":   "<strong>各配方分解</strong>:每个配方测试一个<em>独立</em>的决策轴 (长上下文 · 预算 · 硬件 · 自训 vs API · KV 压缩)。X-1 上的 ❌ 表示「按你的量级用 API」而非「模型失败」——展开 Recipes 节查看各轴上下文。",
    "tooltip.gamma_pill":          "<strong>γ 头条</strong>:γ_分解 (或 γ_Padé 回退)。范围 (0,1) = 相位 A (反伊辛)。γ ≥ 1 = Hagedorn / 相位 B。",
    "tooltip.anti_ising":          "<strong>反伊辛类</strong>:相位 A → β = γ−1 &lt; 0。机器证明 (Sage + Lean Mathlib4)。见 §35 v0.5。",

    // §37 v0.6 — Lean+Mathlib 定理表
    "lean.table.title":            "📑 Lean+Mathlib 定理表",
    "lean.table.desc":             "下方每一项都已机器证明对 Lean 4 + Mathlib4。点击任意 L# 链接跳转到 GitHub 源码行。按主题分组——点击标题展开。",
    "lean.table.theorem":          "定理",
    "lean.table.claim":            "陈述",
    "lean.table.tactic":           "策略",
    "lean.table.source":           "出处",
    "lean.table.lean":             "Lean",
    "lean.findings.title":         "🔎 实质性发现",
    "lean.findings.detected_by":   "检测于",
    "lean.findings.fixed_by":      "修正于",
    "lean.findings.recommendation":"建议",
    "lean.meta.repo":              "仓库",
    "lean.meta.build":             "构建",
    "lean.meta.theorems":          "定理",
    "lean.meta.verified":          "已验证",
    "lean.meta.rejected":          "已拒绝",
    "lean.meta.sorry":             "sorry",
    "lean.meta.findings":          "项实质性发现",
    "lean.manifest.loading":       "正在加载 Lean 清单…",
    "lean.manifest.error":         "Lean 清单不可用",

    // 帮助弹窗 — v0.6 节
    "help.v06.title":              "🆕 v0.6 — γ 预测-vs-观测 + Cardy ΔH + Lean 徽章",
    "help.v06.intro":              "<em>v0.6 (2026-05-06):三个新诊断位于 TAF 卡的 <strong>🔬 诊断</strong> 下。全部在浏览器运行;γ_观测来自在真实权重上运行 Diagnose CLI。</em>",
    "help.v06.layout.title":       "TAF 卡布局 (v0.6 新增)",
    "help.v06.layout.body":        "点击 <strong>🚀 生成完整画像</strong> 后,卡片展示:顶部一条 <strong>hero 条</strong> (架构类 + 元信息 + 3 个 pill:聚合判定 ✅/⚠/❌、γ 头条、🧲 反伊辛若处于相位 A) 和四个 <strong>可展开节</strong>:<strong>📋 配方</strong> (默认展开 — 各维度判定)、<strong>🔬 诊断</strong> (关键数字、γ 预测 vs 观测、what-if 浏览器)、<strong>✓ 验证</strong> (Sage+Lean 代数一致性、可证伪 F1-F23)、<strong>📂 来源与分享</strong> (校准审计 + JSON 下载 / 链接 / 注册表提交)。点击任意标题展开。每个变量都有内联 <strong>ⓘ</strong> 提示。",
    "help.v06.gamma_check.title":  "γ 预测 vs 观测",
    "help.v06.gamma_check.body":   "输入经验测量的 γ,工具计算 <strong>η = θ_eff_obs / θ_eff_Padé</strong> 并分类到 5 种体制之一:",
    "help.v06.case.normal":        "<strong>正常</strong> (η ∈ [0.85, 1.15]) — 模型完整使用名义上下文。<em>用例</em>:在采用前验证新发布。",
    "help.v06.case.fraud":         "<strong>欺诈</strong> (η &lt; 0.01) — 名义 θ 虚高;模型表现如同 θ ≪ 宣称值。<em>用例</em>:检测 YaRN/营销虚标 (CodeLlama / Mistral-Nemo 模式)。",
    "help.v06.case.compressed":    "<strong>压缩</strong> (η &lt; 0.5) — 上下文压缩;模型注意距离比名义 θ 短。<em>用例</em>:识别 RLHF/指令调优引起的压缩 (LLaMA-2 模式)。",
    "help.v06.case.overpade":      "<strong>超 Padé</strong> (η &gt; 1.5) — 模型注意距离超过 Padé 预测。<em>用例</em>:识别 Lerch 修正体制或欠训练早期 checkpoint (pythia-1b 模式)。",
    "help.v06.case.swa":           "<strong>SWA 随机语料</strong> (γ_obs &gt; 1.05 且 随机语料=是) — 滑动窗口注意力签名。<em>用例</em>:在随机 token 上确认 Mistral / Gemma SWA。",
    "help.v06.cardy.title":        "Cardy ΔH 诊断",
    "help.v06.cardy.body":         "<strong>ΔH_Cardy = log(θ_eff_obs / θ_nominal)</strong>。观测有效 θ 与名义 θ 之间的熵变。强负值 = 压缩熵;接近零 = 与名义匹配。在边界情况下补充 η。",
    "help.v06.lean.title":         "Lean + Mathlib 验证徽章",
    "help.v06.lean.body":          "TAF 恒等式在 Lean Mathlib4 中形式化机器证明:<strong>37 个定理</strong>分布于 7 组(Padé、RG 流、Cayley、D-SAGE、审计发现、CV 勘误、杂项)+ <strong>1 项实质性发现</strong>(V 导数 2 倍因子,定理 <code>V_derivative_ne_RG_beta</code>)。源:<a href=\"https://github.com/karlesmarin/lean-taf\" target=\"_blank\">github.com/karlesmarin/lean-taf</a>(commit 25c77fd)。本地重新验证:<code>git clone --depth=1 https://github.com/karlesmarin/lean-taf &amp;&amp; cd lean-taf &amp;&amp; lake exe cache get &amp;&amp; lake env lean Taf/Identities.lean</code>。Hero 中的 🧲 反伊辛 pill 与验证手风琴链接到具体源码行。",
    "help.v06.glossary.title":     "变量词汇表 (亦嵌入 TAF 卡)",
    "help.v06.glossary.body":      "TAF 卡中每个变量都有内联 ⓘ 提示。完整列表:γ、γ_Padé、γ_分解、γ_观测、θ、θ_eff_obs、θ_eff_Padé、η、ΔH_Cardy、χ、d_horizon、L_NIAH、KV 内存、体制。鼠标悬停任意 ⓘ 查看定义 + 论文章节。",

    "hero.title":     "🔬 TAF Agent",
    "hero.tagline":   "30 秒诊断任意 transformer LLM。免费。无需 GPU。无需注册。",
    "hero.subtitle":  "在你花钱或花时间<em>之前</em>,预测某个模型是否适合你的用例。所有计算在浏览器本地运行 &mdash; 你的输入永远不会离开此标签页。",
    "hero.help":      "📘 手册与示例",
    "hero.quickstart_btn": "⚡ 快速开始",
    "hero.inventory_btn":  "🧰 它能给你什么",
    "hero.about":     "由独立研究员构建。开源。不隶属于任何模型供应商。",

    "modes.title":    "🎯 模式",
    "modes.profile":  "📇 模型画像",
    "modes.compare":  "🆚 比较模型",
    "modes.inspector": "🔍 检查 config",
    "modes.ask":      "💬 自由提问",
    "modes.recipe":   "📋 选择配方",
    "modes.diagnose": "🩺 诊断 CLI",
    "diagnose.title": "🩺 诊断 CLI 命令生成器",
    "diagnose.tip":   "浏览器从 config 预测 γ;CLI 在真实权重上测量 γ_obs。此生成器产生在本地运行的精确命令。",
    "diagnose.desc":  "选择选项并将生成的命令复制粘贴到本地机器(Python + transformers + numpy)。快速模式 ≈5 分钟 CPU;完整 ≈20–60 分钟 GPU。",
    "diagnose.model_label": "HF 模型 id:",
    "diagnose.theta_label": "θ(留空自动):",
    "diagnose.n_label": "上下文 N:",
    "diagnose.options_label": "选项:",
    "diagnose.opt_fast": "--fast(CPU,≈5 分钟)",
    "diagnose.opt_cpu": "--cpu(强制 CPU)",
    "diagnose.opt_4bit": "--load_in_4bit(≥7B 模型)",
    "diagnose.local_label": "--local 路径(可选):",
    "diagnose.build_btn": "📋 生成命令",
    "diagnose.cmd_title": "生成的命令:",
    "diagnose.copy_btn": "📋 复制到剪贴板",
    "diagnose.next_steps": "下一步: (1) git clone https://github.com/karlesmarin/tafagent (2) cd tafagent && pip install torch transformers numpy (3) 运行命令 (4) JSON 结果 → 通过 Inspect 模式上传以进行完整 TAF 分析。",
    "modes.phase":    "📊 相图",
    "phase.title":    "📊 相图(γ × θ)",
    "phase.tip":      "每个点是论文经验数据集中的一个模型。x 轴: log θ; y 轴: γ。Hagedorn 线 γ=1 分隔 A 相和 B 相。悬停查看详情,点击加载到表单。",
    "phase.desc":     "数据集中 23 个模型;Padé 曲线在 T=2000。",
    "modes.desc":     "<strong>最快开始</strong>: 粘贴任意 HuggingFace 模型 id (例如 <code>meta-llama/Meta-Llama-3-8B</code>),点击 画像。秒内看到所有 5 个配方的评分。",

    "profile.title":           "📇 模型画像",
    "profile.desc":            "<strong>面向技术人员</strong>: 当您需要候选模型的完整可行性快照时。一键运行所有 5 个配方,生成统一的 TAF 卡。",
    "profile.preset_label":    "预设:",
    "profile.preset_default":  "— 或从列表中选择 —",
    "profile.hf_label":        "HF 模型 id:",
    "profile.fetch_btn":       "📥 获取",
    "profile.btn":             "🚀 生成完整画像",
    "profile.quickstart":      "💡 快速开始: 选择任意预设 → 点击生成。或从 <a href='https://huggingface.co/models?library=transformers&sort=trending' target='_blank'>HF Hub 热门</a> 粘贴一个 id → 📥 获取 → 生成。",

    "compare.title":           "🆚 模型并排比较",
    "compare.desc":            "<strong>面向技术人员</strong>: 当为特定部署场景在 2-3 个候选模型之间选择时。同一配方,多个模型,并排判定。",
    "compare.recipe_label":    "配方:",
    "compare.T_eval_label":    "T_eval (目标上下文):",
    "compare.models_title":    "要比较的模型(最多 3 个)",
    "compare.btn":             "🚀 比较",
    "compare.example":         "💡 尝试: 粘贴 3 个流行的 7-8B 模型 (Meta-Llama-3-8B, Mistral-7B-v0.1, Qwen/Qwen2.5-7B),配方 X-2, T_eval=16000。查看哪个最适合长上下文。",

    "ask.title":               "❓ 您的问题",
    "ask.placeholder":         "例如: Mistral-7B 能处理 16K NIAH 检索吗?或: 我有 5,000 美元,可以训练什么模型?或: 以每天 1 亿 tokens 提供 Llama-70B 的最便宜 GPU?",
    "ask.btn":                 "🚀 分析",
    "ask.example_btn":         "💡 尝试示例",

    "recipe.title":            "📋 配方",
    "recipe.default":          "— 选择一个配方 —",
    "recipe.input_title":      "🎯 输入",

    "verdict.title":           "📊 判定",
    "chain.title":             "🔍 计算链",
    "chain.desc":              "下面每个数字都是确定性 Python。点击步骤展开。",
    "answer.title":            "💬 自然语言回答",
    "share.btn":               "🔗 复制分享链接",
    "share.copied":            "✅ 已复制到剪贴板!",
    "share.download":          "💾 下载 JSON",
    "share.download_md":       "📝 Markdown",
    "share.download_tex":      "📜 LaTeX",
    "share.submit":            "📤 提交到 registry",
    "share.submit_clip_ok":    "↗ 已打开 GitHub。正文已复制到剪贴板——粘贴到 issue 正文。",
    "share.submit_clip_fail":  "↗ 已打开 GitHub。剪贴板被阻止——正文已写入浏览器控制台 (F12)。",
    "share.import_title":      "📂 导入共享的 TAF 结果",
    "a11y.skip":               "跳到主要内容",

    // v0.6.2 — landing 重构:快速开始 + 功能清单 + 架构提示
    "qs.title":                    "⚡ 快速开始",
    "qs.step1":                    "粘贴 HuggingFace 模型 ID(例如 <code>meta-llama/Meta-Llama-3-8B</code>)",
    "qs.step2":                    "点击 <strong>📇 Profile a model</strong>",
    "qs.step3":                    "查看你的 TAF Card — 各用例的判定 + 关键数值 + 经 Lean+Mathlib 验证的数学",
    "qs.cta":                      "↓ 立即开始",
    "inv.title":                   "🧰 这个工具能给你什么",
    "inv.recipes.title":           "🎯 8 个 recipe — 这个模型符合你的用例吗?",
    "inv.recipes.x1.title":        "自训练 vs API",
    "inv.recipes.x1.body":         "对你的流量哪个更便宜?",
    "inv.recipes.x2.title":        "长上下文",
    "inv.recipes.x2.body":         "能可靠处理 32k / 128k tokens 吗?",
    "inv.recipes.x3.title":        "预算",
    "inv.recipes.x3.body":         "用 $X,你能从零训练什么模型?",
    "inv.recipes.x5.title":        "硬件",
    "inv.recipes.x5.body":         "用什么 GPU 服务 N tokens/天?",
    "inv.recipes.x19.title":       "KV 缓存",
    "inv.recipes.x19.body":        "如何压缩而不破坏质量?",
    "inv.recipes.x21.title":       "Imprint 纯度",
    "inv.recipes.x21.body":        "模型的位置编码有多干净?",
    "inv.recipes.x22.title":       "Compute-context",
    "inv.recipes.x22.body":        "模型是否落入经验带?",
    "inv.recipes.x23.title":       "IH 相位",
    "inv.recipes.x23.body":        "induction-head 之前还是之后?",
    "inv.diag.title":              "🔬 诊断",
    "inv.diag.gamma":              "<strong>γ 预测 vs 观测</strong> — 自动分入 5 种状态(正常 · 欺诈/夸大上下文 · 压缩 · over-Padé · sliding-window)",
    "inv.diag.cardy":              "<strong>Cardy ΔH</strong> — 观测上下文与名义上下文之间的熵偏移",
    "inv.diag.fals":               "<strong>可证伪面板</strong> — 检查 23 个具体预测(F1–F23)",
    "inv.diag.alg":                "<strong>代数一致性</strong> — 模型必须满足的 8 条数学恒等式",
    "inv.verify.title":            "✓ 形式化验证的数学",
    "inv.verify.count":            "<strong>37 个定理</strong>已在 Lean 4 + Mathlib4 机器证明",
    "inv.verify.click":            "点击任意徽章 → 在 GitHub 打开源码行",
    "inv.verify.reverify":         "自行验证:<code>lake build</code>(缓存后 ≈5 秒)",
    "inv.export.title":            "📤 导出与分享",
    "inv.export.formats":          "<strong>JSON · Markdown · LaTeX</strong>(论文级)",
    "inv.export.share":            "可复现的分享链接(状态编入 URL)",
    "inv.export.registry":         "提交到 GitHub 上的社区登记",
    "arch.summary":                "支持的架构",
    "arch.anyhf":                  "✓ 任意 HuggingFace 公开模型",
    "tooltip.mha":                 "Multi-Head Attention:每个 token 位置同时通过多个并行 head 进行注意力计算。",
    "tooltip.gqa":                 "Grouped Query Attention:queries 共享比 heads 更少的 keys/values(节省内存但把 γ 推向 Hagedorn)。",
    "tooltip.alibi":               "Attention with Linear Biases:位置信息以学习斜率加到注意力分数,无旋转。",
    "tooltip.abspe":               "Absolute Position Embeddings:每个位置有一个固定的学习向量加到 token embedding。",
    "tooltip.swa":                 "Sliding Window Attention:每个 token 仅在固定局部窗口内做注意力(Mistral、gemma-2 使用此机制)。",
    "tooltip.ssm":                 "State Space Model:维护内部状态的序列层(取代注意力,Mamba、Jamba 使用此机制)。",

    // v0.7.0 — anti-bullshit pack #1: SWA / RoPE-scaling 揭示器
    "modes.unmask":                "🪟 揭示",
    "unmask.title":                "🪟 上下文揭示器",
    "unmask.tip":                  "粘贴 HuggingFace 模型 id(或原始 config.json)。工具检测 sliding-window attention、RoPE 缩放(YaRN/linear/dynamic NTK)和 GQA — 所有使 <code>max_position_embeddings</code> 大于实际有效上下文的因素。Mistral-7B-v0.1 是经典例子:声称 32k,实际只在 ~4-8k 范围内做注意力。",
    "unmask.desc":                 "<strong>你即将为一个实际上注意力不到那么远的模型花钱吗?</strong> 粘贴 id,1 秒内得知。无需 GPU,无需推理 — 只是对 config.json 做算术。",
    "unmask.id_label":             "HF 模型 id:",
    "unmask.fetch_btn":            "🔍 揭示",
    "unmask.paste_summary":        "或粘贴原始 config.json(私有 / 在研模型)",
    "unmask.paste_btn":            "🔍 揭示已粘贴的 config",
    "unmask.label.declared":       "声明上下文",
    "unmask.label.effective":      "有效(估计)",
    "unmask.label.ratio":          "比率",
    "unmask.section.flags":        "架构标志",
    "unmask.section.warnings":     "警告",
    "unmask.section.reco":         "建议",
    "unmask.flag.swa":             "SWA",
    "unmask.flag.rope":            "RoPE 缩放",
    "unmask.flag.gqa":             "GQA",
    "unmask.flag.layers":          "层数",
    "unmask.flag.dhead":           "d_head",
    "unmask.flag.theta":           "RoPE θ",
    "unmask.flag.yes":             "是",
    "unmask.flag.no":              "否",
    "unmask.flag.full_mha":        "否(完整 MHA,{n} heads)",
    "unmask.verdict.honest":            "✅ 诚实",
    "unmask.verdict.inflated":          "⚠ 夸大",
    "unmask.verdict.severely_inflated": "❌ 严重夸大",
    "unmask.verdict.yarn_extended":     "⚠ YARN 扩展",
    "unmask.verdict.unknown":           "❓ 未知",
    "unmask.warn.swa_window":      "SWA 窗口:{window} tokens — 每层仅在此窗口内做注意力。",
    "unmask.warn.multihop":        "多跳估计:~{multiHop} tokens(保守:窗口 × {factor})。",
    "unmask.warn.yarn":            "RoPE 缩放({type})将上下文从 ~{original} 扩展 {factor}× 到 {declared} tokens。",
    "unmask.warn.yarn_advice":     "RoPE 扩展的上下文 — 用 γ_check 诊断在声称的全长度验证 γ 行为。",
    "unmask.warn.gqa_small_dhead": "小 head dim({d_head})+ GQA:长上下文下 KV 缓存压缩很可能(γ 推向 Hagedorn)。",
    "unmask.reco.honest":              "标准全注意力模型。有效上下文与声明一致({declared} tokens)。",
    "unmask.reco.inflated":            "通过 SWA 有效 ~{effective} tokens。用 γ_check 验证你目标长度的行为。",
    "unmask.reco.severely_inflated":   "实际把它当作 ~{effective} tokens 上下文模型。{declared} tokens 的声明仅通过跨层注意力链生效,经验上超过 ~2× SWA 窗口后会退化。",
    "unmask.reco.yarn_extended":       "RoPE 扩展上下文。运行长上下文 benchmark(NIAH 在 8k / 16k / 32k / 全长度)以确认扩展是否成立。用 γ_check 设 T_eval = {declared}。",
    "unmask.reco.unknown":             "无法解析 config。验证 URL 是带公开 config.json 的有效 HF 模型。",
    "unmask.status.empty_id":      "⚠ 输入一个 model id(例如 mistralai/Mistral-7B-v0.1)。",
    "unmask.status.fetching":      "⏳ 正在获取 {modelId} 的 config.json...",
    "unmask.status.success":       "✅ 已分析 {modelId}(判定:{verdict})",
    "unmask.status.empty_paste":   "⚠ 请先粘贴 config.json。",
    "unmask.status.invalid_json":  "❌ JSON 无效:{error}",
    "unmask.status.success_paste": "✅ 已分析粘贴的 config(判定:{verdict})",
    "unmask.pasted_label":         "(已粘贴 config)",
    "mode_desc.ask":               "输入自由问题。浏览器内的 LLM 选择正确的 recipe 并运行。",
    "mode_desc.recipe":            "直接选择一个 recipe 并填表。完整手动控制。",
    "mode_desc.profile":           "最快开始:粘贴任意 HuggingFace model id,点击 Profile。几秒内看到 5 个 recipe。",
    "mode_desc.compare":           "选择 2-3 个候选模型 + 一个 recipe。在表格中并排查看判定。",
    "mode_desc.inspector":         "直接粘贴 config.json。适用于未发布 HF Hub 的私有 / 在研模型。",
    "mode_desc.diagnose":          "构建 diagnose_model.py 的 CLI 命令,在真实 GPU 上测量 γ_obs。浏览器预测;CLI 测量。",
    "mode_desc.phase":             "论文经验面板的 γ × θ 散点图。悬停点查看详情,点击加载到 Diagnose / Recipe 表单。",
    "mode_desc.unmask":            "检测 max_position_embeddings 是否误导(SWA / YaRN / RoPE 缩放)。粘贴 model id,1 行判定。",
    "profile.preset_loaded":       "✅ 已为 <strong>{id}</strong> 加载预设。表单已预填。(点击 📥 Fetch 用 HF Hub 最新 config 覆盖。)",

    // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
    "modes.template":              "📜 Chat-template",
    "mode_desc.template":          "检测模型使用的 chat-template 系列(Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek)。给出 lm-eval / vLLM / transformers 的精确 CLI flag。",
    "template.title":              "📜 Chat-template 检测器",
    "template.tip":                "粘贴 HF 模型 id(或原始 tokenizer_config.json)。检测 chat-template 系列并给出正确使用的精确框架命令。如果忘记应用,lm-eval-harness 会让 accuracy 静默对半(issue #1841)。",
    "template.desc":               "<strong>忘了 <code>--apply_chat_template</code> 吗?</strong> 大多数 multi-turn eval 因为 chat template 未应用而失败 ~50%。粘贴 model id,获取你 stack 的精确 CLI flag。",
    "template.id_label":           "HF 模型 id:",
    "template.fetch_btn":          "📜 检测",
    "template.paste_summary":      "或粘贴原始 tokenizer_config.json(私有模型)",
    "template.paste_btn":          "📜 检测已粘贴 config",
    "template.label.family":       "检测到的系列",
    "template.label.markers":      "匹配的标记",
    "template.label.tpl_len":      "Template 长度",
    "template.section.warnings":   "警告",
    "template.section.commands":   "各框架命令",
    "template.section.raw":        "原始 template(预览)",
    "template.family.custom":      "自定义(未知系列)",
    "template.family.none":        "(无 chat_template)",
    "template.verdict.ok":         "✅ 已检测到 TEMPLATE",
    "template.verdict.custom":     "⚠ 自定义 TEMPLATE",
    "template.verdict.missing":    "❌ 无 CHAT TEMPLATE",
    "template.verdict.base_model": "ℹ 基础模型(无 chat)",
    "template.verdict.unknown":    "❓ 未知",
    "template.warn.no_chat_template": "tokenizer_config.json 中无 <code>chat_template</code> 字段。基础 / 仅预训练模型的典型情况。如果你期待 instruct-tuned 模型,可能加载了错误的文件。",
    "template.warn.custom_template":  "非标准 template({length} 字符)。工具无法将其匹配到已知系列。检查下方预览并验证你的 eval 框架是否支持。",
    "template.warn.lm_eval_apply":    "<strong>lm-eval-harness:</strong>添加 <code>--apply_chat_template</code>,否则 multi-turn eval 上 accuracy 会静默下降 ~50%(issue #1841)。",
    "template.warn.vllm_apply":       "<strong>vLLM serve:</strong>验证 <code>--chat-template</code> 已设置(fine-tuned 变体的自动检测有时失败)。建议:<code>{name}</code>。",
    "template.status.empty_id":    "⚠ 输入 model id(例如 mistralai/Mistral-7B-Instruct-v0.3)。",
    "template.status.fetching":    "⏳ 正在获取 {modelId} 的 tokenizer_config.json...",
    "template.status.success":     "✅ 已检测 {modelId}(判定:{verdict})",
    "template.status.empty_paste": "⚠ 请先粘贴 tokenizer_config.json。",
    "template.status.invalid_json":"❌ JSON 无效:{error}",
    "template.status.success_paste":"✅ 已检测粘贴的 config(判定:{verdict})",
    "template.pasted_label":       "(已粘贴 tokenizer_config)",

    // v0.7.2 — anti-bullshit pack #3: Arena-Elo CI reconstructor
    "modes.arena":                 "🎯 Arena CI",
    "mode_desc.arena":             "从原始 pairwise 投票数据中恢复置信区间(Bradley-Terry MLE + bootstrap)。检测公开 Arena 排行榜隐藏的统计上并列对。",
    "arena.title":                 "🎯 Arena-Elo CI 重建器",
    "arena.tip":                   "Chatbot Arena 在公开排行榜中删除了置信区间。5 Elo 的差距在统计上可能毫无意义。粘贴原始投票数据(model_a, model_b, winner) — 工具计算 Bradley-Terry MLE + bootstrap CI 并列出统计上的并列(CI 重叠)。",
    "arena.desc":                  "<strong>GPT-4 真的比 Claude 强吗 — 还是它们打平?</strong> 粘贴 pairwise 投票 CSV(或点击 <em>加载样本</em>)。Bradley-Terry MLE + 200 次 bootstrap → 排序 Elo + 95% CI + 统计并列检测。全部在浏览器中。",
    "arena.sample_btn":            "📊 加载样本数据",
    "arena.run_btn":                "🎯 计算 CIs",
    "arena.clear_btn":             "🗑️ 清空",
    "arena.csv_summary":           "投票 CSV(header:<code>model_a,model_b,winner</code>;winner ∈ a/b/tie)",
    "arena.section.ranked":        "排序 Elo 与 95% CI",
    "arena.section.ties":          "统计并列(CI 重叠)",
    "arena.section.summary":       "摘要",
    "arena.col.rank":              "#",
    "arena.col.model":             "模型",
    "arena.col.elo":               "Elo",
    "arena.col.ci":                "95% CI",
    "arena.col.ci_width":          "± 半宽",
    "arena.col.matches":           "对局",
    "arena.col.wins":              "胜 / 负 / 平",
    "arena.col.tie_pair":          "配对",
    "arena.col.tie_diff":          "Elo 差距",
    "arena.col.tie_overlap":       "CI 重叠",
    "arena.no_ties":               "无统计并列 — 所有配对在 95% CI 下可区分。",
    "arena.summary.votes":         "总投票数",
    "arena.summary.models":        "模型数",
    "arena.summary.ties":          "统计并列",
    "arena.summary.bootstrap":     "Bootstrap 迭代",
    "arena.summary.ci_level":      "CI 水平",
    "arena.status.empty":          "⚠ 粘贴投票 CSV 或点击加载样本。",
    "arena.status.too_few":        "⚠ 仅 {n} 个有效投票 — 需要至少 10 个才能可靠拟合 Bradley-Terry。",
    "arena.status.computing":      "⏳ 在 {n} 个投票上计算 Bradley-Terry MLE + bootstrap...",
    "arena.status.done":           "✅ {n} 投票 · {models} 模型 · {ties} 统计并列 · {ms} ms",
    "arena.status.sample_loaded":  "✅ 样本已加载(合成 6 模型 Arena 数据)。点击计算 CIs。",

    // v0.7.3 — anti-bullshit pack #4: Contamination Prior
    "modes.contam":                "🧪 污染",
    "mode_desc.contam":            "对 benchmark 分数是否被污染做贝叶斯式的先验估计。输入模型训练 cutoff → 评估 20+ 主流 benchmark(MMLU、GSM8K、HumanEval、MMLU-Pro…)。",
    "contam.title":                "🧪 污染先验",
    "contam.tip":                  "基于 (模型训练 cutoff 日期) × (benchmark 发布日期) × (已知语料库纳入 + 泄漏历史),对 benchmark 分数是否被污染做贝叶斯式的先验估计。Open LLM Leaderboard v1 在 2024 年因 MMLU/HellaSwag 分数被污染而停用。",
    "contam.desc":                 "<strong>你应该相信你模型的 MMLU 分数吗?</strong> 输入模型训练 cutoff 日期 — 工具评估 20+ 主流 benchmark(MMLU、HellaSwag、GSM8K、HumanEval、IFEval、MMLU-Pro、GPQA…)并告诉你哪些分数可能被污染。",
    "contam.cutoff_label":         "训练 cutoff:",
    "contam.run_btn":              "🧪 评估所有 benchmark",
    "contam.section.ranked":       "Benchmark 污染先验",
    "contam.section.high":         "🔴 高风险 benchmark(视分数为不可信)",
    "contam.section.medium":       "🟡 中等风险(用替代品验证)",
    "contam.section.low":          "🟢 低风险(可能干净)",
    "contam.col.benchmark":        "Benchmark",
    "contam.col.released":         "发布",
    "contam.col.gap":              "差距(月)",
    "contam.col.prior":            "P(污染)",
    "contam.col.level":            "等级",
    "contam.col.corpora":          "在语料库",
    "contam.col.category":         "类别",
    "contam.label.high":           "高风险",
    "contam.label.medium":         "中",
    "contam.label.low":            "低",
    "contam.no_entries":           "(此类别中无)",
    "contam.advice.high":          "视这些分数为不可信。用更新 / 私有测试的替代品替换(MMLU-Pro、GPQA、MUSR、MATH-500)。",
    "contam.advice.medium":        "谨慎对待。在 held-out 子集或社区复现上寻找复制。",
    "contam.advice.low":           "分数可能未被污染,但没有泄漏不等于证明 — 仍要用替代测试交叉验证。",
    "contam.summary.headline":     "Cutoff <code>{cutoff}</code> · {n} 个 benchmark 已评估",
    "contam.status.empty":         "⚠ 输入模型训练 cutoff 日期(例如 2023-12)。",
    "contam.status.bad_date":      "⚠ 日期格式错误。使用 YYYY-MM 或 YYYY-MM-DD。",
    "contam.status.done":          "✅ Cutoff {cutoff} · {n} benchmarks 已评估 · {high} 个高风险",

    // v0.7 — Help 模态部分
    "help.v07.title":              "🆕 v0.7 — Anti-bullshit 套件(4 个新模式)",
    "help.v07.intro":              "<em>v0.7(2026-05-06):四个新模式,解决 HuggingFace 社区报告的具体痛点。每个都在浏览器中运行,无推理 — 纯元数据 + 数学。</em>",
    "help.v07.unmask.title":       "🪟 上下文揭示器",
    "help.v07.unmask.body":        "检测 <code>max_position_embeddings</code> 何时具有误导性。Mistral-7B-v0.1 声称 32k 但通过 SWA 实际只在 ~4-8k 内做注意力。粘贴 HF 模型 id → 1 秒判定(诚实 / 夸大 / 严重夸大 / YARN 扩展)。捕获 SWA、RoPE-scaling(YaRN/linear/dynamic NTK)、小 d_head + GQA。<em>用例</em>:在为 32k 上下文付 GPU 钱之前,验证模型是否真的注意那么远。",
    "help.v07.template.title":     "📜 Chat-template 检测器",
    "help.v07.template.body":      "检测模型使用的 chat-template 系列(Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek / 自定义 / 无)并给出 lm-evaluation-harness、vLLM、transformers 的精确 CLI flag。解决 lm-eval-harness 的 issue #1841:忘记 <code>--apply_chat_template</code> 会让 multi-turn accuracy 静默对半。<em>用例</em>:报告 benchmark 分数前,确认你正确应用了 template。",
    "help.v07.arena.title":        "🎯 Arena-Elo CI 重建器",
    "help.v07.arena.body":         "Chatbot Arena 在公开排行榜中删除了置信区间 — 5 Elo 的差距在统计上可能毫无意义。粘贴原始 pairwise 投票数据(model_a, model_b, winner)→ Bradley-Terry MLE + 200 次 bootstrap → 排序 Elo + 95% CI + \"统计并列\" 面板,列出 CI 重叠的配对。尝试加载样本按钮。<em>用例</em>:宣称 \"模型 A 胜过模型 B\" 之前,验证它们的 CI 不重叠。",
    "help.v07.contam.title":       "🧪 污染先验",
    "help.v07.contam.body":        "对 benchmark 分数是否被污染做贝叶斯式的先验估计。输入模型训练 cutoff 日期 → 工具按 P(污染) 评估 20+ 主流 benchmark(MMLU、HellaSwag、GSM8K、HumanEval、IFEval、MMLU-Pro、GPQA、AIME、MATH-500、BBH、MUSR…),基于时间差距、语料库纳入和已知泄漏历史。Open LLM Leaderboard v1 在 2024 年因 MMLU/HellaSwag 分数被污染而停用。<em>用例</em>:比较两个模型时决定相信哪些分数。",
    "help.v07.quant.title":        "⚖️ 量化机制分类器",
    "help.v07.quant.body":         "预测任意(模型 × 量化方案:NF4、AWQ、GPTQ、GGUF Q4_K_M / Q5_K_M / Q8_0、int8、FP8…)的 γ-shift 与 ΔPPL。架构感知:小 d_head + 激进 GQA → 更敏感;校准方案(AWQ)比未校准方案(NF4)更好地吸收偏移。检测到 cliff 时推荐更安全的替代方案。<em>用例</em>:量化之前,预测你的特定架构 × 方案组合是否能保持 PPL 可接受,否则给出具体的切换建议。",
    "help.v07.drift.title":        "🔀 跨框架 Drift 界",
    "help.v07.drift.body":         "同一模型,不同 setup 下分数不同。工具预测仅由数值噪声(dtype、framework、batch)允许的最大 drift。若观测差距超过它 → 真实 bug,通常是 chat-template mismatch(lm-eval-harness issue #1841)或 KV-cache 布局。试试 &quot;加载样本&quot; 按钮看典型的 chat-template bug。<em>用例</em>:在报告回归或声称可复现性之前,验证两个评估之间的差距是否大于数值噪声能解释的范围。",
    "inv.v07.drift":               "<strong>🔀 Drift</strong> — bug 还是噪声?预测两个评估间的最大可允许差距",
    "help.v07.niah.title":         "🔍 NIAH → Reasoning Gap",
    "help.v07.niah.body":          "RULER 论文(NVIDIA 2024)显示长上下文模型经常通过 NIAH(needle 检索)但在相同上下文上多跳 reasoning 失败。工具仅根据架构(γ_Padé + d_horizon + 架构压力:小 d_head、GQA、SWA)预测两种通过率,报告 gap,并找到模型 reasoning 保持 ≥65% 的\"安全 reasoning 上下文\"。扫描模式显示在 1k/4k/16k/64k/T_train 的曲线。<em>用例</em>:在声称的上下文部署之前,搞清楚模型是真的能在那里 reasoning 还是只能检索。",
    "inv.v07.niah":                "<strong>🔍 NIAH→Reason</strong> — 你的\"128k 上下文\"真的能在那里 reasoning,还是只能检索?",

    // v0.7 — Inventory 模态第 5 卡
    "inv.v07.title":               "🆕 v0.7 anti-bullshit 套件",
    "inv.v07.unmask":              "<strong>🪟 Unmask</strong> — config.json 声称 32k?看它是否真的注意那么远",
    "inv.v07.template":            "<strong>📜 Chat-template</strong> — 精确 CLI flag,让 lm-eval 不会静默对半你的 accuracy",
    "inv.v07.arena":               "<strong>🎯 Arena CI</strong> — 恢复 Chatbot Arena 隐藏的置信区间",
    "inv.v07.contam":              "<strong>🧪 污染</strong> — 按污染概率对 20+ benchmark 评级",
    "inv.v07.quant":               "<strong>⚖️ Quant</strong> — 预测任意(模型 × 量化方案)组合的 γ-shift + ΔPPL",

    // v0.7.3 — anti-bullshit pack #5: Quant-regime classifier
    "modes.quant":                 "⚖️ Quant",
    "mode_desc.quant":             "预测任意(模型 × 量化方案)的 γ-shift 与 ΔPPL。架构感知:小 d_head + GQA → 更敏感。检测到 cliff 时推荐更安全的替代方案。",
    "quant.title":                 "⚖️ 量化机制分类器",
    "quant.tip":                   "预测给定(模型 × 量化方案)的 γ-shift(及由此产生的 ΔPPL)。\"AWQ 保留 ~95%\" 这类通用说法太模糊 — TAF 利用 d_head、GQA 比、SWA 标志和模型大小给出特定于架构的判定。解决:HF 社区普遍报告不可预测的量化 cliff(NF4 在 Phi-3 上 -2 PPL,但在 Llama-3-8B 上没问题)。",
    "quant.desc":                  "<strong>量化会破坏你的模型吗?</strong>粘贴 HF 模型 id,选择量化方案 — 获取预测的 γ-shift、预期 ΔPPL 区间,以及在 cliff 情况下的推荐替代方案。仅浏览器,无 GPU,无需校准集。",
    "quant.id_label":              "HF 模型 id:",
    "quant.fetch_btn":             "📥 获取 config",
    "quant.scheme_label":          "量化方案:",
    "quant.run_btn":                "⚖️ 预测",
    "quant.all_btn":               "📊 比较所有方案",
    "quant.regime.safe":           "✅ 安全",
    "quant.regime.mild":           "✅ 轻度压缩",
    "quant.regime.significant":    "⚠ 显著退化",
    "quant.regime.cliff":          "❌ 重大 CLIFF",
    "quant.label.gamma_shift":     "γ 偏移",
    "quant.label.delta_ppl":       "ΔPPL(估)",
    "quant.label.arch_mult":       "架构乘数",
    "quant.section.breakdown":     "细节分解",
    "quant.section.reco":          "建议",
    "quant.section.compare":       "所有方案(按安全性排序)",
    "quant.field.scheme":          "方案",
    "quant.field.calibrated":      "已校准",
    "quant.field.uncalibrated":    "未校准",
    "quant.field.base_penalty":    "基础惩罚",
    "quant.field.arch_mult_full":  "架构乘数",
    "quant.field.gamma_shift":     "预测 γ 偏移",
    "quant.field.ppl_band":        "ΔPPL 区间(估)",
    "quant.field.params":          "参数量",
    "quant.col.scheme":            "方案",
    "quant.col.bits":              "比特",
    "quant.col.gamma_shift":       "γ 偏移",
    "quant.col.ppl_band":          "ΔPPL 区间",
    "quant.col.regime":            "机制",
    "quant.reco.switch_to_awq":    "<strong>切换到 {scheme}</strong> — 校准的 4-bit 处理小 d_head + GQA 比 NF4 好得多。预期 ΔPPL 下降 ~2-3 倍。",
    "quant.reco.switch_to_q5_km":  "<strong>切换到 {scheme}</strong> — Q5 以低成本保留更多 head 维度(仅大约 25% 文件更大)。",
    "quant.reco.switch_to_q4_km":  "<strong>切换到 {scheme}</strong> — Q3/Q2 对此架构过于激进。",
    "quant.reco.consider_awq":     "<strong>考虑 {scheme}</strong> — 在此架构上校准能显著降低 γ-shift。",
    "quant.reco.use_higher_bits":  "<strong>使用更高比特的替代</strong> — 此架构无法干净吸收 4-bit。尝试 5 或 8-bit。",
    "quant.reco.verify_with_eval": "<strong>用真实 eval 验证</strong> — 预测偏移在边缘。部署前在目标上下文运行 NIAH。",
    "quant.reco.no_action":        "无需操作 — 此架构下量化是安全的。",
    "quant.summary.headline_all":  "<code>{modelId}</code> 的所有方案",
    "quant.status.empty_id":       "⚠ 输入 model id(例如 meta-llama/Llama-3.2-1B)。",
    "quant.status.fetching":       "⏳ 正在获取 {modelId} 的 config.json...",
    "quant.status.fetched":        "✅ 已获取 {modelId} 的 config。选择方案并点击预测(或比较所有)。",
    "quant.status.no_scheme":      "⚠ 从下拉中选择一个量化方案。",
    "quant.status.done":           "✅ 预测机制:{regime}",
    "quant.status.done_all":       "✅ 已比较 {n} 个方案 — 按安全性排序。",

    // v0.7.4 — HF Hub 自动完成:隐私 + rate-limit
    "hf_auto.privacy":             "🔒 查询发送到 huggingface.co/api · 本地缓存 5 分钟",
    "hf_auto.rate_limited":        "⚠ HuggingFace 速率限制 — 稍后再试,或手动键入完整 model id",
    "hf_auto.gated_msg":           "是 gated 模型。在此接受许可证:",

    // v0.7.5 — anti-bullshit pack #6: 跨框架 drift 界
    "modes.drift":                 "🔀 Drift",
    "mode_desc.drift":             "在给定(framework、dtype、batch、chat-template)下预测两个 benchmark 分数之间的最大允许 drift。区分真实 bug 与数值噪声。",
    "drift.title":                 "🔀 跨框架 Drift 界",
    "drift.tip":                   "同一模型,不同 setup 下分数不同。差距是噪声还是真实 bug?输入两个分数及其(framework、dtype、batch、chat-template)— 工具预测仅由数值噪声允许的最大 drift。若观测差距超过它 → 真实 bug,通常是 chat-template mismatch(lm-eval issue #1841)或 KV-cache 布局。",
    "drift.desc":                  "<strong>你的模型在 lm-eval-hf 给 67.2,在 vLLM-served 给 65.1。Bug 还是噪声?</strong> 输入两个分数及(framework、dtype、batch、是否应用 chat-template)。工具预测噪声区间并标记真实 bug。arxiv 2506.09501 将此记录为评估再现性的主要问题。",
    "drift.setup_a":               "Setup A",
    "drift.setup_b":               "Setup B",
    "drift.score":                 "分数",
    "drift.framework":             "框架",
    "drift.dtype":                 "Dtype",
    "drift.batch":                 "Batch",
    "drift.template":              "Chat-template",
    "drift.template.applied":      "已应用",
    "drift.template.not_applied":  "未应用",
    "drift.template.unknown":      "未知",
    "drift.run_btn":               "🔀 计算 drift 界",
    "drift.sample_btn":            "📊 加载样本(chat-template bug)",
    "drift.label.observed":        "观测差距",
    "drift.label.band":            "数值区间",
    "drift.label.ratio":           "差距 / 区间",
    "drift.section.setups":        "Setups",
    "drift.section.breakdown":     "Drift 贡献者(数值区间)",
    "drift.section.verdict":       "判定与建议",
    "drift.contrib.dtype":         "Dtype 不匹配",
    "drift.contrib.framework":     "框架",
    "drift.contrib.batch":         "Batch 差异",
    "drift.contrib.template":      "Chat-template 不匹配",
    "drift.dominant_cause":        "主导原因",
    "drift.cause.dtype":           "dtype 精度差异",
    "drift.cause.framework":       "框架 / 内核差异",
    "drift.cause.batch":           "按 batch 的归一化路径",
    "drift.cause.template_mismatch": "一侧应用了 chat-template 而另一侧没有(lm-eval-harness #1841 模式 — 多轮通常 -50%)",
    "drift.verdict.noise":         "✅ 数值噪声",
    "drift.verdict.suspicious":    "⚠ 可疑 — 验证",
    "drift.verdict.bug":           "❌ 真实 BUG — 调查",
    "drift.verdict.bug_template":  "❌ CHAT-TEMPLATE BUG",
    "drift.reco.noise":            "差距落在预期的数值噪声区间内。无需操作;差异与单独的 framework/dtype/batch 变化一致。",
    "drift.reco.suspicious":       "差距是预测区间的 1–2×。边缘——可能是真实 bug。尝试对齐主导贡献者(例如匹配框架或 dtype)并重新测试。",
    "drift.reco.bug":              "差距 &gt; 预测区间的 2×。这是真实 bug。检查主导贡献者 — 很可能是 tokenizer / chat-template / KV-cache 布局差异。用 <code>--apply_chat_template</code> 运行 lm-eval-harness 并确认。",
    "drift.reco.bug_template":     "检测到 chat-template 不匹配。这是评估差距大的最常见原因(lm-eval-harness issue #1841)。用 <code>--apply_chat_template</code> 重跑 &quot;未应用&quot; 一侧(或设置 vLLM <code>--chat-template &lt;name&gt;</code>)并重测。",
    "drift.status.empty_scores":   "⚠ 输入两个分数。",
    "drift.status.done":           "✅ 判定:{verdict}",
    "drift.status.sample_loaded":  "✅ 样本已加载(典型 chat-template bug)。点击计算 drift 界。",

    // v0.7.6 — anti-bullshit pack #7: NIAH → reasoning gap 预测器
    "modes.niah":                  "🔍 NIAH→Reason",
    "mode_desc.niah":              "在任意上下文下预测 NIAH(检索)与多跳 reasoning 通过率。解决:长上下文模型常常通过 NIAH 但在同一上下文上 reasoning 失败(RULER 论文)。",
    "modes.saturation":            "📈 饱和度",
    "mode_desc.saturation":        "告诉你某个 benchmark 是否仍能区分 frontier 模型,或者已经饱和(例如 MMLU 88-94% 顶部,AIME 2025 已经 96-100%)。返回 top-3 + 判定 + 推荐替代品。",
    "modes.hub":                   "🧭 方案",
    "mode_desc.hub":               "每个 LLM-eval 问题的地图 → tafagent 模式(若覆盖)+ 精选外部工具。找到方案而非重新发明。30+ 问题,7 类别。",
    "niah.title":                  "🔍 NIAH → Reasoning Gap",
    "niah.tip":                    "NIAH(Needle in a Haystack)测试检索:\"在长文本中找到这个事实\"。多跳 reasoning 测试推理:\"把开头的事实 X+Y 与结尾的事实 Z 结合\"。RULER 论文(NVIDIA 2024)显示长上下文模型经常通过 NIAH 但在相同上下文上 reasoning 失败。本工具仅根据架构预测两种通过率。",
    "niah.desc":                   "<strong>你的模型声称 128k 上下文。它在 64k 是真的能 reasoning,还是只能检索?</strong>粘贴 HF 模型 id 和目标 eval 上下文 — 工具预测 NIAH 与多跳 reasoning 通过率、gap,以及 reasoning 保持 ≥65% 的 \"安全上下文\"。",
    "niah.id_label":               "HF 模型 id:",
    "niah.fetch_btn":              "📥 获取 config",
    "niah.teval_label":            "目标上下文 (T_eval):",
    "niah.run_btn":                "🔍 预测",
    "niah.sweep_btn":              "📊 扫描上下文",
    "niah.label.niah":             "NIAH 通过率",
    "niah.label.reasoning":        "Reasoning 通过率",
    "niah.label.gap":              "Gap",
    "niah.label.safe_ctx":         "Reasoning 安全上下文",
    "niah.section.breakdown":      "架构细节",
    "niah.section.reco":           "建议",
    "niah.section.sweep":          "按上下文长度扫描通过率",
    "niah.field.dhorizon":         "d_horizon(有效)",
    "niah.field.ratio":            "T_eval / d_horizon",
    "niah.field.arch_pressure":    "架构压力(小 d_head + GQA + SWA)",
    "niah.field.theta":            "RoPE θ",
    "niah.field.t_train":          "T_train(声称)",
    "niah.col.context":            "T_eval",
    "niah.col.niah":               "NIAH",
    "niah.col.reasoning":          "Reasoning",
    "niah.col.gap":                "Gap",
    "niah.col.verdict":            "判定",
    "niah.verdict.robust":         "✅ 稳健",
    "niah.verdict.marginal":       "⚠ 边缘",
    "niah.verdict.degraded":       "⚠ 退化",
    "niah.verdict.retrieval_only": "❌ 仅检索",
    "niah.verdict.broken":         "❌ 失效",
    "niah.reco.robust":            "在此上下文下检索与 reasoning 都稳定。可安全部署用于查询和推理任务。",
    "niah.reco.marginal":          "边缘。检索可用但 reasoning 不稳。用于事实查询,不要用于多步推理。",
    "niah.reco.degraded":          "Reasoning 显著下降。模型能找到事实但难以组合它们。在此长度下避免多跳任务。",
    "niah.reco.retrieval_only":    "RULER 的典型发现:模型通过 NIAH 但 reasoning 失败。适用于 RAG 设置(LLM 仅定位事实),不适用于链式推理。把上下文降到下方的 \"安全\" 值。",
    "niah.reco.broken":            "在此上下文下模型连基本检索都失败。视为 out-of-distribution — 在更短上下文重测。",
    "niah.safe_context":           "≤ {ctx} tokens(reasoning ≥ 65%)",
    "niah.safe_context_none":      "在你的目标以下没找到安全上下文 — 模型即使在小上下文也 reasoning 失败。",
    "niah.summary.sweep":          "<code>{modelId}</code> — 按上下文的通过率",
    "niah.status.empty_id":        "⚠ 输入 model id(例如 meta-llama/Llama-3.1-8B-Instruct)。",
    "niah.status.bad_teval":       "⚠ 输入目标上下文(≥ 512 tokens)。",
    "niah.status.fetching":        "⏳ 正在获取 {modelId} 的 config.json...",
    "niah.status.fetched":        "✅ 已获取 {modelId} 的 config。设置 T_eval 并点击预测(或扫描上下文)。",
    "niah.status.done":            "✅ {verdict} — NIAH {niah}% · reasoning {reasoning}%",
    "niah.status.sweep_done":      "✅ 已扫描 {n} 个上下文长度。",
    "saturation.title":            "📈 Benchmark 饱和度检测器",
    "saturation.tip":              "MMLU 已饱和(所有 frontier 模型 88-94%)。报告\"92% on MMLU\"现在毫无意义。本工具告诉你哪些 benchmark 仍能区分 frontier 模型,哪些已饱和,以及替代方案。数据:DemandSphere AI Frontier Tracker(CC BY-NC 4.0),2026-05 刷新。",
    "saturation.desc":             "<strong>你的 benchmark 还有用吗?</strong>选一个 benchmark 查看 top-3 frontier 分数、spread 与判定(saturated / near-saturated / discriminative),并给出推荐替代品。",
    "saturation.select_label":     "Benchmark:",
    "saturation.select.all":       "— 显示所有 benchmark —",
    "saturation.run_btn":          "📈 分类",
    "saturation.all_btn":          "📊 显示全部",
    "saturation.col.spread":       "Top-3 spread",
    "saturation.col.mean":         "Top-3 平均",
    "saturation.col.n":            "模型数",
    "saturation.col.bench":        "Benchmark",
    "saturation.col.verdict":      "判定",
    "saturation.col.reco":         "首选替代",
    "saturation.col.model":        "模型",
    "saturation.col.score":        "分数",
    "saturation.section.top3":     "Top-3 frontier 分数",
    "saturation.section.recommendations": "推荐替代品",
    "saturation.section.note":     "备注",
    "saturation.section.all":      "所有跟踪的 benchmark",
    "saturation.verdict.saturated":      "🚨 已饱和",
    "saturation.verdict.near_saturated": "⚠ 接近饱和",
    "saturation.verdict.discriminative": "✅ 仍可区分",
    "saturation.verdict.sparse_data":    "ℹ 数据稀疏",
    "saturation.borderline":       "边缘 — 在阈值切点的 ±1pp 内。判定视为\"需仔细核对\"。",
    "saturation.unknown":          "未知 benchmark。",
    "saturation.attribution":      "数据:DemandSphere AI Frontier Model Tracker(CC BY-NC 4.0)· HF Open LLM Leaderboard v3(开源权重历史)· 最近一次 fetch 2026-05-05。",
    "saturation.status.live":      "✅ 实时数据已加载 — {count} 个模型。",
    "saturation.status.baked":     "ℹ 使用 baked 快照(实时 fetch 不可用)。",
    "saturation.status.kb_fail":   "⚠ 无法加载饱和度 KB。",
    "saturation.status.done":      "✅ {name} — {verdict}",
    "saturation.status.all_done":  "✅ 已分类 {n} 个 benchmark。",
    "help.v08.saturation.title":   "📈 Benchmark 饱和度检测器",
    "help.v08.saturation.body":    "MMLU 已饱和(top 88-94%),AIME 2025 上线几个月就饱和,HumanEval 接近饱和。选任何 benchmark,工具返回 top-3 frontier 分数、spread、平均,以及判定 — saturated / near-saturated / discriminative — 加上推荐替代品(例如 MMLU → MMLU-Pro / GPQA / HLE)。可达时从 DemandSphere AI Frontier Tracker(CC BY-NC 4.0)实时 fetch;不可达时使用 2026-05-05 的 baked 快照。<em>用例</em>:在引用\"92% on MMLU\"或设计 eval 之前,检查 benchmark 是否仍能区分任何东西。",
    "inv.v08.saturation":          "<strong>📈 Saturation</strong> — 你的 benchmark 还有用吗,还是所有 frontier 都在顶部并列?",
    "inv.v081.hub":                "<strong>🧭 Solutions Hub</strong> — 每个文档化的问题都映射到一个 tafagent 模式或精选外部工具。别重复发明 — 去找。",
    "help.v081.hub.title":         "🧭 Solutions Hub",
    "help.v081.hub.body":          "tafagent 作为集成者而非孤岛。30+ 问题跨 7 类别(评估可靠性 · 诊断 · 设置 · 训练 · 检索 · 多模态 · 可观测性),每个映射到(a)解决它的 tafagent 模式(若存在),以及(b)社区已信任的最佳外部工具(RAGAS、MTEB、HELM、MCP Schema Validator、llm-stats、llguidance、GlitchMiner 等)。搜索框匹配 pain、场景和工具名称。<em>用例</em>:'我有问题 X — tafagent 解决它吗,如果不,谁解决?'",
    "hub.title":                   "🧭 Solutions Hub",
    "hub.tip":                     "我们已知的每个 LLM-eval 问题的地图:哪个 tafagent 模式能解决它(若有),以及社区已信任的最佳外部工具。目标:全覆盖。如果规范工具已在别处,我们链接而非重建。",
    "hub.desc":                    "<strong>别重新发明 — 去找。</strong>30+ 问题映射到 tafagent 模式 + 精选外部工具。按类别浏览、按关键字搜索,或查看新模式最有帮助的空缺。",
    "hub.clear_btn":               "✕ 清空",
    "hub.no_mode":                 "外部",
    "hub.planned":                 "计划:",
    "hub.best_for":                "适合",
    "hub.not_for":                 "不适合",
    "hub.tools":                   "外部工具",
    "hub.status.loaded":           "✅ 已加载 {total} 个问题,跨 {categories} 类别 — {covered} 个由 tafagent 模式覆盖,精选 {externalLinks} 个外部链接。编译于 {compiled}。",
    "hub.status.fail":             "⚠ 无法加载 Solutions Hub。",
    "hub.search.empty":            "无 '{query}' 的匹配。尝试更宽泛的词(如 'eval'、'rag'、'tokenizer')。",
    "hub.search.results":          "为 '{query}' 找到 {n} 个匹配。",

    // v0.7.7 — 任务卡片(UX 重构:按用户意图分组的 14 个模式)
    "tiles.title":                 "🎯 你想做什么?",
    "tiles.subtitle":              "选择一项任务。每一项会打开下方对应的工具。或往下滚动查看完整的 14 个模式列表。",
    "tile.diagnose.title":         "🔬 诊断一个模型",
    "tile.diagnose.desc":          "这个具体模型符合我的用例吗?",
    "tile.trust.title":            "✓ 相信 benchmark 分数",
    "tile.trust.desc":             "我该相信这个数字吗?是 bug 还是噪声?",
    "tile.eval.title":              "⚙️ 正确设置 eval",
    "tile.eval.desc":              "获取 lm-eval / vLLM / transformers 的精确 CLI flag。",
    "tile.compare.title":          "🆚 比较模型",
    "tile.compare.desc":           "并排,或浏览经验模型面板。",
    "tile.manual.title":           "📋 手动 / 自由",
    "tile.manual.desc":            "手动挑一个具体 recipe,或用自然语言提问。",
    "tile.diagnose.tip":           "当你有具体的 model id 并想要完整诊断时从这里开始:<strong>Profile</strong> 一次运行所有 5 个 recipe。<strong>Unmask</strong> 检查 max_position_embeddings 是否诚实。<strong>NIAH→Reason</strong> 预测 retrieval-vs-reasoning 的 gap。<strong>Quant</strong> 预测量化是否会破坏它。<strong>Inspect</strong> 允许粘贴原始 config.json,适用于私有 / 在研模型。",
    "tile.trust.tip":              "当你看到一个分数想知道它是否可靠。<strong>Contamination</strong> 按模型在训练时看到 benchmark 的可能性给 20+ 个 benchmark 评级。<strong>Drift</strong> 告诉你两个 eval 之间的 gap 是数值噪声还是真实 bug(chat-template 不匹配、KV-cache 布局等)。<strong>Arena CI</strong> 重建 Chatbot Arena 隐藏的置信区间——很多 top-Elo 的 &quot;胜利&quot; 在统计上是并列。",
    "tile.eval.tip":               "在运行 lm-eval-harness 或 vLLM serve 之前,获取正确的 CLI flag。<strong>Chat-template Sniffer</strong> 检测 template 系列(Llama-3 / ChatML / Mistral / Phi-3 / DeepSeek / Alpaca / custom / none)并输出精确的 <code>--apply_chat_template</code> / <code>--chat-template</code> 调用。解决 lm-eval-harness 的 issue #1841(accuracy 静默对半)。<strong>Diagnose CLI</strong> 生成 Python 命令在你的本地 GPU 上测量 γ_obs。",
    "tile.compare.tip":            "<strong>Compare</strong>:选择 2-3 个候选模型 + 一个 recipe,在并排表格中看判定(例如 Llama-3-8B vs Mistral-7B 在 32k 上下文)。<strong>Phase diagram</strong>:23 个经验模型在 (log θ, γ) 平面上的散点图,叠加 Padé 曲线。悬停点查看详情,点击将该模型加载到 Recipe 表单。",
    "tile.manual.tip":             "<strong>Recipe</strong>:挑选具体的 X-N recipe(X-1 自训 vs API、X-2 长上下文、X-3 预算、X-5 硬件、X-19 KV 压缩、X-21 imprint、X-22 compute-context 不变量、X-23 IH 相位)并手动填表,完全控制。<strong>Ask</strong>:输入自由问题;浏览器内的 0.5B LLM(Qwen2.5)选择合适的 recipe 并运行。最适合 &quot;如果……会怎样&quot; 的探索。",
    "share.import_desc":       "有他人 TAF 分析的 JSON 文件? 在这里加载以本地查看判定 + 链。与您自己运行的视图相同。",
    "share.import_btn":        "📂 加载共享的 JSON",
    "synthesis.system":        "您是 transformer LLM 的精确诊断助手。给定预先计算的 TAF 公式结果,用 4-6 句中文写出清晰的摘要。为每个提到的数字引用章节号 (§X.Y)。始终给出具体建议。不要编造数字。",

    // INSPECTOR 模式
    "inspector.title":         "🔍 架构检查器",
    "inspector.desc":          "粘贴 <code>config.json</code> 的原始内容。工具提取架构参数并运行完整的 5 配方 Profile。",
    "inspector.tip":           "<strong>直接粘贴任意 config.json</strong>。工具解析它并运行完整 Profile。适用于:私有模型、开发中的 configs、尚未在 HuggingFace 的模型,或比较自定义架构的行为。",
    "inspector.quickstart":    "💡 用例:您有未在 HF Hub 上的私有模型,或正在设计的 config。粘贴下面的原始 JSON,获取完整 TAF 画像。",
    "inspector.placeholder":   "{\n  \"model_type\": \"llama\",\n  \"rope_theta\": 500000,\n  \"max_position_embeddings\": 8192,\n  \"num_attention_heads\": 32,\n  \"num_key_value_heads\": 8,\n  \"hidden_size\": 4096,\n  \"num_hidden_layers\": 32\n}",
    "inspector.T_eval":        "T_eval (您的目标上下文):",
    "inspector.btn":           "🚀 检查并画像",

    // WHAT-IF 滑块
    "whatif.title":            "🎚 What-if: 拖动 T_eval 实时查看 γ 变化",
    "whatif.desc":             "纯 JS 重新计算 (不调用 Pyodide)。滑动时显示几何 γ_Padé 和 d_horizon。点击按钮重新运行完整链。",
    "whatif.T_eval":           "<strong>T_eval</strong>",
    "whatif.gamma_pade":       "<strong>γ_Padé</strong>",
    "whatif.d_horizon":        "<strong>d_horizon</strong>",
    "whatif.l_niah":           "<strong>L_NIAH 上限</strong>",
    "whatif.predicted":        "<strong>预测几何判定</strong>",
    "whatif.rerun":            "↻ 在此 T_eval 重新计算完整链",

    // COMMUNITY 反馈
    "community.title":         "🌐 社区最近提交",
    "community.desc":          "公共 registry 的实时反馈。点击任意提交查看完整分析。",
    "community.browse_all":    "浏览全部 →",
    "community.loading":       "加载中...",
    "community.no_repo":       "Registry 仓库尚未创建。一旦它存在并有提交,它们将在此处实时显示。",
    "community.no_submissions": "暂无提交。成为第一个 — 生成一个 Profile 并点击 📤 提交到 registry。",

    // FALSIFICATION 仪表板
    "falsification.title":     "🔬 论文预测 — 可证伪状态",
    "falsification.desc":      "TAF 框架基于可证伪的预测 (F1-F23)。每一个都经过经验测试。这是论文中每个预测的实时状态。",
    "falsification.summary":   "{confirmed} 已确认 · {partial} 部分 · {refuted} 已反驳 · {untested} 未测试 (共 {total} 个预测)",
    "falsification.col.id":    "ID",
    "falsification.col.claim": "Claim",
    "falsification.col.status": "状态",
    "falsification.col.evidence": "证据",

    "tafcard.title":           "📇 TAF 卡 — 完整模型画像",
    "tafcard.recipes_title":   "📋 配方 — 各维度判定",
    "tafcard.recipes_count_label": "维度",
    "tafcard.numbers_title":   "🔢 关键数字 (paper §26)",
    "tafcard.fals_title":      "🔬 可证伪状态 (F1-F23)",
    "tafcard.fals_none":       "无适用的可证伪。",
    "tafcard.diag_title":      "🔬 诊断 — 数字 · γ 检验 · what-if",
    "tafcard.verify_title":    "✓ 验证 — Lean + Sage + 可证伪",
    "tafcard.share_title":     "📂 来源与分享",
    "tafcard.whatif_title":    "🎚️ What-if 浏览器",
    "verdict.go":              "通过",
    "verdict.no":              "否",
    "verdict.degraded":        "降级",

    "compare.title_out":       "🆚 比较表",

    "status.loading_pyodide":  "⏳ 加载 Python 运行时 (~10MB,首次加载)...",
    "status.loading_taf":      "⏳ 加载 TAF 公式 + 配方...",
    "status.ready":            "✅ 就绪。选择一个模型并点击画像开始。",
    "status.computing":        "🧮 计算 TAF 链...",
    "status.done":             "✅ 完成。",

    "profile.hf_placeholder":  "例如: meta-llama/Meta-Llama-3-8B 或 Qwen/Qwen2.5-7B",
    "compare.hf_placeholder":  "HF 模型 id (例如: meta-llama/Meta-Llama-3-8B)",
    "compare.slot1_placeholder": "HF 模型 id (例如: meta-llama/Meta-Llama-3-8B)",
    "compare.slot2_placeholder": "HF 模型 id #2",
    "compare.slot3_placeholder": "HF 模型 id #3 (可选)",
    "compare.preset_default": "— 或预设 —",

    // 表单参数
    "param.theta":         "θ (rope_theta)",
    "param.theta.tip":     "<strong>RoPE 基础频率</strong> 来自 <code>config.rope_theta</code>。越高 = 长程能力越强。",
    "param.T_train":       "T_train",
    "param.T_train.tip":   "<strong>训练最大上下文</strong>。来自 <code>max_position_embeddings</code>。超出此范围属于外推。",
    "param.T_eval":        "T_eval (您的目标)",
    "param.T_eval.tip":    "<strong>您的目标推理上下文</strong>。关键问题: 模型在 <em>这个</em> 长度下表现是否良好?",
    "param.n_attn":        "n_attention_heads",
    "param.n_attn.tip":    "<strong>每层 attention heads 数</strong>。来自 <code>num_attention_heads</code>。",
    "param.n_kv":          "n_kv_heads",
    "param.n_kv.tip":      "<strong>KV heads</strong>。若 &lt; n_attention_heads → GQA (Grouped Query Attention)。降低 KV 内存但将 γ 推向 Hagedorn。",
    "param.d_head":        "head_dim",
    "param.d_head.tip":    "<strong>每 head 维度</strong>。典型 64、96、128。来自 <code>head_dim</code> 或 <code>hidden_size / num_attention_heads</code>。",
    "param.n_layers":      "n_layers",
    "param.n_layers.tip":  "<strong>Transformer 块数</strong>。来自 <code>num_hidden_layers</code>。",
    "param.n_params":      "n_params (例如 8e9)",
    "param.n_params.tip":  "<strong>总参数量</strong>。约 400M 阈值出现 induction heads。影响 KV 内存和预算配方。",
    "param.has_swa":       "有 SWA 吗?",
    "param.has_swa.tip":   "<strong>Sliding Window Attention</strong>。Mistral、gemma-2、phi-3 为 <code>true</code>。v0.5.3 校准审计禁用了历史 δ_SWA 校正 (n=1 拟合)。",
    "common.yes":          "是",
    "common.no":           "否",

    // 模式提示
    "modes.tip":           "<strong>十四种使用方式</strong>。<br><strong>📇 画像</strong>: 粘贴模型 id → 5 个配方的 TAF 卡。<br><strong>🆚 比较</strong>: 2-3 个模型在一个配方上并排比较。<br><strong>🔍 检查 config</strong>: 粘贴原始 config.json → 完整画像。<br><strong>💬 提问</strong>: 自由形式问题,浏览器 LLM 选择配方。<br><strong>📋 配方</strong>: 手动选择,完全控制表单。<br><strong>🩺 CLI 诊断</strong>: 生成 Python 命令在本地测量 γ。<br><strong>📊 相图</strong>: 23 个面板模型在 (log θ, γ) 平面上。<br><strong>🪟 揭示</strong>: 检测误导的 max_position_embeddings(SWA / YaRN / RoPE 缩放)。<br><strong>📜 Chat-template</strong>: 检测系列 + 给出 lm-eval / vLLM / transformers 的精确 CLI flag。<br><strong>🎯 Arena CI</strong>: 从原始 pairwise 投票数据重建置信区间;检测 Arena 隐藏的统计并列。<br><strong>🧪 污染</strong>: 根据训练 cutoff 与发布日期,对 20+ benchmark 进行污染概率评估。<br><strong>⚖️ Quant</strong>: 预测任意(模型 × 量化方案)的 γ-shift 与 ΔPPL;cliff 时推荐更安全替代方案。<br><strong>🔀 Drift</strong>: 同一模型,两 setup 下分数不同 — bug 还是噪声?预测数值噪声区间并标记真实 bug。<br><strong>🔍 NIAH→Reason</strong>: 从架构预测 NIAH 与多跳 reasoning 通过率;找到模型的安全 reasoning 上下文。",
    "profile.tip":         "<strong>一键完整诊断</strong>。粘贴任意 HF 模型 id (或选择预设)。工具运行所有 5 个配方 (长上下文、KV 压缩、自定义 vs API、预算、硬件),生成单个 <strong>TAF 卡</strong>,显示每个维度的判定 + 关键数字 + 架构分类。<br><br><strong>用例</strong>: \"我正在为生产评估 Qwen2.5-32B — 它的完整可行性概况是什么?\" → 粘贴 id → 画像 → 完成。",
    "compare.tip":         "<strong>同一配方,多个模型</strong>。选择 2-3 个候选模型和一个配方。在单个比较表中查看判定。<br><br><strong>用例</strong>: \"我需要在 16K 进行长上下文检索 — 哪个最好: Llama-3-8B、Mistral-7B 或 Qwen-7B?\" → 选择 3 个 + X-2 + 16K → 看赢家。",

    // 帮助模态框
    "help.title":               "📘 TAF Agent — 用户手册",
    "help.what.title":          "它做什么?",
    "help.what.body":           "在<em>花费 GPU/$ 之前</em>,预测任意 transformer LLM 的<strong>实际可行性</strong>。回答诸如 \"这个模型能在 L=32K 工作吗?\" 或 \"我应该自定义训练还是使用 API?\" 等问题,使用确定性 Python 公式 (TAF — Thermodynamic Attention Framework)。",
    "help.modes.title":         "如何使用 — 7 种模式",
    "help.modes.profile":       "<strong>📇 画像</strong>: 粘贴模型 id → 同时运行所有配方 = TAF 卡。<strong>最佳起点</strong>。",
    "help.modes.compare":       "<strong>🆚 比较</strong>: 2-3 个模型在同一配方上并排。最适合在候选者之间选择。",
    "help.modes.inspector":     "<strong>🔍 检查 config</strong>: 粘贴原始 <code>config.json</code> → 工具解析并运行完整画像。适用于私有模型、开发中的配置、或尚未在 HF Hub 上的模型。",
    "help.modes.ask":           "<strong>💬 自由提问</strong>: 自然语言问题,浏览器 LLM 选择配方。最适合随意探索。",
    "help.modes.recipe":        "<strong>📋 配方 + 表单</strong>: 手动选择,完全控制参数。最适合需要精确控制时。",
    "help.modes.diagnose":      "<strong>🩺 CLI 诊断</strong>: 生成 Python 命令在你的本地机器上测量 γ (transformers + numpy)。快速 ≈5 分钟 CPU;完整 ≈20–60 分钟 GPU。结果 JSON 可通过 Inspect 重新上传。",
    "help.modes.phase":         "<strong>📊 相图</strong>: 23 个面板模型在 (log θ, γ) 平面上的散点图。Hagedorn 线 γ=1 分隔 A 相和 B 相。点击点将该模型加载到配方表单。",
    "help.recipes.title":       "可用的 8 个配方",
    "help.recipe.x1.title":     "<strong>X-1 自定义训练 vs API</strong> — 比较训练自己模型的成本与付费使用 API 的成本。",
    "help.recipe.x1.example":   "尝试: <em>\"我应该训练 8B 自定义模型还是使用 GPT-4o 处理每月 50M tokens?\"</em><br>答案: 是 (自定义) / 否 (API),含损益平衡月数。",
    "help.recipe.x2.title":     "<strong>X-2 长上下文可行性</strong> — 预测模型是否能可靠地服务目标上下文长度。",
    "help.recipe.x2.example":   "尝试: <em>\"Meta-Llama-3-8B 能处理 32000 tokens 检索吗?\"</em><br>链: γ_Padé → 分解 → d_horizon → NIAH 上限 → 幻觉 → KV 内存。<br>判定: 是 / 降级 / 否,如需则提供缓解措施。",
    "help.recipe.x3.title":     "<strong>X-3 预算预飞行</strong> — 给定 $ 预算,可行训练什么模型?",
    "help.recipe.x3.example":   "尝试: <em>\"我有 $5000,可以训练什么模型?\"</em><br>答案: GO / TINY-MODEL / MEMORY-LIMITED 含具体的 N (参数) 和 D (tokens)。",
    "help.recipe.x5.title":     "<strong>X-5 硬件选择</strong> — 应该使用哪个 GPU 以达到目标吞吐量?",
    "help.recipe.x5.example":   "尝试: <em>\"以每天 1000 万 tokens 提供 Llama-3-8B 的最便宜硬件\"</em><br>答案: 最佳 GPU + $/Mtok + 容量 vs 目标。",
    "help.recipe.x19.title":    "<strong>X-19 KV 压缩决策</strong> — 应该使用 soft decay、hard cutoff 还是文献方法?",
    "help.recipe.x21.title":    "<strong>X-21 Imprint 纯度诊断</strong> — 通过 ν=−1/(2π) 预测 RANDOM token 上的 γ;模型的 RoPE 预测有多干净?",
    "help.recipe.x22.title":    "<strong>X-22 Compute-Context 不变量</strong> — γ × log(N²·D) 是否落在 51.2 ± 16.8 区间内?检测 scaling/training 异常。",
    "help.recipe.x23.title":    "<strong>X-23 IH-Phase 检测器</strong> — 前- 还是后-induction-head?通过 sign(γ_text − γ_random) 进行廉价探测。",
    "help.recipe.x19.example":  "尝试: <em>\"如何为 Qwen2.5-7B 在 32K 压缩 KV 缓存?\"</em><br>答案: USE SOFT DECAY / USE D_f CUTOFF / USE LITERATURE METHODS / USE HARD T_train.",
    "help.recipe.x21.example":  "尝试: <em>\"Llama-3-8B 上的 RoPE 预测有多干净?\"</em><br>答案: 预测的 γ_random + 诊断 (CLEAN / OVER-IMPRINTED / UNDER-IMPRINTED)。",
    "help.recipe.x22.example":  "尝试: <em>\"Mistral-7B 是否符合 compute-context 不变量?\"</em><br>答案: K = γ·log(N²·D)、z-score、IN-BAND 或 OUTLIER。",
    "help.recipe.x23.example":  "尝试: <em>\"Qwen2.5-7B 是后-induction-head 吗?\"</em><br>答案: CONFIRMED PRE-IH / CONFIRMED POST-IH / ANOMALY。",
    "help.section.v04":         "<strong>v0.4 新增</strong> (第 29 次研究会话, 2026-04-28): 来自 cross-model panel 分析 (n=22 LLMs) 的三个诊断 recipes。",
    "help.divider.v04_s29":     "— v0.4 (第 29 次会话发现) —",
    "footer.tech_stack":        "计算:Pyodide · 综合:WebLLM (Qwen2.5-0.5B 本地) · 托管:GitHub Pages · 成本:$0",
    "help.v04.imprint":         "<strong>学习印记斜率 ν = −1/(2π)</strong>: RoPE 旋转周期 2π 在权重上引发位置偏置, 与 log(N_params) 成正比。即使 random token 也显示此 scaling。ν 是 DERIVED — 非拟合 (经验误差 0.3%)。",
    "help.v04.invariant":       "<strong>Chinchilla-attention 不变量 K</strong>: γ × log(N²·D) ≈ 51.2 ± 16.8 (CV=0.329)。将 compute scaling 和 attention 指数连接为单一无量纲数。",
    "help.v04.ih_probe":        "<strong>Δγ 作为 IH 探测</strong>: sign(γ_text − γ_random) > 0 ⟺ post-induction-head。比运行 in-context-learning 基准更便宜。",
    "help.v04.constants":       "<strong>γ 簇落在著名常数上</strong> (有趣, n=4): CodeLlama-13b γ=0.382 ≈ 1−1/φ (黄金共轭, err 0.0003); pythia-1.4b γ=0.705 ≈ 1/√2; Llama-2-7b γ=0.287 ≈ 1−1/√2; Mistral-Nemo γ=0.428 ≈ log_10(e)。Caveat: 可能是巧合。",
    "help.param.theta":         "<strong>θ (rope_theta)</strong>: RoPE 基础频率。越高 = 长程能力越强。典型: 10000 (早期),500000 (Llama-3),1000000 (Qwen2.5)。",
    "help.param.T_train":       "<strong>T_train</strong>: 模型训练时的最大上下文。来自 <code>max_position_embeddings</code>。",
    "help.param.T_eval":        "<strong>T_eval</strong>: <em>您的</em> 目标推理上下文长度。关键旋钮。",
    "help.param.gqa":           "<strong>n_kv_heads &lt; n_attention_heads</strong>: 模型使用 GQA (Grouped Query Attention)。减少 KV 内存但将 γ 推向 Hagedorn。",
    "help.param.swa":           "<strong>has_SWA</strong>: 模型使用 Sliding Window Attention (Mistral、gemma-2)。",
    "help.param.nparams":       "<strong>n_params</strong>: 总参数数量。诱导头出现的阈值约 400M。",
    "help.add_models.title":    "添加新模型 (3 种方式)",
    "help.add_models.preset":   "<strong>预设列表</strong>: 11 个流行模型已策划。从下拉菜单选择。",
    "help.add_models.hf":       "<strong>HF Hub 获取</strong>: 粘贴任意 id (例如 <code>Qwen/Qwen2.5-32B-Instruct</code>),点击 📥 获取。浏览器直接从 HuggingFace 下载 <code>config.json</code>,填充表单。适用于任何公共模型。",
    "help.add_models.manual":   "<strong>手动</strong>: 用模型卡的值直接填充表单字段。",
    "help.audit.title":         "可审计链",
    "help.audit.body":          "每个结果都显示完整的<strong>计算链</strong> — 每个公式步骤及其输入、输出和解释。点击任意步骤展开。引用的章节号 (§26.1、§19.1 等) 指向论文中的推导。",
    "help.synthesis.title":     "自然语言回答",
    "help.synthesis.body":      "在确定性链运行后,浏览器中的 LLM (Qwen2.5-0.5B,首次加载后约 350MB 缓存) 综合自然语言摘要。上面的数字<em>始终正确</em> (确定性 Python);综合由 LLM 生成 — 如有疑问,请对照链验证。",
    "help.params.title":        "常见参数解释",
    "help.verdicts.title":      "判定中要看什么",
    "help.verdict.yes":         "<strong style=\"color:#3fb950;\">是 / GO</strong> — 自信地继续;数字支持选择。",
    "help.verdict.deg":         "<strong style=\"color:#d29922;\">降级 / TINY-MODEL</strong> — 有警告地工作;阅读操作。",
    "help.verdict.no":          "<strong style=\"color:#f85149;\">否 / MEMORY-LIMITED</strong> — 不要按原样进行;提供缓解措施。",
    "help.privacy.title":       "隐私",
    "help.privacy.body":        "一切都在您的浏览器中运行。无遥测,无分析,无数据发送到任何地方。即使是 LLM 模型也通过 WebGPU/WebAssembly 在本地运行。您的 model_ids 和问题永不离开此页面。",
    "help.source.title":        "源代码和论文",
    "help.source.body":         "源代码: <a href=\"https://github.com/karlesmarin/tafagent\" target=\"_blank\">github.com/karlesmarin/tafagent</a><br>论文: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href=\"https://zenodo.org/records/19826343\" target=\"_blank\">Zenodo</a>; arXiv 即将)<br>数据集: <a href=\"https://huggingface.co/datasets/karlexmarin/taf-attention-decay\" target=\"_blank\">taf-attention-decay</a> — 32个模型上的58次γ测量 (CC-BY-4.0)",

    "footer.text":             "© 2026 Carles Marin · Apache-2.0 · 独立研究 · 闭合论文回路的工具。",
  },
};

let currentLang = "en";

export function getLang() {
  return currentLang;
}

export function setLang(code) {
  if (!TRANSLATIONS[code]) return;
  currentLang = code;
  try { localStorage.setItem("tafagent_lang", code); } catch (e) {}
  applyTranslations();
  // Highlight active flag
  document.querySelectorAll("[data-lang]").forEach(el => {
    el.classList.toggle("lang-active", el.dataset.lang === code);
  });
}

export function t(key) {
  return TRANSLATIONS[currentLang][key] ?? TRANSLATIONS.en[key] ?? key;
}

export function applyTranslations() {
  document.querySelectorAll("[data-i18n]").forEach(el => {
    const key = el.dataset.i18n;
    const value = t(key);
    // Allow HTML in translations (we control them)
    el.innerHTML = value;
  });
  document.querySelectorAll("[data-i18n-placeholder]").forEach(el => {
    el.placeholder = t(el.dataset.i18nPlaceholder);
  });
}

// Expose so dynamically-inserted DOM (renderProfile, renderCompare) can re-apply
if (typeof window !== "undefined") {
  window.__taf_applyTranslations = applyTranslations;
  // Also expose the lookup itself so non-import-based modules (e.g. hf_autocomplete
  // that runs outside main.js context) can localize without a circular import.
  window.__taf_t = t;
}

export function initI18n() {
  // Browser default lang detection or stored preference
  let stored = null;
  try { stored = localStorage.getItem("tafagent_lang"); } catch (e) {}
  if (stored && TRANSLATIONS[stored]) {
    currentLang = stored;
  } else {
    const browserLang = (navigator.language || "en").slice(0, 2);
    if (TRANSLATIONS[browserLang]) currentLang = browserLang;
  }
  applyTranslations();
  // Mark active flag
  document.querySelectorAll("[data-lang]").forEach(el => {
    el.classList.toggle("lang-active", el.dataset.lang === currentLang);
  });
}