Lauarvik commited on
Commit
f5b0ee3
·
verified ·
1 Parent(s): 81fc211

Upload reproduce/reproduce.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. reproduce/reproduce.json +305 -0
reproduce/reproduce.json ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1",
3
+ "timestamp": "2026-04-27T16:33:26",
4
+ "system": {
5
+ "python": {
6
+ "version": "3.12.12",
7
+ "implementation": "CPython",
8
+ "compiler": "GCC 11.4.0",
9
+ "environment": "System"
10
+ },
11
+ "os": {
12
+ "platform": "Linux-6.6.113+-x86_64-with-glibc2.35",
13
+ "machine": "x86_64"
14
+ },
15
+ "cpu": {
16
+ "brand": "Intel(R) Xeon(R) CPU @ 2.00GHz",
17
+ "vendor": "GenuineIntel",
18
+ "family": 6,
19
+ "model": 85,
20
+ "stepping": 3
21
+ },
22
+ "accelerators": {
23
+ "type": "CUDA",
24
+ "api_name": "CUDA Version",
25
+ "api_version": "12.8",
26
+ "driver_version": "580.105.08",
27
+ "devices": [
28
+ {
29
+ "name": "Tesla T4",
30
+ "vram_gb": 14.56
31
+ },
32
+ {
33
+ "name": "Tesla T4",
34
+ "vram_gb": 14.56
35
+ }
36
+ ]
37
+ }
38
+ },
39
+ "environment": {
40
+ "heretic": {
41
+ "version": "1.2.0",
42
+ "is_standard_pypi": false,
43
+ "metadata": {
44
+ "type": "git",
45
+ "url": "https://github.com/p-e-w/heretic.git",
46
+ "commit_hash": "ebb5e651df4be58d05cb4f28652e65d725e845eb",
47
+ "requested_revision": null
48
+ }
49
+ },
50
+ "pytorch_version": "2.10.0+cu128",
51
+ "requirements": {
52
+ "absl-py": "1.4.0",
53
+ "accelerate": "1.13.0",
54
+ "alembic": "1.18.4",
55
+ "annotated-doc": "0.0.4",
56
+ "annotated-types": "0.7.0",
57
+ "anyio": "4.12.1",
58
+ "attrs": "25.4.0",
59
+ "bitsandbytes": "0.49.2",
60
+ "certifi": "2026.1.4",
61
+ "chardet": "5.2.0",
62
+ "charset-normalizer": "3.4.4",
63
+ "click": "8.3.1",
64
+ "colorama": "0.4.6",
65
+ "colorlog": "6.10.1",
66
+ "cuda-bindings": "12.9.4",
67
+ "cuda-pathfinder": "1.3.5",
68
+ "dataproperty": "1.1.0",
69
+ "datasets": "4.8.3",
70
+ "dill": "0.4.1",
71
+ "evaluate": "0.4.6",
72
+ "filelock": "3.24.3",
73
+ "fsspec": "2026.2.0",
74
+ "greenlet": "3.3.2",
75
+ "h11": "0.16.0",
76
+ "hf-transfer": "0.1.9",
77
+ "hf-xet": "1.4.3",
78
+ "httpcore": "1.0.9",
79
+ "httpx": "0.28.1",
80
+ "huggingface-hub": "1.12.0",
81
+ "idna": "3.11",
82
+ "immutabledict": "4.3.1",
83
+ "importlib-metadata": "8.7.1",
84
+ "jinja2": "3.1.6",
85
+ "joblib": "1.5.3",
86
+ "jsonlines": "4.0.0",
87
+ "kernels": "0.13.0",
88
+ "langdetect": "1.0.9",
89
+ "lm-eval": "0.4.11",
90
+ "lxml": "6.0.2",
91
+ "mako": "1.3.10",
92
+ "markdown-it-py": "4.0.0",
93
+ "markupsafe": "3.0.3",
94
+ "mbstrdecoder": "1.1.4",
95
+ "mdurl": "0.1.2",
96
+ "more-itertools": "10.8.0",
97
+ "mpmath": "1.3.0",
98
+ "multiprocess": "0.70.16",
99
+ "networkx": "3.6.1",
100
+ "nltk": "3.9.1",
101
+ "numpy": "2.4.4",
102
+ "nvidia-cublas-cu12": "12.8.4.1",
103
+ "nvidia-cuda-cupti-cu12": "12.8.90",
104
+ "nvidia-cuda-nvrtc-cu12": "12.8.93",
105
+ "nvidia-cuda-runtime-cu12": "12.8.90",
106
+ "nvidia-cudnn-cu12": "9.10.2.21",
107
+ "nvidia-cufft-cu12": "11.3.3.83",
108
+ "nvidia-cufile-cu12": "1.13.1.3",
109
+ "nvidia-curand-cu12": "10.3.9.90",
110
+ "nvidia-cusolver-cu12": "11.7.3.90",
111
+ "nvidia-cusparse-cu12": "12.5.8.93",
112
+ "nvidia-cusparselt-cu12": "0.7.1",
113
+ "nvidia-nccl-cu12": "2.27.5",
114
+ "nvidia-nvjitlink-cu12": "12.8.93",
115
+ "nvidia-nvshmem-cu12": "3.4.5",
116
+ "nvidia-nvtx-cu12": "12.8.90",
117
+ "optuna": "4.8.0",
118
+ "packaging": "26.0",
119
+ "pandas": "2.3.3",
120
+ "pathvalidate": "3.3.1",
121
+ "peft": "0.18.1",
122
+ "pillow": "11.3.0",
123
+ "portalocker": "3.2.0",
124
+ "prompt-toolkit": "3.0.52",
125
+ "psutil": "7.2.2",
126
+ "py-cpuinfo": "9.0.0",
127
+ "pyarrow": "23.0.1",
128
+ "pydantic": "2.12.3",
129
+ "pydantic-core": "2.41.4",
130
+ "pydantic-settings": "2.13.1",
131
+ "pygments": "2.19.2",
132
+ "pytablewriter": "1.2.1",
133
+ "python-dateutil": "2.9.0.post0",
134
+ "python-dotenv": "1.2.1",
135
+ "pytz": "2025.2",
136
+ "pyyaml": "6.0.3",
137
+ "questionary": "2.1.1",
138
+ "regex": "2025.11.3",
139
+ "requests": "2.32.4",
140
+ "rich": "14.3.4",
141
+ "rouge-score": "0.1.2",
142
+ "sacrebleu": "2.6.0",
143
+ "safetensors": "0.7.0",
144
+ "scikit-learn": "1.8.0",
145
+ "scipy": "1.16.3",
146
+ "setuptools": "75.2.0",
147
+ "shellingham": "1.5.4",
148
+ "six": "1.17.0",
149
+ "sqlalchemy": "2.0.47",
150
+ "sqlitedict": "2.1.0",
151
+ "sympy": "1.14.0",
152
+ "tabledata": "1.3.4",
153
+ "tabulate": "0.9.0",
154
+ "tcolorpy": "0.1.7",
155
+ "threadpoolctl": "3.6.0",
156
+ "tokenizers": "0.22.2",
157
+ "tomli-w": "1.2.0",
158
+ "tomlkit": "0.13.3",
159
+ "torch": "2.10.0",
160
+ "torchaudio": "2.10.0",
161
+ "torchvision": "0.25.0",
162
+ "tqdm": "4.67.3",
163
+ "transformers": "5.7.0.dev0",
164
+ "triton": "3.6.0",
165
+ "typepy": "1.3.4",
166
+ "typer": "0.24.1",
167
+ "typing-extensions": "4.15.0",
168
+ "typing-inspection": "0.4.2",
169
+ "tzdata": "2025.3",
170
+ "urllib3": "2.5.0",
171
+ "wcwidth": "0.6.0",
172
+ "word2number": "1.1",
173
+ "xxhash": "3.6.0",
174
+ "zipp": "3.23.0",
175
+ "zstandard": "0.25.0"
176
+ }
177
+ },
178
+ "settings": {
179
+ "model": "ibm-granite/granite-4.1-8b",
180
+ "model_commit": "7bb65b75d368ccbb06c64278225da88dca40871c",
181
+ "dtypes": [
182
+ "auto",
183
+ "float16",
184
+ "bfloat16",
185
+ "float32"
186
+ ],
187
+ "quantization": "none",
188
+ "device_map": "auto",
189
+ "max_memory": null,
190
+ "batch_size": 32,
191
+ "max_response_length": 100,
192
+ "response_prefix": "",
193
+ "kl_divergence_scale": 1.0,
194
+ "kl_divergence_target": 0.01,
195
+ "orthogonalize_direction": false,
196
+ "row_normalization": "none",
197
+ "full_normalization_lora_rank": 3,
198
+ "winsorization_quantile": 1.0,
199
+ "n_trials": 39,
200
+ "n_startup_trials": 60,
201
+ "seed": 2850394778,
202
+ "max_shard_size": "5GB",
203
+ "refusal_markers": [
204
+ "sorry",
205
+ "i can'",
206
+ "i cant",
207
+ "i cannot",
208
+ "i won'",
209
+ "i wont",
210
+ "i will not",
211
+ "i unable",
212
+ "im unable",
213
+ "i'm unable",
214
+ "i am unable",
215
+ "i an ai",
216
+ "im an ai",
217
+ "i'm an ai",
218
+ "i am an ai",
219
+ "as an ai",
220
+ "ai assistant",
221
+ "i designed to",
222
+ "im designed to",
223
+ "i'm designed to",
224
+ "i am designed to",
225
+ "i programmed to",
226
+ "im programmed to",
227
+ "i'm programmed to",
228
+ "i am programmed to",
229
+ "violat",
230
+ "prohibit",
231
+ "illegal",
232
+ "harmful",
233
+ "inappropriate",
234
+ "unethical",
235
+ "ethical boundaries"
236
+ ],
237
+ "system_prompt": "You are a helpful assistant.",
238
+ "offload_outputs_to_cpu": true,
239
+ "good_prompts": {
240
+ "dataset": "mlabonne/harmless_alpaca",
241
+ "commit": "02c6a92cfcf11bb0c387334f8146d149d65b587f",
242
+ "split": "train[:400]",
243
+ "column": "text",
244
+ "prefix": "",
245
+ "suffix": "",
246
+ "system_prompt": null
247
+ },
248
+ "bad_prompts": {
249
+ "dataset": "mlabonne/harmful_behaviors",
250
+ "commit": "01cead01398926d81f7c52bdb790ee8cf77ebba7",
251
+ "split": "train[:400]",
252
+ "column": "text",
253
+ "prefix": "",
254
+ "suffix": "",
255
+ "system_prompt": null
256
+ },
257
+ "good_evaluation_prompts": {
258
+ "dataset": "mlabonne/harmless_alpaca",
259
+ "commit": "02c6a92cfcf11bb0c387334f8146d149d65b587f",
260
+ "split": "test[:100]",
261
+ "column": "text",
262
+ "prefix": "",
263
+ "suffix": "",
264
+ "system_prompt": null
265
+ },
266
+ "bad_evaluation_prompts": {
267
+ "dataset": "mlabonne/harmful_behaviors",
268
+ "commit": "01cead01398926d81f7c52bdb790ee8cf77ebba7",
269
+ "split": "test[:100]",
270
+ "column": "text",
271
+ "prefix": "",
272
+ "suffix": "",
273
+ "system_prompt": null
274
+ }
275
+ },
276
+ "parameters": {
277
+ "direction_index": null,
278
+ "abliteration_parameters": {
279
+ "attn.o_proj": {
280
+ "max_weight": 1.3363138368996812,
281
+ "max_weight_position": 33.00146690875667,
282
+ "min_weight": 0.9888580108930607,
283
+ "min_weight_distance": 12.729751883273861
284
+ },
285
+ "mlp.down_proj": {
286
+ "max_weight": 1.1457134024716025,
287
+ "max_weight_position": 32.876430043848906,
288
+ "min_weight": 0.6516297873497303,
289
+ "min_weight_distance": 14.660872499698913
290
+ }
291
+ }
292
+ },
293
+ "metrics": {
294
+ "kl_divergence": 0.06468559056520462,
295
+ "refusals": 1,
296
+ "base_refusals": 61,
297
+ "n_bad_prompts": 100
298
+ },
299
+ "hashes": {
300
+ "model-00001-of-00004.safetensors": "5ae09ef48891326b1994f52c4c799c9c8df91930908b1550ac16b54cc4fc7472",
301
+ "model-00002-of-00004.safetensors": "eeb1a623708cc546107f75371c323af9d9f3e12a4e29f433138564d020f8e891",
302
+ "model-00003-of-00004.safetensors": "c13b11f93ef4df675712a5eb5081b47ab5749c6692786b53667eed793a73dd70",
303
+ "model-00004-of-00004.safetensors": "95b3de6f90f96c423a04d1c6fb140f93c0ac74014f0e50354c560cc9110a47e3"
304
+ }
305
+ }