prodigyhuh commited on
Commit
c6ce9e9
·
verified ·
1 Parent(s): 3b42eef

Upload promoted hard recall micro boost adapter

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-1.7B
3
+ license: mit
4
+ library_name: peft
5
+ pipeline_tag: text-generation
6
+ ---
7
+
8
+ # AtomicVision Hard Recall Micro Boost LoRA
9
+
10
+ This adapter materializes the promoted `checkpoint-1` checkpoint from the
11
+ targeted hard recall micro-repair continuation.
12
+
13
+ ## Parent
14
+ - Base adapter: [prodigyhuh/atomicvision-medium-fidelity-boost-lora](https://huggingface.co/prodigyhuh/atomicvision-medium-fidelity-boost-lora)
15
+ - Source HF job: [69ed269fd70108f37acdef6d](https://huggingface.co/jobs/prodigyhuh/69ed269fd70108f37acdef6d)
16
+ - Source commit: `3838f9048bce4c6bc81e57f5c0dab00980c7fa08`
17
+
18
+ ## Held-out strict eval (`seed_start=10000`, `episodes=32`)
19
+
20
+ | Difficulty | Reward | F1 | MAE | Strict | Normalized | Done | Submit |
21
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
22
+ | medium | 4.5065 | 0.7891 | 0.02712 | 1.00 | 1.00 | 1.00 | 1.00 |
23
+ | hard | 4.7148 | 0.8207 | 0.02552 | 1.00 | 1.00 | 1.00 | 1.00 |
24
+
25
+ This run preserves perfect strict execution and slightly improves the hard slice
26
+ over the previous best published adapter without regressing medium.
adapter_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "Qwen/Qwen3-1.7B",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "lora_ga_config": null,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "peft_version": "0.19.1",
28
+ "qalora_group_size": 16,
29
+ "r": 16,
30
+ "rank_pattern": {},
31
+ "revision": null,
32
+ "target_modules": [
33
+ "k_proj",
34
+ "down_proj",
35
+ "gate_proj",
36
+ "o_proj",
37
+ "q_proj",
38
+ "up_proj",
39
+ "v_proj"
40
+ ],
41
+ "target_parameters": null,
42
+ "task_type": "CAUSAL_LM",
43
+ "trainable_token_indices": null,
44
+ "use_bdlora": null,
45
+ "use_dora": false,
46
+ "use_qalora": false,
47
+ "use_rslora": false
48
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:745af6dec82baa429af53c2feba7c5832ef2f990bcc8d97f680822e5ea33f110
3
+ size 69782384
chat_template.jinja ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {#- Training variant of the Qwen3 chat template (see qwen3.jinja for the original).
2
+ Modifications vs the original:
3
+ - {%- if '</think>' in content %} → {%- if '<think>' in content and '</think>' in content %}
4
+ Always check for both tags to avoid edge cases where the model generates only one tag.
5
+ - Removed the loop.index0 > ns.last_query_index conditional; always include thinking block.
6
+ This makes the template prefix-preserving for the [user, assistant] → [user, assistant, tool] transition.
7
+ - Added {% generation %} / {% endgeneration %} around assistant message output to support
8
+ assistant-only loss masking in SFT training.
9
+ -#}
10
+ {%- if tools %}
11
+ {{- '<|im_start|>system\n' }}
12
+ {%- if messages[0].role == 'system' %}
13
+ {{- messages[0].content + '\n\n' }}
14
+ {%- endif %}
15
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
16
+ {%- for tool in tools %}
17
+ {{- "\n" }}
18
+ {{- tool | tojson }}
19
+ {%- endfor %}
20
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
21
+ {%- else %}
22
+ {%- if messages[0].role == 'system' %}
23
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
24
+ {%- endif %}
25
+ {%- endif %}
26
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
27
+ {%- for message in messages[::-1] %}
28
+ {%- set index = (messages|length - 1) - loop.index0 %}
29
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
30
+ {%- set ns.multi_step_tool = false %}
31
+ {%- set ns.last_query_index = index %}
32
+ {%- endif %}
33
+ {%- endfor %}
34
+ {%- for message in messages %}
35
+ {%- if message.content is string %}
36
+ {%- set content = message.content %}
37
+ {%- else %}
38
+ {%- set content = '' %}
39
+ {%- endif %}
40
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
41
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
42
+ {%- elif message.role == "assistant" %}
43
+ {%- set reasoning_content = '' %}
44
+ {%- if message.reasoning_content is string %}
45
+ {%- set reasoning_content = message.reasoning_content %}
46
+ {%- else %}
47
+ {%- if '<think>' in content and '</think>' in content %}
48
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
49
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {{- '<|im_start|>' + message.role + '\n' }}
53
+ {%- generation %}
54
+ {{- '<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
55
+ {%- if message.tool_calls %}
56
+ {%- for tool_call in message.tool_calls %}
57
+ {%- if (loop.first and content) or (not loop.first) %}
58
+ {{- '\n' }}
59
+ {%- endif %}
60
+ {%- if tool_call.function %}
61
+ {%- set tool_call = tool_call.function %}
62
+ {%- endif %}
63
+ {{- '<tool_call>\n{"name": "' }}
64
+ {{- tool_call.name }}
65
+ {{- '", "arguments": ' }}
66
+ {%- if tool_call.arguments is string %}
67
+ {{- tool_call.arguments }}
68
+ {%- else %}
69
+ {{- tool_call.arguments | tojson }}
70
+ {%- endif %}
71
+ {{- '}\n</tool_call>' }}
72
+ {%- endfor %}
73
+ {%- endif %}
74
+ {{- '<|im_end|>\n' }}
75
+ {%- endgeneration %}
76
+ {%- elif message.role == "tool" %}
77
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
78
+ {{- '<|im_start|>user' }}
79
+ {%- endif %}
80
+ {{- '\n<tool_response>\n' }}
81
+ {{- content }}
82
+ {{- '\n</tool_response>' }}
83
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
84
+ {{- '<|im_end|>\n' }}
85
+ {%- endif %}
86
+ {%- endif %}
87
+ {%- endfor %}
88
+ {%- if add_generation_prompt %}
89
+ {{- '<|im_start|>assistant\n' }}
90
+ {%- if enable_thinking is defined and enable_thinking is false %}
91
+ {{- '<think>\n\n</think>\n\n' }}
92
+ {%- endif %}
93
+ {%- endif %}
heldout_eval.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model": "Qwen/Qwen3-1.7B",
3
+ "adapter": "/tmp/atomicvision_publish_runner/output/train/checkpoint-1",
4
+ "episodes_per_difficulty": 32,
5
+ "seed_start": 10000,
6
+ "seed_policy": {
7
+ "sft_train": {
8
+ "start": 1000,
9
+ "stop": 4000
10
+ },
11
+ "grpo_train": {
12
+ "start": 4000,
13
+ "stop": 8000
14
+ },
15
+ "heldout_eval": {
16
+ "start": 10000,
17
+ "stop": 11000
18
+ }
19
+ },
20
+ "heldout_seed_enforced": true,
21
+ "max_tool_steps": 3,
22
+ "max_new_tokens": 180,
23
+ "modes": [
24
+ "strict"
25
+ ],
26
+ "results": {
27
+ "medium": {
28
+ "baseline_prior_submit": {
29
+ "episodes": 32,
30
+ "mean_reward": 4.65651721875,
31
+ "mean_f1": 0.80580365625,
32
+ "mean_mae": 0.0244615625,
33
+ "mean_steps": 2.0,
34
+ "mean_scan_cost": 1.5,
35
+ "done_rate": 1.0,
36
+ "tool_failure_rate": 0.0,
37
+ "mean_repeated_tool_calls": 0.0,
38
+ "strict_tool_call_pass_rate": 1.0,
39
+ "normalized_tool_call_pass_rate": 1.0,
40
+ "normalized_tool_call_repair_rate": 0.0,
41
+ "first_action_valid_rate": 1.0,
42
+ "first_action_ask_prior_rate": 1.0,
43
+ "submit_action_rate": 1.0,
44
+ "mean_identity_reward": 3.2232142812499998,
45
+ "mean_concentration_reward": 2.23155896875,
46
+ "mean_confidence_reward": 0.270494,
47
+ "mean_false_positive_penalty": -0.1875,
48
+ "mean_missed_defect_penalty": -0.28125,
49
+ "mean_timeout_penalty": 0.0,
50
+ "mean_outcome_reward_total": 5.72526725,
51
+ "mean_penalty_total": -1.06875
52
+ },
53
+ "strict_adapter": {
54
+ "episodes": 32,
55
+ "mean_reward": 4.50648265625,
56
+ "mean_f1": 0.789137,
57
+ "mean_mae": 0.027124218749999998,
58
+ "mean_steps": 2.0,
59
+ "mean_scan_cost": 1.5,
60
+ "done_rate": 1.0,
61
+ "tool_failure_rate": 0.0,
62
+ "mean_repeated_tool_calls": 0.0,
63
+ "strict_tool_call_pass_rate": 1.0,
64
+ "normalized_tool_call_pass_rate": 1.0,
65
+ "normalized_tool_call_repair_rate": 0.0,
66
+ "first_action_valid_rate": 1.0,
67
+ "first_action_ask_prior_rate": 1.0,
68
+ "submit_action_rate": 1.0,
69
+ "mean_identity_reward": 3.156547625,
70
+ "mean_concentration_reward": 2.16269634375,
71
+ "mean_confidence_reward": 0.29348875,
72
+ "mean_false_positive_penalty": -0.1875,
73
+ "mean_missed_defect_penalty": -0.31875,
74
+ "mean_timeout_penalty": 0.0,
75
+ "mean_outcome_reward_total": 5.61273271875,
76
+ "mean_penalty_total": -1.10625
77
+ },
78
+ "strict_failures": []
79
+ },
80
+ "hard": {
81
+ "baseline_prior_submit": {
82
+ "episodes": 32,
83
+ "mean_reward": 5.01651990625,
84
+ "mean_f1": 0.85153328125,
85
+ "mean_mae": 0.02220903125,
86
+ "mean_steps": 2.0,
87
+ "mean_scan_cost": 1.5,
88
+ "done_rate": 1.0,
89
+ "tool_failure_rate": 0.0,
90
+ "mean_repeated_tool_calls": 0.0,
91
+ "strict_tool_call_pass_rate": 1.0,
92
+ "normalized_tool_call_pass_rate": 1.0,
93
+ "normalized_tool_call_repair_rate": 0.0,
94
+ "first_action_valid_rate": 1.0,
95
+ "first_action_ask_prior_rate": 1.0,
96
+ "submit_action_rate": 1.0,
97
+ "mean_identity_reward": 3.40613278125,
98
+ "mean_concentration_reward": 2.3444645625,
99
+ "mean_confidence_reward": 0.53779759375,
100
+ "mean_false_positive_penalty": -0.109375,
101
+ "mean_missed_defect_penalty": -0.5625,
102
+ "mean_timeout_penalty": 0.0,
103
+ "mean_outcome_reward_total": 6.2883949375,
104
+ "mean_penalty_total": -1.2718749999999999
105
+ },
106
+ "strict_adapter": {
107
+ "episodes": 32,
108
+ "mean_reward": 4.714775875,
109
+ "mean_f1": 0.8206800937500001,
110
+ "mean_mae": 0.02552296875,
111
+ "mean_steps": 2.0,
112
+ "mean_scan_cost": 1.5,
113
+ "done_rate": 1.0,
114
+ "tool_failure_rate": 0.0,
115
+ "mean_repeated_tool_calls": 0.0,
116
+ "strict_tool_call_pass_rate": 1.0,
117
+ "normalized_tool_call_pass_rate": 1.0,
118
+ "normalized_tool_call_repair_rate": 0.0,
119
+ "first_action_valid_rate": 1.0,
120
+ "first_action_ask_prior_rate": 1.0,
121
+ "submit_action_rate": 1.0,
122
+ "mean_identity_reward": 3.282720125,
123
+ "mean_concentration_reward": 2.243760375,
124
+ "mean_confidence_reward": 0.5257955,
125
+ "mean_false_positive_penalty": -0.09375,
126
+ "mean_missed_defect_penalty": -0.64375,
127
+ "mean_timeout_penalty": 0.0,
128
+ "mean_outcome_reward_total": 6.052276,
129
+ "mean_penalty_total": -1.3375
130
+ },
131
+ "strict_failures": []
132
+ }
133
+ }
134
+ }
promotion_summary.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_adapter": "/tmp/atomicvision_publish_runner/output/base_adapter",
3
+ "candidates": {
4
+ "base": {
5
+ "hard": {
6
+ "done": 1.0,
7
+ "f1": 0.81621578125,
8
+ "fail": 0.0,
9
+ "mae": 0.02591959375,
10
+ "normalized": 1.0,
11
+ "reward": 4.69171921875,
12
+ "strict": 1.0,
13
+ "submit": 1.0
14
+ },
15
+ "hard_f1_delta_vs_base": 0.0,
16
+ "hard_reward_delta_vs_base": 0.0,
17
+ "label": "base",
18
+ "medium": {
19
+ "done": 1.0,
20
+ "f1": 0.789137,
21
+ "fail": 0.0,
22
+ "mae": 0.027124218749999998,
23
+ "normalized": 1.0,
24
+ "reward": 4.50648265625,
25
+ "strict": 1.0,
26
+ "submit": 1.0
27
+ },
28
+ "medium_f1_delta_vs_base": 0.0,
29
+ "medium_reward_delta_vs_base": 0.0
30
+ },
31
+ "checkpoint-1": {
32
+ "hard": {
33
+ "done": 1.0,
34
+ "f1": 0.8206800937500001,
35
+ "fail": 0.0,
36
+ "mae": 0.02552296875,
37
+ "normalized": 1.0,
38
+ "reward": 4.714775875,
39
+ "strict": 1.0,
40
+ "submit": 1.0
41
+ },
42
+ "hard_f1_delta_vs_base": 0.004464312500000012,
43
+ "hard_reward_delta_vs_base": 0.02305665624999964,
44
+ "label": "checkpoint-1",
45
+ "medium": {
46
+ "done": 1.0,
47
+ "f1": 0.789137,
48
+ "fail": 0.0,
49
+ "mae": 0.027124218749999998,
50
+ "normalized": 1.0,
51
+ "reward": 4.50648265625,
52
+ "strict": 1.0,
53
+ "submit": 1.0
54
+ },
55
+ "medium_f1_delta_vs_base": 0.0,
56
+ "medium_reward_delta_vs_base": 0.0
57
+ }
58
+ },
59
+ "checkpoint_steps": [
60
+ 1
61
+ ],
62
+ "dataset_counts": {
63
+ "submit_after_reference": 12,
64
+ "submit_prior": 4
65
+ },
66
+ "episodes_per_difficulty": 16,
67
+ "eval_difficulties": [
68
+ "medium",
69
+ "hard"
70
+ ],
71
+ "eval_episodes": 32,
72
+ "eval_seed_start": 10000,
73
+ "learning_rate": 1e-06,
74
+ "max_updates": 1,
75
+ "profile": "hard_recall_micro_repair",
76
+ "promotion_candidate": "checkpoint-1",
77
+ "seed_start": 3600,
78
+ "train_difficulties": [
79
+ "hard"
80
+ ]
81
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be75606093db2094d7cd20f3c2f385c212750648bd6ea4fb2bf507a6a4c55506
3
+ size 11422650
tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "local_files_only": false,
25
+ "model_max_length": 131072,
26
+ "pad_token": "<|endoftext|>",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "Qwen2Tokenizer",
29
+ "unk_token": null
30
+ }
training_dataset.jsonl ADDED
The diff for this file is too large to render. See raw diff