{ "base_model": "Qwen/Qwen3-1.7B", "adapter": "/tmp/atomicvision_publish_runner/output/train/checkpoint-1", "episodes_per_difficulty": 32, "seed_start": 10000, "seed_policy": { "sft_train": { "start": 1000, "stop": 4000 }, "grpo_train": { "start": 4000, "stop": 8000 }, "heldout_eval": { "start": 10000, "stop": 11000 } }, "heldout_seed_enforced": true, "max_tool_steps": 3, "max_new_tokens": 180, "modes": [ "strict" ], "results": { "medium": { "baseline_prior_submit": { "episodes": 32, "mean_reward": 4.65651721875, "mean_f1": 0.80580365625, "mean_mae": 0.0244615625, "mean_steps": 2.0, "mean_scan_cost": 1.5, "done_rate": 1.0, "tool_failure_rate": 0.0, "mean_repeated_tool_calls": 0.0, "strict_tool_call_pass_rate": 1.0, "normalized_tool_call_pass_rate": 1.0, "normalized_tool_call_repair_rate": 0.0, "first_action_valid_rate": 1.0, "first_action_ask_prior_rate": 1.0, "submit_action_rate": 1.0, "mean_identity_reward": 3.2232142812499998, "mean_concentration_reward": 2.23155896875, "mean_confidence_reward": 0.270494, "mean_false_positive_penalty": -0.1875, "mean_missed_defect_penalty": -0.28125, "mean_timeout_penalty": 0.0, "mean_outcome_reward_total": 5.72526725, "mean_penalty_total": -1.06875 }, "strict_adapter": { "episodes": 32, "mean_reward": 4.50648265625, "mean_f1": 0.789137, "mean_mae": 0.027124218749999998, "mean_steps": 2.0, "mean_scan_cost": 1.5, "done_rate": 1.0, "tool_failure_rate": 0.0, "mean_repeated_tool_calls": 0.0, "strict_tool_call_pass_rate": 1.0, "normalized_tool_call_pass_rate": 1.0, "normalized_tool_call_repair_rate": 0.0, "first_action_valid_rate": 1.0, "first_action_ask_prior_rate": 1.0, "submit_action_rate": 1.0, "mean_identity_reward": 3.156547625, "mean_concentration_reward": 2.16269634375, "mean_confidence_reward": 0.29348875, "mean_false_positive_penalty": -0.1875, "mean_missed_defect_penalty": -0.31875, "mean_timeout_penalty": 0.0, "mean_outcome_reward_total": 5.61273271875, "mean_penalty_total": -1.10625 }, "strict_failures": [] }, "hard": { "baseline_prior_submit": { "episodes": 32, "mean_reward": 5.01651990625, "mean_f1": 0.85153328125, "mean_mae": 0.02220903125, "mean_steps": 2.0, "mean_scan_cost": 1.5, "done_rate": 1.0, "tool_failure_rate": 0.0, "mean_repeated_tool_calls": 0.0, "strict_tool_call_pass_rate": 1.0, "normalized_tool_call_pass_rate": 1.0, "normalized_tool_call_repair_rate": 0.0, "first_action_valid_rate": 1.0, "first_action_ask_prior_rate": 1.0, "submit_action_rate": 1.0, "mean_identity_reward": 3.40613278125, "mean_concentration_reward": 2.3444645625, "mean_confidence_reward": 0.53779759375, "mean_false_positive_penalty": -0.109375, "mean_missed_defect_penalty": -0.5625, "mean_timeout_penalty": 0.0, "mean_outcome_reward_total": 6.2883949375, "mean_penalty_total": -1.2718749999999999 }, "strict_adapter": { "episodes": 32, "mean_reward": 4.714775875, "mean_f1": 0.8206800937500001, "mean_mae": 0.02552296875, "mean_steps": 2.0, "mean_scan_cost": 1.5, "done_rate": 1.0, "tool_failure_rate": 0.0, "mean_repeated_tool_calls": 0.0, "strict_tool_call_pass_rate": 1.0, "normalized_tool_call_pass_rate": 1.0, "normalized_tool_call_repair_rate": 0.0, "first_action_valid_rate": 1.0, "first_action_ask_prior_rate": 1.0, "submit_action_rate": 1.0, "mean_identity_reward": 3.282720125, "mean_concentration_reward": 2.243760375, "mean_confidence_reward": 0.5257955, "mean_false_positive_penalty": -0.09375, "mean_missed_defect_penalty": -0.64375, "mean_timeout_penalty": 0.0, "mean_outcome_reward_total": 6.052276, "mean_penalty_total": -1.3375 }, "strict_failures": [] } } }