{ "model_name": "Qwen/Qwen3-8B", "lora_r": 32, "lora_alpha": 64, "lora_dropout": 0.0, "max_seq_length": 4096, "lr": 1e-05, "warmup_ratio": 0.05, "max_grad_norm": 1.0, "max_steps": 300, "batch_problems": 8, "group_size": 8, "temperature": 1.2, "top_p": 1.0, "think_temperature": 1.3, "out_temperature": 0.9, "cot_dump": true, "forbid_words": false, "max_think": 1024, "max_out": 64, "penalty": true, "lambda_penalty": 0.5, "normalize_advantages": true, "seed": 42, "n_checkpoints": 25, "env": "coin", "save_dir": "/workspace-vast/jbauer/exp/models/feedback_spillover", "wandb_project": "cot-oracle", "wandb_entity": "MATS10-CS-JB", "wandb_group": "cr2", "run_name": "coin_pen_cr2", "console_every": 5, "table_every": 25, "judge_model": "", "use_vllm": true, "gpu_mem_util": 0.5, "resume_from": "", "save_every": 0, "adaptive_lambda": true, "lambda_target": 0.9, "lambda_lr": 0.05, "lambda_ema_alpha": 0.9, "lambda_init": 0.3, "lambda_max": 1.2, "monitor_sees_commands": false, "monitor_count": false, "neutral_prompt": false, "penalize_cot": true, "n_flips_min": 16, "n_flips_max": 24 }