#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10,<3.13"
# dependencies = [
#     "huggingface_hub>=0.26.0",
#     "torch>=2.7.0,<2.8.0",
#     "torchvision",
#     "trl>=0.17.0,<0.18.0",
#     "transformers>=4.51.0",
#     "datasets>=3.1.0",
#     "accelerate>=1.0.0,<1.7.0",
#     "peft>=0.15.0",
#     "bitsandbytes>=0.45.0",
#     "unsloth>=2025.10,<2026.0",
#     "matplotlib>=3.9.0",
#     "pyyaml",
#     "fastapi",
#     "uvicorn",
#     "pydantic>=2.6.0",
#     "openenv-core[core]>=0.2.2",
#     "openai>=1.20.0",
#     "requests>=2.31.0",
# ]
# ///
"""HF Job entrypoint: download repo from HF Space, run SFT + GRPO, upload artifacts.

Run with:
    hf jobs uv run --flavor a10g-large --secrets HF_TOKEN training/hf_job_train.py

Env vars (all optional):
    SOURCE_REPO       (default: pixxel-phantom/orbital-thruster-env)
    SOURCE_REPO_TYPE  (default: space)
    OUTPUT_REPO       (default: pixxel-phantom/orbital-thruster-grpo)
    ORBITAL_BASE_MODEL (default: Qwen/Qwen3-4B-Instruct-2507)
"""
from __future__ import annotations

import os
import shutil
import subprocess
import sys
from pathlib import Path

from huggingface_hub import HfApi, snapshot_download

SOURCE_REPO = os.environ.get("SOURCE_REPO", "pixxel-phantom/orbital-thruster-env")
SOURCE_REPO_TYPE = os.environ.get("SOURCE_REPO_TYPE", "space")
OUTPUT_REPO = os.environ.get("OUTPUT_REPO", "pixxel-phantom/orbital-thruster-grpo")
BASE_MODEL = os.environ.get("ORBITAL_BASE_MODEL", "Qwen/Qwen3-4B-Instruct-2507")

WORK = Path("/tmp/orbital_work")
WORK.mkdir(parents=True, exist_ok=True)


def main() -> None:
    print(f"[hf_job] downloading {SOURCE_REPO} ({SOURCE_REPO_TYPE})")
    repo_dir = snapshot_download(
        repo_id=SOURCE_REPO,
        repo_type=SOURCE_REPO_TYPE,
        local_dir=str(WORK / "repo"),
    )
    repo = Path(repo_dir)
    print(f"[hf_job] repo at {repo}")

    os.environ["ORBITAL_BASE_MODEL"] = BASE_MODEL
    env = {**os.environ, "PYTHONPATH": str(repo)}

    def run(cmd: list[str], cwd: Path) -> None:
        print(f"[hf_job] $ {' '.join(cmd)}  (cwd={cwd})")
        subprocess.run(cmd, cwd=str(cwd), env=env, check=True)

    seed_path = repo / "training" / "data" / "seed_trajectories.jsonl"
    if not seed_path.exists():
        run([sys.executable, "training/generate_seed_trajectories.py"], cwd=repo)

    use_vanilla = os.environ.get("ORBITAL_VANILLA", "0") == "1"
    if use_vanilla:
        sft_steps = str(int(os.environ.get("ORBITAL_SFT_STEPS", "40")))
        grpo_steps = str(int(os.environ.get("ORBITAL_GRPO_STEPS", "80")))
        run([sys.executable, "training/local_train.py", "--phase", "all",
             "--sft-steps", sft_steps, "--grpo-steps", grpo_steps], cwd=repo)
    else:
        run([sys.executable, "training/qwen3_smoke_sft.py"], cwd=repo)
        run([sys.executable, "training/qwen3_grpo_train.py"], cwd=repo)
    run([sys.executable, "training/eval_trained_model.py"], cwd=repo)

    api = HfApi()
    api.create_repo(repo_id=OUTPUT_REPO, repo_type="model", exist_ok=True)
    print(f"[hf_job] uploading artifacts to {OUTPUT_REPO}")

    artifact_root = WORK / "upload"
    artifact_root.mkdir(exist_ok=True)
    for sub in ["trainer_output/qwen_grpo", "trainer_output/qwen_sft", "outputs"]:
        src = repo / sub
        if src.exists():
            dst = artifact_root / sub
            dst.parent.mkdir(parents=True, exist_ok=True)
            if dst.exists():
                shutil.rmtree(dst)
            shutil.copytree(src, dst)

    readme = artifact_root / "README.md"
    readme.write_text(
        "# OrbitalThrusterEnv — GRPO LoRA adapter\n\n"
        f"Base model: `{BASE_MODEL}`\n\n"
        f"Source env: https://huggingface.co/spaces/{SOURCE_REPO}\n\n"
        "Trained via TRL `GRPOTrainer` + Unsloth on OpenEnv `OrbitalThrusterEnv` flagship task `mission_ops_long_horizon`.\n"
        "5 independent reward funcs (format, env-step, mode-match, anti-spam, fuel-discipline) for anti-reward-hacking.\n\n"
        "## Artifacts\n"
        "- `trainer_output/qwen_grpo/` — final LoRA adapter\n"
        "- `trainer_output/qwen_sft/` — SFT warm-start adapter\n"
        "- `outputs/training/grpo_metrics.png` — reward + loss curves\n"
        "- `outputs/eval_trained/trained_vs_baseline.png` — trained vs baselines on 4 tasks\n"
    )
    api.upload_folder(folder_path=str(artifact_root), repo_id=OUTPUT_REPO, repo_type="model")
    print(f"[hf_job] done. https://huggingface.co/{OUTPUT_REPO}")


if __name__ == "__main__":
    main()