""" Cloud training runner. Called inside HF Space container. Uses ai-toolkit substrate (same as local) but on cloud GPU. """ import os import subprocess import sys from pathlib import Path REPO_ROOT = Path("/app") CONFIG_PATH = REPO_ROOT / "train_config.yaml" OUTPUT_DIR = REPO_ROOT / "output" HF_HUB_TOKEN = os.environ.get("HF_HUB_TOKEN", "") def main(): OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # Symlink the HF-cached model to the path expected by config model_src = Path("/app/hf_cache/hub/models--Niansuh--FLUX.1-schnell/snapshots") if model_src.exists(): # Link the actual snapshot dir snapshots = list(model_src.iterdir()) if snapshots: target = snapshots[0] link_path = Path("/app/hf_cache/hub/models--Niansuh--FLUX.1-schnell/snapshots/e69accbbbb51bd4722bc6ec9a3a2fa620e7ce7e5") if not link_path.exists(): link_path.symlink_to(target) # Adjust config for cloud paths cfg_text = CONFIG_PATH.read_text() cfg_text = cfg_text.replace("D:/AI_Training/", "/app/") cfg_text = cfg_text.replace("ostris/FLUX.1-schnell-training-adapter", "ostris/FLUX.1-schnell-training-adapter") cloud_cfg = REPO_ROOT / "train_config_cloud.yaml" cloud_cfg.write_text(cfg_text) # Run training print("[cloud_train] starting ai-toolkit sd_trainer") subprocess.run([ sys.executable, "-m", "ai_toolkit.scripts.train", str(cloud_cfg), ], check=True) # Push adapter to HF Hub print("[cloud_train] pushing adapter to HF Hub") adapter_path = OUTPUT_DIR / "spock_lora_v1" if adapter_path.exists(): from huggingface_hub import HfApi api = HfApi(token=HF_HUB_TOKEN) api.upload_folder( folder_path=str(adapter_path), repo_id=os.environ.get("HF_REPO", "dandydonunhinged/spock-body-lora"), repo_type="model", ) print(f"[cloud_train] pushed to {os.environ.get('HF_REPO')}") print("[cloud_train] done") if __name__ == "__main__": main()