Spaces:

huanx
/

daniya-gptsovits-trainer

Paused

App Files Files Community

Codex commited on 28 days ago

Commit

7dab114

1 Parent(s): 3987df7

Fix Daniya GPT-SoVITS training space

Browse files

Files changed (3) hide show

Dockerfile +1 -2
app.py +523 -140
requirements.txt +21 -9

Dockerfile CHANGED Viewed

@@ -9,11 +9,10 @@ WORKDIR /code
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-RUN pip install --no-cache-dir "gradio>=5.0.0"
 COPY --chown=user . .
 USER user
 ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
-CMD ["python", "app.py"]

 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY --chown=user . .
 USER user
 ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,183 +1,566 @@
 #!/usr/bin/env python3
 """
-GPT-SoVITS 训练器 — 达妮娅语音
-HuggingFace Space (CPU)
 """
-import os, sys, shutil, subprocess, logging
 from pathlib import Path
 import gradio as gr
-# ── 配置
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 DATASET_REPO = "huanx/daniya-voice-gptsovits"
 GPT_SOVITS_REPO = "https://github.com/RVC-Boss/GPT-SoVITS.git"
-GPT_SOVITS_DIR = Path.home() / "GPT-SoVITS"
-WORK_DIR = Path("/tmp/gptsovits_workspace")
-RAW_AUDIO = WORK_DIR / "raw_audio"
-OUTPUT_DIR = WORK_DIR / "output"
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger(__name__)
 def ensure_dirs():
-    for d in [WORK_DIR, RAW_AUDIO, OUTPUT_DIR]:
-        d.mkdir(parents=True, exist_ok=True)
-# ── 步骤 1：环境检查 + 安装 GPT-SoVITS
 def check_environment():
     try:
-        ensure_dirs()
-        # 强制清理旧目录再重新克隆
-        log.info("清理旧环境...")
-        subprocess.run(["rm", "-rf", str(GPT_SOVITS_DIR)], check=False)
-        # 重新克隆
-        log.info("克隆 GPT-SoVITS...")
-        clone = subprocess.run(
-            ["git", "clone", "--depth", "1", GPT_SOVITS_REPO, str(GPT_SOVITS_DIR)],
-            capture_output=True, text=True, timeout=600
-        )
-        if clone.returncode != 0:
-            return f"克隆失败 (exit={clone.returncode}) STDERR: {clone.stderr[:300]}"
-        # 装依��（跳过 Web UI 包避免降级 Gradio）
-        req = GPT_SOVITS_DIR / "requirements.txt"
-        if req.exists():
-            import re
-            lines_req = [l for l in req.read_text().splitlines()
-                     if not re.match(r'^\s*(gradio|streamlit|fastapi|uvicorn|huggingface.hub|protobuf)', l) and l.strip()]
-            if lines_req:
-                log.info("安装 %d 个依赖...", len(lines_req))
-                pip_result = subprocess.run(
-                    [sys.executable, "-m", "pip", "install", "--no-cache-dir"] + lines_req,
-                    capture_output=True, text=True, timeout=1200
-                )
-                if pip_result.returncode != 0:
-                    log.warning("pip 部分失败: %s", pip_result.stderr[-200:])
-            log.info("GPT-SoVITS 安装完成")
-        train_py = GPT_SOVITS_DIR / "train.py"
-        if not train_py.exists():
-            items = list(GPT_SOVITS_DIR.iterdir())[:15] if GPT_SOVITS_DIR.exists() else []
-            return f"克隆后仍缺 train.py 目录: {GPT_SOVITS_DIR} 内容: {[i.name for i in items]}"
-        return f"环境就绪 GPT-SoVITS: {GPT_SOVITS_DIR} 工作目录: {WORK_DIR}"
-    except Exception as e:
         log.exception("check_environment")
-        return f"环境安装失败: {e}"
-# ── 步骤 2：下载数据集
 def download_dataset():
     try:
-        ensure_dirs()
-        from huggingface_hub import snapshot_download
-        kwargs = {"repo_id": DATASET_REPO, "repo_type": "dataset",
-                  "local_dir": str(WORK_DIR / "dataset"), "ignore_patterns": ["*.md", "*.txt"]}
-        if HF_TOKEN:
-            kwargs["token"] = HF_TOKEN
-        snapshot_download(**kwargs)
-        audio_src = WORK_DIR / "dataset" / "audio"
-        if audio_src.exists():
-            for f in audio_src.glob("*.wav"):
-                shutil.copy2(f, RAW_AUDIO)
-        count = len(list(RAW_AUDIO.glob("*.wav")))
-        return f"✅ 下载完成！音频文件: {count} 个"
-    except Exception as e:
         log.exception("download_dataset")
-        return f"❌ 下载失败: {e}"
-# ── 步骤 3：准备训练数据
 def prepare_data():
     try:
-        ensure_dirs()
-        import pandas as pd
-        target_audio = GPT_SOVITS_DIR / "raw_audio" / "daniya"
-        target_audio.mkdir(parents=True, exist_ok=True)
-        for wav in RAW_AUDIO.glob("*.wav"):
-            shutil.copy2(wav, target_audio)
-        metadata = WORK_DIR / "dataset" / "metadata.csv"
-        if not metadata.exists():
-            for f in (WORK_DIR / "dataset").rglob("*.csv"):
-                metadata = f; break
-        fl_dir = GPT_SOVITS_DIR / "filelist"
-        fl_dir.mkdir(exist_ok=True)
-        if metadata.exists():
-            df = pd.read_csv(metadata)
-            with open(fl_dir / "daniya.list", "w", encoding="utf-8") as f:
-                for _, row in df.iterrows():
-                    f.write(f"raw_audio/daniya/{row['file']}|{row['text']}|daniya\n")
-            return f"✅ 准备完成！训练样本: {len(df)} 条"
-        else:
-            wavs = list(target_audio.glob("*.wav"))
-            with open(fl_dir / "daniya.list", "w", encoding="utf-8") as f:
-                for w in wavs:
-                    f.write(f"raw_audio/daniya/{w.name}|{w.stem}|daniya\n")
-            return f"⚠️ 未找到 metadata.csv，用文件名当文本。样本: {len(wavs)} 条"
-    except Exception as e:
         log.exception("prepare_data")
-        return f"❌ 准备失败: {e}"
-# ── 步骤 4：训练
-def start_training(epochs=100, batch_size=4, save_steps=500, lr=0.0001):
     try:
-        ensure_dirs()
-        out_dir = OUTPUT_DIR / "daniya"
-        out_dir.mkdir(parents=True, exist_ok=True)
-        cmd = [sys.executable, "train.py", "--model", "sovits",
-               "--output_dir", str(out_dir), "--epochs", str(epochs),
-               "--batch_size", str(batch_size), "--save_steps", str(save_steps),
-               "--learning_rate", str(lr), "--device", "cpu"]
-        log.info("训练: %s", " ".join(cmd))
-        proc = subprocess.Popen(cmd, cwd=GPT_SOVITS_DIR,
-                                stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                text=True, bufsize=1)
-        lines = []
-        for line in proc.stdout:
-            lines.append(line.rstrip())
-            if len(lines) > 200: lines = lines[-200:]
-            if len(lines) % 10 == 0:
-                yield "\n".join(lines)
-        proc.wait()
-        yield "\n".join(lines[-30:]) + f"\n\n{'✅ 训练完成' if proc.returncode==0 else f'❌ 训练失败 (exit={proc.returncode})'}"
-    except Exception as e:
         log.exception("start_training")
-        yield f"❌ 训练出错: {e}"
-# ── UI
 def create_ui():
-    with gr.Blocks(title="GPT-SoVITS 训练 — 达妮娅", theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# 🎤 GPT-SoVITS 训练器 — 达妮娅语音\nCPU 训练 SoVITS 模型，按顺序点击按钮。")
         with gr.Row():
             with gr.Column(scale=1):
-                for label, btn_text, fn, out in [
-                    ("📦 环境", "1. 安装环境", check_environment, gr.Textbox(label="状态", lines=5, interactive=False)),
-                    ("📥 数据", "2. 下载数据集", download_dataset, gr.Textbox(label="状态", lines=3, interactive=False)),
-                    ("🛠️ 预处理", "3. 准备训练数据", prepare_data, gr.Textbox(label="状态", lines=4, interactive=False)),
-                ]:
-                    gr.Markdown(f"### {label}")
-                    b = gr.Button(btn_text, variant="primary" if "环境" not in label else "secondary")
-                    b.click(fn, outputs=out)
             with gr.Column(scale=1):
-                gr.Markdown("### ⚙️ 参数")
-                epochs = gr.Slider(10, 500, value=100, step=10, label="训练轮数")
-                batch = gr.Slider(1, 16, value=4, step=1, label="批次大小")
-                save_steps = gr.Slider(100, 2000, value=500, step=100, label="保存间隔")
-                lr = gr.Slider(1e-5, 0.01, value=1e-4, step=1e-5, label="学习率")
-                gr.Markdown("### 🚀 训练")
-                btn_train = gr.Button("4. 开始训练", variant="primary", size="lg")
-                out_train = gr.Textbox(label="训练日志", lines=18, interactive=False, autoscroll=True)
-                btn_train.click(start_training, inputs=[epochs, batch, save_steps, lr], outputs=out_train)
     return demo
 if __name__ == "__main__":
     demo = create_ui()
-    demo.launch(server_name="0.0.0.0", server_port=7860)# rebuild trigger 1779588386

 #!/usr/bin/env python3
 """
+GPT-SoVITS Daniya trainer for Hugging Face Spaces.
+This Space is CPU-oriented. It prepares the dataset with the current
+GPT-SoVITS pipeline and can export fresh SoVITS and GPT checkpoints
+through the Gradio UI.
 """
+import csv
+import json
+import logging
+import os
+import shutil
+import subprocess
+import sys
 from pathlib import Path
 import gradio as gr
+import yaml
+from huggingface_hub import hf_hub_download, snapshot_download
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 DATASET_REPO = "huanx/daniya-voice-gptsovits"
 GPT_SOVITS_REPO = "https://github.com/RVC-Boss/GPT-SoVITS.git"
+WORK_DIR = Path("/tmp/daniya_trainer")
+HF_HOME = WORK_DIR / "hf_home"
+GPT_SOVITS_DIR = WORK_DIR / "GPT-SoVITS"
+DATASET_DIR = WORK_DIR / "dataset"
+AUDIO_DIR = DATASET_DIR / "audio"
+EXP_NAME = "daniya"
+EXP_ROOT = WORK_DIR / "logs"
+EXP_DIR = EXP_ROOT / EXP_NAME
+OUTPUT_ROOT = WORK_DIR / "trained_models"
+SOVITS_OUTPUT_DIR = OUTPUT_ROOT / "SoVITS_weights_v2"
+GPT_OUTPUT_DIR = OUTPUT_ROOT / "GPT_weights_v2"
+INPUT_LIST = WORK_DIR / "daniya.list"
+TEXT_PATH = EXP_DIR / "2-name2text.txt"
+SEMANTIC_PATH = EXP_DIR / "6-name2semantic.tsv"
+PRETRAINED_DIR = GPT_SOVITS_DIR / "GPT_SoVITS" / "pretrained_models"
+BERT_DIR = PRETRAINED_DIR / "chinese-roberta-wwm-ext-large"
+CNHUBERT_DIR = PRETRAINED_DIR / "chinese-hubert-base"
+PRETRAINED_REPO = "lj1995/GPT-SoVITS"
+PRETRAINED_S1_REL = "gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt"
+PRETRAINED_S2G_REL = "gsv-v2final-pretrained/s2G2333k.pth"
+PRETRAINED_S2D_REL = "gsv-v2final-pretrained/s2D2333k.pth"
+PRETRAINED_S1 = PRETRAINED_DIR / PRETRAINED_S1_REL
+PRETRAINED_S2G = PRETRAINED_DIR / PRETRAINED_S2G_REL
+PRETRAINED_S2D = PRETRAINED_DIR / PRETRAINED_S2D_REL
+BERT_REPO = "hfl/chinese-roberta-wwm-ext-large"
+CNHUBERT_REPO = "TencentGameMate/chinese-hubert-base"
+VERSION = "v2"
+LANGUAGE = "zh"
+SPEAKER = "daniya"
+MODEL_PATTERNS = [
+    "*.json",
+    "*.txt",
+    "*.bin",
+    "*.safetensors",
+    "*.model",
+]
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 log = logging.getLogger(__name__)
+os.environ.setdefault("HF_HOME", str(HF_HOME))
+os.environ.setdefault("TRANSFORMERS_CACHE", str(HF_HOME / "transformers"))
+os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
 def ensure_dirs():
+    for path in [
+        WORK_DIR,
+        HF_HOME,
+        DATASET_DIR,
+        EXP_ROOT,
+        EXP_DIR,
+        OUTPUT_ROOT,
+        SOVITS_OUTPUT_DIR,
+        GPT_OUTPUT_DIR,
+    ]:
+        path.mkdir(parents=True, exist_ok=True)
+def hf_kwargs():
+    return {"token": HF_TOKEN} if HF_TOKEN else {}
+def push(logs, message):
+    logs.append(message)
+    return "\n".join(logs[-200:])
+def run_cmd(command, cwd=None, env=None):
+    proc = subprocess.Popen(
+        command,
+        cwd=str(cwd) if cwd else None,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        bufsize=1,
+    )
+    yield f"$ {' '.join(command)}"
+    for raw in proc.stdout:
+        line = raw.rstrip()
+        if line:
+            yield line
+    code = proc.wait()
+    if code != 0:
+        raise RuntimeError(f"命令失败 (exit={code}): {' '.join(command)}")
+def has_transformers_model(path: Path):
+    return path.exists() and (path / "config.json").exists() and (
+        any(path.glob("*.bin")) or any(path.glob("*.safetensors"))
+    )
+def metadata_rows():
+    metadata = DATASET_DIR / "metadata.csv"
+    if not metadata.exists():
+        return []
+    with metadata.open("r", encoding="utf-8", newline="") as handle:
+        return list(csv.DictReader(handle))
+def latest_file(directory: Path, suffix: str):
+    files = sorted(directory.glob(f"*{suffix}"), key=lambda item: item.stat().st_mtime)
+    return str(files[-1]) if files else None
+def artifacts_summary():
+    sovits = latest_file(SOVITS_OUTPUT_DIR, ".pth")
+    gpt = latest_file(GPT_OUTPUT_DIR, ".ckpt")
+    lines = [
+        f"SoVITS: {sovits or '暂无'}",
+        f"GPT: {gpt or '暂无'}",
+    ]
+    return "\n".join(lines), sovits, gpt
+def dataset_prepared():
+    return (
+        TEXT_PATH.exists()
+        and SEMANTIC_PATH.exists()
+        and (EXP_DIR / "3-bert").exists()
+        and (EXP_DIR / "4-cnhubert").exists()
+        and (EXP_DIR / "5-wav32k").exists()
+    )
+def build_process_env():
+    env = os.environ.copy()
+    env.update(
+        {
+            "PYTHONPATH": str(GPT_SOVITS_DIR),
+            "inp_text": str(INPUT_LIST),
+            "inp_wav_dir": str(AUDIO_DIR),
+            "exp_name": EXP_NAME,
+            "opt_dir": str(EXP_DIR),
+            "i_part": "0",
+            "all_parts": "1",
+            "_CUDA_VISIBLE_DEVICES": "0",
+            "is_half": "False",
+            "version": VERSION,
+            "hz": "25hz",
+            "bert_pretrained_dir": str(BERT_DIR),
+            "bert_path": str(BERT_DIR),
+            "cnhubert_base_dir": str(CNHUBERT_DIR),
+            "pretrained_s2G": str(PRETRAINED_S2G),
+            "s2config_path": "GPT_SoVITS/configs/s2.json",
+        }
+    )
+    return env
+def ensure_upstream_repo():
+    if (GPT_SOVITS_DIR / "webui.py").exists():
+        return
+    if GPT_SOVITS_DIR.exists():
+        shutil.rmtree(GPT_SOVITS_DIR)
+    subprocess.run(
+        ["git", "clone", "--depth", "1", GPT_SOVITS_REPO, str(GPT_SOVITS_DIR)],
+        check=True,
+        capture_output=True,
+        text=True,
+        timeout=900,
+    )
+def patch_upstream_repo():
+    patch_marker = GPT_SOVITS_DIR / ".hf_space_patch_applied"
+    if patch_marker.exists():
+        return
+    chinese2 = GPT_SOVITS_DIR / "GPT_SoVITS" / "text" / "chinese2.py"
+    content = chinese2.read_text(encoding="utf-8")
+    old = "is_g2pw = True  # True if is_g2pw_str.lower() == 'true' else False"
+    new = "is_g2pw = False  # patched for CPU Space training; avoids extra G2PW asset"
+    if old in content:
+        content = content.replace(old, new, 1)
+        chinese2.write_text(content, encoding="utf-8")
+    patch_marker.write_text("ok\n", encoding="utf-8")
+def ensure_base_assets():
+    if not has_transformers_model(BERT_DIR):
+        snapshot_download(
+            repo_id=BERT_REPO,
+            local_dir=str(BERT_DIR),
+            allow_patterns=MODEL_PATTERNS,
+            **hf_kwargs(),
+        )
+    if not has_transformers_model(CNHUBERT_DIR):
+        snapshot_download(
+            repo_id=CNHUBERT_REPO,
+            local_dir=str(CNHUBERT_DIR),
+            allow_patterns=MODEL_PATTERNS,
+            **hf_kwargs(),
+        )
+    if not PRETRAINED_S1.exists():
+        hf_hub_download(
+            repo_id=PRETRAINED_REPO,
+            filename=PRETRAINED_S1_REL,
+            local_dir=str(PRETRAINED_DIR),
+            **hf_kwargs(),
+        )
+    if not PRETRAINED_S2G.exists():
+        hf_hub_download(
+            repo_id=PRETRAINED_REPO,
+            filename=PRETRAINED_S2G_REL,
+            local_dir=str(PRETRAINED_DIR),
+            **hf_kwargs(),
+        )
+    if not PRETRAINED_S2D.exists():
+        try:
+            hf_hub_download(
+                repo_id=PRETRAINED_REPO,
+                filename=PRETRAINED_S2D_REL,
+                local_dir=str(PRETRAINED_DIR),
+                **hf_kwargs(),
+            )
+        except Exception:
+            log.warning("Optional pretrained discriminator not found: %s", PRETRAINED_S2D_REL)
+def reset_preprocess_outputs():
+    for path in [INPUT_LIST, TEXT_PATH, SEMANTIC_PATH, EXP_DIR / "2-name2text-0.txt", EXP_DIR / "6-name2semantic-0.tsv"]:
+        if path.exists():
+            path.unlink()
+    for directory in [EXP_DIR / "3-bert", EXP_DIR / "4-cnhubert", EXP_DIR / "5-wav32k"]:
+        if directory.exists():
+            shutil.rmtree(directory)
+def build_manifest():
+    rows = metadata_rows()
+    audio_files = {item.name for item in AUDIO_DIR.glob("*.wav")}
+    listed = set()
+    output = []
+    for row in rows:
+        wav_name = (row.get("file") or "").strip()
+        text = (row.get("text") or "").strip()
+        if not wav_name or not text or wav_name not in audio_files:
+            continue
+        listed.add(wav_name)
+        output.append(f"{wav_name}|{SPEAKER}|{LANGUAGE}|{text}")
+    if not output:
+        raise RuntimeError("metadata.csv 里没有可用训练样本")
+    INPUT_LIST.write_text("\n".join(output) + "\n", encoding="utf-8")
+    unlisted = sorted(audio_files - listed)
+    return len(output), len(audio_files), unlisted
+def create_sovits_config(epochs, batch_size, save_every_epoch, learning_rate):
+    config_path = GPT_SOVITS_DIR / "GPT_SoVITS" / "configs" / "s2.json"
+    with config_path.open("r", encoding="utf-8") as handle:
+        data = json.load(handle)
+    data["train"]["fp16_run"] = False
+    data["train"]["batch_size"] = int(batch_size)
+    data["train"]["epochs"] = int(epochs)
+    data["train"]["learning_rate"] = float(learning_rate)
+    data["train"]["pretrained_s2G"] = str(PRETRAINED_S2G)
+    data["train"]["pretrained_s2D"] = str(PRETRAINED_S2D) if PRETRAINED_S2D.exists() else ""
+    data["train"]["if_save_latest"] = False
+    data["train"]["if_save_every_weights"] = True
+    data["train"]["save_every_epoch"] = int(save_every_epoch)
+    data["train"]["gpu_numbers"] = "0"
+    data["train"]["grad_ckpt"] = False
+    data["data"]["exp_dir"] = str(EXP_DIR)
+    data["s2_ckpt_dir"] = str(EXP_DIR)
+    data["save_weight_dir"] = str(SOVITS_OUTPUT_DIR)
+    data["name"] = EXP_NAME
+    data["version"] = VERSION
+    data["model"]["version"] = VERSION
+    tmp_config = WORK_DIR / "tmp_s2.json"
+    tmp_config.write_text(json.dumps(data), encoding="utf-8")
+    return tmp_config
+def create_gpt_config(epochs, batch_size, save_every_epoch):
+    config_path = GPT_SOVITS_DIR / "GPT_SoVITS" / "configs" / "s1longer-v2.yaml"
+    with config_path.open("r", encoding="utf-8") as handle:
+        data = yaml.safe_load(handle)
+    data["train"]["batch_size"] = int(batch_size)
+    data["train"]["epochs"] = int(epochs)
+    data["train"]["precision"] = "32"
+    data["train"]["save_every_n_epoch"] = int(save_every_epoch)
+    data["train"]["if_save_every_weights"] = True
+    data["train"]["if_save_latest"] = False
+    data["train"]["if_dpo"] = False
+    data["train"]["exp_name"] = EXP_NAME
+    data["train"]["half_weights_save_dir"] = str(GPT_OUTPUT_DIR)
+    data["data"]["num_workers"] = 0
+    data["pretrained_s1"] = str(PRETRAINED_S1)
+    data["train_semantic_path"] = str(SEMANTIC_PATH)
+    data["train_phoneme_path"] = str(TEXT_PATH)
+    data["output_dir"] = str(EXP_DIR / "logs_s1_v2")
+    tmp_config = WORK_DIR / "tmp_s1.yaml"
+    tmp_config.write_text(yaml.safe_dump(data, allow_unicode=True, sort_keys=False), encoding="utf-8")
+    return tmp_config
+def setup_environment_steps(logs):
+    ensure_dirs()
+    if (GPT_SOVITS_DIR / "webui.py").exists():
+        yield push(logs, "GPT-SoVITS 仓库已存在，跳过克隆。")
+    else:
+        yield push(logs, "克隆 GPT-SoVITS 仓库...")
+        ensure_upstream_repo()
+        yield push(logs, "✅ GPT-SoVITS 仓库已就绪。")
+    patch_upstream_repo()
+    yield push(logs, "✅ 已应用 Space 兼容补丁。")
+    if not has_transformers_model(BERT_DIR):
+        yield push(logs, "下载中文 BERT 特征模型...")
+    if not has_transformers_model(CNHUBERT_DIR):
+        yield push(logs, "下载 CN-HuBERT 特征模型...")
+    if not PRETRAINED_S1.exists() or not PRETRAINED_S2G.exists():
+        yield push(logs, "下载 GPT-SoVITS v2 底模...")
+    ensure_base_assets()
+    yield push(
+        logs,
+        "✅ 环境就绪：GPT-SoVITS 仓库、中文特征模型和 v2 底模均已准备完成。",
+    )
+def download_dataset_steps(logs):
+    ensure_dirs()
+    yield from setup_environment_steps(logs)
+    yield push(logs, "下载 Daniya 数据集...")
+    snapshot_download(
+        repo_id=DATASET_REPO,
+        repo_type="dataset",
+        local_dir=str(DATASET_DIR),
+        **hf_kwargs(),
+    )
+    rows = metadata_rows()
+    audio_count = len(list(AUDIO_DIR.glob("*.wav")))
+    yield push(
+        logs,
+        f"✅ 数据集已下载：音频 {audio_count} 个，metadata {len(rows)} 条。",
+    )
+def prepare_data_steps(logs):
+    yield from download_dataset_steps(logs)
+    reset_preprocess_outputs()
+    sample_count, audio_count, unlisted = build_manifest()
+    yield push(
+        logs,
+        f"训练清单已生成：metadata 可用样本 {sample_count} 条，音频总数 {audio_count} 个，未标注音频 {len(unlisted)} 个。",
+    )
+    env = build_process_env()
+    for line in run_cmd([sys.executable, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py"], cwd=GPT_SOVITS_DIR, env=env):
+        yield push(logs, line)
+    part_text = EXP_DIR / "2-name2text-0.txt"
+    if not part_text.exists():
+        raise RuntimeError("文本特征提取完成后未生成 2-name2text-0.txt")
+    part_text.replace(TEXT_PATH)
+    yield push(logs, "✅ 文本分词与 BERT 特征提取完成。")
+    for line in run_cmd(
+        [sys.executable, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py"],
+        cwd=GPT_SOVITS_DIR,
+        env=env,
+    ):
+        yield push(logs, line)
+    yield push(logs, "✅ CN-HuBERT 特征与 32k wav 已生成。")
+    for line in run_cmd([sys.executable, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py"], cwd=GPT_SOVITS_DIR, env=env):
+        yield push(logs, line)
+    part_semantic = EXP_DIR / "6-name2semantic-0.tsv"
+    if not part_semantic.exists():
+        raise RuntimeError("语义 token 提取完成后未生成 6-name2semantic-0.tsv")
+    semantic_rows = part_semantic.read_text(encoding="utf-8").strip()
+    SEMANTIC_PATH.write_text(
+        "item_name\tsemantic_audio\n" + semantic_rows + ("\n" if semantic_rows else ""),
+        encoding="utf-8",
+    )
+    part_semantic.unlink()
+    yield push(logs, "✅ 语义 token 提取完成。")
+    return f"✅ 预处理完成，可用于训练的样本 {sample_count} 条。"
 def check_environment():
+    logs = []
     try:
+        final = None
+        final = yield from setup_environment_steps(logs)
+        if final:
+            yield final
+    except Exception as exc:
         log.exception("check_environment")
+        yield push(logs, f"❌ 环境准备失败: {exc}")
 def download_dataset():
+    logs = []
     try:
+        final = None
+        final = yield from download_dataset_steps(logs)
+        if final:
+            yield final
+    except Exception as exc:
         log.exception("download_dataset")
+        yield push(logs, f"❌ 数据集下载失败: {exc}")
 def prepare_data():
+    logs = []
     try:
+        final = yield from prepare_data_steps(logs)
+        yield push(logs, final)
+    except Exception as exc:
         log.exception("prepare_data")
+        yield push(logs, f"❌ 预处理失败: {exc}")
+def start_training(epochs=2, batch_size=1, save_every_epoch=1, lr=0.0001):
+    logs = []
     try:
+        if not dataset_prepared():
+            yield push(logs, "训练前缺少预处理产物，开始自动补齐..."), None
+            for update in prepare_data_steps(logs):
+                yield update, None
+        config_path = create_sovits_config(epochs, batch_size, save_every_epoch, lr)
+        env = build_process_env()
+        yield push(logs, "开始 SoVITS 训练..."), None
+        for line in run_cmd(
+            [sys.executable, "-s", "GPT_SoVITS/s2_train.py", "--config", str(config_path)],
+            cwd=GPT_SOVITS_DIR,
+            env=env,
+        ):
+            yield push(logs, line), None
+        latest = latest_file(SOVITS_OUTPUT_DIR, ".pth")
+        if not latest:
+            raise RuntimeError("训练结束后没有找到导出的 SoVITS 权重文件")
+        yield push(logs, f"✅ SoVITS 训练完成，最新权重：{latest}"), latest
+    except Exception as exc:
         log.exception("start_training")
+        yield push(logs, f"❌ SoVITS 训练失败: {exc}"), None
+def start_gpt_training(epochs=1, batch_size=1, save_every_epoch=1):
+    logs = []
+    try:
+        if not dataset_prepared():
+            yield push(logs, "训练前缺少预处理产物，开始自动补齐..."), None
+            for update in prepare_data_steps(logs):
+                yield update, None
+        config_path = create_gpt_config(epochs, batch_size, save_every_epoch)
+        env = build_process_env()
+        yield push(logs, "开始 GPT 训练..."), None
+        for line in run_cmd(
+            [sys.executable, "-s", "GPT_SoVITS/s1_train.py", "--config_file", str(config_path)],
+            cwd=GPT_SOVITS_DIR,
+            env=env,
+        ):
+            yield push(logs, line), None
+        latest = latest_file(GPT_OUTPUT_DIR, ".ckpt")
+        if not latest:
+            raise RuntimeError("训练结束后没有找到导出的 GPT 权重文件")
+        yield push(logs, f"✅ GPT 训练完成，最新权重：{latest}"), latest
+    except Exception as exc:
+        log.exception("start_gpt_training")
+        yield push(logs, f"❌ GPT 训练失败: {exc}"), None
+def refresh_outputs():
+    return artifacts_summary()
 def create_ui():
+    with gr.Blocks(title="GPT-SoVITS 训练器 — 达妮娅", theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            "# 🎤 GPT-SoVITS 训练器 — 达妮娅语音\n"
+            "这个 Space 按当前 GPT-SoVITS 训练链路执行。先拿到 SoVITS 权重，再按需继续训练 GPT。"
+        )
         with gr.Row():
             with gr.Column(scale=1):
+                gr.Markdown("### 1. 环境")
+                env_btn = gr.Button("准备环境", variant="secondary")
+                env_out = gr.Textbox(label="环境状态", lines=8, interactive=False, autoscroll=True)
+                gr.Markdown("### 2. 数据集")
+                dataset_btn = gr.Button("下载数据集", variant="secondary")
+                dataset_out = gr.Textbox(label="数据状态", lines=8, interactive=False, autoscroll=True)
+                gr.Markdown("### 3. 预处理")
+                prep_btn = gr.Button("生成训练特征", variant="primary")
+                prep_out = gr.Textbox(label="预处理日志", lines=16, interactive=False, autoscroll=True)
             with gr.Column(scale=1):
+                gr.Markdown("### 4. SoVITS 训练")
+                sovits_epochs = gr.Slider(1, 20, value=2, step=1, label="训练轮数")
+                sovits_batch = gr.Slider(1, 4, value=1, step=1, label="批次大小")
+                sovits_save_every = gr.Slider(1, 5, value=1, step=1, label="每隔多少轮导出")
+                sovits_lr = gr.Slider(1e-5, 5e-4, value=1e-4, step=1e-5, label="学习率")
+                sovits_btn = gr.Button("开始 SoVITS 训练", variant="primary", size="lg")
+                sovits_log = gr.Textbox(label="SoVITS 训练日志", lines=18, interactive=False, autoscroll=True)
+                sovits_file = gr.File(label="最新 SoVITS 权重", interactive=False)
+                gr.Markdown("### 5. GPT 训练（可选）")
+                gpt_epochs = gr.Slider(1, 10, value=1, step=1, label="训练轮数")
+                gpt_batch = gr.Slider(1, 4, value=1, step=1, label="批次大小")
+                gpt_save_every = gr.Slider(1, 5, value=1, step=1, label="每隔多少轮导出")
+                gpt_btn = gr.Button("开始 GPT 训练", variant="secondary")
+                gpt_log = gr.Textbox(label="GPT 训练日志", lines=14, interactive=False, autoscroll=True)
+                gpt_file = gr.File(label="最新 GPT 权重", interactive=False)
+                gr.Markdown("### 6. 当前输出")
+                refresh_btn = gr.Button("刷新最新权重")
+                refresh_text = gr.Textbox(label="输出摘要", lines=3, interactive=False)
+                refresh_sovits = gr.File(label="SoVITS 输出", interactive=False)
+                refresh_gpt = gr.File(label="GPT 输出", interactive=False)
+        env_btn.click(check_environment, outputs=env_out)
+        dataset_btn.click(download_dataset, outputs=dataset_out)
+        prep_btn.click(prepare_data, outputs=prep_out)
+        sovits_btn.click(
+            start_training,
+            inputs=[sovits_epochs, sovits_batch, sovits_save_every, sovits_lr],
+            outputs=[sovits_log, sovits_file],
+        )
+        gpt_btn.click(
+            start_gpt_training,
+            inputs=[gpt_epochs, gpt_batch, gpt_save_every],
+            outputs=[gpt_log, gpt_file],
+        )
+        refresh_btn.click(refresh_outputs, outputs=[refresh_text, refresh_sovits, refresh_gpt])
     return demo
 if __name__ == "__main__":
+    ensure_dirs()
     demo = create_ui()
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -1,14 +1,26 @@
 huggingface_hub>=0.25.0
-pandas>=2.0.0
 soundfile>=0.12.0
-librosa>=0.10.0
 torch>=2.0.0
 torchaudio>=2.0.0
-numpy>=1.24.0
-scipy>=1.10.0
-matplotlib>=3.7.0
-tqdm>=4.65.0
-pyyaml>=6.0
-transformers>=4.30.0
 accelerate>=0.20.0
-sentencepiece>=0.1.99

+gradio>=6,<7
 huggingface_hub>=0.25.0
+numpy<2.0
+pandas>=2.0.0,<3
+scipy>=1.10.0
+librosa==0.10.2
 soundfile>=0.12.0
 torch>=2.0.0
 torchaudio>=2.0.0
+pytorch-lightning>=2.4
+torchmetrics<=1.5
+tensorboard
+transformers>=4.43,<=4.50
+sentencepiece>=0.1.99
 accelerate>=0.20.0
+ffmpeg-python
+cn2an
+pypinyin
+jieba_fast
+PyYAML>=6.0
+psutil
+numba
+chardet
+einops
+typeguard<3
+tqdm>=4.65.0