Codex commited on
Commit ·
7dab114
1
Parent(s): 3987df7
Fix Daniya GPT-SoVITS training space
Browse files- Dockerfile +1 -2
- app.py +523 -140
- requirements.txt +21 -9
Dockerfile
CHANGED
|
@@ -9,11 +9,10 @@ WORKDIR /code
|
|
| 9 |
|
| 10 |
COPY --chown=user requirements.txt .
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
-
RUN pip install --no-cache-dir "gradio>=5.0.0"
|
| 13 |
|
| 14 |
COPY --chown=user . .
|
| 15 |
|
| 16 |
USER user
|
| 17 |
ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
|
| 18 |
|
| 19 |
-
CMD ["python", "app.py"]
|
|
|
|
| 9 |
|
| 10 |
COPY --chown=user requirements.txt .
|
| 11 |
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
| 12 |
|
| 13 |
COPY --chown=user . .
|
| 14 |
|
| 15 |
USER user
|
| 16 |
ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
|
| 17 |
|
| 18 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -1,183 +1,566 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
GPT-SoVITS
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from pathlib import Path
|
|
|
|
| 9 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
# ── 配置
|
| 12 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
|
|
|
| 13 |
DATASET_REPO = "huanx/daniya-voice-gptsovits"
|
| 14 |
GPT_SOVITS_REPO = "https://github.com/RVC-Boss/GPT-SoVITS.git"
|
| 15 |
-
|
| 16 |
-
WORK_DIR = Path("/tmp/
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 21 |
log = logging.getLogger(__name__)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def ensure_dirs():
|
| 24 |
-
for
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
-
# ── 步骤 1:环境检查 + 安装 GPT-SoVITS
|
| 28 |
def check_environment():
|
|
|
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
log.info("克隆 GPT-SoVITS...")
|
| 36 |
-
clone = subprocess.run(
|
| 37 |
-
["git", "clone", "--depth", "1", GPT_SOVITS_REPO, str(GPT_SOVITS_DIR)],
|
| 38 |
-
capture_output=True, text=True, timeout=600
|
| 39 |
-
)
|
| 40 |
-
if clone.returncode != 0:
|
| 41 |
-
return f"克隆失败 (exit={clone.returncode}) STDERR: {clone.stderr[:300]}"
|
| 42 |
-
# 装依��(跳过 Web UI 包避免降级 Gradio)
|
| 43 |
-
req = GPT_SOVITS_DIR / "requirements.txt"
|
| 44 |
-
if req.exists():
|
| 45 |
-
import re
|
| 46 |
-
lines_req = [l for l in req.read_text().splitlines()
|
| 47 |
-
if not re.match(r'^\s*(gradio|streamlit|fastapi|uvicorn|huggingface.hub|protobuf)', l) and l.strip()]
|
| 48 |
-
if lines_req:
|
| 49 |
-
log.info("安装 %d 个依赖...", len(lines_req))
|
| 50 |
-
pip_result = subprocess.run(
|
| 51 |
-
[sys.executable, "-m", "pip", "install", "--no-cache-dir"] + lines_req,
|
| 52 |
-
capture_output=True, text=True, timeout=1200
|
| 53 |
-
)
|
| 54 |
-
if pip_result.returncode != 0:
|
| 55 |
-
log.warning("pip 部分失败: %s", pip_result.stderr[-200:])
|
| 56 |
-
log.info("GPT-SoVITS 安装完成")
|
| 57 |
-
|
| 58 |
-
train_py = GPT_SOVITS_DIR / "train.py"
|
| 59 |
-
if not train_py.exists():
|
| 60 |
-
items = list(GPT_SOVITS_DIR.iterdir())[:15] if GPT_SOVITS_DIR.exists() else []
|
| 61 |
-
return f"克隆后仍缺 train.py 目录: {GPT_SOVITS_DIR} 内容: {[i.name for i in items]}"
|
| 62 |
-
return f"环境就绪 GPT-SoVITS: {GPT_SOVITS_DIR} 工作目录: {WORK_DIR}"
|
| 63 |
-
except Exception as e:
|
| 64 |
log.exception("check_environment")
|
| 65 |
-
|
| 66 |
|
| 67 |
|
| 68 |
-
# ── 步骤 2:下载数据集
|
| 69 |
def download_dataset():
|
|
|
|
| 70 |
try:
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
kwargs["token"] = HF_TOKEN
|
| 77 |
-
snapshot_download(**kwargs)
|
| 78 |
-
|
| 79 |
-
audio_src = WORK_DIR / "dataset" / "audio"
|
| 80 |
-
if audio_src.exists():
|
| 81 |
-
for f in audio_src.glob("*.wav"):
|
| 82 |
-
shutil.copy2(f, RAW_AUDIO)
|
| 83 |
-
count = len(list(RAW_AUDIO.glob("*.wav")))
|
| 84 |
-
return f"✅ 下载完成!音频文件: {count} 个"
|
| 85 |
-
except Exception as e:
|
| 86 |
log.exception("download_dataset")
|
| 87 |
-
|
|
|
|
| 88 |
|
| 89 |
-
# ── 步骤 3:准备训练数据
|
| 90 |
def prepare_data():
|
|
|
|
| 91 |
try:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
target_audio = GPT_SOVITS_DIR / "raw_audio" / "daniya"
|
| 96 |
-
target_audio.mkdir(parents=True, exist_ok=True)
|
| 97 |
-
for wav in RAW_AUDIO.glob("*.wav"):
|
| 98 |
-
shutil.copy2(wav, target_audio)
|
| 99 |
-
|
| 100 |
-
metadata = WORK_DIR / "dataset" / "metadata.csv"
|
| 101 |
-
if not metadata.exists():
|
| 102 |
-
for f in (WORK_DIR / "dataset").rglob("*.csv"):
|
| 103 |
-
metadata = f; break
|
| 104 |
-
|
| 105 |
-
fl_dir = GPT_SOVITS_DIR / "filelist"
|
| 106 |
-
fl_dir.mkdir(exist_ok=True)
|
| 107 |
-
|
| 108 |
-
if metadata.exists():
|
| 109 |
-
df = pd.read_csv(metadata)
|
| 110 |
-
with open(fl_dir / "daniya.list", "w", encoding="utf-8") as f:
|
| 111 |
-
for _, row in df.iterrows():
|
| 112 |
-
f.write(f"raw_audio/daniya/{row['file']}|{row['text']}|daniya\n")
|
| 113 |
-
return f"✅ 准备完成!训练样本: {len(df)} 条"
|
| 114 |
-
else:
|
| 115 |
-
wavs = list(target_audio.glob("*.wav"))
|
| 116 |
-
with open(fl_dir / "daniya.list", "w", encoding="utf-8") as f:
|
| 117 |
-
for w in wavs:
|
| 118 |
-
f.write(f"raw_audio/daniya/{w.name}|{w.stem}|daniya\n")
|
| 119 |
-
return f"⚠️ 未找到 metadata.csv,用文件名当文本。样本: {len(wavs)} 条"
|
| 120 |
-
except Exception as e:
|
| 121 |
log.exception("prepare_data")
|
| 122 |
-
|
|
|
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
try:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
yield "\n".join(lines)
|
| 146 |
-
proc.wait()
|
| 147 |
-
yield "\n".join(lines[-30:]) + f"\n\n{'✅ 训练完成' if proc.returncode==0 else f'❌ 训练失败 (exit={proc.returncode})'}"
|
| 148 |
-
except Exception as e:
|
| 149 |
log.exception("start_training")
|
| 150 |
-
yield f"❌ 训练
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
# ── UI
|
| 153 |
def create_ui():
|
| 154 |
-
with gr.Blocks(title="GPT-SoVITS 训练 — 达妮娅", theme=gr.themes.Soft()) as demo:
|
| 155 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
with gr.Row():
|
| 158 |
with gr.Column(scale=1):
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
with gr.Column(scale=1):
|
| 169 |
-
gr.Markdown("###
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
gr.
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
return demo
|
| 180 |
|
|
|
|
| 181 |
if __name__ == "__main__":
|
|
|
|
| 182 |
demo = create_ui()
|
| 183 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
GPT-SoVITS Daniya trainer for Hugging Face Spaces.
|
| 4 |
+
|
| 5 |
+
This Space is CPU-oriented. It prepares the dataset with the current
|
| 6 |
+
GPT-SoVITS pipeline and can export fresh SoVITS and GPT checkpoints
|
| 7 |
+
through the Gradio UI.
|
| 8 |
"""
|
| 9 |
|
| 10 |
+
import csv
|
| 11 |
+
import json
|
| 12 |
+
import logging
|
| 13 |
+
import os
|
| 14 |
+
import shutil
|
| 15 |
+
import subprocess
|
| 16 |
+
import sys
|
| 17 |
from pathlib import Path
|
| 18 |
+
|
| 19 |
import gradio as gr
|
| 20 |
+
import yaml
|
| 21 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
| 22 |
+
|
| 23 |
|
|
|
|
| 24 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 25 |
+
|
| 26 |
DATASET_REPO = "huanx/daniya-voice-gptsovits"
|
| 27 |
GPT_SOVITS_REPO = "https://github.com/RVC-Boss/GPT-SoVITS.git"
|
| 28 |
+
|
| 29 |
+
WORK_DIR = Path("/tmp/daniya_trainer")
|
| 30 |
+
HF_HOME = WORK_DIR / "hf_home"
|
| 31 |
+
GPT_SOVITS_DIR = WORK_DIR / "GPT-SoVITS"
|
| 32 |
+
DATASET_DIR = WORK_DIR / "dataset"
|
| 33 |
+
AUDIO_DIR = DATASET_DIR / "audio"
|
| 34 |
+
EXP_NAME = "daniya"
|
| 35 |
+
EXP_ROOT = WORK_DIR / "logs"
|
| 36 |
+
EXP_DIR = EXP_ROOT / EXP_NAME
|
| 37 |
+
OUTPUT_ROOT = WORK_DIR / "trained_models"
|
| 38 |
+
SOVITS_OUTPUT_DIR = OUTPUT_ROOT / "SoVITS_weights_v2"
|
| 39 |
+
GPT_OUTPUT_DIR = OUTPUT_ROOT / "GPT_weights_v2"
|
| 40 |
+
|
| 41 |
+
INPUT_LIST = WORK_DIR / "daniya.list"
|
| 42 |
+
TEXT_PATH = EXP_DIR / "2-name2text.txt"
|
| 43 |
+
SEMANTIC_PATH = EXP_DIR / "6-name2semantic.tsv"
|
| 44 |
+
|
| 45 |
+
PRETRAINED_DIR = GPT_SOVITS_DIR / "GPT_SoVITS" / "pretrained_models"
|
| 46 |
+
BERT_DIR = PRETRAINED_DIR / "chinese-roberta-wwm-ext-large"
|
| 47 |
+
CNHUBERT_DIR = PRETRAINED_DIR / "chinese-hubert-base"
|
| 48 |
+
|
| 49 |
+
PRETRAINED_REPO = "lj1995/GPT-SoVITS"
|
| 50 |
+
PRETRAINED_S1_REL = "gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt"
|
| 51 |
+
PRETRAINED_S2G_REL = "gsv-v2final-pretrained/s2G2333k.pth"
|
| 52 |
+
PRETRAINED_S2D_REL = "gsv-v2final-pretrained/s2D2333k.pth"
|
| 53 |
+
PRETRAINED_S1 = PRETRAINED_DIR / PRETRAINED_S1_REL
|
| 54 |
+
PRETRAINED_S2G = PRETRAINED_DIR / PRETRAINED_S2G_REL
|
| 55 |
+
PRETRAINED_S2D = PRETRAINED_DIR / PRETRAINED_S2D_REL
|
| 56 |
+
|
| 57 |
+
BERT_REPO = "hfl/chinese-roberta-wwm-ext-large"
|
| 58 |
+
CNHUBERT_REPO = "TencentGameMate/chinese-hubert-base"
|
| 59 |
+
|
| 60 |
+
VERSION = "v2"
|
| 61 |
+
LANGUAGE = "zh"
|
| 62 |
+
SPEAKER = "daniya"
|
| 63 |
+
|
| 64 |
+
MODEL_PATTERNS = [
|
| 65 |
+
"*.json",
|
| 66 |
+
"*.txt",
|
| 67 |
+
"*.bin",
|
| 68 |
+
"*.safetensors",
|
| 69 |
+
"*.model",
|
| 70 |
+
]
|
| 71 |
|
| 72 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| 73 |
log = logging.getLogger(__name__)
|
| 74 |
|
| 75 |
+
os.environ.setdefault("HF_HOME", str(HF_HOME))
|
| 76 |
+
os.environ.setdefault("TRANSFORMERS_CACHE", str(HF_HOME / "transformers"))
|
| 77 |
+
os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
def ensure_dirs():
|
| 81 |
+
for path in [
|
| 82 |
+
WORK_DIR,
|
| 83 |
+
HF_HOME,
|
| 84 |
+
DATASET_DIR,
|
| 85 |
+
EXP_ROOT,
|
| 86 |
+
EXP_DIR,
|
| 87 |
+
OUTPUT_ROOT,
|
| 88 |
+
SOVITS_OUTPUT_DIR,
|
| 89 |
+
GPT_OUTPUT_DIR,
|
| 90 |
+
]:
|
| 91 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def hf_kwargs():
|
| 95 |
+
return {"token": HF_TOKEN} if HF_TOKEN else {}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def push(logs, message):
|
| 99 |
+
logs.append(message)
|
| 100 |
+
return "\n".join(logs[-200:])
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def run_cmd(command, cwd=None, env=None):
|
| 104 |
+
proc = subprocess.Popen(
|
| 105 |
+
command,
|
| 106 |
+
cwd=str(cwd) if cwd else None,
|
| 107 |
+
env=env,
|
| 108 |
+
stdout=subprocess.PIPE,
|
| 109 |
+
stderr=subprocess.STDOUT,
|
| 110 |
+
text=True,
|
| 111 |
+
bufsize=1,
|
| 112 |
+
)
|
| 113 |
+
yield f"$ {' '.join(command)}"
|
| 114 |
+
for raw in proc.stdout:
|
| 115 |
+
line = raw.rstrip()
|
| 116 |
+
if line:
|
| 117 |
+
yield line
|
| 118 |
+
code = proc.wait()
|
| 119 |
+
if code != 0:
|
| 120 |
+
raise RuntimeError(f"命令失败 (exit={code}): {' '.join(command)}")
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def has_transformers_model(path: Path):
|
| 124 |
+
return path.exists() and (path / "config.json").exists() and (
|
| 125 |
+
any(path.glob("*.bin")) or any(path.glob("*.safetensors"))
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def metadata_rows():
|
| 130 |
+
metadata = DATASET_DIR / "metadata.csv"
|
| 131 |
+
if not metadata.exists():
|
| 132 |
+
return []
|
| 133 |
+
with metadata.open("r", encoding="utf-8", newline="") as handle:
|
| 134 |
+
return list(csv.DictReader(handle))
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def latest_file(directory: Path, suffix: str):
|
| 138 |
+
files = sorted(directory.glob(f"*{suffix}"), key=lambda item: item.stat().st_mtime)
|
| 139 |
+
return str(files[-1]) if files else None
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def artifacts_summary():
|
| 143 |
+
sovits = latest_file(SOVITS_OUTPUT_DIR, ".pth")
|
| 144 |
+
gpt = latest_file(GPT_OUTPUT_DIR, ".ckpt")
|
| 145 |
+
lines = [
|
| 146 |
+
f"SoVITS: {sovits or '暂无'}",
|
| 147 |
+
f"GPT: {gpt or '暂无'}",
|
| 148 |
+
]
|
| 149 |
+
return "\n".join(lines), sovits, gpt
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def dataset_prepared():
|
| 153 |
+
return (
|
| 154 |
+
TEXT_PATH.exists()
|
| 155 |
+
and SEMANTIC_PATH.exists()
|
| 156 |
+
and (EXP_DIR / "3-bert").exists()
|
| 157 |
+
and (EXP_DIR / "4-cnhubert").exists()
|
| 158 |
+
and (EXP_DIR / "5-wav32k").exists()
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def build_process_env():
|
| 163 |
+
env = os.environ.copy()
|
| 164 |
+
env.update(
|
| 165 |
+
{
|
| 166 |
+
"PYTHONPATH": str(GPT_SOVITS_DIR),
|
| 167 |
+
"inp_text": str(INPUT_LIST),
|
| 168 |
+
"inp_wav_dir": str(AUDIO_DIR),
|
| 169 |
+
"exp_name": EXP_NAME,
|
| 170 |
+
"opt_dir": str(EXP_DIR),
|
| 171 |
+
"i_part": "0",
|
| 172 |
+
"all_parts": "1",
|
| 173 |
+
"_CUDA_VISIBLE_DEVICES": "0",
|
| 174 |
+
"is_half": "False",
|
| 175 |
+
"version": VERSION,
|
| 176 |
+
"hz": "25hz",
|
| 177 |
+
"bert_pretrained_dir": str(BERT_DIR),
|
| 178 |
+
"bert_path": str(BERT_DIR),
|
| 179 |
+
"cnhubert_base_dir": str(CNHUBERT_DIR),
|
| 180 |
+
"pretrained_s2G": str(PRETRAINED_S2G),
|
| 181 |
+
"s2config_path": "GPT_SoVITS/configs/s2.json",
|
| 182 |
+
}
|
| 183 |
+
)
|
| 184 |
+
return env
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def ensure_upstream_repo():
|
| 188 |
+
if (GPT_SOVITS_DIR / "webui.py").exists():
|
| 189 |
+
return
|
| 190 |
+
if GPT_SOVITS_DIR.exists():
|
| 191 |
+
shutil.rmtree(GPT_SOVITS_DIR)
|
| 192 |
+
subprocess.run(
|
| 193 |
+
["git", "clone", "--depth", "1", GPT_SOVITS_REPO, str(GPT_SOVITS_DIR)],
|
| 194 |
+
check=True,
|
| 195 |
+
capture_output=True,
|
| 196 |
+
text=True,
|
| 197 |
+
timeout=900,
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def patch_upstream_repo():
|
| 202 |
+
patch_marker = GPT_SOVITS_DIR / ".hf_space_patch_applied"
|
| 203 |
+
if patch_marker.exists():
|
| 204 |
+
return
|
| 205 |
+
chinese2 = GPT_SOVITS_DIR / "GPT_SoVITS" / "text" / "chinese2.py"
|
| 206 |
+
content = chinese2.read_text(encoding="utf-8")
|
| 207 |
+
old = "is_g2pw = True # True if is_g2pw_str.lower() == 'true' else False"
|
| 208 |
+
new = "is_g2pw = False # patched for CPU Space training; avoids extra G2PW asset"
|
| 209 |
+
if old in content:
|
| 210 |
+
content = content.replace(old, new, 1)
|
| 211 |
+
chinese2.write_text(content, encoding="utf-8")
|
| 212 |
+
patch_marker.write_text("ok\n", encoding="utf-8")
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def ensure_base_assets():
|
| 216 |
+
if not has_transformers_model(BERT_DIR):
|
| 217 |
+
snapshot_download(
|
| 218 |
+
repo_id=BERT_REPO,
|
| 219 |
+
local_dir=str(BERT_DIR),
|
| 220 |
+
allow_patterns=MODEL_PATTERNS,
|
| 221 |
+
**hf_kwargs(),
|
| 222 |
+
)
|
| 223 |
+
if not has_transformers_model(CNHUBERT_DIR):
|
| 224 |
+
snapshot_download(
|
| 225 |
+
repo_id=CNHUBERT_REPO,
|
| 226 |
+
local_dir=str(CNHUBERT_DIR),
|
| 227 |
+
allow_patterns=MODEL_PATTERNS,
|
| 228 |
+
**hf_kwargs(),
|
| 229 |
+
)
|
| 230 |
+
if not PRETRAINED_S1.exists():
|
| 231 |
+
hf_hub_download(
|
| 232 |
+
repo_id=PRETRAINED_REPO,
|
| 233 |
+
filename=PRETRAINED_S1_REL,
|
| 234 |
+
local_dir=str(PRETRAINED_DIR),
|
| 235 |
+
**hf_kwargs(),
|
| 236 |
+
)
|
| 237 |
+
if not PRETRAINED_S2G.exists():
|
| 238 |
+
hf_hub_download(
|
| 239 |
+
repo_id=PRETRAINED_REPO,
|
| 240 |
+
filename=PRETRAINED_S2G_REL,
|
| 241 |
+
local_dir=str(PRETRAINED_DIR),
|
| 242 |
+
**hf_kwargs(),
|
| 243 |
+
)
|
| 244 |
+
if not PRETRAINED_S2D.exists():
|
| 245 |
+
try:
|
| 246 |
+
hf_hub_download(
|
| 247 |
+
repo_id=PRETRAINED_REPO,
|
| 248 |
+
filename=PRETRAINED_S2D_REL,
|
| 249 |
+
local_dir=str(PRETRAINED_DIR),
|
| 250 |
+
**hf_kwargs(),
|
| 251 |
+
)
|
| 252 |
+
except Exception:
|
| 253 |
+
log.warning("Optional pretrained discriminator not found: %s", PRETRAINED_S2D_REL)
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def reset_preprocess_outputs():
|
| 257 |
+
for path in [INPUT_LIST, TEXT_PATH, SEMANTIC_PATH, EXP_DIR / "2-name2text-0.txt", EXP_DIR / "6-name2semantic-0.tsv"]:
|
| 258 |
+
if path.exists():
|
| 259 |
+
path.unlink()
|
| 260 |
+
for directory in [EXP_DIR / "3-bert", EXP_DIR / "4-cnhubert", EXP_DIR / "5-wav32k"]:
|
| 261 |
+
if directory.exists():
|
| 262 |
+
shutil.rmtree(directory)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def build_manifest():
|
| 266 |
+
rows = metadata_rows()
|
| 267 |
+
audio_files = {item.name for item in AUDIO_DIR.glob("*.wav")}
|
| 268 |
+
listed = set()
|
| 269 |
+
output = []
|
| 270 |
+
for row in rows:
|
| 271 |
+
wav_name = (row.get("file") or "").strip()
|
| 272 |
+
text = (row.get("text") or "").strip()
|
| 273 |
+
if not wav_name or not text or wav_name not in audio_files:
|
| 274 |
+
continue
|
| 275 |
+
listed.add(wav_name)
|
| 276 |
+
output.append(f"{wav_name}|{SPEAKER}|{LANGUAGE}|{text}")
|
| 277 |
+
if not output:
|
| 278 |
+
raise RuntimeError("metadata.csv 里没有可用训练样本")
|
| 279 |
+
INPUT_LIST.write_text("\n".join(output) + "\n", encoding="utf-8")
|
| 280 |
+
unlisted = sorted(audio_files - listed)
|
| 281 |
+
return len(output), len(audio_files), unlisted
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def create_sovits_config(epochs, batch_size, save_every_epoch, learning_rate):
|
| 285 |
+
config_path = GPT_SOVITS_DIR / "GPT_SoVITS" / "configs" / "s2.json"
|
| 286 |
+
with config_path.open("r", encoding="utf-8") as handle:
|
| 287 |
+
data = json.load(handle)
|
| 288 |
+
data["train"]["fp16_run"] = False
|
| 289 |
+
data["train"]["batch_size"] = int(batch_size)
|
| 290 |
+
data["train"]["epochs"] = int(epochs)
|
| 291 |
+
data["train"]["learning_rate"] = float(learning_rate)
|
| 292 |
+
data["train"]["pretrained_s2G"] = str(PRETRAINED_S2G)
|
| 293 |
+
data["train"]["pretrained_s2D"] = str(PRETRAINED_S2D) if PRETRAINED_S2D.exists() else ""
|
| 294 |
+
data["train"]["if_save_latest"] = False
|
| 295 |
+
data["train"]["if_save_every_weights"] = True
|
| 296 |
+
data["train"]["save_every_epoch"] = int(save_every_epoch)
|
| 297 |
+
data["train"]["gpu_numbers"] = "0"
|
| 298 |
+
data["train"]["grad_ckpt"] = False
|
| 299 |
+
data["data"]["exp_dir"] = str(EXP_DIR)
|
| 300 |
+
data["s2_ckpt_dir"] = str(EXP_DIR)
|
| 301 |
+
data["save_weight_dir"] = str(SOVITS_OUTPUT_DIR)
|
| 302 |
+
data["name"] = EXP_NAME
|
| 303 |
+
data["version"] = VERSION
|
| 304 |
+
data["model"]["version"] = VERSION
|
| 305 |
+
tmp_config = WORK_DIR / "tmp_s2.json"
|
| 306 |
+
tmp_config.write_text(json.dumps(data), encoding="utf-8")
|
| 307 |
+
return tmp_config
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def create_gpt_config(epochs, batch_size, save_every_epoch):
|
| 311 |
+
config_path = GPT_SOVITS_DIR / "GPT_SoVITS" / "configs" / "s1longer-v2.yaml"
|
| 312 |
+
with config_path.open("r", encoding="utf-8") as handle:
|
| 313 |
+
data = yaml.safe_load(handle)
|
| 314 |
+
data["train"]["batch_size"] = int(batch_size)
|
| 315 |
+
data["train"]["epochs"] = int(epochs)
|
| 316 |
+
data["train"]["precision"] = "32"
|
| 317 |
+
data["train"]["save_every_n_epoch"] = int(save_every_epoch)
|
| 318 |
+
data["train"]["if_save_every_weights"] = True
|
| 319 |
+
data["train"]["if_save_latest"] = False
|
| 320 |
+
data["train"]["if_dpo"] = False
|
| 321 |
+
data["train"]["exp_name"] = EXP_NAME
|
| 322 |
+
data["train"]["half_weights_save_dir"] = str(GPT_OUTPUT_DIR)
|
| 323 |
+
data["data"]["num_workers"] = 0
|
| 324 |
+
data["pretrained_s1"] = str(PRETRAINED_S1)
|
| 325 |
+
data["train_semantic_path"] = str(SEMANTIC_PATH)
|
| 326 |
+
data["train_phoneme_path"] = str(TEXT_PATH)
|
| 327 |
+
data["output_dir"] = str(EXP_DIR / "logs_s1_v2")
|
| 328 |
+
tmp_config = WORK_DIR / "tmp_s1.yaml"
|
| 329 |
+
tmp_config.write_text(yaml.safe_dump(data, allow_unicode=True, sort_keys=False), encoding="utf-8")
|
| 330 |
+
return tmp_config
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
def setup_environment_steps(logs):
|
| 334 |
+
ensure_dirs()
|
| 335 |
+
if (GPT_SOVITS_DIR / "webui.py").exists():
|
| 336 |
+
yield push(logs, "GPT-SoVITS 仓库已存在,跳过克隆。")
|
| 337 |
+
else:
|
| 338 |
+
yield push(logs, "克隆 GPT-SoVITS 仓库...")
|
| 339 |
+
ensure_upstream_repo()
|
| 340 |
+
yield push(logs, "✅ GPT-SoVITS 仓库已就绪。")
|
| 341 |
+
patch_upstream_repo()
|
| 342 |
+
yield push(logs, "✅ 已应用 Space 兼容补丁。")
|
| 343 |
+
if not has_transformers_model(BERT_DIR):
|
| 344 |
+
yield push(logs, "下载中文 BERT 特征模型...")
|
| 345 |
+
if not has_transformers_model(CNHUBERT_DIR):
|
| 346 |
+
yield push(logs, "下载 CN-HuBERT 特征模型...")
|
| 347 |
+
if not PRETRAINED_S1.exists() or not PRETRAINED_S2G.exists():
|
| 348 |
+
yield push(logs, "下载 GPT-SoVITS v2 底模...")
|
| 349 |
+
ensure_base_assets()
|
| 350 |
+
yield push(
|
| 351 |
+
logs,
|
| 352 |
+
"✅ 环境就绪:GPT-SoVITS 仓库、中文特征模型和 v2 底模均已准备完成。",
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def download_dataset_steps(logs):
|
| 357 |
+
ensure_dirs()
|
| 358 |
+
yield from setup_environment_steps(logs)
|
| 359 |
+
yield push(logs, "下载 Daniya 数据集...")
|
| 360 |
+
snapshot_download(
|
| 361 |
+
repo_id=DATASET_REPO,
|
| 362 |
+
repo_type="dataset",
|
| 363 |
+
local_dir=str(DATASET_DIR),
|
| 364 |
+
**hf_kwargs(),
|
| 365 |
+
)
|
| 366 |
+
rows = metadata_rows()
|
| 367 |
+
audio_count = len(list(AUDIO_DIR.glob("*.wav")))
|
| 368 |
+
yield push(
|
| 369 |
+
logs,
|
| 370 |
+
f"✅ 数据集已下载:音频 {audio_count} 个,metadata {len(rows)} 条。",
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def prepare_data_steps(logs):
|
| 375 |
+
yield from download_dataset_steps(logs)
|
| 376 |
+
reset_preprocess_outputs()
|
| 377 |
+
sample_count, audio_count, unlisted = build_manifest()
|
| 378 |
+
yield push(
|
| 379 |
+
logs,
|
| 380 |
+
f"训练清单已生成:metadata 可用样本 {sample_count} 条,音频总数 {audio_count} 个,未标注音频 {len(unlisted)} 个。",
|
| 381 |
+
)
|
| 382 |
+
env = build_process_env()
|
| 383 |
+
for line in run_cmd([sys.executable, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py"], cwd=GPT_SOVITS_DIR, env=env):
|
| 384 |
+
yield push(logs, line)
|
| 385 |
+
part_text = EXP_DIR / "2-name2text-0.txt"
|
| 386 |
+
if not part_text.exists():
|
| 387 |
+
raise RuntimeError("文本特征提取完成后未生成 2-name2text-0.txt")
|
| 388 |
+
part_text.replace(TEXT_PATH)
|
| 389 |
+
yield push(logs, "✅ 文本分词与 BERT 特征提取完成。")
|
| 390 |
+
for line in run_cmd(
|
| 391 |
+
[sys.executable, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py"],
|
| 392 |
+
cwd=GPT_SOVITS_DIR,
|
| 393 |
+
env=env,
|
| 394 |
+
):
|
| 395 |
+
yield push(logs, line)
|
| 396 |
+
yield push(logs, "✅ CN-HuBERT 特征与 32k wav 已生成。")
|
| 397 |
+
for line in run_cmd([sys.executable, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py"], cwd=GPT_SOVITS_DIR, env=env):
|
| 398 |
+
yield push(logs, line)
|
| 399 |
+
part_semantic = EXP_DIR / "6-name2semantic-0.tsv"
|
| 400 |
+
if not part_semantic.exists():
|
| 401 |
+
raise RuntimeError("语义 token 提取完成后未生成 6-name2semantic-0.tsv")
|
| 402 |
+
semantic_rows = part_semantic.read_text(encoding="utf-8").strip()
|
| 403 |
+
SEMANTIC_PATH.write_text(
|
| 404 |
+
"item_name\tsemantic_audio\n" + semantic_rows + ("\n" if semantic_rows else ""),
|
| 405 |
+
encoding="utf-8",
|
| 406 |
+
)
|
| 407 |
+
part_semantic.unlink()
|
| 408 |
+
yield push(logs, "✅ 语义 token 提取完成。")
|
| 409 |
+
return f"✅ 预处理完成,可用于训练的样本 {sample_count} 条。"
|
| 410 |
+
|
| 411 |
|
|
|
|
| 412 |
def check_environment():
|
| 413 |
+
logs = []
|
| 414 |
try:
|
| 415 |
+
final = None
|
| 416 |
+
final = yield from setup_environment_steps(logs)
|
| 417 |
+
if final:
|
| 418 |
+
yield final
|
| 419 |
+
except Exception as exc:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
log.exception("check_environment")
|
| 421 |
+
yield push(logs, f"❌ 环境准备失败: {exc}")
|
| 422 |
|
| 423 |
|
|
|
|
| 424 |
def download_dataset():
|
| 425 |
+
logs = []
|
| 426 |
try:
|
| 427 |
+
final = None
|
| 428 |
+
final = yield from download_dataset_steps(logs)
|
| 429 |
+
if final:
|
| 430 |
+
yield final
|
| 431 |
+
except Exception as exc:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
log.exception("download_dataset")
|
| 433 |
+
yield push(logs, f"❌ 数据集下载失败: {exc}")
|
| 434 |
+
|
| 435 |
|
|
|
|
| 436 |
def prepare_data():
|
| 437 |
+
logs = []
|
| 438 |
try:
|
| 439 |
+
final = yield from prepare_data_steps(logs)
|
| 440 |
+
yield push(logs, final)
|
| 441 |
+
except Exception as exc:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
log.exception("prepare_data")
|
| 443 |
+
yield push(logs, f"❌ 预处理失败: {exc}")
|
| 444 |
+
|
| 445 |
|
| 446 |
+
def start_training(epochs=2, batch_size=1, save_every_epoch=1, lr=0.0001):
|
| 447 |
+
logs = []
|
| 448 |
try:
|
| 449 |
+
if not dataset_prepared():
|
| 450 |
+
yield push(logs, "训练前缺少预处理产物,开始自动补齐..."), None
|
| 451 |
+
for update in prepare_data_steps(logs):
|
| 452 |
+
yield update, None
|
| 453 |
+
config_path = create_sovits_config(epochs, batch_size, save_every_epoch, lr)
|
| 454 |
+
env = build_process_env()
|
| 455 |
+
yield push(logs, "开始 SoVITS 训练..."), None
|
| 456 |
+
for line in run_cmd(
|
| 457 |
+
[sys.executable, "-s", "GPT_SoVITS/s2_train.py", "--config", str(config_path)],
|
| 458 |
+
cwd=GPT_SOVITS_DIR,
|
| 459 |
+
env=env,
|
| 460 |
+
):
|
| 461 |
+
yield push(logs, line), None
|
| 462 |
+
latest = latest_file(SOVITS_OUTPUT_DIR, ".pth")
|
| 463 |
+
if not latest:
|
| 464 |
+
raise RuntimeError("训练结束后没有找到导出的 SoVITS 权重文件")
|
| 465 |
+
yield push(logs, f"✅ SoVITS 训练完成,最新权重:{latest}"), latest
|
| 466 |
+
except Exception as exc:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
log.exception("start_training")
|
| 468 |
+
yield push(logs, f"❌ SoVITS 训练失败: {exc}"), None
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def start_gpt_training(epochs=1, batch_size=1, save_every_epoch=1):
|
| 472 |
+
logs = []
|
| 473 |
+
try:
|
| 474 |
+
if not dataset_prepared():
|
| 475 |
+
yield push(logs, "训练前缺少预处理产物,开始自动补齐..."), None
|
| 476 |
+
for update in prepare_data_steps(logs):
|
| 477 |
+
yield update, None
|
| 478 |
+
config_path = create_gpt_config(epochs, batch_size, save_every_epoch)
|
| 479 |
+
env = build_process_env()
|
| 480 |
+
yield push(logs, "开始 GPT 训练..."), None
|
| 481 |
+
for line in run_cmd(
|
| 482 |
+
[sys.executable, "-s", "GPT_SoVITS/s1_train.py", "--config_file", str(config_path)],
|
| 483 |
+
cwd=GPT_SOVITS_DIR,
|
| 484 |
+
env=env,
|
| 485 |
+
):
|
| 486 |
+
yield push(logs, line), None
|
| 487 |
+
latest = latest_file(GPT_OUTPUT_DIR, ".ckpt")
|
| 488 |
+
if not latest:
|
| 489 |
+
raise RuntimeError("训练结束后没有找到导出的 GPT 权重文件")
|
| 490 |
+
yield push(logs, f"✅ GPT 训练完成,最新权重:{latest}"), latest
|
| 491 |
+
except Exception as exc:
|
| 492 |
+
log.exception("start_gpt_training")
|
| 493 |
+
yield push(logs, f"❌ GPT 训练失败: {exc}"), None
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
def refresh_outputs():
|
| 497 |
+
return artifacts_summary()
|
| 498 |
+
|
| 499 |
|
|
|
|
| 500 |
def create_ui():
|
| 501 |
+
with gr.Blocks(title="GPT-SoVITS 训练器 — 达妮娅", theme=gr.themes.Soft()) as demo:
|
| 502 |
+
gr.Markdown(
|
| 503 |
+
"# 🎤 GPT-SoVITS 训练器 — 达妮娅语音\n"
|
| 504 |
+
"这个 Space 按当前 GPT-SoVITS 训练链路执行。先拿到 SoVITS 权重,再按需继续训练 GPT。"
|
| 505 |
+
)
|
| 506 |
|
| 507 |
with gr.Row():
|
| 508 |
with gr.Column(scale=1):
|
| 509 |
+
gr.Markdown("### 1. 环境")
|
| 510 |
+
env_btn = gr.Button("准备环境", variant="secondary")
|
| 511 |
+
env_out = gr.Textbox(label="环境状态", lines=8, interactive=False, autoscroll=True)
|
| 512 |
+
|
| 513 |
+
gr.Markdown("### 2. 数据集")
|
| 514 |
+
dataset_btn = gr.Button("下载数据集", variant="secondary")
|
| 515 |
+
dataset_out = gr.Textbox(label="数据状态", lines=8, interactive=False, autoscroll=True)
|
| 516 |
+
|
| 517 |
+
gr.Markdown("### 3. 预处理")
|
| 518 |
+
prep_btn = gr.Button("生成训练特征", variant="primary")
|
| 519 |
+
prep_out = gr.Textbox(label="预处理日志", lines=16, interactive=False, autoscroll=True)
|
| 520 |
|
| 521 |
with gr.Column(scale=1):
|
| 522 |
+
gr.Markdown("### 4. SoVITS 训练")
|
| 523 |
+
sovits_epochs = gr.Slider(1, 20, value=2, step=1, label="训练轮数")
|
| 524 |
+
sovits_batch = gr.Slider(1, 4, value=1, step=1, label="批次大小")
|
| 525 |
+
sovits_save_every = gr.Slider(1, 5, value=1, step=1, label="每隔多少轮导出")
|
| 526 |
+
sovits_lr = gr.Slider(1e-5, 5e-4, value=1e-4, step=1e-5, label="学习率")
|
| 527 |
+
sovits_btn = gr.Button("开始 SoVITS 训练", variant="primary", size="lg")
|
| 528 |
+
sovits_log = gr.Textbox(label="SoVITS 训练日志", lines=18, interactive=False, autoscroll=True)
|
| 529 |
+
sovits_file = gr.File(label="最新 SoVITS 权重", interactive=False)
|
| 530 |
+
|
| 531 |
+
gr.Markdown("### 5. GPT 训练(可选)")
|
| 532 |
+
gpt_epochs = gr.Slider(1, 10, value=1, step=1, label="训练轮数")
|
| 533 |
+
gpt_batch = gr.Slider(1, 4, value=1, step=1, label="批次大小")
|
| 534 |
+
gpt_save_every = gr.Slider(1, 5, value=1, step=1, label="每隔多少轮导出")
|
| 535 |
+
gpt_btn = gr.Button("开始 GPT 训练", variant="secondary")
|
| 536 |
+
gpt_log = gr.Textbox(label="GPT 训练日志", lines=14, interactive=False, autoscroll=True)
|
| 537 |
+
gpt_file = gr.File(label="最新 GPT 权重", interactive=False)
|
| 538 |
+
|
| 539 |
+
gr.Markdown("### 6. 当前输出")
|
| 540 |
+
refresh_btn = gr.Button("刷新最新权重")
|
| 541 |
+
refresh_text = gr.Textbox(label="输出摘要", lines=3, interactive=False)
|
| 542 |
+
refresh_sovits = gr.File(label="SoVITS 输出", interactive=False)
|
| 543 |
+
refresh_gpt = gr.File(label="GPT 输出", interactive=False)
|
| 544 |
+
|
| 545 |
+
env_btn.click(check_environment, outputs=env_out)
|
| 546 |
+
dataset_btn.click(download_dataset, outputs=dataset_out)
|
| 547 |
+
prep_btn.click(prepare_data, outputs=prep_out)
|
| 548 |
+
sovits_btn.click(
|
| 549 |
+
start_training,
|
| 550 |
+
inputs=[sovits_epochs, sovits_batch, sovits_save_every, sovits_lr],
|
| 551 |
+
outputs=[sovits_log, sovits_file],
|
| 552 |
+
)
|
| 553 |
+
gpt_btn.click(
|
| 554 |
+
start_gpt_training,
|
| 555 |
+
inputs=[gpt_epochs, gpt_batch, gpt_save_every],
|
| 556 |
+
outputs=[gpt_log, gpt_file],
|
| 557 |
+
)
|
| 558 |
+
refresh_btn.click(refresh_outputs, outputs=[refresh_text, refresh_sovits, refresh_gpt])
|
| 559 |
|
| 560 |
return demo
|
| 561 |
|
| 562 |
+
|
| 563 |
if __name__ == "__main__":
|
| 564 |
+
ensure_dirs()
|
| 565 |
demo = create_ui()
|
| 566 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
requirements.txt
CHANGED
|
@@ -1,14 +1,26 @@
|
|
|
|
|
| 1 |
huggingface_hub>=0.25.0
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
| 3 |
soundfile>=0.12.0
|
| 4 |
-
librosa>=0.10.0
|
| 5 |
torch>=2.0.0
|
| 6 |
torchaudio>=2.0.0
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
transformers>=4.30.0
|
| 13 |
accelerate>=0.20.0
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=6,<7
|
| 2 |
huggingface_hub>=0.25.0
|
| 3 |
+
numpy<2.0
|
| 4 |
+
pandas>=2.0.0,<3
|
| 5 |
+
scipy>=1.10.0
|
| 6 |
+
librosa==0.10.2
|
| 7 |
soundfile>=0.12.0
|
|
|
|
| 8 |
torch>=2.0.0
|
| 9 |
torchaudio>=2.0.0
|
| 10 |
+
pytorch-lightning>=2.4
|
| 11 |
+
torchmetrics<=1.5
|
| 12 |
+
tensorboard
|
| 13 |
+
transformers>=4.43,<=4.50
|
| 14 |
+
sentencepiece>=0.1.99
|
|
|
|
| 15 |
accelerate>=0.20.0
|
| 16 |
+
ffmpeg-python
|
| 17 |
+
cn2an
|
| 18 |
+
pypinyin
|
| 19 |
+
jieba_fast
|
| 20 |
+
PyYAML>=6.0
|
| 21 |
+
psutil
|
| 22 |
+
numba
|
| 23 |
+
chardet
|
| 24 |
+
einops
|
| 25 |
+
typeguard<3
|
| 26 |
+
tqdm>=4.65.0
|