Codex commited on
Commit
7dab114
·
1 Parent(s): 3987df7

Fix Daniya GPT-SoVITS training space

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -2
  2. app.py +523 -140
  3. requirements.txt +21 -9
Dockerfile CHANGED
@@ -9,11 +9,10 @@ WORKDIR /code
9
 
10
  COPY --chown=user requirements.txt .
11
  RUN pip install --no-cache-dir -r requirements.txt
12
- RUN pip install --no-cache-dir "gradio>=5.0.0"
13
 
14
  COPY --chown=user . .
15
 
16
  USER user
17
  ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
18
 
19
- CMD ["python", "app.py"]
 
9
 
10
  COPY --chown=user requirements.txt .
11
  RUN pip install --no-cache-dir -r requirements.txt
 
12
 
13
  COPY --chown=user . .
14
 
15
  USER user
16
  ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
17
 
18
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -1,183 +1,566 @@
1
  #!/usr/bin/env python3
2
  """
3
- GPT-SoVITS 训练器 达妮娅语音
4
- HuggingFace Space (CPU)
 
 
 
5
  """
6
 
7
- import os, sys, shutil, subprocess, logging
 
 
 
 
 
 
8
  from pathlib import Path
 
9
  import gradio as gr
 
 
 
10
 
11
- # ── 配置
12
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
 
13
  DATASET_REPO = "huanx/daniya-voice-gptsovits"
14
  GPT_SOVITS_REPO = "https://github.com/RVC-Boss/GPT-SoVITS.git"
15
- GPT_SOVITS_DIR = Path.home() / "GPT-SoVITS"
16
- WORK_DIR = Path("/tmp/gptsovits_workspace")
17
- RAW_AUDIO = WORK_DIR / "raw_audio"
18
- OUTPUT_DIR = WORK_DIR / "output"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
21
  log = logging.getLogger(__name__)
22
 
 
 
 
 
 
23
  def ensure_dirs():
24
- for d in [WORK_DIR, RAW_AUDIO, OUTPUT_DIR]:
25
- d.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # ── 步骤 1:环境检查 + 安装 GPT-SoVITS
28
  def check_environment():
 
29
  try:
30
- ensure_dirs()
31
- # 强制清理旧目录再重新克隆
32
- log.info("清理旧环境...")
33
- subprocess.run(["rm", "-rf", str(GPT_SOVITS_DIR)], check=False)
34
- # 重新克隆
35
- log.info("克隆 GPT-SoVITS...")
36
- clone = subprocess.run(
37
- ["git", "clone", "--depth", "1", GPT_SOVITS_REPO, str(GPT_SOVITS_DIR)],
38
- capture_output=True, text=True, timeout=600
39
- )
40
- if clone.returncode != 0:
41
- return f"克隆失败 (exit={clone.returncode}) STDERR: {clone.stderr[:300]}"
42
- # 装依��(跳过 Web UI 包避免降级 Gradio)
43
- req = GPT_SOVITS_DIR / "requirements.txt"
44
- if req.exists():
45
- import re
46
- lines_req = [l for l in req.read_text().splitlines()
47
- if not re.match(r'^\s*(gradio|streamlit|fastapi|uvicorn|huggingface.hub|protobuf)', l) and l.strip()]
48
- if lines_req:
49
- log.info("安装 %d 个依赖...", len(lines_req))
50
- pip_result = subprocess.run(
51
- [sys.executable, "-m", "pip", "install", "--no-cache-dir"] + lines_req,
52
- capture_output=True, text=True, timeout=1200
53
- )
54
- if pip_result.returncode != 0:
55
- log.warning("pip 部分失败: %s", pip_result.stderr[-200:])
56
- log.info("GPT-SoVITS 安装完成")
57
-
58
- train_py = GPT_SOVITS_DIR / "train.py"
59
- if not train_py.exists():
60
- items = list(GPT_SOVITS_DIR.iterdir())[:15] if GPT_SOVITS_DIR.exists() else []
61
- return f"克隆后仍缺 train.py 目录: {GPT_SOVITS_DIR} 内容: {[i.name for i in items]}"
62
- return f"环境就绪 GPT-SoVITS: {GPT_SOVITS_DIR} 工作目录: {WORK_DIR}"
63
- except Exception as e:
64
  log.exception("check_environment")
65
- return f"环境安装失败: {e}"
66
 
67
 
68
- # ── 步骤 2:下载数据集
69
  def download_dataset():
 
70
  try:
71
- ensure_dirs()
72
- from huggingface_hub import snapshot_download
73
- kwargs = {"repo_id": DATASET_REPO, "repo_type": "dataset",
74
- "local_dir": str(WORK_DIR / "dataset"), "ignore_patterns": ["*.md", "*.txt"]}
75
- if HF_TOKEN:
76
- kwargs["token"] = HF_TOKEN
77
- snapshot_download(**kwargs)
78
-
79
- audio_src = WORK_DIR / "dataset" / "audio"
80
- if audio_src.exists():
81
- for f in audio_src.glob("*.wav"):
82
- shutil.copy2(f, RAW_AUDIO)
83
- count = len(list(RAW_AUDIO.glob("*.wav")))
84
- return f"✅ 下载完成!音频文件: {count} 个"
85
- except Exception as e:
86
  log.exception("download_dataset")
87
- return f"❌ 下载失败: {e}"
 
88
 
89
- # ── 步骤 3:准备训练数据
90
  def prepare_data():
 
91
  try:
92
- ensure_dirs()
93
- import pandas as pd
94
-
95
- target_audio = GPT_SOVITS_DIR / "raw_audio" / "daniya"
96
- target_audio.mkdir(parents=True, exist_ok=True)
97
- for wav in RAW_AUDIO.glob("*.wav"):
98
- shutil.copy2(wav, target_audio)
99
-
100
- metadata = WORK_DIR / "dataset" / "metadata.csv"
101
- if not metadata.exists():
102
- for f in (WORK_DIR / "dataset").rglob("*.csv"):
103
- metadata = f; break
104
-
105
- fl_dir = GPT_SOVITS_DIR / "filelist"
106
- fl_dir.mkdir(exist_ok=True)
107
-
108
- if metadata.exists():
109
- df = pd.read_csv(metadata)
110
- with open(fl_dir / "daniya.list", "w", encoding="utf-8") as f:
111
- for _, row in df.iterrows():
112
- f.write(f"raw_audio/daniya/{row['file']}|{row['text']}|daniya\n")
113
- return f"✅ 准备完成!训练样本: {len(df)} 条"
114
- else:
115
- wavs = list(target_audio.glob("*.wav"))
116
- with open(fl_dir / "daniya.list", "w", encoding="utf-8") as f:
117
- for w in wavs:
118
- f.write(f"raw_audio/daniya/{w.name}|{w.stem}|daniya\n")
119
- return f"⚠️ 未找到 metadata.csv,用文件名当文本。样本: {len(wavs)} 条"
120
- except Exception as e:
121
  log.exception("prepare_data")
122
- return f"❌ 准备失败: {e}"
 
123
 
124
- # ── 步骤 4:训练
125
- def start_training(epochs=100, batch_size=4, save_steps=500, lr=0.0001):
126
  try:
127
- ensure_dirs()
128
- out_dir = OUTPUT_DIR / "daniya"
129
- out_dir.mkdir(parents=True, exist_ok=True)
130
-
131
- cmd = [sys.executable, "train.py", "--model", "sovits",
132
- "--output_dir", str(out_dir), "--epochs", str(epochs),
133
- "--batch_size", str(batch_size), "--save_steps", str(save_steps),
134
- "--learning_rate", str(lr), "--device", "cpu"]
135
-
136
- log.info("训练: %s", " ".join(cmd))
137
- proc = subprocess.Popen(cmd, cwd=GPT_SOVITS_DIR,
138
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
139
- text=True, bufsize=1)
140
- lines = []
141
- for line in proc.stdout:
142
- lines.append(line.rstrip())
143
- if len(lines) > 200: lines = lines[-200:]
144
- if len(lines) % 10 == 0:
145
- yield "\n".join(lines)
146
- proc.wait()
147
- yield "\n".join(lines[-30:]) + f"\n\n{'✅ 训练完成' if proc.returncode==0 else f'❌ 训练失败 (exit={proc.returncode})'}"
148
- except Exception as e:
149
  log.exception("start_training")
150
- yield f"❌ 训练出错: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- # ── UI
153
  def create_ui():
154
- with gr.Blocks(title="GPT-SoVITS 训练 — 达妮娅", theme=gr.themes.Soft()) as demo:
155
- gr.Markdown("# 🎤 GPT-SoVITS 训练器 — 达妮娅语音\nCPU 训练 SoVITS 模型,按顺序点击按钮。")
 
 
 
156
 
157
  with gr.Row():
158
  with gr.Column(scale=1):
159
- for label, btn_text, fn, out in [
160
- ("📦 环境", "1. 安装环境", check_environment, gr.Textbox(label="状态", lines=5, interactive=False)),
161
- ("📥 数据", "2. 下载数据集", download_dataset, gr.Textbox(label="状态", lines=3, interactive=False)),
162
- ("🛠️ 预处理", "3. 准备训练数据", prepare_data, gr.Textbox(label="状态", lines=4, interactive=False)),
163
- ]:
164
- gr.Markdown(f"### {label}")
165
- b = gr.Button(btn_text, variant="primary" if "环境" not in label else "secondary")
166
- b.click(fn, outputs=out)
 
 
 
167
 
168
  with gr.Column(scale=1):
169
- gr.Markdown("### ⚙️ 参数")
170
- epochs = gr.Slider(10, 500, value=100, step=10, label="训练轮数")
171
- batch = gr.Slider(1, 16, value=4, step=1, label="批次大小")
172
- save_steps = gr.Slider(100, 2000, value=500, step=100, label="保存间隔")
173
- lr = gr.Slider(1e-5, 0.01, value=1e-4, step=1e-5, label="学习率")
174
- gr.Markdown("### 🚀 训练")
175
- btn_train = gr.Button("4. 开始训练", variant="primary", size="lg")
176
- out_train = gr.Textbox(label="训练日志", lines=18, interactive=False, autoscroll=True)
177
- btn_train.click(start_training, inputs=[epochs, batch, save_steps, lr], outputs=out_train)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  return demo
180
 
 
181
  if __name__ == "__main__":
 
182
  demo = create_ui()
183
- demo.launch(server_name="0.0.0.0", server_port=7860)# rebuild trigger 1779588386
 
1
  #!/usr/bin/env python3
2
  """
3
+ GPT-SoVITS Daniya trainer for Hugging Face Spaces.
4
+
5
+ This Space is CPU-oriented. It prepares the dataset with the current
6
+ GPT-SoVITS pipeline and can export fresh SoVITS and GPT checkpoints
7
+ through the Gradio UI.
8
  """
9
 
10
+ import csv
11
+ import json
12
+ import logging
13
+ import os
14
+ import shutil
15
+ import subprocess
16
+ import sys
17
  from pathlib import Path
18
+
19
  import gradio as gr
20
+ import yaml
21
+ from huggingface_hub import hf_hub_download, snapshot_download
22
+
23
 
 
24
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
25
+
26
  DATASET_REPO = "huanx/daniya-voice-gptsovits"
27
  GPT_SOVITS_REPO = "https://github.com/RVC-Boss/GPT-SoVITS.git"
28
+
29
+ WORK_DIR = Path("/tmp/daniya_trainer")
30
+ HF_HOME = WORK_DIR / "hf_home"
31
+ GPT_SOVITS_DIR = WORK_DIR / "GPT-SoVITS"
32
+ DATASET_DIR = WORK_DIR / "dataset"
33
+ AUDIO_DIR = DATASET_DIR / "audio"
34
+ EXP_NAME = "daniya"
35
+ EXP_ROOT = WORK_DIR / "logs"
36
+ EXP_DIR = EXP_ROOT / EXP_NAME
37
+ OUTPUT_ROOT = WORK_DIR / "trained_models"
38
+ SOVITS_OUTPUT_DIR = OUTPUT_ROOT / "SoVITS_weights_v2"
39
+ GPT_OUTPUT_DIR = OUTPUT_ROOT / "GPT_weights_v2"
40
+
41
+ INPUT_LIST = WORK_DIR / "daniya.list"
42
+ TEXT_PATH = EXP_DIR / "2-name2text.txt"
43
+ SEMANTIC_PATH = EXP_DIR / "6-name2semantic.tsv"
44
+
45
+ PRETRAINED_DIR = GPT_SOVITS_DIR / "GPT_SoVITS" / "pretrained_models"
46
+ BERT_DIR = PRETRAINED_DIR / "chinese-roberta-wwm-ext-large"
47
+ CNHUBERT_DIR = PRETRAINED_DIR / "chinese-hubert-base"
48
+
49
+ PRETRAINED_REPO = "lj1995/GPT-SoVITS"
50
+ PRETRAINED_S1_REL = "gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt"
51
+ PRETRAINED_S2G_REL = "gsv-v2final-pretrained/s2G2333k.pth"
52
+ PRETRAINED_S2D_REL = "gsv-v2final-pretrained/s2D2333k.pth"
53
+ PRETRAINED_S1 = PRETRAINED_DIR / PRETRAINED_S1_REL
54
+ PRETRAINED_S2G = PRETRAINED_DIR / PRETRAINED_S2G_REL
55
+ PRETRAINED_S2D = PRETRAINED_DIR / PRETRAINED_S2D_REL
56
+
57
+ BERT_REPO = "hfl/chinese-roberta-wwm-ext-large"
58
+ CNHUBERT_REPO = "TencentGameMate/chinese-hubert-base"
59
+
60
+ VERSION = "v2"
61
+ LANGUAGE = "zh"
62
+ SPEAKER = "daniya"
63
+
64
+ MODEL_PATTERNS = [
65
+ "*.json",
66
+ "*.txt",
67
+ "*.bin",
68
+ "*.safetensors",
69
+ "*.model",
70
+ ]
71
 
72
  logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
73
  log = logging.getLogger(__name__)
74
 
75
+ os.environ.setdefault("HF_HOME", str(HF_HOME))
76
+ os.environ.setdefault("TRANSFORMERS_CACHE", str(HF_HOME / "transformers"))
77
+ os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
78
+
79
+
80
  def ensure_dirs():
81
+ for path in [
82
+ WORK_DIR,
83
+ HF_HOME,
84
+ DATASET_DIR,
85
+ EXP_ROOT,
86
+ EXP_DIR,
87
+ OUTPUT_ROOT,
88
+ SOVITS_OUTPUT_DIR,
89
+ GPT_OUTPUT_DIR,
90
+ ]:
91
+ path.mkdir(parents=True, exist_ok=True)
92
+
93
+
94
+ def hf_kwargs():
95
+ return {"token": HF_TOKEN} if HF_TOKEN else {}
96
+
97
+
98
+ def push(logs, message):
99
+ logs.append(message)
100
+ return "\n".join(logs[-200:])
101
+
102
+
103
+ def run_cmd(command, cwd=None, env=None):
104
+ proc = subprocess.Popen(
105
+ command,
106
+ cwd=str(cwd) if cwd else None,
107
+ env=env,
108
+ stdout=subprocess.PIPE,
109
+ stderr=subprocess.STDOUT,
110
+ text=True,
111
+ bufsize=1,
112
+ )
113
+ yield f"$ {' '.join(command)}"
114
+ for raw in proc.stdout:
115
+ line = raw.rstrip()
116
+ if line:
117
+ yield line
118
+ code = proc.wait()
119
+ if code != 0:
120
+ raise RuntimeError(f"命令失败 (exit={code}): {' '.join(command)}")
121
+
122
+
123
+ def has_transformers_model(path: Path):
124
+ return path.exists() and (path / "config.json").exists() and (
125
+ any(path.glob("*.bin")) or any(path.glob("*.safetensors"))
126
+ )
127
+
128
+
129
+ def metadata_rows():
130
+ metadata = DATASET_DIR / "metadata.csv"
131
+ if not metadata.exists():
132
+ return []
133
+ with metadata.open("r", encoding="utf-8", newline="") as handle:
134
+ return list(csv.DictReader(handle))
135
+
136
+
137
+ def latest_file(directory: Path, suffix: str):
138
+ files = sorted(directory.glob(f"*{suffix}"), key=lambda item: item.stat().st_mtime)
139
+ return str(files[-1]) if files else None
140
+
141
+
142
+ def artifacts_summary():
143
+ sovits = latest_file(SOVITS_OUTPUT_DIR, ".pth")
144
+ gpt = latest_file(GPT_OUTPUT_DIR, ".ckpt")
145
+ lines = [
146
+ f"SoVITS: {sovits or '暂无'}",
147
+ f"GPT: {gpt or '暂无'}",
148
+ ]
149
+ return "\n".join(lines), sovits, gpt
150
+
151
+
152
+ def dataset_prepared():
153
+ return (
154
+ TEXT_PATH.exists()
155
+ and SEMANTIC_PATH.exists()
156
+ and (EXP_DIR / "3-bert").exists()
157
+ and (EXP_DIR / "4-cnhubert").exists()
158
+ and (EXP_DIR / "5-wav32k").exists()
159
+ )
160
+
161
+
162
+ def build_process_env():
163
+ env = os.environ.copy()
164
+ env.update(
165
+ {
166
+ "PYTHONPATH": str(GPT_SOVITS_DIR),
167
+ "inp_text": str(INPUT_LIST),
168
+ "inp_wav_dir": str(AUDIO_DIR),
169
+ "exp_name": EXP_NAME,
170
+ "opt_dir": str(EXP_DIR),
171
+ "i_part": "0",
172
+ "all_parts": "1",
173
+ "_CUDA_VISIBLE_DEVICES": "0",
174
+ "is_half": "False",
175
+ "version": VERSION,
176
+ "hz": "25hz",
177
+ "bert_pretrained_dir": str(BERT_DIR),
178
+ "bert_path": str(BERT_DIR),
179
+ "cnhubert_base_dir": str(CNHUBERT_DIR),
180
+ "pretrained_s2G": str(PRETRAINED_S2G),
181
+ "s2config_path": "GPT_SoVITS/configs/s2.json",
182
+ }
183
+ )
184
+ return env
185
+
186
+
187
+ def ensure_upstream_repo():
188
+ if (GPT_SOVITS_DIR / "webui.py").exists():
189
+ return
190
+ if GPT_SOVITS_DIR.exists():
191
+ shutil.rmtree(GPT_SOVITS_DIR)
192
+ subprocess.run(
193
+ ["git", "clone", "--depth", "1", GPT_SOVITS_REPO, str(GPT_SOVITS_DIR)],
194
+ check=True,
195
+ capture_output=True,
196
+ text=True,
197
+ timeout=900,
198
+ )
199
+
200
+
201
+ def patch_upstream_repo():
202
+ patch_marker = GPT_SOVITS_DIR / ".hf_space_patch_applied"
203
+ if patch_marker.exists():
204
+ return
205
+ chinese2 = GPT_SOVITS_DIR / "GPT_SoVITS" / "text" / "chinese2.py"
206
+ content = chinese2.read_text(encoding="utf-8")
207
+ old = "is_g2pw = True # True if is_g2pw_str.lower() == 'true' else False"
208
+ new = "is_g2pw = False # patched for CPU Space training; avoids extra G2PW asset"
209
+ if old in content:
210
+ content = content.replace(old, new, 1)
211
+ chinese2.write_text(content, encoding="utf-8")
212
+ patch_marker.write_text("ok\n", encoding="utf-8")
213
+
214
+
215
+ def ensure_base_assets():
216
+ if not has_transformers_model(BERT_DIR):
217
+ snapshot_download(
218
+ repo_id=BERT_REPO,
219
+ local_dir=str(BERT_DIR),
220
+ allow_patterns=MODEL_PATTERNS,
221
+ **hf_kwargs(),
222
+ )
223
+ if not has_transformers_model(CNHUBERT_DIR):
224
+ snapshot_download(
225
+ repo_id=CNHUBERT_REPO,
226
+ local_dir=str(CNHUBERT_DIR),
227
+ allow_patterns=MODEL_PATTERNS,
228
+ **hf_kwargs(),
229
+ )
230
+ if not PRETRAINED_S1.exists():
231
+ hf_hub_download(
232
+ repo_id=PRETRAINED_REPO,
233
+ filename=PRETRAINED_S1_REL,
234
+ local_dir=str(PRETRAINED_DIR),
235
+ **hf_kwargs(),
236
+ )
237
+ if not PRETRAINED_S2G.exists():
238
+ hf_hub_download(
239
+ repo_id=PRETRAINED_REPO,
240
+ filename=PRETRAINED_S2G_REL,
241
+ local_dir=str(PRETRAINED_DIR),
242
+ **hf_kwargs(),
243
+ )
244
+ if not PRETRAINED_S2D.exists():
245
+ try:
246
+ hf_hub_download(
247
+ repo_id=PRETRAINED_REPO,
248
+ filename=PRETRAINED_S2D_REL,
249
+ local_dir=str(PRETRAINED_DIR),
250
+ **hf_kwargs(),
251
+ )
252
+ except Exception:
253
+ log.warning("Optional pretrained discriminator not found: %s", PRETRAINED_S2D_REL)
254
+
255
+
256
+ def reset_preprocess_outputs():
257
+ for path in [INPUT_LIST, TEXT_PATH, SEMANTIC_PATH, EXP_DIR / "2-name2text-0.txt", EXP_DIR / "6-name2semantic-0.tsv"]:
258
+ if path.exists():
259
+ path.unlink()
260
+ for directory in [EXP_DIR / "3-bert", EXP_DIR / "4-cnhubert", EXP_DIR / "5-wav32k"]:
261
+ if directory.exists():
262
+ shutil.rmtree(directory)
263
+
264
+
265
+ def build_manifest():
266
+ rows = metadata_rows()
267
+ audio_files = {item.name for item in AUDIO_DIR.glob("*.wav")}
268
+ listed = set()
269
+ output = []
270
+ for row in rows:
271
+ wav_name = (row.get("file") or "").strip()
272
+ text = (row.get("text") or "").strip()
273
+ if not wav_name or not text or wav_name not in audio_files:
274
+ continue
275
+ listed.add(wav_name)
276
+ output.append(f"{wav_name}|{SPEAKER}|{LANGUAGE}|{text}")
277
+ if not output:
278
+ raise RuntimeError("metadata.csv 里没有可用训练样本")
279
+ INPUT_LIST.write_text("\n".join(output) + "\n", encoding="utf-8")
280
+ unlisted = sorted(audio_files - listed)
281
+ return len(output), len(audio_files), unlisted
282
+
283
+
284
+ def create_sovits_config(epochs, batch_size, save_every_epoch, learning_rate):
285
+ config_path = GPT_SOVITS_DIR / "GPT_SoVITS" / "configs" / "s2.json"
286
+ with config_path.open("r", encoding="utf-8") as handle:
287
+ data = json.load(handle)
288
+ data["train"]["fp16_run"] = False
289
+ data["train"]["batch_size"] = int(batch_size)
290
+ data["train"]["epochs"] = int(epochs)
291
+ data["train"]["learning_rate"] = float(learning_rate)
292
+ data["train"]["pretrained_s2G"] = str(PRETRAINED_S2G)
293
+ data["train"]["pretrained_s2D"] = str(PRETRAINED_S2D) if PRETRAINED_S2D.exists() else ""
294
+ data["train"]["if_save_latest"] = False
295
+ data["train"]["if_save_every_weights"] = True
296
+ data["train"]["save_every_epoch"] = int(save_every_epoch)
297
+ data["train"]["gpu_numbers"] = "0"
298
+ data["train"]["grad_ckpt"] = False
299
+ data["data"]["exp_dir"] = str(EXP_DIR)
300
+ data["s2_ckpt_dir"] = str(EXP_DIR)
301
+ data["save_weight_dir"] = str(SOVITS_OUTPUT_DIR)
302
+ data["name"] = EXP_NAME
303
+ data["version"] = VERSION
304
+ data["model"]["version"] = VERSION
305
+ tmp_config = WORK_DIR / "tmp_s2.json"
306
+ tmp_config.write_text(json.dumps(data), encoding="utf-8")
307
+ return tmp_config
308
+
309
+
310
+ def create_gpt_config(epochs, batch_size, save_every_epoch):
311
+ config_path = GPT_SOVITS_DIR / "GPT_SoVITS" / "configs" / "s1longer-v2.yaml"
312
+ with config_path.open("r", encoding="utf-8") as handle:
313
+ data = yaml.safe_load(handle)
314
+ data["train"]["batch_size"] = int(batch_size)
315
+ data["train"]["epochs"] = int(epochs)
316
+ data["train"]["precision"] = "32"
317
+ data["train"]["save_every_n_epoch"] = int(save_every_epoch)
318
+ data["train"]["if_save_every_weights"] = True
319
+ data["train"]["if_save_latest"] = False
320
+ data["train"]["if_dpo"] = False
321
+ data["train"]["exp_name"] = EXP_NAME
322
+ data["train"]["half_weights_save_dir"] = str(GPT_OUTPUT_DIR)
323
+ data["data"]["num_workers"] = 0
324
+ data["pretrained_s1"] = str(PRETRAINED_S1)
325
+ data["train_semantic_path"] = str(SEMANTIC_PATH)
326
+ data["train_phoneme_path"] = str(TEXT_PATH)
327
+ data["output_dir"] = str(EXP_DIR / "logs_s1_v2")
328
+ tmp_config = WORK_DIR / "tmp_s1.yaml"
329
+ tmp_config.write_text(yaml.safe_dump(data, allow_unicode=True, sort_keys=False), encoding="utf-8")
330
+ return tmp_config
331
+
332
+
333
+ def setup_environment_steps(logs):
334
+ ensure_dirs()
335
+ if (GPT_SOVITS_DIR / "webui.py").exists():
336
+ yield push(logs, "GPT-SoVITS 仓库已存在,跳过克隆。")
337
+ else:
338
+ yield push(logs, "克隆 GPT-SoVITS 仓库...")
339
+ ensure_upstream_repo()
340
+ yield push(logs, "✅ GPT-SoVITS 仓库已就绪。")
341
+ patch_upstream_repo()
342
+ yield push(logs, "✅ 已应用 Space 兼容补丁。")
343
+ if not has_transformers_model(BERT_DIR):
344
+ yield push(logs, "下载中文 BERT 特征模型...")
345
+ if not has_transformers_model(CNHUBERT_DIR):
346
+ yield push(logs, "下载 CN-HuBERT 特征模型...")
347
+ if not PRETRAINED_S1.exists() or not PRETRAINED_S2G.exists():
348
+ yield push(logs, "下载 GPT-SoVITS v2 底模...")
349
+ ensure_base_assets()
350
+ yield push(
351
+ logs,
352
+ "✅ 环境就绪:GPT-SoVITS 仓库、中文特征模型和 v2 底模均已准备完成。",
353
+ )
354
+
355
+
356
+ def download_dataset_steps(logs):
357
+ ensure_dirs()
358
+ yield from setup_environment_steps(logs)
359
+ yield push(logs, "下载 Daniya 数据集...")
360
+ snapshot_download(
361
+ repo_id=DATASET_REPO,
362
+ repo_type="dataset",
363
+ local_dir=str(DATASET_DIR),
364
+ **hf_kwargs(),
365
+ )
366
+ rows = metadata_rows()
367
+ audio_count = len(list(AUDIO_DIR.glob("*.wav")))
368
+ yield push(
369
+ logs,
370
+ f"✅ 数据集已下载:音频 {audio_count} 个,metadata {len(rows)} 条。",
371
+ )
372
+
373
+
374
+ def prepare_data_steps(logs):
375
+ yield from download_dataset_steps(logs)
376
+ reset_preprocess_outputs()
377
+ sample_count, audio_count, unlisted = build_manifest()
378
+ yield push(
379
+ logs,
380
+ f"训练清单已生成:metadata 可用样本 {sample_count} 条,音频总数 {audio_count} 个,未标注音频 {len(unlisted)} 个。",
381
+ )
382
+ env = build_process_env()
383
+ for line in run_cmd([sys.executable, "-s", "GPT_SoVITS/prepare_datasets/1-get-text.py"], cwd=GPT_SOVITS_DIR, env=env):
384
+ yield push(logs, line)
385
+ part_text = EXP_DIR / "2-name2text-0.txt"
386
+ if not part_text.exists():
387
+ raise RuntimeError("文本特征提取完成后未生成 2-name2text-0.txt")
388
+ part_text.replace(TEXT_PATH)
389
+ yield push(logs, "✅ 文本分词与 BERT 特征提取完成。")
390
+ for line in run_cmd(
391
+ [sys.executable, "-s", "GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py"],
392
+ cwd=GPT_SOVITS_DIR,
393
+ env=env,
394
+ ):
395
+ yield push(logs, line)
396
+ yield push(logs, "✅ CN-HuBERT 特征与 32k wav 已生成。")
397
+ for line in run_cmd([sys.executable, "-s", "GPT_SoVITS/prepare_datasets/3-get-semantic.py"], cwd=GPT_SOVITS_DIR, env=env):
398
+ yield push(logs, line)
399
+ part_semantic = EXP_DIR / "6-name2semantic-0.tsv"
400
+ if not part_semantic.exists():
401
+ raise RuntimeError("语义 token 提取完成后未生成 6-name2semantic-0.tsv")
402
+ semantic_rows = part_semantic.read_text(encoding="utf-8").strip()
403
+ SEMANTIC_PATH.write_text(
404
+ "item_name\tsemantic_audio\n" + semantic_rows + ("\n" if semantic_rows else ""),
405
+ encoding="utf-8",
406
+ )
407
+ part_semantic.unlink()
408
+ yield push(logs, "✅ 语义 token 提取完成。")
409
+ return f"✅ 预处理完成,可用于训练的样本 {sample_count} 条。"
410
+
411
 
 
412
  def check_environment():
413
+ logs = []
414
  try:
415
+ final = None
416
+ final = yield from setup_environment_steps(logs)
417
+ if final:
418
+ yield final
419
+ except Exception as exc:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  log.exception("check_environment")
421
+ yield push(logs, f"环境准备失败: {exc}")
422
 
423
 
 
424
  def download_dataset():
425
+ logs = []
426
  try:
427
+ final = None
428
+ final = yield from download_dataset_steps(logs)
429
+ if final:
430
+ yield final
431
+ except Exception as exc:
 
 
 
 
 
 
 
 
 
 
432
  log.exception("download_dataset")
433
+ yield push(logs, f"❌ 数据集下载失败: {exc}")
434
+
435
 
 
436
  def prepare_data():
437
+ logs = []
438
  try:
439
+ final = yield from prepare_data_steps(logs)
440
+ yield push(logs, final)
441
+ except Exception as exc:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  log.exception("prepare_data")
443
+ yield push(logs, f"❌ 预处理失败: {exc}")
444
+
445
 
446
+ def start_training(epochs=2, batch_size=1, save_every_epoch=1, lr=0.0001):
447
+ logs = []
448
  try:
449
+ if not dataset_prepared():
450
+ yield push(logs, "训练前缺少预处理产物,开始自动补齐..."), None
451
+ for update in prepare_data_steps(logs):
452
+ yield update, None
453
+ config_path = create_sovits_config(epochs, batch_size, save_every_epoch, lr)
454
+ env = build_process_env()
455
+ yield push(logs, "开始 SoVITS 训练..."), None
456
+ for line in run_cmd(
457
+ [sys.executable, "-s", "GPT_SoVITS/s2_train.py", "--config", str(config_path)],
458
+ cwd=GPT_SOVITS_DIR,
459
+ env=env,
460
+ ):
461
+ yield push(logs, line), None
462
+ latest = latest_file(SOVITS_OUTPUT_DIR, ".pth")
463
+ if not latest:
464
+ raise RuntimeError("训练结束后没有找到导出的 SoVITS 权重文件")
465
+ yield push(logs, f"✅ SoVITS 训练完成,最新权重:{latest}"), latest
466
+ except Exception as exc:
 
 
 
 
467
  log.exception("start_training")
468
+ yield push(logs, f"❌ SoVITS 训练失败: {exc}"), None
469
+
470
+
471
+ def start_gpt_training(epochs=1, batch_size=1, save_every_epoch=1):
472
+ logs = []
473
+ try:
474
+ if not dataset_prepared():
475
+ yield push(logs, "训练前缺少预处理产物,开始自动补齐..."), None
476
+ for update in prepare_data_steps(logs):
477
+ yield update, None
478
+ config_path = create_gpt_config(epochs, batch_size, save_every_epoch)
479
+ env = build_process_env()
480
+ yield push(logs, "开始 GPT 训练..."), None
481
+ for line in run_cmd(
482
+ [sys.executable, "-s", "GPT_SoVITS/s1_train.py", "--config_file", str(config_path)],
483
+ cwd=GPT_SOVITS_DIR,
484
+ env=env,
485
+ ):
486
+ yield push(logs, line), None
487
+ latest = latest_file(GPT_OUTPUT_DIR, ".ckpt")
488
+ if not latest:
489
+ raise RuntimeError("训练结束后没有找到导出的 GPT 权重文件")
490
+ yield push(logs, f"✅ GPT 训练完成,最新权重:{latest}"), latest
491
+ except Exception as exc:
492
+ log.exception("start_gpt_training")
493
+ yield push(logs, f"❌ GPT 训练失败: {exc}"), None
494
+
495
+
496
+ def refresh_outputs():
497
+ return artifacts_summary()
498
+
499
 
 
500
  def create_ui():
501
+ with gr.Blocks(title="GPT-SoVITS 训练 — 达妮娅", theme=gr.themes.Soft()) as demo:
502
+ gr.Markdown(
503
+ "# 🎤 GPT-SoVITS 训练器 — 达妮娅语音\n"
504
+ "这个 Space 按当前 GPT-SoVITS 训练链路执行。先拿到 SoVITS 权重,再按需继续训练 GPT。"
505
+ )
506
 
507
  with gr.Row():
508
  with gr.Column(scale=1):
509
+ gr.Markdown("### 1. 环境")
510
+ env_btn = gr.Button("准备环境", variant="secondary")
511
+ env_out = gr.Textbox(label="环境状态", lines=8, interactive=False, autoscroll=True)
512
+
513
+ gr.Markdown("### 2. 数据集")
514
+ dataset_btn = gr.Button("下载数据集", variant="secondary")
515
+ dataset_out = gr.Textbox(label="数据状态", lines=8, interactive=False, autoscroll=True)
516
+
517
+ gr.Markdown("### 3. 预处理")
518
+ prep_btn = gr.Button("生成训练特征", variant="primary")
519
+ prep_out = gr.Textbox(label="预处理日志", lines=16, interactive=False, autoscroll=True)
520
 
521
  with gr.Column(scale=1):
522
+ gr.Markdown("### 4. SoVITS 训练")
523
+ sovits_epochs = gr.Slider(1, 20, value=2, step=1, label="训练轮数")
524
+ sovits_batch = gr.Slider(1, 4, value=1, step=1, label="批次大小")
525
+ sovits_save_every = gr.Slider(1, 5, value=1, step=1, label="多少轮导出")
526
+ sovits_lr = gr.Slider(1e-5, 5e-4, value=1e-4, step=1e-5, label="学习率")
527
+ sovits_btn = gr.Button("开始 SoVITS 训练", variant="primary", size="lg")
528
+ sovits_log = gr.Textbox(label="SoVITS 训练日志", lines=18, interactive=False, autoscroll=True)
529
+ sovits_file = gr.File(label="最新 SoVITS 权重", interactive=False)
530
+
531
+ gr.Markdown("### 5. GPT 训练(可选)")
532
+ gpt_epochs = gr.Slider(1, 10, value=1, step=1, label="训练轮数")
533
+ gpt_batch = gr.Slider(1, 4, value=1, step=1, label="批次大小")
534
+ gpt_save_every = gr.Slider(1, 5, value=1, step=1, label="每隔多少轮导出")
535
+ gpt_btn = gr.Button("开始 GPT 训练", variant="secondary")
536
+ gpt_log = gr.Textbox(label="GPT 训练日志", lines=14, interactive=False, autoscroll=True)
537
+ gpt_file = gr.File(label="最新 GPT 权重", interactive=False)
538
+
539
+ gr.Markdown("### 6. 当前输出")
540
+ refresh_btn = gr.Button("刷新最新权重")
541
+ refresh_text = gr.Textbox(label="输出摘要", lines=3, interactive=False)
542
+ refresh_sovits = gr.File(label="SoVITS 输出", interactive=False)
543
+ refresh_gpt = gr.File(label="GPT 输出", interactive=False)
544
+
545
+ env_btn.click(check_environment, outputs=env_out)
546
+ dataset_btn.click(download_dataset, outputs=dataset_out)
547
+ prep_btn.click(prepare_data, outputs=prep_out)
548
+ sovits_btn.click(
549
+ start_training,
550
+ inputs=[sovits_epochs, sovits_batch, sovits_save_every, sovits_lr],
551
+ outputs=[sovits_log, sovits_file],
552
+ )
553
+ gpt_btn.click(
554
+ start_gpt_training,
555
+ inputs=[gpt_epochs, gpt_batch, gpt_save_every],
556
+ outputs=[gpt_log, gpt_file],
557
+ )
558
+ refresh_btn.click(refresh_outputs, outputs=[refresh_text, refresh_sovits, refresh_gpt])
559
 
560
  return demo
561
 
562
+
563
  if __name__ == "__main__":
564
+ ensure_dirs()
565
  demo = create_ui()
566
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,14 +1,26 @@
 
1
  huggingface_hub>=0.25.0
2
- pandas>=2.0.0
 
 
 
3
  soundfile>=0.12.0
4
- librosa>=0.10.0
5
  torch>=2.0.0
6
  torchaudio>=2.0.0
7
- numpy>=1.24.0
8
- scipy>=1.10.0
9
- matplotlib>=3.7.0
10
- tqdm>=4.65.0
11
- pyyaml>=6.0
12
- transformers>=4.30.0
13
  accelerate>=0.20.0
14
- sentencepiece>=0.1.99
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=6,<7
2
  huggingface_hub>=0.25.0
3
+ numpy<2.0
4
+ pandas>=2.0.0,<3
5
+ scipy>=1.10.0
6
+ librosa==0.10.2
7
  soundfile>=0.12.0
 
8
  torch>=2.0.0
9
  torchaudio>=2.0.0
10
+ pytorch-lightning>=2.4
11
+ torchmetrics<=1.5
12
+ tensorboard
13
+ transformers>=4.43,<=4.50
14
+ sentencepiece>=0.1.99
 
15
  accelerate>=0.20.0
16
+ ffmpeg-python
17
+ cn2an
18
+ pypinyin
19
+ jieba_fast
20
+ PyYAML>=6.0
21
+ psutil
22
+ numba
23
+ chardet
24
+ einops
25
+ typeguard<3
26
+ tqdm>=4.65.0