| from pathlib import Path |
| import torch |
| import time |
| import re |
| from pydub import AudioSegment |
|
|
| from modules.tts_engine import load_optimized_model |
| from modules.file_manager import ensure_voice_sample_compatibility, list_voice_samples |
| from modules.audio_processor import apply_smart_fade_memory, smart_audio_validation_memory, process_audio_with_trimming_and_silence |
| from config.config import * |
|
|
| def get_original_voice_from_log(book_name): |
| """Extract original voice name from run log""" |
| audiobook_root = Path(AUDIOBOOK_ROOT) |
| log_file = audiobook_root / book_name / "run.log" |
| |
| if log_file.exists(): |
| try: |
| with open(log_file, 'r', encoding='utf-8') as f: |
| for line in f: |
| line = line.strip() |
| if line.startswith("Voice: ") or line.startswith("Voice used: "): |
| voice_name = line.split(": ", 1)[1].strip() |
| print(f"π Found original voice in log: {voice_name}") |
| return voice_name |
| except Exception as e: |
| print(f"β οΈ Error reading run log: {e}") |
| |
| return None |
|
|
| def get_original_voice_from_filename(book_name): |
| """Extract voice name from existing audiobook filename""" |
| audiobook_root = Path(AUDIOBOOK_ROOT) |
| book_dir = audiobook_root / book_name |
| |
| |
| for wav_file in book_dir.glob("*.wav"): |
| match = re.search(r'\[([^\]]+)\]\.wav$', wav_file.name) |
| if match: |
| voice_name = match.group(1) |
| print(f"π Found original voice in filename: {voice_name}") |
| return voice_name |
| |
| |
| for m4b_file in book_dir.glob("*.m4b"): |
| match = re.search(r'\[([^\]]+)\]\.m4b$', m4b_file.name) |
| if match: |
| voice_name = match.group(1) |
| print(f"π Found original voice in M4B filename: {voice_name}") |
| return voice_name |
| |
| return None |
|
|
| def find_voice_file_by_name(voice_name): |
| """Find voice file by name in Voice_Samples directory""" |
| voice_files = list_voice_samples() |
| |
| |
| for voice_file in voice_files: |
| if voice_file.stem == voice_name: |
| print(f"β
Found exact voice match: {voice_file.name}") |
| return voice_file |
| |
| |
| voice_name_lower = voice_name.lower() |
| for voice_file in voice_files: |
| if voice_name_lower in voice_file.stem.lower(): |
| print(f"β
Found partial voice match: {voice_file.name}") |
| return voice_file |
| |
| return None |
|
|
| def get_tts_params_for_chunk(chunk): |
| """Extract TTS parameters from chunk data or prompt user""" |
| |
| if 'tts_params' in chunk: |
| tts_params = chunk['tts_params'] |
| print(f"π Using stored TTS params: exag={tts_params.get('exaggeration', 1.0)}, cfg={tts_params.get('cfg_weight', 0.7)}, temp={tts_params.get('temperature', 0.7)}") |
| return tts_params |
| |
| |
| print(f"\nβοΈ TTS Parameters for chunk synthesis:") |
| |
| def get_float_input(prompt, default): |
| while True: |
| try: |
| value = input(f"{prompt} [{default}]: ").strip() |
| if not value: |
| return default |
| return float(value) |
| except ValueError: |
| print(f"β Invalid input. Please enter a valid number.") |
| |
| exaggeration = get_float_input("Exaggeration", DEFAULT_EXAGGERATION) |
| cfg_weight = get_float_input("CFG Weight", DEFAULT_CFG_WEIGHT) |
| temperature = get_float_input("Temperature", DEFAULT_TEMPERATURE) |
| |
| return { |
| 'exaggeration': exaggeration, |
| 'cfg_weight': cfg_weight, |
| 'temperature': temperature |
| } |
|
|
| def synthesize_chunk(chunk, index, book_name, audio_dir, revision=False, chunks_json_path=None, override_voice_name=None): |
| """Generate audio for a single chunk using specified or detected voice and TTS parameters""" |
| filename = f"chunk_{index+1:05d}_rev.wav" if revision else f"chunk_{index+1:05d}.wav" |
| out_path = Path(audio_dir) / filename |
| |
| try: |
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| |
| |
| print(f"π€ Loading TTS model for chunk synthesis...") |
| model = load_optimized_model(device) |
| |
| |
| if override_voice_name: |
| |
| print(f"π€ Using explicitly selected voice: {override_voice_name}") |
| voice_path = find_voice_file_by_name(override_voice_name) |
| voice_name = override_voice_name |
| detection_method = "user_selected" |
| else: |
| |
| print(f"π Detecting original voice for book: {book_name}") |
| from modules.voice_detector import detect_voice_for_book |
| |
| voice_name, voice_path, detection_method = detect_voice_for_book(book_name, chunks_json_path) |
| |
| |
| if not voice_path: |
| print(f"β οΈ Voice not found, using first available voice") |
| voice_files = list_voice_samples() |
| if not voice_files: |
| print("β No voice samples found") |
| return None |
| voice_path = voice_files[0] |
| voice_name = voice_path.stem |
| detection_method = "fallback_first_available" |
| |
| print(f"π€ Using voice: {voice_name} (method: {detection_method})") |
| compatible_voice = ensure_voice_sample_compatibility(voice_path) |
| |
| |
| tts_params = get_tts_params_for_chunk(chunk) |
| |
| |
| model.prepare_conditionals(compatible_voice) |
| |
| |
| chunk_text = chunk.get('text', '') |
| if not chunk_text: |
| print("β No text found in chunk") |
| return None |
| |
| print(f"π€ Synthesizing: {chunk_text[:50]}...") |
| print(f"π TTS params: exag={tts_params['exaggeration']}, cfg={tts_params['cfg_weight']}, temp={tts_params['temperature']}") |
| |
| |
| with torch.no_grad(): |
| wav = model.generate(chunk_text, |
| exaggeration=tts_params['exaggeration'], |
| cfg_weight=tts_params['cfg_weight'], |
| temperature=tts_params['temperature']).detach().cpu() |
| |
| if wav.dim() == 1: |
| wav = wav.unsqueeze(0) |
| |
| |
| import io |
| import soundfile as sf |
| |
| wav_np = wav.squeeze().numpy() |
| with io.BytesIO() as wav_buffer: |
| sf.write(wav_buffer, wav_np, model.sr, format='wav') |
| wav_buffer.seek(0) |
| audio_segment = AudioSegment.from_wav(wav_buffer) |
| |
| |
| audio_segment = apply_smart_fade_memory(audio_segment) |
| audio_segment, is_quarantined = smart_audio_validation_memory(audio_segment, model.sr) |
| |
| |
| boundary_type = chunk.get('boundary_type', 'none') |
| if boundary_type and boundary_type != "none": |
| audio_segment = process_audio_with_trimming_and_silence(audio_segment, boundary_type) |
| else: |
| |
| if ENABLE_AUDIO_TRIMMING: |
| from modules.audio_processor import trim_audio_endpoint |
| audio_segment = trim_audio_endpoint(audio_segment) |
| |
| |
| audio_segment.export(out_path, format="wav") |
| print(f"β
Saved synthesized chunk: {out_path.name}") |
| |
| |
| del model |
| torch.cuda.empty_cache() |
| |
| return str(out_path) |
| |
| except Exception as e: |
| print(f"β Failed to synthesize chunk: {e}") |
| import traceback |
| traceback.print_exc() |
| return None |
|
|