| |
| """ |
| Direct Audio Generation from JSON Tool |
| |
| This script allows for generating audiobook chunks directly from a pre-existing |
| `chunks_info.json` file. It is intended for debugging and testing purposes, |
| allowing a user to manually edit the TTS parameters in the JSON file and |
| hear the results without the VADER analysis step. |
| """ |
|
|
| import torch |
| from pathlib import Path |
| import sys |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
| import time |
| from datetime import timedelta |
|
|
| |
| project_root = Path(__file__).parent |
| sys.path.append(str(project_root)) |
|
|
| from config.config import * |
| from modules.tts_engine import load_optimized_model, process_one_chunk |
| from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility |
| from wrapper.chunk_loader import load_chunks |
| from chatterbox.tts import punc_norm |
| from modules.progress_tracker import log_chunk_progress, log_run |
|
|
| def main(): |
| """Main function to drive the generation process.""" |
| print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}") |
| |
| |
| book_name = input("Enter the book name (e.g., 'london'): ").strip() |
| if not book_name: |
| print("β Book name cannot be empty.") |
| return |
|
|
| |
| book_audio_dir = AUDIOBOOK_ROOT / book_name |
| json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json" |
|
|
| if not json_path.exists(): |
| print(f"β Error: JSON file not found at {json_path}") |
| print("Please ensure you have run the 'Prepare text file' option for this book first.") |
| return |
|
|
| print(f"π Loading chunks from: {json_path}") |
| all_chunks = load_chunks(str(json_path)) |
| print(f"β
Found {len(all_chunks)} chunks.") |
|
|
| |
| voice_files = list_voice_samples() |
| if not voice_files: |
| print(f"β No voice samples found in {VOICE_SAMPLES_DIR}") |
| return |
|
|
| print("\nAvailable voices:") |
| for i, voice_file in enumerate(voice_files, 1): |
| print(f" [{i}] {voice_file.stem}") |
| |
| while True: |
| try: |
| choice = input("Select voice number: ").strip() |
| idx = int(choice) - 1 |
| if 0 <= idx < len(voice_files): |
| voice_path = voice_files[idx] |
| break |
| print("Invalid selection.") |
| except (ValueError, IndexError): |
| print("Invalid selection.") |
| |
| |
| voice_path = ensure_voice_sample_compatibility(voice_path) |
|
|
| |
| if torch.cuda.is_available(): |
| device = "cuda" |
| elif torch.backends.mps.is_available(): |
| device = "mps" |
| else: |
| device = "cpu" |
| |
| print(f"\nπ Using device: {device}") |
| print(f"π€ Using voice: {Path(voice_path).name}") |
|
|
| |
| model = load_optimized_model(device) |
| |
| |
| print(f"π€ Preparing voice conditionals with: {Path(voice_path).name}") |
| model.prepare_conditionals(voice_path) |
|
|
| |
| output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name) |
| |
| |
| print("π§Ή Clearing old audio chunks...") |
| for wav_file in audio_chunks_dir.glob("*.wav"): |
| wav_file.unlink() |
|
|
| start_time = time.time() |
| total_chunks = len(all_chunks) |
| log_path = output_root / "debug_generation.log" |
| |
| print(f"\nπ Generating {total_chunks} chunks...") |
|
|
| with ThreadPoolExecutor(max_workers=2) as executor: |
| futures = [] |
| for i, chunk_data in enumerate(all_chunks): |
| |
| chunk_tts_params = { |
| "exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION), |
| "cfg_weight": DEFAULT_CFG_WEIGHT, |
| "temperature": DEFAULT_TEMPERATURE |
| } |
|
|
| future = executor.submit( |
| process_one_chunk, |
| i, chunk_data['text'], text_chunks_dir, audio_chunks_dir, |
| voice_path, chunk_tts_params, start_time, total_chunks, |
| punc_norm, book_name, log_run, log_path, device, |
| model, None, chunk_data['is_paragraph_end'], all_chunks, chunk_data['boundary_type'] |
| ) |
| futures.append(future) |
|
|
| for future in as_completed(futures): |
| try: |
| result = future.result() |
| if result: |
| idx, _ = result |
| log_chunk_progress(idx, total_chunks, start_time, 0) |
| except Exception as e: |
| print(f"\nβ An error occurred while processing a chunk: {e}") |
|
|
| elapsed_time = time.time() - start_time |
| print(f"\n{GREEN}β
Generation Complete!{RESET}") |
| print(f"β±οΈ Total time: {timedelta(seconds=int(elapsed_time))}") |
| print(f"π Audio chunks are in: {audio_chunks_dir}") |
| print("You can now use Option 3 from the main menu to combine them.") |
|
|
| if __name__ == "__main__": |
| main() |
|
|