danneauxs commited on
Commit
346d87a
Β·
1 Parent(s): d0851e9

Deploy complete ChatterboxTTS system with utils, tools, and wrapper modules

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__/
2
+ *.pyc
modules/asr_manager.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ASR Manager Module
3
+ Centralized ASR model loading with adaptive GPU/CPU fallback and real-time VRAM monitoring
4
+ """
5
+
6
+ import torch
7
+ import logging
8
+ from pathlib import Path
9
+ from config.config import DEFAULT_ASR_MODEL, ASR_MODEL_VRAM_MB, ASR_MODEL_RAM_MB
10
+
11
+ def get_real_time_vram_status():
12
+ """Get current GPU memory usage in real-time"""
13
+ try:
14
+ if torch.cuda.is_available():
15
+ gpu_count = torch.cuda.device_count()
16
+ if gpu_count > 0:
17
+ # Use first GPU
18
+ total_vram = torch.cuda.get_device_properties(0).total_memory
19
+ allocated_vram = torch.cuda.memory_allocated(0)
20
+ reserved_vram = torch.cuda.memory_reserved(0)
21
+ available_vram = total_vram - allocated_vram
22
+
23
+ return {
24
+ 'total_mb': total_vram // 1024 // 1024,
25
+ 'allocated_mb': allocated_vram // 1024 // 1024,
26
+ 'reserved_mb': reserved_vram // 1024 // 1024,
27
+ 'available_mb': available_vram // 1024 // 1024,
28
+ 'has_gpu': True
29
+ }
30
+ except Exception as e:
31
+ logging.warning(f"Failed to get real-time VRAM status: {e}")
32
+
33
+ return {
34
+ 'total_mb': 0,
35
+ 'allocated_mb': 0,
36
+ 'reserved_mb': 0,
37
+ 'available_mb': 0,
38
+ 'has_gpu': False
39
+ }
40
+
41
+ def calculate_available_vram_for_asr(safety_buffer_mb=500):
42
+ """Calculate VRAM available for ASR with safety buffer"""
43
+ vram_status = get_real_time_vram_status()
44
+
45
+ if not vram_status['has_gpu']:
46
+ return 0
47
+
48
+ # Available VRAM minus safety buffer for stability
49
+ available_with_buffer = max(0, vram_status['available_mb'] - safety_buffer_mb)
50
+
51
+ return available_with_buffer
52
+
53
+ def can_model_fit_gpu(model_name, available_vram_mb):
54
+ """Check if a specific ASR model can fit in available VRAM"""
55
+ required_vram = ASR_MODEL_VRAM_MB.get(model_name, 0)
56
+ return available_vram_mb >= required_vram
57
+
58
+ def try_load_model_with_fallback(model_name, primary_device, fallback_device="cpu"):
59
+ """Try to load model on primary device, fallback to secondary if it fails"""
60
+ import whisper
61
+
62
+ # Convert device names for whisper compatibility
63
+ def convert_device_name(device):
64
+ if device.lower() == "gpu":
65
+ return "cuda"
66
+ return device.lower()
67
+
68
+ primary_device_whisper = convert_device_name(primary_device)
69
+ fallback_device_whisper = convert_device_name(fallback_device)
70
+
71
+ try:
72
+ print(f"🎯 Attempting to load {model_name} on {primary_device.upper()}")
73
+ model = whisper.load_model(model_name, device=primary_device_whisper)
74
+ print(f"βœ… Successfully loaded {model_name} on {primary_device.upper()}")
75
+ return model, primary_device
76
+
77
+ except Exception as e:
78
+ print(f"⚠️ {model_name} failed on {primary_device} ({str(e)[:50]}...)")
79
+
80
+ if fallback_device_whisper != primary_device_whisper:
81
+ try:
82
+ print(f"πŸ”„ Trying {model_name} on {fallback_device.upper()}")
83
+ model = whisper.load_model(model_name, device=fallback_device_whisper)
84
+ print(f"βœ… Successfully loaded {model_name} on {fallback_device.upper()}")
85
+ return model, fallback_device
86
+
87
+ except Exception as fallback_e:
88
+ print(f"❌ {model_name} also failed on {fallback_device} ({str(fallback_e)[:50]}...)")
89
+
90
+ # Both failed
91
+ raise Exception(f"Model {model_name} failed on both {primary_device} and {fallback_device}")
92
+
93
+ def load_asr_model_adaptive(asr_config=None):
94
+ """
95
+ Adaptive ASR model loading with real-time VRAM checking and intelligent fallback
96
+
97
+ Args:
98
+ asr_config: ASR configuration dict from interfaces (None for GUI fallback)
99
+
100
+ Returns:
101
+ tuple: (asr_model, actual_device_used) or (None, None) if all loading fails
102
+ """
103
+ print(f"πŸ” Starting adaptive ASR model loading...")
104
+
105
+ # Get current VRAM status
106
+ vram_status = get_real_time_vram_status()
107
+ available_vram = calculate_available_vram_for_asr()
108
+
109
+ print(f"πŸ–₯️ Real-time VRAM status:")
110
+ print(f" Total: {vram_status['total_mb']:,}MB")
111
+ print(f" Allocated: {vram_status['allocated_mb']:,}MB")
112
+ print(f" Available for ASR: {available_vram:,}MB (with 500MB safety buffer)")
113
+
114
+ # Determine what models to try based on config
115
+ if asr_config and asr_config.get('enabled') and 'primary_model' in asr_config:
116
+ # Intelligent selection from CLI/Gradio
117
+ primary_model = asr_config['primary_model']
118
+ primary_device = asr_config['primary_device']
119
+ fallback_model = asr_config['fallback_model']
120
+ fallback_device = asr_config['fallback_device']
121
+
122
+ print(f"🧠 Using intelligent ASR config:")
123
+ print(f" Primary: {primary_model} on {primary_device.upper()}")
124
+ print(f" Fallback: {fallback_model} on {fallback_device.upper()}")
125
+
126
+ # Real-time VRAM check for primary model
127
+ if primary_device.lower() == 'gpu':
128
+ if not vram_status['has_gpu']:
129
+ print(f"⚠️ No GPU available, forcing CPU mode")
130
+ primary_device = 'cpu'
131
+ elif not can_model_fit_gpu(primary_model, available_vram):
132
+ required = ASR_MODEL_VRAM_MB.get(primary_model, 0)
133
+ print(f"⚠️ Insufficient VRAM for {primary_model} (need {required}MB, have {available_vram}MB)")
134
+ print(f"πŸ”„ Switching primary to CPU")
135
+ primary_device = 'cpu'
136
+
137
+ # Try primary model
138
+ try:
139
+ return try_load_model_with_fallback(primary_model, primary_device, primary_device)
140
+ except:
141
+ # Primary failed, try fallback model
142
+ print(f"πŸ”„ Primary model failed, trying fallback configuration...")
143
+
144
+ # Real-time VRAM check for fallback model
145
+ if fallback_device.lower() == 'gpu':
146
+ if not vram_status['has_gpu']:
147
+ print(f"⚠️ No GPU available for fallback, using CPU")
148
+ fallback_device = 'cpu'
149
+ elif not can_model_fit_gpu(fallback_model, available_vram):
150
+ required = ASR_MODEL_VRAM_MB.get(fallback_model, 0)
151
+ print(f"⚠️ Insufficient VRAM for fallback {fallback_model} (need {required}MB, have {available_vram}MB)")
152
+ fallback_device = 'cpu'
153
+
154
+ try:
155
+ return try_load_model_with_fallback(fallback_model, fallback_device, 'cpu')
156
+ except:
157
+ print(f"❌ Both configured models failed!")
158
+
159
+ else:
160
+ # Fallback mode for GUI or missing config
161
+ print(f"πŸ”§ Using fallback mode: {DEFAULT_ASR_MODEL}")
162
+
163
+ # Last resort: try default model with adaptive device selection
164
+ print(f"πŸ†˜ Last resort: trying {DEFAULT_ASR_MODEL} with adaptive device selection")
165
+
166
+ # Choose device based on real-time VRAM availability
167
+ if vram_status['has_gpu'] and can_model_fit_gpu(DEFAULT_ASR_MODEL, available_vram):
168
+ device = 'cuda' # Use cuda directly for whisper
169
+ device_display = 'GPU'
170
+ print(f"βœ… Using GPU for {DEFAULT_ASR_MODEL}")
171
+ else:
172
+ device = 'cpu'
173
+ device_display = 'CPU'
174
+ print(f"πŸ”„ Using CPU for {DEFAULT_ASR_MODEL}")
175
+
176
+ try:
177
+ import whisper
178
+ model = whisper.load_model(DEFAULT_ASR_MODEL, device=device)
179
+ print(f"βœ… Successfully loaded {DEFAULT_ASR_MODEL} on {device_display}")
180
+ return model, device_display.lower()
181
+ except Exception as e:
182
+ print(f"❌ Critical failure: Could not load {DEFAULT_ASR_MODEL} on {device}: {e}")
183
+
184
+ # Ultimate fallback to CPU if GPU failed
185
+ if device == 'cuda':
186
+ try:
187
+ print(f"πŸ†˜ Ultimate fallback: {DEFAULT_ASR_MODEL} on CPU")
188
+ model = whisper.load_model(DEFAULT_ASR_MODEL, device='cpu')
189
+ print(f"βœ… Successfully loaded {DEFAULT_ASR_MODEL} on CPU")
190
+ return model, 'cpu'
191
+ except Exception as cpu_e:
192
+ print(f"πŸ’€ Complete failure: {cpu_e}")
193
+
194
+ return None, None
195
+
196
+ def cleanup_asr_model(asr_model):
197
+ """Clean up ASR model to free memory"""
198
+ if asr_model is not None:
199
+ try:
200
+ del asr_model
201
+ if torch.cuda.is_available():
202
+ torch.cuda.empty_cache()
203
+ print(f"🧹 ASR model cleaned up")
204
+ except Exception as e:
205
+ logging.warning(f"Failed to cleanup ASR model: {e}")
206
+
207
+ def get_asr_memory_info():
208
+ """Get memory information for ASR debugging"""
209
+ vram_status = get_real_time_vram_status()
210
+ available_vram = calculate_available_vram_for_asr()
211
+
212
+ info = {
213
+ 'vram_total_mb': vram_status['total_mb'],
214
+ 'vram_allocated_mb': vram_status['allocated_mb'],
215
+ 'vram_available_for_asr_mb': available_vram,
216
+ 'has_gpu': vram_status['has_gpu']
217
+ }
218
+
219
+ return info
220
+
221
+ if __name__ == "__main__":
222
+ # Test the adaptive loading
223
+ print("Testing ASR Manager...")
224
+ info = get_asr_memory_info()
225
+ print(f"Memory info: {info}")
226
+
227
+ # Test adaptive loading
228
+ model, device = load_asr_model_adaptive()
229
+ if model:
230
+ print(f"Test successful: Model loaded on {device}")
231
+ cleanup_asr_model(model)
232
+ else:
233
+ print("Test failed: No model loaded")
modules/system_detector.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ System Resource Detection Module
3
+ Detects VRAM, RAM, CPU cores and recommends appropriate ASR models
4
+ """
5
+
6
+ import psutil
7
+ import torch
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ # Add project root to path for imports
13
+ if __name__ == "__main__":
14
+ sys.path.insert(0, str(Path(__file__).parent.parent))
15
+
16
+ from config.config import ASR_MODEL_VRAM_MB, ASR_MODEL_RAM_MB
17
+
18
+ def get_gpu_memory():
19
+ """Get total and available GPU memory in MB"""
20
+ try:
21
+ if torch.cuda.is_available():
22
+ gpu_count = torch.cuda.device_count()
23
+ if gpu_count > 0:
24
+ # Use first GPU
25
+ total_vram = torch.cuda.get_device_properties(0).total_memory
26
+ allocated_vram = torch.cuda.memory_allocated(0)
27
+ available_vram = total_vram - allocated_vram
28
+
29
+ return {
30
+ 'total_mb': total_vram // 1024 // 1024,
31
+ 'available_mb': available_vram // 1024 // 1024,
32
+ 'allocated_mb': allocated_vram // 1024 // 1024
33
+ }
34
+ except:
35
+ pass
36
+
37
+ return {'total_mb': 0, 'available_mb': 0, 'allocated_mb': 0}
38
+
39
+ def get_system_memory():
40
+ """Get total and available system RAM in MB"""
41
+ try:
42
+ memory = psutil.virtual_memory()
43
+ return {
44
+ 'total_mb': memory.total // 1024 // 1024,
45
+ 'available_mb': memory.available // 1024 // 1024,
46
+ 'used_mb': memory.used // 1024 // 1024
47
+ }
48
+ except:
49
+ return {'total_mb': 0, 'available_mb': 0, 'used_mb': 0}
50
+
51
+ def get_cpu_cores():
52
+ """Get number of CPU cores"""
53
+ try:
54
+ return psutil.cpu_count(logical=False) or psutil.cpu_count()
55
+ except:
56
+ return 1
57
+
58
+ def estimate_tts_vram_usage():
59
+ """Estimate VRAM usage by ChatterboxTTS (updated based on real usage)"""
60
+ return 5500 # 5.5GB in MB (was 7GB, adjusted based on actual 3.5GB usage + buffer)
61
+
62
+ def get_system_profile():
63
+ """Get complete system resource profile"""
64
+ gpu_info = get_gpu_memory()
65
+ ram_info = get_system_memory()
66
+ cpu_cores = get_cpu_cores()
67
+
68
+ # Estimate available resources after TTS loading
69
+ tts_vram_estimate = estimate_tts_vram_usage()
70
+ available_vram_after_tts = max(0, gpu_info['available_mb'] - tts_vram_estimate)
71
+
72
+ return {
73
+ 'gpu': gpu_info,
74
+ 'ram': ram_info,
75
+ 'cpu_cores': cpu_cores,
76
+ 'available_vram_after_tts': available_vram_after_tts,
77
+ 'has_gpu': gpu_info['total_mb'] > 0
78
+ }
79
+
80
+ def categorize_system(profile):
81
+ """Categorize system capabilities"""
82
+ gpu_total = profile['gpu']['total_mb']
83
+ ram_total = profile['ram']['total_mb']
84
+ cpu_cores = profile['cpu_cores']
85
+
86
+ # VRAM categories
87
+ if gpu_total < 4000:
88
+ vram_category = "low"
89
+ elif gpu_total <= 12000:
90
+ vram_category = "medium"
91
+ else:
92
+ vram_category = "high"
93
+
94
+ # RAM categories
95
+ if ram_total < 16000:
96
+ ram_category = "low"
97
+ elif ram_total <= 64000:
98
+ ram_category = "medium"
99
+ else:
100
+ ram_category = "high"
101
+
102
+ # CPU categories
103
+ if cpu_cores < 6:
104
+ cpu_category = "low"
105
+ elif cpu_cores <= 16:
106
+ cpu_category = "medium"
107
+ else:
108
+ cpu_category = "high"
109
+
110
+ return {
111
+ 'vram': vram_category,
112
+ 'ram': ram_category,
113
+ 'cpu': cpu_category
114
+ }
115
+
116
+ def get_safe_asr_models(profile):
117
+ """Get ASR models that can safely run on GPU with available VRAM"""
118
+ available_vram = profile['available_vram_after_tts']
119
+ safe_models = []
120
+
121
+ for model, vram_req in ASR_MODEL_VRAM_MB.items():
122
+ if vram_req <= available_vram:
123
+ safe_models.append(model)
124
+
125
+ return safe_models
126
+
127
+ def get_safe_cpu_models(profile):
128
+ """Get ASR models that can safely run on CPU with available RAM"""
129
+ available_ram = profile['ram']['available_mb']
130
+ safe_models = []
131
+
132
+ for model, ram_req in ASR_MODEL_RAM_MB.items():
133
+ if ram_req <= available_ram:
134
+ safe_models.append(model)
135
+
136
+ return safe_models
137
+
138
+ def recommend_asr_models(profile):
139
+ """Recommend Safe/Moderate/Insane ASR model configurations"""
140
+ categories = categorize_system(profile)
141
+ safe_gpu_models = get_safe_asr_models(profile)
142
+ safe_cpu_models = get_safe_cpu_models(profile)
143
+
144
+ recommendations = {}
145
+
146
+ # Model priority order (best to worst)
147
+ model_priority = ["large-v3", "large", "large-v2", "medium", "small", "base", "tiny"]
148
+
149
+ # Safe: Conservative choice
150
+ safe_gpu = None
151
+ safe_cpu = None
152
+
153
+ for model in reversed(model_priority): # Start from smallest
154
+ if model in safe_gpu_models and not safe_gpu:
155
+ safe_gpu = model
156
+ if model in safe_cpu_models and not safe_cpu:
157
+ safe_cpu = model
158
+ if safe_gpu and safe_cpu:
159
+ break
160
+
161
+ # Moderate: Balanced choice
162
+ moderate_gpu = None
163
+ moderate_cpu = None
164
+
165
+ # Try to get a model 1-2 steps up from safe
166
+ safe_idx = model_priority.index(safe_gpu) if safe_gpu else len(model_priority)
167
+ moderate_idx = max(0, safe_idx - 2)
168
+
169
+ for i in range(moderate_idx, len(model_priority)):
170
+ model = model_priority[i]
171
+ if model in safe_gpu_models and not moderate_gpu:
172
+ moderate_gpu = model
173
+ if model in safe_cpu_models and not moderate_cpu:
174
+ moderate_cpu = model
175
+ if moderate_gpu and moderate_cpu:
176
+ break
177
+
178
+ # Insane: Push the limits (best available models)
179
+ insane_gpu = None
180
+ insane_cpu = None
181
+
182
+ # Get the best (largest) models that are safe
183
+ for model in model_priority: # Start from best
184
+ if model in safe_gpu_models and not insane_gpu:
185
+ insane_gpu = model
186
+ if model in safe_cpu_models and not insane_cpu:
187
+ insane_cpu = model
188
+ if insane_gpu and insane_cpu:
189
+ break
190
+
191
+ # Build recommendations
192
+ recommendations['safe'] = {
193
+ 'primary': {'model': safe_gpu or safe_cpu, 'device': 'gpu' if safe_gpu else 'cpu'},
194
+ 'fallback': {'model': safe_cpu, 'device': 'cpu'}
195
+ }
196
+
197
+ recommendations['moderate'] = {
198
+ 'primary': {'model': moderate_gpu or moderate_cpu, 'device': 'gpu' if moderate_gpu else 'cpu'},
199
+ 'fallback': {'model': moderate_cpu, 'device': 'cpu'}
200
+ }
201
+
202
+ recommendations['insane'] = {
203
+ 'primary': {'model': insane_gpu or insane_cpu, 'device': 'gpu' if insane_gpu else 'cpu'},
204
+ 'fallback': {'model': insane_cpu, 'device': 'cpu'}
205
+ }
206
+
207
+ return recommendations
208
+
209
+ def print_system_summary(profile):
210
+ """Print a human-readable system summary"""
211
+ categories = categorize_system(profile)
212
+
213
+ print(f"πŸ–₯️ System Profile:")
214
+ print(f" VRAM: {profile['gpu']['total_mb']:,}MB total, {profile['available_vram_after_tts']:,}MB available after TTS ({categories['vram']} class)")
215
+ print(f" RAM: {profile['ram']['total_mb']:,}MB total, {profile['ram']['available_mb']:,}MB available ({categories['ram']} class)")
216
+ print(f" CPU: {profile['cpu_cores']} cores ({categories['cpu']} class)")
217
+
218
+ if not profile['has_gpu']:
219
+ print(f" ⚠️ No CUDA GPU detected - ASR will run on CPU only")
220
+
221
+ if __name__ == "__main__":
222
+ # Test the detection
223
+ profile = get_system_profile()
224
+ print_system_summary(profile)
225
+
226
+ recommendations = recommend_asr_models(profile)
227
+ print(f"\nASR Model Recommendations:")
228
+ for level, config in recommendations.items():
229
+ primary = config['primary']
230
+ fallback = config['fallback']
231
+ print(f"🟒 {level.upper()}: {primary['model']} ({primary['device']}) + {fallback['model']} (cpu fallback)")
modules/voice_detector.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice Detection Module
3
+ Handles voice detection from multiple sources: JSON metadata, log files, filenames
4
+ """
5
+
6
+ import re
7
+ import json
8
+ from pathlib import Path
9
+ from config.config import AUDIOBOOK_ROOT
10
+ from modules.file_manager import list_voice_samples
11
+
12
+
13
+ def get_likely_voices_for_book(book_name, chunks_json_path=None):
14
+ """
15
+ Get the most likely voice candidates for a book using the 3 detection methods:
16
+ 1. JSON metadata/comments (if available)
17
+ 2. run.log file
18
+ 3. Generated audiobook filenames (may return multiple)
19
+
20
+ Returns: list of (voice_name, voice_path, detection_method) tuples
21
+ """
22
+ print(f"πŸ” Finding likely voices for book: {book_name}")
23
+ likely_voices = []
24
+
25
+ # Method 1: Check JSON metadata and comments
26
+ if chunks_json_path:
27
+ voice_from_json = get_voice_from_json(chunks_json_path)
28
+ if voice_from_json:
29
+ voice_path = find_voice_file_by_name(voice_from_json)
30
+ if voice_path:
31
+ likely_voices.append((voice_from_json, voice_path, "json_metadata"))
32
+ print(f"βœ… Voice found in JSON: {voice_from_json}")
33
+
34
+ # Method 2: Check run.log file
35
+ voice_from_log = get_voice_from_log(book_name)
36
+ if voice_from_log:
37
+ voice_path = find_voice_file_by_name(voice_from_log)
38
+ if voice_path:
39
+ # Avoid duplicates
40
+ if not any(v[0] == voice_from_log for v in likely_voices):
41
+ likely_voices.append((voice_from_log, voice_path, "run_log"))
42
+ print(f"βœ… Voice found in run.log: {voice_from_log}")
43
+
44
+ # Method 3: Check generated filename patterns (may find multiple)
45
+ voices_from_files = get_voices_from_filenames(book_name)
46
+ for voice_name in voices_from_files:
47
+ voice_path = find_voice_file_by_name(voice_name)
48
+ if voice_path:
49
+ # Avoid duplicates
50
+ if not any(v[0] == voice_name for v in likely_voices):
51
+ likely_voices.append((voice_name, voice_path, "filename_pattern"))
52
+ print(f"βœ… Voice found in filename: {voice_name}")
53
+
54
+ if not likely_voices:
55
+ print(f"⚠️ No likely voices detected for {book_name}")
56
+ else:
57
+ print(f"πŸ“‹ Found {len(likely_voices)} likely voice candidates")
58
+
59
+ return likely_voices
60
+
61
+ def detect_voice_for_book(book_name, chunks_json_path=None):
62
+ """
63
+ Detect the most likely voice for a book (returns first candidate)
64
+ For backwards compatibility with existing code
65
+ """
66
+ likely_voices = get_likely_voices_for_book(book_name, chunks_json_path)
67
+ if likely_voices:
68
+ return likely_voices[0] # Return the first (most likely) candidate
69
+ return None, None, "not_found"
70
+
71
+
72
+ def get_voice_from_json(json_path):
73
+ """Extract voice information from JSON metadata"""
74
+ try:
75
+ with open(json_path, 'r', encoding='utf-8') as f:
76
+ content = f.read()
77
+
78
+ # Check for voice metadata in JSON
79
+ if '"voice_used":' in content:
80
+ data = json.loads(content)
81
+ if isinstance(data, dict) and 'voice_used' in data:
82
+ return data['voice_used']
83
+ elif isinstance(data, list) and data and 'voice_used' in data[0]:
84
+ return data[0]['voice_used']
85
+
86
+ # Check for voice as comment in JSON (fallback option)
87
+ voice_comment_match = re.search(r'//\s*voice:\s*([^\n]+)', content, re.IGNORECASE)
88
+ if voice_comment_match:
89
+ return voice_comment_match.group(1).strip()
90
+
91
+ except Exception as e:
92
+ print(f"⚠️ Error reading JSON for voice info: {e}")
93
+
94
+ return None
95
+
96
+
97
+ def get_voice_from_log(book_name):
98
+ """Extract voice information from run.log file"""
99
+ audiobook_root = Path(AUDIOBOOK_ROOT)
100
+ log_file = audiobook_root / book_name / "run.log"
101
+
102
+ if log_file.exists():
103
+ try:
104
+ with open(log_file, 'r', encoding='utf-8') as f:
105
+ for line in f:
106
+ line = line.strip()
107
+ if line.startswith("Voice: ") or line.startswith("Voice used: "):
108
+ voice_name = line.split(": ", 1)[1].strip()
109
+ return voice_name
110
+ except Exception as e:
111
+ print(f"⚠️ Error reading run log: {e}")
112
+
113
+ return None
114
+
115
+
116
+ def get_voices_from_filenames(book_name):
117
+ """Extract voice names from existing audiobook filename patterns (may return multiple)"""
118
+ audiobook_root = Path(AUDIOBOOK_ROOT)
119
+ book_dir = audiobook_root / book_name
120
+
121
+ if not book_dir.exists():
122
+ return []
123
+
124
+ found_voices = []
125
+
126
+ # Look for WAV files with voice pattern: BookName [VoiceName].wav
127
+ for wav_file in book_dir.glob("*.wav"):
128
+ match = re.search(r'\[([^\]]+)\]\.wav$', wav_file.name)
129
+ if match:
130
+ voice_name = match.group(1)
131
+ if voice_name not in found_voices:
132
+ found_voices.append(voice_name)
133
+
134
+ # Look for M4B files with voice pattern: BookName[VoiceName].m4b
135
+ for m4b_file in book_dir.glob("*.m4b"):
136
+ match = re.search(r'\[([^\]]+)\]\.m4b$', m4b_file.name)
137
+ if match:
138
+ voice_name = match.group(1)
139
+ if voice_name not in found_voices:
140
+ found_voices.append(voice_name)
141
+
142
+ return found_voices
143
+
144
+ def get_voice_from_filename(book_name):
145
+ """Extract voice name from existing audiobook filename patterns (backwards compatibility)"""
146
+ voices = get_voices_from_filenames(book_name)
147
+ return voices[0] if voices else None
148
+
149
+
150
+ def find_voice_file_by_name(voice_name):
151
+ """Find voice file by name in Voice_Samples directory"""
152
+ voice_files = list_voice_samples()
153
+
154
+ # Exact match first
155
+ for voice_file in voice_files:
156
+ if voice_file.stem == voice_name:
157
+ return voice_file
158
+
159
+ # Partial match (case insensitive)
160
+ voice_name_lower = voice_name.lower()
161
+ for voice_file in voice_files:
162
+ if voice_name_lower in voice_file.stem.lower():
163
+ return voice_file
164
+
165
+ return None
166
+
167
+
168
+
169
+
170
+ def add_voice_to_json(json_path, voice_name, method="metadata"):
171
+ """
172
+ Add voice information to JSON file
173
+
174
+ method options:
175
+ - "metadata": Add as top-level metadata
176
+ - "comment": Add as comment that doesn't affect parsing
177
+ """
178
+ try:
179
+ with open(json_path, 'r', encoding='utf-8') as f:
180
+ content = f.read()
181
+
182
+ if method == "metadata":
183
+ # Add voice as metadata to JSON structure
184
+ data = json.loads(content)
185
+
186
+ if isinstance(data, list):
187
+ # For list format, add metadata as first element or update existing
188
+ if data and isinstance(data[0], dict) and not any(key.startswith('text') for key in data[0].keys()):
189
+ # First element is already metadata
190
+ data[0]['voice_used'] = voice_name
191
+ else:
192
+ # Insert metadata as first element
193
+ metadata = {"voice_used": voice_name, "_metadata": True}
194
+ data.insert(0, metadata)
195
+ elif isinstance(data, dict):
196
+ # For dict format, add to top level
197
+ data['voice_used'] = voice_name
198
+
199
+ # Save updated JSON
200
+ with open(json_path, 'w', encoding='utf-8') as f:
201
+ json.dump(data, f, indent=2, ensure_ascii=False)
202
+
203
+ elif method == "comment":
204
+ # Add voice as comment at the top of file
205
+ voice_comment = f"// voice: {voice_name}\n"
206
+
207
+ if not content.startswith("// voice:"):
208
+ content = voice_comment + content
209
+ with open(json_path, 'w', encoding='utf-8') as f:
210
+ f.write(content)
211
+
212
+ print(f"βœ… Added voice '{voice_name}' to {json_path.name} using {method} method")
213
+ return True
214
+
215
+ except Exception as e:
216
+ print(f"❌ Error adding voice to JSON: {e}")
217
+ return False
218
+
219
+
220
+ def remove_voice_comment_from_json(json_path):
221
+ """Remove voice comment from JSON file for clean processing"""
222
+ try:
223
+ with open(json_path, 'r', encoding='utf-8') as f:
224
+ content = f.read()
225
+
226
+ # Remove voice comment lines
227
+ lines = content.split('\n')
228
+ filtered_lines = [line for line in lines if not line.strip().startswith('// voice:')]
229
+
230
+ if len(filtered_lines) != len(lines):
231
+ # Comments were removed, save cleaned version
232
+ cleaned_content = '\n'.join(filtered_lines)
233
+ with open(json_path, 'w', encoding='utf-8') as f:
234
+ f.write(cleaned_content)
235
+ return True
236
+
237
+ except Exception as e:
238
+ print(f"⚠️ Error cleaning JSON comments: {e}")
239
+
240
+ return False
requirements.txt ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ChatterboxTTS HuggingFace Spaces Requirements
2
+ # Optimized for HF Spaces environment with flexible versions
3
+
4
+ # Core ML and TTS - Essential (flexible versions for HF compatibility)
5
+ torch>=2.0.0
6
+ torchaudio>=2.0.0
7
+ transformers>=4.20.0
8
+ huggingface_hub>=0.15.0
9
+ safetensors>=0.3.0
10
+
11
+ # Audio processing - Required
12
+ soundfile>=0.12.0
13
+ librosa>=0.9.0
14
+ pydub>=0.25.0
15
+ audioread>=3.0.0
16
+
17
+ # ASR System - Intelligent ASR with fallback
18
+ openai-whisper>=20231117
19
+
20
+ # System monitoring and resource detection
21
+ psutil>=5.8.0
22
+ pynvml>=11.0.0
23
+
24
+ # Core scientific computing (flexible for HF environment)
25
+ numpy>=1.21.0
26
+ scipy>=1.7.0
27
+
28
+ # Text processing
29
+ regex>=2023.0.0
30
+ vaderSentiment>=3.3.0
31
+
32
+ # Web interface - Gradio (let HF manage version)
33
+ gradio>=4.0.0
34
+
35
+ # Progress and logging
36
+ tqdm>=4.60.0
37
+
38
+ # File handling
39
+ pathlib2>=2.3.0
40
+
41
+ # Configuration and utilities
42
+ python-dotenv>=1.0.0
43
+
44
+ # Optional utilities
45
+ requests>=2.25.0
46
+ packaging>=21.0
47
+
48
+ # Core ChatterboxTTS model dependencies
49
+ resemble-perth>=1.0.1
50
+ omegaconf>=2.3.0
51
+ einops>=0.6.0
52
+ diffusers>=0.21.0
53
+ tokenizers>=0.13.0
54
+ conformer>=0.3.0
55
+ s3tokenizer==0.2.0
tools/combine_only.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Combine Only Tool
3
+ Standalone tool for combining existing audio chunks into final audiobook
4
+ """
5
+
6
+ import re
7
+ import time
8
+ import logging
9
+ from datetime import timedelta
10
+ from pathlib import Path
11
+
12
+ from config.config import *
13
+ from modules.file_manager import (
14
+ get_audio_files_in_directory, combine_audio_chunks,
15
+ convert_to_m4b, add_metadata_to_m4b, find_book_files
16
+ )
17
+ from modules.audio_processor import get_wav_duration
18
+ from modules.progress_tracker import log_console, log_run
19
+ import subprocess
20
+ import shutil
21
+
22
+ def combine_audio_for_book(book_path_str, voice_name=None):
23
+ """Combine audio chunks for a specific book (GUI-friendly version)"""
24
+ from pathlib import Path
25
+ book_path = Path(book_path_str)
26
+
27
+ print(f"\n{CYAN}πŸ”— Combining Audio Chunks for: {book_path.name}{RESET}")
28
+ print("=" * 60)
29
+
30
+ # Setup paths
31
+ tts_dir = book_path / "TTS"
32
+ audio_chunks_dir = tts_dir / "audio_chunks"
33
+
34
+ if not audio_chunks_dir.exists():
35
+ print(f"{RED}❌ No audio_chunks folder found in {book_path}{RESET}")
36
+ print(f"πŸ’‘ Make sure this book has been processed with TTS generation first.")
37
+ return False
38
+
39
+ # Find audio chunks
40
+ chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
41
+
42
+ if not chunk_paths:
43
+ print(f"{RED}❌ No chunk_*.wav files found in {audio_chunks_dir}{RESET}")
44
+ print(f"πŸ’‘ Expected files like: chunk_00001.wav, chunk_00002.wav, etc.")
45
+ return False
46
+
47
+ print(f"\nπŸ“¦ Found {GREEN}{len(chunk_paths)}{RESET} audio chunks")
48
+
49
+ # Verify chunk sequence
50
+ missing_chunks = verify_chunk_sequence(chunk_paths)
51
+ if missing_chunks:
52
+ print(f"\n⚠️ {YELLOW}Warning: Missing chunks detected:{RESET}")
53
+ for chunk_num in missing_chunks[:10]: # Show first 10 missing
54
+ print(f" Missing: chunk_{chunk_num:05}.wav")
55
+ if len(missing_chunks) > 10:
56
+ print(f" ... and {len(missing_chunks) - 10} more")
57
+ print(f"{YELLOW}πŸ”„ Continuing with available chunks for GUI operation...{RESET}")
58
+
59
+ # Display chunk info
60
+ total_duration = sum(get_wav_duration(chunk_path) for chunk_path in chunk_paths)
61
+ duration_str = str(timedelta(seconds=int(total_duration)))
62
+
63
+ print(f"\nπŸ“Š Chunk Analysis:")
64
+ print(f" Total Chunks: {GREEN}{len(chunk_paths)}{RESET}")
65
+ print(f" Total Duration: {GREEN}{duration_str}{RESET}")
66
+ print(f" Average Chunk: {GREEN}{total_duration/len(chunk_paths):.1f}s{RESET}")
67
+
68
+ # Perform the actual combine operation
69
+ return _perform_combine_operation(book_path, chunk_paths, total_duration, voice_name)
70
+
71
+ def _perform_combine_operation(book_path, chunk_paths, total_duration, voice_name=None):
72
+ """Perform the actual audio combining operation"""
73
+ import time
74
+ from datetime import timedelta
75
+
76
+ basename = book_path.name
77
+
78
+ # Determine file naming based on voice
79
+ if voice_name:
80
+ file_suffix = f" [{voice_name}]"
81
+ else:
82
+ file_suffix = "_combined"
83
+
84
+ # Start timing
85
+ start_time = time.time()
86
+
87
+ # Create concat file and combine
88
+ print(f"\nπŸ”— Combining audio chunks...")
89
+ combined_wav_path = book_path / f"{basename}{file_suffix}.wav"
90
+
91
+ try:
92
+ combine_audio_chunks(chunk_paths, combined_wav_path)
93
+ print(f"βœ… Combined WAV created: {combined_wav_path.name}")
94
+ except Exception as e:
95
+ print(f"{RED}❌ Failed to combine chunks: {e}{RESET}")
96
+ return False
97
+
98
+ # Find metadata files
99
+ text_book_dir = TEXT_INPUT_ROOT / basename
100
+ book_files = find_book_files(text_book_dir)
101
+ text_files, cover_file, nfo_file = book_files['text'], book_files['cover'], book_files['nfo']
102
+
103
+ if not cover_file:
104
+ print(f"⚠️ {YELLOW}No cover image found in {text_book_dir}{RESET}")
105
+ else:
106
+ print(f"πŸ“Έ Using cover: {cover_file.name}")
107
+
108
+ if not nfo_file:
109
+ print(f"⚠️ {YELLOW}No book.nfo metadata found in {text_book_dir}{RESET}")
110
+ else:
111
+ print(f"πŸ“ Using metadata: {nfo_file.name}")
112
+
113
+ # M4B conversion
114
+ print(f"\nπŸ“± Converting to M4B audiobook...")
115
+ temp_m4b_path = book_path / "temp_output.m4b"
116
+ final_m4b_path = book_path / f"{basename}{file_suffix}.m4b"
117
+
118
+ try:
119
+ convert_to_m4b(combined_wav_path, temp_m4b_path)
120
+ add_metadata_to_m4b(temp_m4b_path, final_m4b_path, cover_file, nfo_file)
121
+ print(f"βœ… M4B audiobook created: {final_m4b_path.name}")
122
+ except Exception as e:
123
+ print(f"{RED}❌ Failed to create M4B: {e}{RESET}")
124
+ return False
125
+
126
+ # Calculate final timing
127
+ elapsed_total = time.time() - start_time
128
+ elapsed_td = timedelta(seconds=int(elapsed_total))
129
+
130
+ # Verify final file
131
+ if final_m4b_path.exists():
132
+ final_size = final_m4b_path.stat().st_size / (1024 * 1024) # MB
133
+ print(f"πŸ“¦ Final file size: {GREEN}{final_size:.1f} MB{RESET}")
134
+
135
+ # Calculate efficiency
136
+ realtime_factor = total_duration / elapsed_total if elapsed_total > 0 else 0
137
+ duration_str = str(timedelta(seconds=int(total_duration)))
138
+
139
+ print(f"\nπŸŽ‰ {GREEN}Combine completed successfully!{RESET}")
140
+ print(f"πŸ“Š Final Statistics:")
141
+ print(f" Audio Duration: {GREEN}{duration_str}{RESET}")
142
+ print(f" Processing Time: {GREEN}{elapsed_td}{RESET}")
143
+ print(f" Realtime Factor: {GREEN}{realtime_factor:.2f}x{RESET}")
144
+ print(f" Output Location: {GREEN}{final_m4b_path}{RESET}")
145
+
146
+ # Clean up temp files
147
+ try:
148
+ if temp_m4b_path.exists():
149
+ temp_m4b_path.unlink()
150
+ print(f"🧹 Cleaned up temporary file: {temp_m4b_path.name}")
151
+ except Exception as e:
152
+ print(f"⚠️ Could not clean up temp file: {e}")
153
+
154
+ return True
155
+ else:
156
+ print(f"{RED}❌ Final M4B file was not created successfully{RESET}")
157
+ return False
158
+
159
+ def run_combine_only_mode():
160
+ """Combine existing chunks into audiobook (CLI version)"""
161
+ print(f"\n{CYAN}πŸ”— Combine-Only Mode: Assembling Existing Audio Chunks{RESET}")
162
+ print("=" * 60)
163
+
164
+ # Show available audiobooks
165
+ books = sorted([d for d in AUDIOBOOK_ROOT.iterdir() if d.is_dir()])
166
+ if not books:
167
+ print(f"{RED}❌ No folders found in Audiobook/ directory.{RESET}")
168
+ print(f"πŸ’‘ Make sure you have processed books with audio chunks to combine.")
169
+ return None
170
+
171
+ print(f"{CYAN}Available audiobooks to combine:{RESET}")
172
+ for i, book in enumerate(books):
173
+ # Check if it has audio chunks
174
+ audio_chunks_dir = book / "TTS" / "audio_chunks"
175
+ if audio_chunks_dir.exists():
176
+ chunk_count = len(list(audio_chunks_dir.glob('chunk_*.wav')))
177
+ status = f"({chunk_count} chunks)" if chunk_count > 0 else "(no chunks)"
178
+ print(f" [{i}] {book.name} {status}")
179
+ else:
180
+ print(f" [{i}] {book.name} (no TTS folder)")
181
+
182
+ # Book selection
183
+ while True:
184
+ try:
185
+ idx = int(input(f"\n{YELLOW}Select audiobook index: {RESET}"))
186
+ if 0 <= idx < len(books):
187
+ break
188
+ else:
189
+ print(f"{RED}Invalid selection. Please enter a number between 0 and {len(books)-1}.{RESET}")
190
+ except (ValueError, KeyboardInterrupt):
191
+ print(f"{RED}Invalid selection. Please try again.{RESET}")
192
+ except EOFError:
193
+ print(f"\n{RED}❌ Input error - unable to read selection.{RESET}")
194
+ return None
195
+ except Exception as e:
196
+ print(f"{RED}❌ Unexpected error: {e}{RESET}")
197
+ return None
198
+
199
+ selected_book = books[idx]
200
+ basename = selected_book.name
201
+
202
+ print(f"\n🎯 Selected: {BOLD}{basename}{RESET}")
203
+
204
+ # Setup paths
205
+ tts_dir = selected_book / "TTS"
206
+ audio_chunks_dir = tts_dir / "audio_chunks"
207
+
208
+ if not audio_chunks_dir.exists():
209
+ print(f"{RED}❌ No audio_chunks folder found in {selected_book}{RESET}")
210
+ print(f"πŸ’‘ Make sure this book has been processed with TTS generation first.")
211
+ return None
212
+
213
+ # Find audio chunks
214
+ chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
215
+
216
+ if not chunk_paths:
217
+ print(f"{RED}❌ No chunk_*.wav files found in {audio_chunks_dir}{RESET}")
218
+ print(f"πŸ’‘ Expected files like: chunk_00001.wav, chunk_00002.wav, etc.")
219
+ return None
220
+
221
+ print(f"\nπŸ“¦ Found {GREEN}{len(chunk_paths)}{RESET} audio chunks")
222
+
223
+ # Verify chunk sequence
224
+ missing_chunks = verify_chunk_sequence(chunk_paths)
225
+ if missing_chunks:
226
+ print(f"\n⚠️ {YELLOW}Warning: Missing chunks detected:{RESET}")
227
+ for chunk_num in missing_chunks[:10]: # Show first 10 missing
228
+ print(f" Missing: chunk_{chunk_num:05}.wav")
229
+ if len(missing_chunks) > 10:
230
+ print(f" ... and {len(missing_chunks) - 10} more")
231
+
232
+ try:
233
+ continue_anyway = input(f"\n{YELLOW}Continue with incomplete chunks? [y/N]: {RESET}").strip().lower()
234
+ if continue_anyway != 'y':
235
+ print("πŸ›‘ Combine operation cancelled.")
236
+ return None
237
+ except (EOFError, KeyboardInterrupt):
238
+ print(f"\n{RED}πŸ›‘ Combine operation cancelled.{RESET}")
239
+ return None
240
+
241
+ # Display chunk info
242
+ total_duration = sum(get_wav_duration(chunk_path) for chunk_path in chunk_paths)
243
+ duration_str = str(timedelta(seconds=int(total_duration)))
244
+
245
+ print(f"\nπŸ“Š Chunk Analysis:")
246
+ print(f" Total Chunks: {GREEN}{len(chunk_paths)}{RESET}")
247
+ print(f" Total Duration: {GREEN}{duration_str}{RESET}")
248
+ print(f" Average Chunk: {GREEN}{total_duration/len(chunk_paths):.1f}s{RESET}")
249
+
250
+ # Use the shared combine operation (CLI doesn't pass voice name)
251
+ success = _perform_combine_operation(selected_book, chunk_paths, total_duration)
252
+
253
+ if success:
254
+ return selected_book / f"{basename}_combined.m4b"
255
+ else:
256
+ return None
257
+
258
+ def verify_chunk_sequence(chunk_paths):
259
+ """Verify chunk sequence and return missing chunk numbers"""
260
+ chunk_numbers = []
261
+
262
+ for chunk_path in chunk_paths:
263
+ match = re.match(r"chunk_(\d+)\.wav", chunk_path.name)
264
+ if match:
265
+ chunk_numbers.append(int(match.group(1)))
266
+
267
+ if not chunk_numbers:
268
+ return []
269
+
270
+ chunk_numbers.sort()
271
+ expected_range = range(1, max(chunk_numbers) + 1)
272
+ missing = [num for num in expected_range if num not in chunk_numbers]
273
+
274
+ return missing
275
+
276
+ def list_available_books_for_combine():
277
+ """List books available for combine operation"""
278
+ books_info = []
279
+
280
+ if not AUDIOBOOK_ROOT.exists():
281
+ return books_info
282
+
283
+ for book_dir in AUDIOBOOK_ROOT.iterdir():
284
+ if not book_dir.is_dir():
285
+ continue
286
+
287
+ audio_chunks_dir = book_dir / "TTS" / "audio_chunks"
288
+ if not audio_chunks_dir.exists():
289
+ continue
290
+
291
+ chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
292
+ if not chunk_paths:
293
+ continue
294
+
295
+ # Calculate total duration
296
+ try:
297
+ total_duration = sum(get_wav_duration(chunk_path) for chunk_path in chunk_paths)
298
+ duration_str = str(timedelta(seconds=int(total_duration)))
299
+ except:
300
+ duration_str = "Unknown"
301
+
302
+ books_info.append({
303
+ "name": book_dir.name,
304
+ "path": book_dir,
305
+ "chunk_count": len(chunk_paths),
306
+ "duration": duration_str
307
+ })
308
+
309
+ return books_info
310
+
311
+ def quick_combine(book_name):
312
+ """Quick combine operation for specific book (CLI usage)"""
313
+ book_path = AUDIOBOOK_ROOT / book_name
314
+
315
+ if not book_path.exists():
316
+ print(f"{RED}❌ Book '{book_name}' not found in Audiobook directory{RESET}")
317
+ return None
318
+
319
+ audio_chunks_dir = book_path / "TTS" / "audio_chunks"
320
+ chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
321
+
322
+ if not chunk_paths:
323
+ print(f"{RED}❌ No audio chunks found for '{book_name}'{RESET}")
324
+ return None
325
+
326
+ print(f"πŸ”— Quick combining {len(chunk_paths)} chunks for '{book_name}'...")
327
+
328
+ # Use same logic as main function but without interactive prompts
329
+ combined_wav_path = book_path / f"{book_name}_quick_combined.wav"
330
+ final_m4b_path = book_path / f"{book_name}_quick_combined.m4b"
331
+
332
+ combine_audio_chunks(chunk_paths, combined_wav_path)
333
+
334
+ temp_m4b_path = book_path / "temp_quick.m4b"
335
+ convert_to_m4b(combined_wav_path, temp_m4b_path)
336
+
337
+ # Simple M4B without metadata for quick operation
338
+ temp_m4b_path.rename(final_m4b_path)
339
+
340
+ print(f"βœ… Quick combine complete: {final_m4b_path}")
341
+ return final_m4b_path
342
+
343
+ def apply_playback_speed_to_m4b(input_m4b_path, output_m4b_path, speed_factor):
344
+ """Apply playback speed adjustment to M4B file using ffmpeg"""
345
+ try:
346
+ print(f"πŸ”„ Applying {speed_factor}x speed to {Path(input_m4b_path).name}")
347
+
348
+ # Check if ffmpeg is available
349
+ if not shutil.which('ffmpeg'):
350
+ print("❌ ffmpeg not found - required for M4B speed adjustment")
351
+ return False
352
+
353
+ # Build ffmpeg command for speed adjustment
354
+ cmd = [
355
+ 'ffmpeg', '-y', # -y to overwrite output file
356
+ '-i', str(input_m4b_path),
357
+ '-filter:a', f'atempo={speed_factor}', # Audio speed adjustment
358
+ '-c:a', 'aac', # Re-encode to AAC for M4B compatibility
359
+ '-b:a', '64k', # Audio bitrate
360
+ str(output_m4b_path)
361
+ ]
362
+
363
+ print(f"Running: {' '.join(cmd)}")
364
+
365
+ # Execute ffmpeg command
366
+ result = subprocess.run(
367
+ cmd,
368
+ capture_output=True,
369
+ text=True,
370
+ timeout=300 # 5 minute timeout
371
+ )
372
+
373
+ if result.returncode == 0:
374
+ print(f"βœ… Successfully created speed-adjusted M4B: {Path(output_m4b_path).name}")
375
+ return True
376
+ else:
377
+ print(f"❌ ffmpeg failed: {result.stderr}")
378
+ return False
379
+
380
+ except subprocess.TimeoutExpired:
381
+ print("❌ M4B speed adjustment timed out")
382
+ return False
383
+ except Exception as e:
384
+ print(f"❌ Error adjusting M4B speed: {e}")
385
+ return False
386
+
387
+ if __name__ == "__main__":
388
+ import sys
389
+
390
+ if len(sys.argv) > 1:
391
+ # CLI usage: python combine_only.py "Book Name"
392
+ book_name = sys.argv[1]
393
+ quick_combine(book_name)
394
+ else:
395
+ # Interactive mode
396
+ run_combine_only_mode()
utils/abbreviations.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Dr. -> Doctor
2
+ Mr. -> Mister
3
+ Mrs. -> Missus
4
+ Ms. -> Miss
5
+ U.S. -> US
6
+ U.K. -> UK
7
+ etc. -> et cetera
8
+ vs. -> versus
9
+ 1st -> first
10
+ 2nd -> second
11
+ 3rd -> third
utils/abbreviations.txt~ ADDED
File without changes
utils/chunk_manager.TXT ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chunk_manager.py
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from text_cleaner import smart_punctuate # Assuming you've extracted this already
6
+
7
+ import json
8
+
9
+ def save_chunks_to_json(chunks, output_path):
10
+ """Save enriched chunk list to a JSON file"""
11
+ with open(output_path, 'w', encoding='utf-8') as f:
12
+ json.dump(chunks, f, indent=2, ensure_ascii=False)
13
+ print(f"βœ… Saved {len(chunks)} chunks to: {output_path}")
14
+
15
+
16
+ def break_long_sentence(sentence, max_words):
17
+ break_patterns = [
18
+ r'(,\s+and\s+)', r'(,\s+but\s+)', r'(,\s+)', r'(;\s*)', r'β€”', r'(\.\s*")',
19
+ ]
20
+ chunks = []
21
+ remaining_text = sentence.strip()
22
+
23
+ while remaining_text:
24
+ words = remaining_text.split()
25
+ if len(words) <= max_words:
26
+ chunks.append(remaining_text.strip())
27
+ break
28
+
29
+ for pattern in break_patterns:
30
+ for match in re.finditer(pattern, remaining_text):
31
+ break_pos = match.end()
32
+ candidate = remaining_text[:break_pos].strip()
33
+ if len(candidate.split()) <= max_words:
34
+ chunks.append(candidate)
35
+ remaining_text = remaining_text[break_pos:].strip()
36
+ break
37
+ else:
38
+ continue
39
+ break
40
+ else:
41
+ forced = " ".join(words[:max_words]) + ","
42
+ chunks.append(forced)
43
+ remaining_text = " ".join(words[max_words:]).strip()
44
+
45
+ return chunks
46
+
47
+ def fix_short_sentences(chunk_text):
48
+ short = re.findall(r'\b[A-Z][a-z]{1,3}\.\s+', chunk_text)
49
+ if len(short) >= 2:
50
+ merged = chunk_text.replace(". ", ", ")
51
+ if not merged.endswith("."):
52
+ merged += "."
53
+ return merged
54
+ return chunk_text
55
+
56
+ def detect_content_boundary(chunk_text):
57
+ if re.match(r'^\s*(Chapter \d+|CHAPTER \d+)', chunk_text, re.IGNORECASE):
58
+ return "chapter_start"
59
+ if re.search(r'\*\*\*|---|###', chunk_text):
60
+ return "section_break"
61
+ if chunk_text.endswith('\n\n') or chunk_text.endswith('\n'):
62
+ return "paragraph_end"
63
+ return None
64
+
65
+ def sentence_chunk_text(text, max_words=30, min_words=4):
66
+ sentence_end_re = re.compile(r'([.!?][\"\')]*\s+)')
67
+ lines = text.splitlines()
68
+ paragraph_buffer = []
69
+ final_chunks = []
70
+
71
+ def flush_paragraph(lines_in_para):
72
+ raw_sentences = []
73
+
74
+ for line in lines_in_para:
75
+ start = 0
76
+ for match in sentence_end_re.finditer(line):
77
+ end = match.end()
78
+ sentence = line[start:end].strip()
79
+ if sentence:
80
+ raw_sentences.append(sentence)
81
+ start = end
82
+ if start < len(line):
83
+ sentence = line[start:].strip()
84
+ if sentence:
85
+ raw_sentences.append(sentence)
86
+
87
+ # Now group into chunks
88
+ temp_chunks = []
89
+ short_group = []
90
+
91
+ for sentence in raw_sentences:
92
+ wc = len(sentence.split())
93
+
94
+ if wc > max_words:
95
+ split_chunks = break_long_sentence(sentence, max_words)
96
+ for chunk in split_chunks:
97
+ temp_chunks.append(chunk.strip())
98
+ elif wc < min_words:
99
+ short_group.append(sentence)
100
+ else:
101
+ if short_group:
102
+ merged_text = ", ".join(short_group + [sentence])
103
+ temp_chunks.append(merged_text.strip())
104
+ short_group = []
105
+ else:
106
+ temp_chunks.append(sentence.strip())
107
+
108
+ if short_group:
109
+ merged_text = ", ".join(short_group)
110
+ temp_chunks.append(merged_text.strip())
111
+
112
+ # Apply proper paragraph end tagging
113
+ for i, chunk in enumerate(temp_chunks):
114
+ final_chunks.append((
115
+ fix_short_sentences(chunk),
116
+ True if i == len(temp_chunks) - 1 else False
117
+ ))
118
+
119
+ for line in lines:
120
+ stripped = line.strip()
121
+ if not stripped:
122
+ flush_paragraph(paragraph_buffer)
123
+ paragraph_buffer = []
124
+ else:
125
+ paragraph_buffer.append(stripped)
126
+
127
+ # Flush any remaining paragraph
128
+ if paragraph_buffer:
129
+ flush_paragraph(paragraph_buffer)
130
+
131
+ return final_chunks
132
+
133
+
134
+
135
+ def prechunk_text_file(path, max_words=30, min_words=4):
136
+ raw = Path(path).read_text(encoding='utf-8')
137
+ text = smart_punctuate(raw)
138
+ chunks = sentence_chunk_text(text, max_words=max_words, min_words=min_words)
139
+
140
+ enriched_chunks = []
141
+ for i, (chunk_text, is_para_end) in enumerate(chunks):
142
+ boundary = detect_content_boundary(chunk_text)
143
+
144
+ enriched_chunks.append({
145
+ "index": i,
146
+ "text": chunk_text.strip(),
147
+ "word_count": len(chunk_text.strip().split()),
148
+ "boundary_type": boundary or "none",
149
+ "is_paragraph_end": is_para_end
150
+ })
151
+
152
+
153
+ return enriched_chunks
utils/dirlist.TXT ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def list_directory_recursively(start_path):
4
+ """
5
+ Lists all folders and their files recursively starting from the given path.
6
+
7
+ Args:
8
+ start_path (str): The path to the directory to start listing from.
9
+ """
10
+ if not os.path.isdir(start_path):
11
+ print(f"Error: '{start_path}' is not a valid directory.")
12
+ return
13
+
14
+ print(f"Listing contents of: {start_path}\n")
15
+
16
+ # os.walk yields a 3-tuple: (dirpath, dirnames, filenames)
17
+ # dirpath: The path of the current directory.
18
+ # dirnames: A list of the names of the subdirectories in dirpath (not full paths).
19
+ # filenames: A list of the names of the non-directory files in dirpath (not full paths).
20
+ for root, dirs, files in os.walk(start_path):
21
+ # Print the current directory path
22
+ # Use os.path.relpath to show path relative to start_path, or keep root for full path
23
+ relative_root = os.path.relpath(root, start_path)
24
+ if relative_root == '.':
25
+ # This is the starting directory itself
26
+ print(f"Folder: {os.path.basename(root)}/")
27
+ else:
28
+ print(f"Folder: {relative_root}/")
29
+
30
+ # Print files in the current directory
31
+ if files:
32
+ for file in sorted(files): # Sort files for consistent output
33
+ print(f" File: {file}")
34
+
35
+ # You can also print subdirectories found at this level if you wish
36
+ # for dir_name in sorted(dirs):
37
+ # print(f" Subfolder: {dir_name}/")
38
+
39
+ print() # Add an empty line for readability between folders
40
+
41
+ if __name__ == "__main__":
42
+ # Example usage:
43
+ # Get directory path from user
44
+ directory_to_list = input("Enter the path to the Linux directory you want to list (e.g., /home/user/documents): ")
45
+
46
+ list_directory_recursively(directory_to_list)
47
+
48
+ # You can also hardcode a path for testing:
49
+ # list_directory_recursively("/path/to/your/test_directory")
utils/generate_from_json (copy).py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Direct Audio Generation from JSON Tool
4
+
5
+ This script allows for generating audiobook chunks directly from a pre-existing
6
+ `chunks_info.json` file. It is intended for debugging and testing purposes,
7
+ allowing a user to manually edit the TTS parameters in the JSON file and
8
+ hear the results without the VADER analysis step.
9
+ """
10
+
11
+ import torch
12
+ from pathlib import Path
13
+ import sys
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ import time
16
+ from datetime import timedelta
17
+
18
+ # Add project root to path to allow module imports
19
+ project_root = Path(__file__).parent
20
+ sys.path.append(str(project_root))
21
+
22
+ from config.config import *
23
+ from modules.tts_engine import load_optimized_model, process_one_chunk
24
+ from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility
25
+ from wrapper.chunk_loader import load_chunks
26
+ from chatterbox.tts import punc_norm
27
+ from modules.progress_tracker import log_chunk_progress, log_run
28
+
29
+ def main():
30
+ """Main function to drive the generation process."""
31
+ print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}")
32
+
33
+ # 1. Get Book Name
34
+ book_name = input("Enter the book name (e.g., 'london'): ").strip()
35
+ if not book_name:
36
+ print("❌ Book name cannot be empty.")
37
+ return
38
+
39
+ # 2. Locate and Load JSON
40
+ book_audio_dir = AUDIOBOOK_ROOT / book_name
41
+ json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json"
42
+
43
+ if not json_path.exists():
44
+ print(f"❌ Error: JSON file not found at {json_path}")
45
+ print("Please ensure you have run the 'Prepare text file' option for this book first.")
46
+ return
47
+
48
+ print(f"πŸ“– Loading chunks from: {json_path}")
49
+ all_chunks = load_chunks(str(json_path))
50
+ print(f"βœ… Found {len(all_chunks)} chunks.")
51
+
52
+ # 3. Select Voice
53
+ voice_files = list_voice_samples()
54
+ if not voice_files:
55
+ print(f"❌ No voice samples found in {VOICE_SAMPLES_DIR}")
56
+ return
57
+
58
+ print("\nAvailable voices:")
59
+ for i, voice_file in enumerate(voice_files, 1):
60
+ print(f" [{i}] {voice_file.stem}")
61
+
62
+ while True:
63
+ try:
64
+ choice = input("Select voice number: ").strip()
65
+ idx = int(choice) - 1
66
+ if 0 <= idx < len(voice_files):
67
+ voice_path = voice_files[idx]
68
+ break
69
+ print("Invalid selection.")
70
+ except (ValueError, IndexError):
71
+ print("Invalid selection.")
72
+
73
+ # Ensure voice compatibility
74
+ voice_path = ensure_voice_sample_compatibility(voice_path)
75
+
76
+ # 4. Setup Environment
77
+ if torch.cuda.is_available():
78
+ device = "cuda"
79
+ elif torch.backends.mps.is_available():
80
+ device = "mps"
81
+ else:
82
+ device = "cpu"
83
+
84
+ print(f"\nπŸš€ Using device: {device}")
85
+ print(f"🎀 Using voice: {Path(voice_path).name}")
86
+
87
+ # 5. Load Model
88
+ model = load_optimized_model(device)
89
+
90
+ # 6. Prepare voice conditionals (THIS WAS MISSING!)
91
+ print(f"🎀 Preparing voice conditionals with: {Path(voice_path).name}")
92
+ model.prepare_conditionals(voice_path)
93
+
94
+ # 7. Process Chunks
95
+ output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name)
96
+
97
+ # Clean existing audio chunks
98
+ print("🧹 Clearing old audio chunks...")
99
+ for wav_file in audio_chunks_dir.glob("*.wav"):
100
+ wav_file.unlink()
101
+
102
+ start_time = time.time()
103
+ total_chunks = len(all_chunks)
104
+ log_path = output_root / "debug_generation.log"
105
+
106
+ print(f"\nπŸ”„ Generating {total_chunks} chunks...")
107
+
108
+ with ThreadPoolExecutor(max_workers=1) as executor: # Force sequential processing
109
+ futures = []
110
+ for i, chunk_data in enumerate(all_chunks):
111
+ # Extract exaggeration from JSON, force others to default
112
+ chunk_tts_params = {
113
+ "exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION),
114
+ "cfg_weight": DEFAULT_CFG_WEIGHT,
115
+ "temperature": DEFAULT_TEMPERATURE
116
+ }
117
+
118
+ future = executor.submit(
119
+ process_one_chunk,
120
+ i, chunk_data['text'], text_chunks_dir, audio_chunks_dir,
121
+ voice_path, chunk_tts_params, start_time, total_chunks,
122
+ punc_norm, book_name, log_run, log_path, device,
123
+ model, None, chunk_data['is_paragraph_end'], all_chunks, chunk_data['boundary_type']
124
+ )
125
+ futures.append(future)
126
+
127
+ for future in as_completed(futures):
128
+ try:
129
+ result = future.result()
130
+ if result:
131
+ idx, _ = result
132
+ log_chunk_progress(idx, total_chunks, start_time, 0)
133
+ except Exception as e:
134
+ print(f"\n❌ An error occurred while processing a chunk: {e}")
135
+
136
+ elapsed_time = time.time() - start_time
137
+ print(f"\n{GREEN}βœ… Generation Complete!{RESET}")
138
+ print(f"⏱️ Total time: {timedelta(seconds=int(elapsed_time))}")
139
+ print(f"πŸ”Š Audio chunks are in: {audio_chunks_dir}")
140
+ print("You can now use Option 3 from the main menu to combine them.")
141
+
142
+ if __name__ == "__main__":
143
+ main()
utils/generate_from_json.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Direct Audio Generation from JSON Tool
4
+
5
+ This script allows for generating audiobook chunks directly from a pre-existing
6
+ `chunks_info.json` file. It is intended for debugging and testing purposes,
7
+ allowing a user to manually edit the TTS parameters in the JSON file and
8
+ hear the results without the VADER analysis step.
9
+ """
10
+
11
+ import torch
12
+ from pathlib import Path
13
+ import sys
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ import time
16
+ from datetime import timedelta
17
+
18
+ # Add project root to path to allow module imports
19
+ project_root = Path(__file__).parent
20
+ sys.path.append(str(project_root))
21
+
22
+ from config.config import *
23
+ from modules.tts_engine import load_optimized_model, process_one_chunk
24
+ from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility
25
+ from wrapper.chunk_loader import load_chunks
26
+ from chatterbox.tts import punc_norm
27
+ from modules.progress_tracker import log_chunk_progress, log_run
28
+
29
+ def main():
30
+ """Main function to drive the generation process."""
31
+ print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}")
32
+
33
+ # 1. Get Book Name
34
+ book_name = input("Enter the book name (e.g., 'london'): ").strip()
35
+ if not book_name:
36
+ print("❌ Book name cannot be empty.")
37
+ return
38
+
39
+ # 2. Locate and Load JSON
40
+ book_audio_dir = AUDIOBOOK_ROOT / book_name
41
+ json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json"
42
+
43
+ if not json_path.exists():
44
+ print(f"❌ Error: JSON file not found at {json_path}")
45
+ print("Please ensure you have run the 'Prepare text file' option for this book first.")
46
+ return
47
+
48
+ print(f"πŸ“– Loading chunks from: {json_path}")
49
+ all_chunks = load_chunks(str(json_path))
50
+ print(f"βœ… Found {len(all_chunks)} chunks.")
51
+
52
+ # 3. Select Voice
53
+ voice_files = list_voice_samples()
54
+ if not voice_files:
55
+ print(f"❌ No voice samples found in {VOICE_SAMPLES_DIR}")
56
+ return
57
+
58
+ print("\nAvailable voices:")
59
+ for i, voice_file in enumerate(voice_files, 1):
60
+ print(f" [{i}] {voice_file.stem}")
61
+
62
+ while True:
63
+ try:
64
+ choice = input("Select voice number: ").strip()
65
+ idx = int(choice) - 1
66
+ if 0 <= idx < len(voice_files):
67
+ voice_path = voice_files[idx]
68
+ break
69
+ print("Invalid selection.")
70
+ except (ValueError, IndexError):
71
+ print("Invalid selection.")
72
+
73
+ # Ensure voice compatibility
74
+ voice_path = ensure_voice_sample_compatibility(voice_path)
75
+
76
+ # 4. Setup Environment
77
+ if torch.cuda.is_available():
78
+ device = "cuda"
79
+ elif torch.backends.mps.is_available():
80
+ device = "mps"
81
+ else:
82
+ device = "cpu"
83
+
84
+ print(f"\nπŸš€ Using device: {device}")
85
+ print(f"🎀 Using voice: {Path(voice_path).name}")
86
+
87
+ # 5. Load Model
88
+ model = load_optimized_model(device)
89
+
90
+ # 6. Prepare voice conditionals (THIS WAS MISSING!)
91
+ print(f"🎀 Preparing voice conditionals with: {Path(voice_path).name}")
92
+ model.prepare_conditionals(voice_path)
93
+
94
+ # 7. Process Chunks
95
+ output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name)
96
+
97
+ # Clean existing audio chunks
98
+ print("🧹 Clearing old audio chunks...")
99
+ for wav_file in audio_chunks_dir.glob("*.wav"):
100
+ wav_file.unlink()
101
+
102
+ start_time = time.time()
103
+ total_chunks = len(all_chunks)
104
+ log_path = output_root / "debug_generation.log"
105
+
106
+ print(f"\nπŸ”„ Generating {total_chunks} chunks...")
107
+
108
+ with ThreadPoolExecutor(max_workers=2) as executor: # Test parallel processing
109
+ futures = []
110
+ for i, chunk_data in enumerate(all_chunks):
111
+ # Extract exaggeration from JSON, force others to default
112
+ chunk_tts_params = {
113
+ "exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION),
114
+ "cfg_weight": DEFAULT_CFG_WEIGHT,
115
+ "temperature": DEFAULT_TEMPERATURE
116
+ }
117
+
118
+ future = executor.submit(
119
+ process_one_chunk,
120
+ i, chunk_data['text'], text_chunks_dir, audio_chunks_dir,
121
+ voice_path, chunk_tts_params, start_time, total_chunks,
122
+ punc_norm, book_name, log_run, log_path, device,
123
+ model, None, all_chunks, chunk_data['boundary_type']
124
+ )
125
+ futures.append(future)
126
+
127
+ for future in as_completed(futures):
128
+ try:
129
+ result = future.result()
130
+ if result:
131
+ idx, _ = result
132
+ log_chunk_progress(idx, total_chunks, start_time, 0)
133
+ except Exception as e:
134
+ print(f"\n❌ An error occurred while processing a chunk: {e}")
135
+
136
+ elapsed_time = time.time() - start_time
137
+ print(f"\n{GREEN}βœ… Generation Complete!{RESET}")
138
+ print(f"⏱️ Total time: {timedelta(seconds=int(elapsed_time))}")
139
+ print(f"πŸ”Š Audio chunks are in: {audio_chunks_dir}")
140
+ print("You can now use Option 3 from the main menu to combine them.")
141
+
142
+ if __name__ == "__main__":
143
+ main()
utils/generate_from_json.py.bak ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Direct Audio Generation from JSON Tool
4
+
5
+ This script allows for generating audiobook chunks directly from a pre-existing
6
+ `chunks_info.json` file. It is intended for debugging and testing purposes,
7
+ allowing a user to manually edit the TTS parameters in the JSON file and
8
+ hear the results without the VADER analysis step.
9
+ """
10
+
11
+ import torch
12
+ from pathlib import Path
13
+ import sys
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ import time
16
+ from datetime import timedelta
17
+
18
+ # Add project root to path to allow module imports
19
+ project_root = Path(__file__).parent
20
+ sys.path.append(str(project_root))
21
+
22
+ from config.config import *
23
+ from modules.tts_engine import load_optimized_model, process_one_chunk
24
+ from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility
25
+ from wrapper.chunk_loader import load_chunks
26
+ from chatterbox.tts import punc_norm
27
+ from modules.progress_tracker import log_chunk_progress, log_run
28
+
29
+ def main():
30
+ """Main function to drive the generation process."""
31
+ print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}")
32
+
33
+ # 1. Get Book Name
34
+ book_name = input("Enter the book name (e.g., 'london'): ").strip()
35
+ if not book_name:
36
+ print("❌ Book name cannot be empty.")
37
+ return
38
+
39
+ # 2. Locate and Load JSON
40
+ book_audio_dir = AUDIOBOOK_ROOT / book_name
41
+ json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json"
42
+
43
+ if not json_path.exists():
44
+ print(f"❌ Error: JSON file not found at {json_path}")
45
+ print("Please ensure you have run the 'Prepare text file' option for this book first.")
46
+ return
47
+
48
+ print(f"πŸ“– Loading chunks from: {json_path}")
49
+ all_chunks = load_chunks(str(json_path))
50
+ print(f"βœ… Found {len(all_chunks)} chunks.")
51
+
52
+ # 3. Select Voice
53
+ voice_files = list_voice_samples()
54
+ if not voice_files:
55
+ print(f"❌ No voice samples found in {VOICE_SAMPLES_DIR}")
56
+ return
57
+
58
+ print("\nAvailable voices:")
59
+ for i, voice_file in enumerate(voice_files, 1):
60
+ print(f" [{i}] {voice_file.stem}")
61
+
62
+ while True:
63
+ try:
64
+ choice = input("Select voice number: ").strip()
65
+ idx = int(choice) - 1
66
+ if 0 <= idx < len(voice_files):
67
+ voice_path = voice_files[idx]
68
+ break
69
+ print("Invalid selection.")
70
+ except (ValueError, IndexError):
71
+ print("Invalid selection.")
72
+
73
+ # Ensure voice compatibility
74
+ voice_path = ensure_voice_sample_compatibility(voice_path)
75
+
76
+ # 4. Setup Environment
77
+ if torch.cuda.is_available():
78
+ device = "cuda"
79
+ elif torch.backends.mps.is_available():
80
+ device = "mps"
81
+ else:
82
+ device = "cpu"
83
+
84
+ print(f"\nπŸš€ Using device: {device}")
85
+ print(f"🎀 Using voice: {Path(voice_path).name}")
86
+
87
+ # 5. Load Model
88
+ model = load_optimized_model(device)
89
+
90
+ # 6. Prepare voice conditionals (THIS WAS MISSING!)
91
+ print(f"🎀 Preparing voice conditionals with: {Path(voice_path).name}")
92
+ model.prepare_conditionals(voice_path)
93
+
94
+ # 7. Process Chunks
95
+ output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name)
96
+
97
+ # Clean existing audio chunks
98
+ print("🧹 Clearing old audio chunks...")
99
+ for wav_file in audio_chunks_dir.glob("*.wav"):
100
+ wav_file.unlink()
101
+
102
+ start_time = time.time()
103
+ total_chunks = len(all_chunks)
104
+ log_path = output_root / "debug_generation.log"
105
+
106
+ print(f"\nπŸ”„ Generating {total_chunks} chunks...")
107
+
108
+ with ThreadPoolExecutor(max_workers=2) as executor: # Test parallel processing
109
+ futures = []
110
+ for i, chunk_data in enumerate(all_chunks):
111
+ # Extract exaggeration from JSON, force others to default
112
+ chunk_tts_params = {
113
+ "exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION),
114
+ "cfg_weight": DEFAULT_CFG_WEIGHT,
115
+ "temperature": DEFAULT_TEMPERATURE
116
+ }
117
+
118
+ future = executor.submit(
119
+ process_one_chunk,
120
+ i, chunk_data['text'], text_chunks_dir, audio_chunks_dir,
121
+ voice_path, chunk_tts_params, start_time, total_chunks,
122
+ punc_norm, book_name, log_run, log_path, device,
123
+ model, None, chunk_data['is_paragraph_end'], all_chunks, chunk_data['boundary_type']
124
+ )
125
+ futures.append(future)
126
+
127
+ for future in as_completed(futures):
128
+ try:
129
+ result = future.result()
130
+ if result:
131
+ idx, _ = result
132
+ log_chunk_progress(idx, total_chunks, start_time, 0)
133
+ except Exception as e:
134
+ print(f"\n❌ An error occurred while processing a chunk: {e}")
135
+
136
+ elapsed_time = time.time() - start_time
137
+ print(f"\n{GREEN}βœ… Generation Complete!{RESET}")
138
+ print(f"⏱️ Total time: {timedelta(seconds=int(elapsed_time))}")
139
+ print(f"πŸ”Š Audio chunks are in: {audio_chunks_dir}")
140
+ print("You can now use Option 3 from the main menu to combine them.")
141
+
142
+ if __name__ == "__main__":
143
+ main()
utils/prechunktest.TXT ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from chunk_manager import prechunk_text_file, save_chunks_to_json
2
+
3
+ chunks = prechunk_text_file("/home/danno/MyApps/chatterbox/Text_Input/test.txt", max_words=30, min_words=4)
4
+ save_chunks_to_json(chunks, "Text_Input/my_book_chunks.json")
utils/resume_handler.TXT ADDED
@@ -0,0 +1,525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Resume Handler Module
3
+ Handles resume functionality for interrupted processing
4
+ """
5
+
6
+ import torch
7
+ import time
8
+ import logging
9
+ from datetime import timedelta
10
+ from pathlib import Path
11
+
12
+ from config import *
13
+ from modules.text_processor import smart_punctuate, sentence_chunk_text
14
+ from modules.file_manager import (
15
+ setup_book_directories, find_book_files, list_voice_samples,
16
+ ensure_voice_sample_compatibility, get_audio_files_in_directory,
17
+ combine_audio_chunks, convert_to_m4b, add_metadata_to_m4b
18
+ )
19
+ from modules.audio_processor import get_chunk_audio_duration, pause_for_chunk_review
20
+ from modules.progress_tracker import setup_logging, log_chunk_progress, log_run
21
+
22
+ def analyze_existing_chunks(audio_chunks_dir):
23
+ """Analyze existing chunks to determine resume point"""
24
+ if not audio_chunks_dir.exists():
25
+ return 0, []
26
+
27
+ chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
28
+
29
+ if not chunk_paths:
30
+ return 0, []
31
+
32
+ # Find the highest chunk number
33
+ chunk_numbers = []
34
+ for chunk_path in chunk_paths:
35
+ import re
36
+ match = re.match(r"chunk_(\d+)\.wav", chunk_path.name)
37
+ if match:
38
+ chunk_numbers.append(int(match.group(1)))
39
+
40
+ if not chunk_numbers:
41
+ return 0, []
42
+
43
+ chunk_numbers.sort()
44
+ last_chunk_number = max(chunk_numbers)
45
+
46
+ # Check for gaps in sequence
47
+ missing_chunks = []
48
+ for i in range(1, last_chunk_number + 1):
49
+ if i not in chunk_numbers:
50
+ missing_chunks.append(i)
51
+
52
+ print(f"πŸ“Š Existing chunks analysis:")
53
+ print(f" Total chunks found: {GREEN}{len(chunk_numbers)}{RESET}")
54
+ print(f" Highest chunk number: {GREEN}{last_chunk_number}{RESET}")
55
+ if missing_chunks:
56
+ print(f" Missing chunks: {YELLOW}{len(missing_chunks)}{RESET}")
57
+ if len(missing_chunks) <= 10:
58
+ print(f" Missing: {missing_chunks}")
59
+ else:
60
+ print(f" Missing: {missing_chunks[:10]}... (+{len(missing_chunks)-10} more)")
61
+
62
+ return last_chunk_number, missing_chunks
63
+
64
+ def suggest_resume_point(last_chunk, missing_chunks):
65
+ """Suggest optimal resume point based on existing chunks"""
66
+ if not missing_chunks:
67
+ # No gaps, can resume from next chunk
68
+ return last_chunk + 1
69
+
70
+ # If there are missing chunks, suggest resuming from first missing
71
+ first_missing = min(missing_chunks)
72
+
73
+ print(f"\nπŸ’‘ Resume suggestions:")
74
+ print(f" Resume from chunk {GREEN}{last_chunk + 1}{RESET} (continue from last)")
75
+ print(f" Resume from chunk {YELLOW}{first_missing}{RESET} (fill gaps first)")
76
+
77
+ return first_missing
78
+
79
+ def validate_resume_point(start_chunk, total_expected_chunks):
80
+ """Validate that resume point makes sense"""
81
+ if start_chunk < 1:
82
+ print(f"{RED}❌ Invalid resume point: {start_chunk}. Must be >= 1{RESET}")
83
+ return False
84
+
85
+ if start_chunk > total_expected_chunks:
86
+ print(f"{RED}❌ Resume point {start_chunk} exceeds expected total chunks {total_expected_chunks}{RESET}")
87
+ return False
88
+
89
+ return True
90
+
91
+ def process_book_folder_resume(book_dir, voice_path, tts_params, device, start_chunk=1):
92
+ """Enhanced book processing with resume capability"""
93
+ from modules.tts_engine import process_one_chunk, load_optimized_model, get_optimal_workers
94
+ from chatterbox.tts import punc_norm
95
+ from concurrent.futures import ThreadPoolExecutor, as_completed
96
+
97
+ # Setup directories
98
+ output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(book_dir)
99
+
100
+ # Find book files
101
+ text_files, cover_file, nfo_file = find_book_files(book_dir)
102
+
103
+ if not text_files:
104
+ logging.info(f"[{book_dir.name}] ERROR: No .txt files found in the book folder.")
105
+ return None, None, []
106
+
107
+ # Don't delete existing directories if resuming
108
+ if start_chunk == 1:
109
+ # Only clear on fresh start
110
+ import shutil
111
+ for d in [text_chunks_dir, audio_chunks_dir]:
112
+ if d.exists() and d.is_dir():
113
+ shutil.rmtree(d)
114
+
115
+ for d in [output_root, tts_dir, text_chunks_dir, audio_chunks_dir]:
116
+ d.mkdir(parents=True, exist_ok=True)
117
+ else:
118
+ # Ensure directories exist for resume
119
+ for d in [output_root, tts_dir, text_chunks_dir, audio_chunks_dir]:
120
+ d.mkdir(parents=True, exist_ok=True)
121
+
122
+ setup_logging(output_root)
123
+
124
+ # Enhanced text processing
125
+ all_chunks = []
126
+ for tf in text_files:
127
+ with open(tf, 'r', encoding='utf-8') as f:
128
+ raw = f.read()
129
+ smart = smart_punctuate(raw)
130
+ chunks = sentence_chunk_text(smart, max_words=MAX_CHUNK_WORDS, min_words=MIN_CHUNK_WORDS)
131
+ for chunk_text, is_para_end in chunks:
132
+ all_chunks.append({
133
+ "text": chunk_text,
134
+ "is_paragraph_end": is_para_end
135
+ })
136
+
137
+ # Validate resume point
138
+ if not validate_resume_point(start_chunk, len(all_chunks)):
139
+ return None, None, []
140
+
141
+ # Filter chunks to process (resume logic)
142
+ if start_chunk > 1:
143
+ print(f"πŸ”„ Resuming from chunk {start_chunk}")
144
+ print(f"πŸ“Š Skipping chunks 1-{start_chunk-1} (already completed)")
145
+
146
+ # Check which chunks already exist
147
+ existing_chunks = []
148
+ for i in range(start_chunk-1):
149
+ chunk_path = audio_chunks_dir / f"chunk_{i+1:05}.wav"
150
+ if chunk_path.exists():
151
+ existing_chunks.append(i+1)
152
+
153
+ print(f"βœ… Found {len(existing_chunks)} existing chunks")
154
+
155
+ # Only process remaining chunks
156
+ chunks_to_process = all_chunks[start_chunk-1:]
157
+ chunk_offset = start_chunk - 1
158
+ else:
159
+ chunks_to_process = all_chunks
160
+ chunk_offset = 0
161
+
162
+ run_log_lines = [
163
+ f"\n===== RESUME Processing: {book_dir.name} =====",
164
+ f"Voice: {voice_path.name}",
165
+ f"Started: {time.strftime('%Y-%m-%d %H:%M:%S')}",
166
+ f"Resume from chunk: {start_chunk}",
167
+ f"Text files processed: {len(text_files)}",
168
+ f"Total chunks generated: {len(all_chunks)}",
169
+ f"Chunks to process: {len(chunks_to_process)}"
170
+ ]
171
+
172
+ # Write initial run info immediately
173
+ initial_log = run_log_lines + [
174
+ f"--- Generation Settings ---",
175
+ f"Batch Processing: Enabled ({BATCH_SIZE} chunks per batch)",
176
+ f"ASR Enabled: {ENABLE_ASR}",
177
+ f"Hum Detection: {ENABLE_HUM_DETECTION}",
178
+ f"Dynamic Workers: {USE_DYNAMIC_WORKERS}",
179
+ f"Voice used: {voice_path.name}",
180
+ f"Exaggeration: {tts_params['exaggeration']}",
181
+ f"CFG weight: {tts_params['cfg_weight']}",
182
+ f"Temperature: {tts_params['temperature']}",
183
+ f"Processing Status: IN PROGRESS...",
184
+ f"="*50
185
+ ]
186
+
187
+ log_run("\n".join(initial_log), output_root / "run.log")
188
+ print(f"πŸ“ Initial run info written to: {output_root / 'run.log'}")
189
+
190
+ start_time = time.time()
191
+ total_chunks = len(all_chunks)
192
+ remaining_chunks = len(chunks_to_process)
193
+ log_path = output_root / "chunk_validation.log"
194
+
195
+ # Calculate existing audio duration for accurate progress
196
+ total_audio_duration = 0.0
197
+ if start_chunk > 1:
198
+ print("πŸ“Š Calculating existing audio duration...")
199
+ for i in range(start_chunk-1):
200
+ chunk_path = audio_chunks_dir / f"chunk_{i+1:05}.wav"
201
+ if chunk_path.exists():
202
+ total_audio_duration += get_chunk_audio_duration(chunk_path)
203
+ print(f"πŸ“Š Existing audio: {timedelta(seconds=int(total_audio_duration))}")
204
+
205
+ # Batch processing for remaining chunks
206
+ print(f"πŸ“Š Processing {remaining_chunks} remaining chunks in batches of {BATCH_SIZE}")
207
+
208
+ all_results = []
209
+
210
+ for batch_start in range(0, remaining_chunks, BATCH_SIZE):
211
+ batch_end = min(batch_start + BATCH_SIZE, remaining_chunks)
212
+ batch_chunks = chunks_to_process[batch_start:batch_end]
213
+
214
+ actual_start_chunk = chunk_offset + batch_start + 1
215
+ actual_end_chunk = chunk_offset + batch_end
216
+
217
+ print(f"\nπŸ”„ Processing batch: chunks {actual_start_chunk}-{actual_end_chunk}")
218
+
219
+ # Fresh model for each batch
220
+ model = load_optimized_model(device)
221
+ compatible_voice = ensure_voice_sample_compatibility(voice_path, output_dir=tts_dir)
222
+ model.prepare_conditionals(compatible_voice, exaggeration=tts_params['exaggeration'])
223
+
224
+ # Load ASR model once per batch if needed
225
+ asr_model = None
226
+ if ENABLE_ASR:
227
+ import whisper
228
+ print(f"🎀 Loading Whisper ASR model for batch...")
229
+ asr_model = whisper.load_model("base", device="cuda")
230
+
231
+ futures = []
232
+ batch_results = []
233
+
234
+ # Dynamic worker allocation
235
+ optimal_workers = get_optimal_workers()
236
+ print(f"πŸ”§ Using {optimal_workers} workers for batch {actual_start_chunk}-{actual_end_chunk}")
237
+
238
+ with ThreadPoolExecutor(max_workers=optimal_workers) as executor:
239
+ for i, chunk_data in enumerate(batch_chunks):
240
+ global_chunk_index = chunk_offset + batch_start + i
241
+
242
+ # Check for shutdown request
243
+ if shutdown_requested:
244
+ print(f"\n⏹️ {YELLOW}Stopping submission of new chunks...{RESET}")
245
+ break
246
+
247
+ chunk = chunk_data["text"]
248
+ is_paragraph_end = chunk_data.get("is_paragraph_end", False)
249
+ all_chunk_texts = [cd["text"] for cd in all_chunks]
250
+
251
+ futures.append(executor.submit(
252
+ process_one_chunk,
253
+ global_chunk_index, chunk, text_chunks_dir, audio_chunks_dir,
254
+ voice_path, tts_params, start_time, total_chunks,
255
+ punc_norm, book_dir.name, log_run, log_path, device,
256
+ model, asr_model, is_paragraph_end, all_chunk_texts
257
+ ))
258
+
259
+ # Wait for batch to complete
260
+ print(f"πŸ”„ {CYAN}Waiting for batch {actual_start_chunk}-{actual_end_chunk} to complete...{RESET}")
261
+ completed_count = 0
262
+
263
+ for fut in as_completed(futures):
264
+ try:
265
+ idx, wav_path = fut.result()
266
+ if wav_path and wav_path.exists():
267
+ # Measure actual audio duration for this chunk
268
+ chunk_duration = get_chunk_audio_duration(wav_path)
269
+ total_audio_duration += chunk_duration
270
+ batch_results.append((idx, wav_path))
271
+
272
+ # Update progress every 10 chunks within batch
273
+ completed_count += 1
274
+ if completed_count % 10 == 0:
275
+ current_chunk = chunk_offset + batch_start + completed_count
276
+ log_chunk_progress(current_chunk - 1, total_chunks, start_time, total_audio_duration)
277
+
278
+ except Exception as e:
279
+ logging.error(f"Future failed in batch: {e}")
280
+
281
+ # Clean up model after batch
282
+ print(f"🧹 Cleaning up after batch {actual_start_chunk}-{actual_end_chunk}")
283
+ del model
284
+ if asr_model:
285
+ del asr_model
286
+ torch.cuda.empty_cache()
287
+ import gc
288
+ gc.collect()
289
+ time.sleep(2)
290
+
291
+ all_results.extend(batch_results)
292
+ print(f"βœ… Batch {actual_start_chunk}-{actual_end_chunk} completed ({len(batch_results)} chunks)")
293
+
294
+ # Final processing - combine ALL chunks (existing + new)
295
+ quarantine_dir = audio_chunks_dir / "quarantine"
296
+ pause_for_chunk_review(quarantine_dir)
297
+
298
+ # Collect ALL chunk paths (both existing and newly created)
299
+ chunk_paths = []
300
+ for i in range(total_chunks):
301
+ chunk_path = audio_chunks_dir / f"chunk_{i+1:05}.wav"
302
+ if chunk_path.exists():
303
+ chunk_paths.append(chunk_path)
304
+ else:
305
+ logging.warning(f"Missing chunk file: chunk_{i+1:05}.wav")
306
+
307
+ if not chunk_paths:
308
+ logging.info(f"{RED}❌ No valid audio chunks found. Skipping concatenation and conversion.{RESET}")
309
+ return None, None, []
310
+
311
+ print(f"πŸ“Š Found {len(chunk_paths)} total chunks for final audiobook")
312
+
313
+ # Calculate timing
314
+ elapsed_total = time.time() - start_time
315
+ elapsed_td = timedelta(seconds=int(elapsed_total))
316
+
317
+ # Get total audio duration from ALL chunks
318
+ total_audio_duration_final = sum(get_chunk_audio_duration(chunk_path) for chunk_path in chunk_paths)
319
+ audio_duration_td = timedelta(seconds=int(total_audio_duration_final))
320
+ realtime_factor = total_audio_duration_final / elapsed_total if elapsed_total > 0 else 0.0
321
+
322
+ print(f"\n⏱️ Resume Processing Complete:")
323
+ print(f" Elapsed Time: {CYAN}{str(elapsed_td)}{RESET}")
324
+ print(f" Audio Duration: {GREEN}{str(audio_duration_td)}{RESET}")
325
+ print(f" Realtime Factor: {YELLOW}{realtime_factor:.2f}x{RESET}")
326
+
327
+ # Combine audio
328
+ combined_wav_path = output_root / f"{book_dir.name} [{voice_path.stem}].wav"
329
+ print("\nπŸ’Ύ Saving WAV file...")
330
+ combine_audio_chunks(chunk_paths, combined_wav_path)
331
+
332
+ # M4B conversion
333
+ temp_m4b_path = output_root / "output.m4b"
334
+ final_m4b_path = output_root / f"{book_dir.name}[{voice_path.stem}].m4b"
335
+ convert_to_m4b(combined_wav_path, temp_m4b_path)
336
+ add_metadata_to_m4b(temp_m4b_path, final_m4b_path, cover_file, nfo_file)
337
+
338
+ logging.info(f"Audiobook created: {final_m4b_path}")
339
+
340
+ # Append final completion info
341
+ completion_log = [
342
+ f"\n--- Resume Processing Complete ---",
343
+ f"Completed: {time.strftime('%Y-%m-%d %H:%M:%S')}",
344
+ f"Processing Time: {str(elapsed_td)}",
345
+ f"Audio Duration: {str(audio_duration_td)}",
346
+ f"Realtime Factor: {realtime_factor:.2f}x",
347
+ f"Total Chunks: {len(chunk_paths)}",
348
+ f"Combined WAV: {combined_wav_path}",
349
+ f"Final M4B: {final_m4b_path}"
350
+ ]
351
+
352
+ # Append to existing log
353
+ log_run("\n".join(completion_log), output_root / "run.log")
354
+ print(f"πŸ“ Final completion info appended to: {output_root / 'run.log'}")
355
+
356
+ return final_m4b_path, combined_wav_path, run_log_lines
357
+
358
+ def resume_book_from_chunk(start_chunk):
359
+ """Interactive resume function for stuck book"""
360
+ print(f"\nπŸ”„ Resume Book Processing from Chunk {start_chunk}")
361
+ print("=" * 50)
362
+
363
+ # Show available books
364
+ book_dirs = sorted([d for d in TEXT_INPUT_ROOT.iterdir() if d.is_dir()])
365
+ if not book_dirs:
366
+ print(f"{RED}No folders found in Text_Input/.{RESET}")
367
+ return None
368
+
369
+ print("Available books:")
370
+ for i, book in enumerate(book_dirs):
371
+ # Check if book has existing processing
372
+ audiobook_dir = AUDIOBOOK_ROOT / book.name
373
+ if audiobook_dir.exists():
374
+ audio_chunks_dir = audiobook_dir / "TTS" / "audio_chunks"
375
+ if audio_chunks_dir.exists():
376
+ last_chunk, missing = analyze_existing_chunks(audio_chunks_dir)
377
+ status = f"(last chunk: {last_chunk})"
378
+ else:
379
+ status = "(no existing chunks)"
380
+ else:
381
+ status = "(not started)"
382
+
383
+ print(f" [{i}] {book.name} {status}")
384
+
385
+ while True:
386
+ try:
387
+ book_idx = int(input("Select book index: "))
388
+ if 0 <= book_idx < len(book_dirs):
389
+ book_dir = book_dirs[book_idx]
390
+ break
391
+ except Exception:
392
+ pass
393
+ print("Invalid selection. Try again.")
394
+
395
+ # Analyze existing chunks for selected book
396
+ audiobook_dir = AUDIOBOOK_ROOT / book_dir.name
397
+ if audiobook_dir.exists():
398
+ audio_chunks_dir = audiobook_dir / "TTS" / "audio_chunks"
399
+ if audio_chunks_dir.exists():
400
+ last_chunk, missing = analyze_existing_chunks(audio_chunks_dir)
401
+ suggested_resume = suggest_resume_point(last_chunk, missing)
402
+
403
+ print(f"\nSuggested resume point: {GREEN}{suggested_resume}{RESET}")
404
+
405
+ # Allow user to override
406
+ user_input = input(f"Resume from chunk [{suggested_resume}]: ").strip()
407
+ if user_input:
408
+ try:
409
+ start_chunk = int(user_input)
410
+ except ValueError:
411
+ print(f"Invalid input, using suggested: {suggested_resume}")
412
+ start_chunk = suggested_resume
413
+ else:
414
+ start_chunk = suggested_resume
415
+
416
+ # Show available voices
417
+ voice_files = list_voice_samples()
418
+ if not voice_files:
419
+ print(f"{RED}No voice samples found.{RESET}")
420
+ return None
421
+
422
+ print("\nAvailable voices:")
423
+ for i, voice in enumerate(voice_files):
424
+ print(f" [{i}] {voice.name}")
425
+
426
+ while True:
427
+ try:
428
+ voice_idx = int(input("Select voice index: "))
429
+ if 0 <= voice_idx < len(voice_files):
430
+ voice_path = voice_files[voice_idx]
431
+ break
432
+ except Exception:
433
+ pass
434
+ print("Invalid selection. Try again.")
435
+
436
+ # Get TTS parameters
437
+ def prompt_float(prompt, default):
438
+ val = input(f"{prompt} [{default}]: ").strip()
439
+ return float(val) if val else default
440
+
441
+ exaggeration = prompt_float("Enter exaggeration (emotion intensity)", 0.5)
442
+ cfg_weight = prompt_float("Enter cfg_weight (faithfulness to text)", 0.2)
443
+ temperature = prompt_float("Enter temperature (randomness)", 0.2)
444
+
445
+ tts_params = dict(exaggeration=exaggeration, cfg_weight=cfg_weight, temperature=temperature)
446
+
447
+ # Determine device
448
+ if torch.cuda.is_available():
449
+ device = "cuda"
450
+ elif torch.backends.mps.is_available():
451
+ device = "mps"
452
+ else:
453
+ device = "cpu"
454
+
455
+ print(f"\nπŸš€ Resuming {book_dir.name} from chunk {start_chunk}")
456
+ print(f"🎀 Voice: {voice_path.name}")
457
+ print(f"βš™οΈ Parameters: {tts_params}")
458
+
459
+ # Process with resume
460
+ return process_book_folder_resume(book_dir, voice_path, tts_params, device, start_chunk)
461
+
462
+ def find_incomplete_books():
463
+ """Find books that appear to be incomplete"""
464
+ incomplete_books = []
465
+
466
+ for book_dir in TEXT_INPUT_ROOT.iterdir():
467
+ if not book_dir.is_dir():
468
+ continue
469
+
470
+ audiobook_dir = AUDIOBOOK_ROOT / book_dir.name
471
+ if not audiobook_dir.exists():
472
+ continue
473
+
474
+ audio_chunks_dir = audiobook_dir / "TTS" / "audio_chunks"
475
+ if not audio_chunks_dir.exists():
476
+ continue
477
+
478
+ # Check if there's a final M4B
479
+ m4b_files = list(audiobook_dir.glob("*.m4b"))
480
+ wav_files = list(audiobook_dir.glob("*.wav"))
481
+
482
+ if not m4b_files and not wav_files:
483
+ # No final output, likely incomplete
484
+ last_chunk, missing = analyze_existing_chunks(audio_chunks_dir)
485
+ if last_chunk > 0:
486
+ incomplete_books.append({
487
+ "name": book_dir.name,
488
+ "last_chunk": last_chunk,
489
+ "missing_chunks": len(missing),
490
+ "path": book_dir
491
+ })
492
+
493
+ return incomplete_books
494
+
495
+ def auto_resume_incomplete():
496
+ """Automatically suggest resume for incomplete books"""
497
+ incomplete = find_incomplete_books()
498
+
499
+ if not incomplete:
500
+ print(f"{GREEN}βœ… No incomplete books found!{RESET}")
501
+ return
502
+
503
+ print(f"{YELLOW}πŸ“‹ Found {len(incomplete)} incomplete books:{RESET}")
504
+ for i, book in enumerate(incomplete):
505
+ print(f" [{i}] {book['name']} (last chunk: {book['last_chunk']}, missing: {book['missing_chunks']})")
506
+
507
+ choice = input(f"\nSelect book to resume [0-{len(incomplete)-1}] or 'q' to quit: ").strip()
508
+
509
+ if choice.lower() == 'q':
510
+ return
511
+
512
+ try:
513
+ idx = int(choice)
514
+ if 0 <= idx < len(incomplete):
515
+ selected_book = incomplete[idx]
516
+ suggested_resume = selected_book['last_chunk'] + 1
517
+
518
+ print(f"\n🎯 Selected: {selected_book['name']}")
519
+ print(f"πŸ’‘ Suggested resume point: chunk {suggested_resume}")
520
+
521
+ return resume_book_from_chunk(suggested_resume)
522
+ except ValueError:
523
+ print("Invalid selection.")
524
+
525
+ return None
utils/text_cleaner.TXT ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # text_cleaner.py
2
+
3
+ import re
4
+
5
+ def smart_punctuate(text):
6
+ """Basic punctuation cleanup for sentence ends"""
7
+ lines = text.splitlines()
8
+ out = []
9
+
10
+ for l in lines:
11
+ stripped = l.strip()
12
+
13
+ # Preserve empty lines (paragraph breaks)
14
+ if not stripped:
15
+ out.append("")
16
+ elif not re.search(r'[.!?]$', stripped):
17
+ out.append(stripped + ".")
18
+ else:
19
+ out.append(stripped)
20
+
21
+ result = "\n".join(out)
22
+
23
+ # Normalize quotes and formatting
24
+ result = result.replace('"', '"').replace('β€œ', '"').replace('”', '"')
25
+ result = result.replace('β€˜', "'").replace('’', "'")
26
+ result = re.sub(r'\*\*([^*]+)\*\*', r'\1', result) # remove markdown bold
27
+ result = re.sub(r'_{2,}', '', result) # remove underlines
28
+
29
+ return result
utils/text_processor.TXT ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Processing Module
3
+ Handles text chunking, abbreviations, and preprocessing for TTS
4
+ """
5
+
6
+ import re
7
+ import logging
8
+ from pathlib import Path
9
+ from config import *
10
+
11
+ # ============================================================================
12
+ # ABBREVIATION REPLACEMENT SYSTEM
13
+ # ============================================================================
14
+
15
+ def load_abbreviations(file_path="abbreviations.txt"):
16
+ """Load abbreviation replacements from external file"""
17
+ replacements = {}
18
+ abbrev_file = Path(file_path)
19
+
20
+ if not abbrev_file.exists():
21
+ print(f"⚠️ {YELLOW}Abbreviations file not found: {file_path}{RESET}")
22
+ print(f"πŸ“ Creating sample file...")
23
+ create_sample_abbreviations_file(abbrev_file)
24
+ return replacements
25
+
26
+ try:
27
+ with open(abbrev_file, 'r', encoding='utf-8') as f:
28
+ for line_num, line in enumerate(f, 1):
29
+ line = line.strip()
30
+
31
+ # Skip empty lines and comments
32
+ if not line or line.startswith('#'):
33
+ continue
34
+
35
+ # Parse "abbrev -> replacement" format
36
+ if ' -> ' in line:
37
+ abbrev, replacement = line.split(' -> ', 1)
38
+ replacements[abbrev.strip()] = replacement.strip()
39
+ else:
40
+ print(f"⚠️ Invalid format on line {line_num}: {line}")
41
+
42
+ print(f"βœ… Loaded {len(replacements)} abbreviation replacements from {file_path}")
43
+
44
+ except Exception as e:
45
+ print(f"❌ Error loading abbreviations: {e}")
46
+
47
+ return replacements
48
+
49
+ def create_sample_abbreviations_file(file_path):
50
+ """Create a sample abbreviations file with common replacements"""
51
+ sample_content = """# Abbreviation Replacements for TTS
52
+ # Format: abbreviation -> replacement
53
+ # Lines starting with # are comments
54
+
55
+ # Common titles and abbreviations
56
+ Dr. -> Doctor
57
+ Mr. -> Mister
58
+ Mrs. -> Missus
59
+ Ms. -> Miss
60
+ Prof. -> Professor
61
+ Rev. -> Reverend
62
+ Lt. -> Lieutenant
63
+ Capt. -> Captain
64
+ Gen. -> General
65
+ Col. -> Colonel
66
+ Jr. -> Junior
67
+ Sr. -> Senior
68
+
69
+ # Political and organizations
70
+ M.P. -> MP
71
+ U.S. -> US
72
+ U.K. -> UK
73
+ U.N. -> UN
74
+ F.B.I. -> FBI
75
+ C.I.A. -> CIA
76
+ N.A.S.A. -> NASA
77
+
78
+ # Common abbreviations
79
+ etc. -> et cetera
80
+ vs. -> versus
81
+ e.g. -> for example
82
+ i.e. -> that is
83
+ Inc. -> Incorporated
84
+ Corp. -> Corporation
85
+ Ltd. -> Limited
86
+ Co. -> Company
87
+
88
+ # Numbers and ordinals
89
+ 1st -> first
90
+ 2nd -> second
91
+ 3rd -> third
92
+ 4th -> fourth
93
+ 5th -> fifth
94
+ 10th -> tenth
95
+ 20th -> twentieth
96
+ 21st -> twenty-first
97
+ 30th -> thirtieth
98
+ 40th -> fortieth
99
+ 50th -> fiftieth
100
+ 60th -> sixtieth
101
+ 70th -> seventieth
102
+ 80th -> eightieth
103
+ 90th -> ninetieth
104
+ 100th -> one hundredth
105
+
106
+ # Time abbreviations
107
+ a.m. -> AM
108
+ p.m. -> PM
109
+ A.M. -> AM
110
+ P.M. -> PM
111
+ """
112
+
113
+ try:
114
+ with open(file_path, 'w', encoding='utf-8') as f:
115
+ f.write(sample_content)
116
+ print(f"πŸ“ Created sample abbreviations file: {file_path}")
117
+ print(f"πŸ’‘ Edit this file to add your own replacements!")
118
+ except Exception as e:
119
+ print(f"❌ Error creating sample file: {e}")
120
+
121
+ def preprocess_abbreviations(text, replacements):
122
+ """Replace abbreviations with TTS-friendly versions"""
123
+ if not replacements:
124
+ return text
125
+
126
+ original_text = text
127
+ replacements_made = 0
128
+
129
+ # Apply replacements (order matters for overlapping patterns)
130
+ for abbrev, replacement in replacements.items():
131
+ if abbrev in text:
132
+ text = text.replace(abbrev, replacement)
133
+ replacements_made += 1
134
+
135
+ if replacements_made > 0:
136
+ logging.info(f"πŸ“ Applied {replacements_made} abbreviation replacements")
137
+
138
+ return text
139
+
140
+ # ============================================================================
141
+ # TEXT PREPROCESSING AND CHUNKING
142
+ # ============================================================================
143
+
144
+ def smart_punctuate(text):
145
+ """Enhanced punctuation normalization with abbreviation replacement"""
146
+
147
+ # Load abbreviations and apply them
148
+ abbreviation_replacements = load_abbreviations()
149
+ text = preprocess_abbreviations(text, abbreviation_replacements)
150
+
151
+ # Then continue with existing punctuation logic
152
+ lines = text.splitlines()
153
+ out = []
154
+
155
+ for l in lines:
156
+ stripped = l.strip()
157
+
158
+ # Preserve empty lines (paragraph breaks)
159
+ if not stripped:
160
+ out.append("") # Keep the blank line
161
+ # Process non-empty lines
162
+ elif not re.search(r'[.!?]$', stripped):
163
+ out.append(stripped + ".")
164
+ else:
165
+ out.append(stripped)
166
+
167
+ result = "\n".join(out)
168
+
169
+ # Enhanced text preprocessing - use simple string replacements
170
+ result = result.replace('"', '"').replace('"', '"') # Replace smart double quotes
171
+ result = result.replace(''', "'").replace(''', "'") # Replace smart single quotes
172
+
173
+ # Remove problematic formatting
174
+ result = re.sub(r'\*\*([^*]+)\*\*', r'\1', result) # Remove bold markdown
175
+ result = re.sub(r'_{2,}', '', result) # Remove underlines
176
+
177
+ return result
178
+
179
+ def fix_short_sentence_artifacts(chunk_text):
180
+ """
181
+ Fix multiple short sentences that cause TTS errors.
182
+ Example: "Yes. No. Maybe." β†’ "Yes, no, maybe."
183
+ "Right." β†’ "Right," (if it's a single-word chunk)
184
+ """
185
+ # Handle full chunk that is just one short sentence
186
+ words = chunk_text.strip().split()
187
+ if len(words) == 1 and chunk_text.strip().endswith('.'):
188
+ return chunk_text.strip()[:-1] + ',' # Replace period with comma
189
+
190
+ parts = re.split(r'([.!?])', chunk_text.strip())
191
+ if len(parts) < 2:
192
+ return chunk_text # nothing to fix
193
+
194
+ # Reconstruct sentence-punctuation pairs
195
+ sentences = []
196
+ for i in range(0, len(parts)-1, 2):
197
+ sentence = parts[i].strip()
198
+ punct = parts[i+1]
199
+ if sentence:
200
+ word_count = len(sentence.split())
201
+ sentences.append((sentence, punct, word_count))
202
+
203
+ # Handle multiple short sentences
204
+ short_count = sum(1 for _, _, wc in sentences if wc <= 3)
205
+
206
+ if short_count >= 2 and len(sentences) >= 2:
207
+ merged = ", ".join(s for s, _, _ in sentences) + "."
208
+ return merged
209
+
210
+ # Handle case where first sentence is a single word
211
+ if len(sentences) >= 2 and sentences[0][2] == 1 and sentences[0][1] == ".":
212
+ # Replace period with comma
213
+ first, second = sentences[0][0], sentences[1][0]
214
+ rest = " ".join(s for s, _, _ in sentences[2:])
215
+ new_text = f"{first}, {second}"
216
+ if rest:
217
+ new_text += " " + rest
218
+ return new_text
219
+
220
+ return chunk_text
221
+
222
+ def sentence_chunk_text(text, max_words=MAX_CHUNK_WORDS, min_words=MIN_CHUNK_WORDS):
223
+ """Enhanced sentence chunking with smart mid-sentence breaking for long sentences"""
224
+
225
+ # First, split into sentences
226
+ sentence_end_re = re.compile(r'([.!?][\"\'\)]*\s+)')
227
+ sentences = []
228
+ start_index = 0
229
+
230
+ for match in sentence_end_re.finditer(text):
231
+ end_index = match.end()
232
+ sentence = text[start_index:end_index].strip()
233
+ if sentence:
234
+ sentences.append(sentence)
235
+ start_index = end_index
236
+
237
+ if start_index < len(text):
238
+ remainder = text[start_index:].strip()
239
+ if remainder:
240
+ sentences.append(remainder)
241
+
242
+ # Process each sentence and break if too long
243
+ processed_chunks = []
244
+
245
+ for sentence in sentences:
246
+ sentence = sentence.strip()
247
+ if not sentence:
248
+ continue
249
+
250
+ # Check if sentence exceeds word limit
251
+ sentence_words = sentence.split()
252
+
253
+ if len(sentence_words) <= max_words:
254
+ # Sentence is fine as-is
255
+ is_para_end = sentence.endswith("\n") or sentence.endswith("\n\n")
256
+ processed_chunks.append((sentence, is_para_end))
257
+ else:
258
+ # Sentence is too long - need to break it intelligently
259
+ broken_chunks = break_long_sentence(sentence, max_words)
260
+ processed_chunks.extend(broken_chunks)
261
+
262
+ # Now group short chunks together (original grouping logic)
263
+ final_chunks = []
264
+ short_group = []
265
+
266
+ for chunk_text, is_para_end in processed_chunks:
267
+ word_count = len(chunk_text.split())
268
+
269
+ if word_count < min_words and not is_para_end:
270
+ # Collect short chunks for grouping
271
+ short_group.append(re.sub(r'[.!?]+$', '', chunk_text.strip()))
272
+ else:
273
+ # Process any accumulated short chunks
274
+ if short_group:
275
+ if word_count < min_words:
276
+ # This chunk is also short, add it to the group
277
+ short_group.append(re.sub(r'[.!?]+$', '', chunk_text.strip()))
278
+ merged = ", ".join(short_group) + "."
279
+ final_chunks.append((merged.strip(), is_para_end))
280
+ short_group = []
281
+ else:
282
+ # Merge short group with current chunk
283
+ merged = ", ".join(short_group) + ", " + chunk_text
284
+ final_chunks.append((merged.strip(), is_para_end))
285
+ short_group = []
286
+ else:
287
+ # Normal chunk
288
+ final_chunks.append((chunk_text, is_para_end))
289
+
290
+ # Handle any remaining short group
291
+ if short_group:
292
+ merged = ", ".join(short_group) + "."
293
+ final_chunks.append((merged.strip(), False))
294
+
295
+ # Apply short sentence cleanup
296
+ fixed_chunks = []
297
+ for chunk_text, is_para_end in final_chunks:
298
+ fixed_text = fix_short_sentence_artifacts(chunk_text)
299
+ fixed_chunks.append((fixed_text, is_para_end))
300
+
301
+ return fixed_chunks
302
+
303
+ def break_long_sentence(sentence, max_words):
304
+ """Break a long sentence at natural pause points"""
305
+
306
+ # Define break points in order of preference
307
+ break_patterns = [
308
+ r'(,\s+and\s+)', # ", and "
309
+ r'(,\s+but\s+)', # ", but "
310
+ r'(,\s+yet\s+)', # ", yet "
311
+ r'(,\s+or\s+)', # ", or "
312
+ r'(,\s+so\s+)', # ", so "
313
+ r'(;\s*)', # "; "
314
+ r'(β€”\s*)', # "β€” " (em dash)
315
+ r'(\s+β€”\s+)', # " β€” " (spaced em dash)
316
+ r'(\.\s*")', # '." ' (end quote)
317
+ r'("\s*)', # '" ' (start quote)
318
+ r'(,\s+which\s+)', # ", which "
319
+ r'(,\s+when\s+)', # ", when "
320
+ r'(,\s+where\s+)', # ", where "
321
+ r'(,\s+while\s+)', # ", while "
322
+ r'(,\s+though\s+)', # ", though "
323
+ r'(,\s+)', # ", " (any comma - last resort)
324
+ ]
325
+
326
+ chunks = []
327
+ remaining_text = sentence.strip()
328
+ is_para_end = sentence.endswith("\n") or sentence.endswith("\n\n")
329
+
330
+ while remaining_text:
331
+ words = remaining_text.split()
332
+
333
+ if len(words) <= max_words:
334
+ # Remaining text fits in one chunk
335
+ chunks.append((remaining_text.strip(), is_para_end if not chunks else False))
336
+ break
337
+
338
+ # Find the best break point within the word limit
339
+ best_break_pos = None
340
+ best_break_text = ""
341
+
342
+ # Try each break pattern
343
+ for pattern in break_patterns:
344
+ for match in re.finditer(pattern, remaining_text):
345
+ break_pos = match.end()
346
+
347
+ # Check if this break point gives us a good chunk size
348
+ potential_chunk = remaining_text[:break_pos].strip()
349
+ chunk_words = potential_chunk.split()
350
+
351
+ if len(chunk_words) <= max_words and len(chunk_words) >= min(6, max_words // 3):
352
+ best_break_pos = break_pos
353
+ best_break_text = potential_chunk
354
+ break
355
+
356
+ if best_break_pos:
357
+ break
358
+
359
+ if best_break_pos:
360
+ # Found a good break point
361
+ chunks.append((best_break_text, False))
362
+ remaining_text = remaining_text[best_break_pos:].strip()
363
+ else:
364
+ # No good break point found - force break at word limit
365
+ words = remaining_text.split()
366
+ force_break_words = words[:max_words]
367
+ force_break_text = " ".join(force_break_words)
368
+
369
+ # Try to end at a reasonable point
370
+ if not force_break_text.endswith(('.', '!', '?', ',', ';')):
371
+ force_break_text += ","
372
+
373
+ chunks.append((force_break_text, False))
374
+ remaining_text = " ".join(words[max_words:]).strip()
375
+
376
+ return chunks
377
+
378
+ # ============================================================================
379
+ # CONTENT BOUNDARY DETECTION
380
+ # ============================================================================
381
+
382
+ def detect_content_boundaries(chunk_text, chunk_index, all_chunks):
383
+ """Detect chapter breaks and paragraph endings for appropriate silence"""
384
+ boundary_type = None
385
+
386
+ # Chapter detection (flexible patterns)
387
+ chapter_patterns = [
388
+ r'^(Chapter \d+|CHAPTER \d+)',
389
+ r'^(Ch\. \d+|CH\. \d+)',
390
+ r'^\d+\.', # Simple "1." numbering
391
+ r'^[IVX]+\.', # Roman numerals "I.", "II.", etc.
392
+ ]
393
+
394
+ for pattern in chapter_patterns:
395
+ if re.search(pattern, chunk_text.strip(), re.MULTILINE):
396
+ boundary_type = "chapter_start"
397
+ break
398
+
399
+ # Look ahead for chapter start (current chunk ends chapter)
400
+ if chunk_index + 1 < len(all_chunks):
401
+ next_chunk = all_chunks[chunk_index + 1]
402
+ for pattern in chapter_patterns:
403
+ if re.search(pattern, next_chunk.strip()):
404
+ boundary_type = "chapter_end"
405
+ break
406
+
407
+ # Section breaks (asterisks, multiple line breaks)
408
+ if re.search(r'\*{3,}|\#{3,}|β€”{3,}', chunk_text):
409
+ boundary_type = "section_break"
410
+
411
+ # Paragraph ending (already detected in chunking)
412
+ if chunk_text.endswith('\n\n') or chunk_text.endswith('\n'):
413
+ if boundary_type is None:
414
+ boundary_type = "paragraph_end"
415
+
416
+ return boundary_type
417
+
418
+ # ============================================================================
419
+ # UTILITY FUNCTIONS
420
+ # ============================================================================
421
+
422
+ def reload_abbreviations():
423
+ """Reload abbreviations from file (useful for testing changes)"""
424
+ return load_abbreviations()
425
+
426
+ def test_abbreviations(test_text="Dr. Smith met with the M.P. at 3:30 p.m. on the 21st."):
427
+ """Test abbreviation replacements on sample text"""
428
+ abbreviation_replacements = load_abbreviations()
429
+ print(f"Original: {test_text}")
430
+ processed = preprocess_abbreviations(test_text, abbreviation_replacements)
431
+ print(f"Processed: {processed}")
432
+ return processed
433
+
434
+ def test_chunking(test_text=None, max_words=20, min_words=4):
435
+ """Test the enhanced chunking with sample or custom text"""
436
+ if test_text is None:
437
+ test_text = '''Though perfectly worldly-wise, and able, as she expressed it, to take care of herself, there was yet something curiously ingenuous in her single-minded attitude towards life, and her whole-hearted determination to "make good." This glimpse of a world unknown to me was not without its charm, and I enjoyed seeing her vivid little face light up as she talked.'''
438
+
439
+ chunks = sentence_chunk_text(test_text, max_words=max_words, min_words=min_words)
440
+
441
+ print("Enhanced Chunking Results:")
442
+ for i, (chunk, is_para) in enumerate(chunks):
443
+ word_count = len(chunk.split())
444
+ print(f"Chunk {i+1} ({word_count} words): {chunk}")
445
+ if word_count > max_words:
446
+ print(f" ⚠️ WARNING: Still over limit!")
447
+ print()
448
+
449
+ return chunks
wrapper/chunk_editor.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def update_chunk(chunk, boundary_type=None, pause_duration=None, sentiment_score=None):
2
+ if boundary_type is not None:
3
+ chunk['boundary_type'] = boundary_type
4
+ if pause_duration is not None:
5
+ chunk['pause_duration'] = pause_duration
6
+ if sentiment_score is not None:
7
+ chunk['sentiment_score'] = sentiment_score
8
+ return chunk
wrapper/chunk_editor.py.bak ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def update_chunk(chunk, boundary_type=None, pause_duration=None, sentiment_score=None):
2
+ if boundary_type is not None:
3
+ chunk['boundary_type'] = boundary_type
4
+ if pause_duration is not None:
5
+ chunk['pause_duration'] = pause_duration
6
+ if sentiment_score is not None:
7
+ chunk['sentiment_score'] = sentiment_score
8
+ return chunk
wrapper/chunk_loader.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ def load_chunks(path):
4
+ with open(path, 'r', encoding='utf-8') as f:
5
+ data = json.load(f)
6
+
7
+ # Filter out metadata entries (they start with _metadata: True)
8
+ if isinstance(data, list):
9
+ chunks = [item for item in data if not (isinstance(item, dict) and item.get('_metadata', False))]
10
+ return chunks
11
+
12
+ return data
13
+
14
+ def load_metadata(path):
15
+ """Extract metadata from JSON file"""
16
+ try:
17
+ with open(path, 'r', encoding='utf-8') as f:
18
+ data = json.load(f)
19
+
20
+ if isinstance(data, list) and data:
21
+ # Look for metadata in first element
22
+ first_item = data[0]
23
+ if isinstance(first_item, dict) and first_item.get('_metadata', False):
24
+ return first_item
25
+
26
+ except Exception as e:
27
+ print(f"⚠️ Error loading metadata from {path}: {e}")
28
+
29
+ return None
30
+
31
+ def save_chunks(path, chunks):
32
+ # Validate and clean chunks before saving
33
+ from collections import OrderedDict
34
+ import copy
35
+
36
+ cleaned_chunks = []
37
+ for chunk in chunks:
38
+ if isinstance(chunk, dict) and 'text' in chunk:
39
+ original_text = chunk['text']
40
+ # Clean up any quote corruption
41
+ cleaned_text = original_text.replace('\\"', '"').replace("\\'", "'")
42
+
43
+ # Check for dialogue corruption patterns
44
+ if ('replied' in cleaned_text or 'said' in cleaned_text) and '"' in cleaned_text:
45
+ # Additional cleanup for dialogue
46
+ import re
47
+ cleaned_text = re.sub(r'(["\'])\s*,\s*(["\'])\s*\.', r'\1.', cleaned_text) # Fix ", ". pattern
48
+ cleaned_text = re.sub(r'(["\'])\s*,\s*(["\'])\s*$', r'\1.', cleaned_text) # Fix trailing ", "
49
+
50
+ if cleaned_text != original_text:
51
+ print(f"πŸ”§ FIXED dialogue corruption:")
52
+ print(f" Before: {original_text}")
53
+ print(f" After: {cleaned_text}")
54
+
55
+ # Preserve structure (OrderedDict or regular dict)
56
+ if isinstance(chunk, OrderedDict):
57
+ chunk_copy = OrderedDict()
58
+ for key, value in chunk.items():
59
+ if key == 'text':
60
+ chunk_copy[key] = cleaned_text
61
+ else:
62
+ chunk_copy[key] = copy.deepcopy(value)
63
+ else:
64
+ chunk_copy = chunk.copy()
65
+ chunk_copy['text'] = cleaned_text
66
+
67
+ cleaned_chunks.append(chunk_copy)
68
+ else:
69
+ cleaned_chunks.append(chunk)
70
+
71
+ with open(path, 'w', encoding='utf-8') as f:
72
+ json.dump(cleaned_chunks, f, indent=2, ensure_ascii=False)
wrapper/chunk_loader.py.bak ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ def load_chunks(path):
4
+ with open(path, 'r', encoding='utf-8') as f:
5
+ return json.load(f)
6
+
7
+ def save_chunks(path, chunks):
8
+ with open(path, 'w', encoding='utf-8') as f:
9
+ json.dump(chunks, f, indent=2)
wrapper/chunk_player.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import os
3
+
4
+ def play_chunk_audio(path):
5
+ if not os.path.exists(path):
6
+ print(f"❌ Audio file not found: {path}")
7
+ return
8
+ try:
9
+ subprocess.run(["ffplay", "-nodisp", "-autoexit", path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
10
+ except Exception as e:
11
+ print(f"Error playing audio: {e}")
12
+
wrapper/chunk_player.py.bak ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import os
3
+
4
+ def play_chunk_audio(path):
5
+ if not os.path.exists(path):
6
+ print(f"❌ Audio file not found: {path}")
7
+ return
8
+ try:
9
+ subprocess.run(["ffplay", "-nodisp", "-autoexit", path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
10
+ except Exception as e:
11
+ print(f"Error playing audio: {e}")
12
+
wrapper/chunk_revisions.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from config.config import AUDIOBOOK_ROOT
5
+ base = AUDIOBOOK_ROOT
6
+
7
+
8
+ def accept_revision(index, audio_dir):
9
+ """
10
+ Archive original chunk and replace with revised version.
11
+ Assumes revised version is saved as: chunk_XXXXX_rev.wav
12
+ """
13
+ base = Path(audio_dir)
14
+ # Use 1-based indexing and 5-digit format
15
+ original = base / f"chunk_{index+1:05d}.wav"
16
+ revised = base / f"chunk_{index+1:05d}_rev.wav"
17
+ archive_dir = base.parent.parent / "Audio_Revisions"
18
+ archive_dir.mkdir(exist_ok=True)
19
+
20
+ if not revised.exists():
21
+ print("❌ No revised file found. Cannot accept.")
22
+ return
23
+
24
+ # Archive original if exists
25
+ if original.exists():
26
+ archived = archive_dir / f"chunk_{index+1:05d}_orig.wav"
27
+ shutil.move(str(original), str(archived))
28
+ print(f"πŸ“¦ Original chunk archived to {archived.name}")
29
+ else:
30
+ print(f"⚠️ Original chunk missing β€” no archive created.")
31
+
32
+ # Move revised chunk to main filename
33
+ shutil.move(str(revised), str(original))
34
+ print(f"βœ… Revised chunk accepted as {original.name}")
wrapper/chunk_revisions.py.bak ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from config.config import AUDIOBOOK_ROOT
5
+ base = AUDIOBOOK_ROOT
6
+
7
+
8
+ def accept_revision(index, audio_dir):
9
+ """
10
+ Archive original chunk and replace with revised version.
11
+ Assumes revised version is saved as: chunk_XXXXX_rev.wav
12
+ """
13
+ base = Path(audio_dir)
14
+ # Use 1-based indexing and 5-digit format
15
+ original = base / f"chunk_{index+1:05d}.wav"
16
+ revised = base / f"chunk_{index+1:05d}_rev.wav"
17
+ archive_dir = base.parent.parent / "Audio_Revisions"
18
+ archive_dir.mkdir(exist_ok=True)
19
+
20
+ if not revised.exists():
21
+ print("❌ No revised file found. Cannot accept.")
22
+ return
23
+
24
+ # Archive original if exists
25
+ if original.exists():
26
+ archived = archive_dir / f"chunk_{index+1:05d}_orig.wav"
27
+ shutil.move(str(original), str(archived))
28
+ print(f"πŸ“¦ Original chunk archived to {archived.name}")
29
+ else:
30
+ print(f"⚠️ Original chunk missing β€” no archive created.")
31
+
32
+ # Move revised chunk to main filename
33
+ shutil.move(str(revised), str(original))
34
+ print(f"βœ… Revised chunk accepted as {original.name}")
wrapper/chunk_revisions.py~ ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from config.config import AUDIOBOOK_ROOT
5
+ base = AUDIOBOOK_ROOT
6
+
7
+
8
+ def accept_revision(index):
9
+ """
10
+ Archive original chunk and replace with revised version.
11
+ Assumes revised version is saved as: chunk_XXX_rev.wav
12
+ """
13
+ base = Path(AUDIO_OUTPUT_DIR)
14
+ original = base / f"chunk_{index:03}.wav"
15
+ revised = base / f"chunk_{index:03}_rev.wav"
16
+ archive_dir = base / "Audio_Revisions"
17
+ archive_dir.mkdir(exist_ok=True)
18
+
19
+ if not revised.exists():
20
+ print("❌ No revised file found. Cannot accept.")
21
+ return
22
+
23
+ # Archive original if exists
24
+ if original.exists():
25
+ archived = archive_dir / f"chunk_{index:03}_orig.wav"
26
+ shutil.move(original, archived)
27
+ print(f"πŸ“¦ Original chunk archived to {archived.name}")
28
+ else:
29
+ print(f"⚠️ Original chunk missing β€” no archive created.")
30
+
31
+ # Move revised chunk to main filename
32
+ shutil.move(revised, original)
33
+ print(f"βœ… Revised chunk accepted as {original.name}")
wrapper/chunk_search.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ def search_chunks(chunks, query):
2
+ results = []
3
+ query_lower = query.lower()
4
+
5
+ for chunk in chunks:
6
+ if query_lower in chunk['text'].lower():
7
+ results.append(chunk)
8
+
9
+ return results
wrapper/chunk_search.py.bak ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ def search_chunks(chunks, query):
2
+ results = []
3
+ query_lower = query.lower()
4
+
5
+ for chunk in chunks:
6
+ if query_lower in chunk['text'].lower():
7
+ results.append(chunk)
8
+
9
+ return results
wrapper/chunk_synthesizer.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import torch
3
+ import time
4
+ import re
5
+ from pydub import AudioSegment
6
+
7
+ from modules.tts_engine import load_optimized_model
8
+ from modules.file_manager import ensure_voice_sample_compatibility, list_voice_samples
9
+ from modules.audio_processor import apply_smart_fade_memory, smart_audio_validation_memory, process_audio_with_trimming_and_silence
10
+ from config.config import *
11
+
12
+ def get_original_voice_from_log(book_name):
13
+ """Extract original voice name from run log"""
14
+ audiobook_root = Path(AUDIOBOOK_ROOT)
15
+ log_file = audiobook_root / book_name / "run.log"
16
+
17
+ if log_file.exists():
18
+ try:
19
+ with open(log_file, 'r', encoding='utf-8') as f:
20
+ for line in f:
21
+ line = line.strip()
22
+ if line.startswith("Voice: ") or line.startswith("Voice used: "):
23
+ voice_name = line.split(": ", 1)[1].strip()
24
+ print(f"πŸ“„ Found original voice in log: {voice_name}")
25
+ return voice_name
26
+ except Exception as e:
27
+ print(f"⚠️ Error reading run log: {e}")
28
+
29
+ return None
30
+
31
+ def get_original_voice_from_filename(book_name):
32
+ """Extract voice name from existing audiobook filename"""
33
+ audiobook_root = Path(AUDIOBOOK_ROOT)
34
+ book_dir = audiobook_root / book_name
35
+
36
+ # Look for WAV files with voice pattern: BookName [VoiceName].wav
37
+ for wav_file in book_dir.glob("*.wav"):
38
+ match = re.search(r'\[([^\]]+)\]\.wav$', wav_file.name)
39
+ if match:
40
+ voice_name = match.group(1)
41
+ print(f"πŸ“ Found original voice in filename: {voice_name}")
42
+ return voice_name
43
+
44
+ # Look for M4B files with voice pattern: BookName[VoiceName].m4b
45
+ for m4b_file in book_dir.glob("*.m4b"):
46
+ match = re.search(r'\[([^\]]+)\]\.m4b$', m4b_file.name)
47
+ if match:
48
+ voice_name = match.group(1)
49
+ print(f"πŸ“ Found original voice in M4B filename: {voice_name}")
50
+ return voice_name
51
+
52
+ return None
53
+
54
+ def find_voice_file_by_name(voice_name):
55
+ """Find voice file by name in Voice_Samples directory"""
56
+ voice_files = list_voice_samples()
57
+
58
+ # Exact match first
59
+ for voice_file in voice_files:
60
+ if voice_file.stem == voice_name:
61
+ print(f"βœ… Found exact voice match: {voice_file.name}")
62
+ return voice_file
63
+
64
+ # Partial match (case insensitive)
65
+ voice_name_lower = voice_name.lower()
66
+ for voice_file in voice_files:
67
+ if voice_name_lower in voice_file.stem.lower():
68
+ print(f"βœ… Found partial voice match: {voice_file.name}")
69
+ return voice_file
70
+
71
+ return None
72
+
73
+ def get_tts_params_for_chunk(chunk):
74
+ """Extract TTS parameters from chunk data or prompt user"""
75
+ # Check if chunk has TTS params stored
76
+ if 'tts_params' in chunk:
77
+ tts_params = chunk['tts_params']
78
+ print(f"πŸ“Š Using stored TTS params: exag={tts_params.get('exaggeration', 1.0)}, cfg={tts_params.get('cfg_weight', 0.7)}, temp={tts_params.get('temperature', 0.7)}")
79
+ return tts_params
80
+
81
+ # Prompt user for TTS parameters
82
+ print(f"\nβš™οΈ TTS Parameters for chunk synthesis:")
83
+
84
+ def get_float_input(prompt, default):
85
+ while True:
86
+ try:
87
+ value = input(f"{prompt} [{default}]: ").strip()
88
+ if not value:
89
+ return default
90
+ return float(value)
91
+ except ValueError:
92
+ print(f"❌ Invalid input. Please enter a valid number.")
93
+
94
+ exaggeration = get_float_input("Exaggeration", DEFAULT_EXAGGERATION)
95
+ cfg_weight = get_float_input("CFG Weight", DEFAULT_CFG_WEIGHT)
96
+ temperature = get_float_input("Temperature", DEFAULT_TEMPERATURE)
97
+
98
+ return {
99
+ 'exaggeration': exaggeration,
100
+ 'cfg_weight': cfg_weight,
101
+ 'temperature': temperature
102
+ }
103
+
104
+ def synthesize_chunk(chunk, index, book_name, audio_dir, revision=False, chunks_json_path=None, override_voice_name=None):
105
+ """Generate audio for a single chunk using specified or detected voice and TTS parameters"""
106
+ filename = f"chunk_{index+1:05d}_rev.wav" if revision else f"chunk_{index+1:05d}.wav"
107
+ out_path = Path(audio_dir) / filename
108
+
109
+ try:
110
+ # Get device
111
+ device = "cuda" if torch.cuda.is_available() else "cpu"
112
+
113
+ # Load TTS model
114
+ print(f"πŸ€– Loading TTS model for chunk synthesis...")
115
+ model = load_optimized_model(device)
116
+
117
+ # Determine voice to use
118
+ if override_voice_name:
119
+ # Use explicitly provided voice
120
+ print(f"🎀 Using explicitly selected voice: {override_voice_name}")
121
+ voice_path = find_voice_file_by_name(override_voice_name)
122
+ voice_name = override_voice_name
123
+ detection_method = "user_selected"
124
+ else:
125
+ # Use enhanced voice detection
126
+ print(f"πŸ” Detecting original voice for book: {book_name}")
127
+ from modules.voice_detector import detect_voice_for_book
128
+
129
+ voice_name, voice_path, detection_method = detect_voice_for_book(book_name, chunks_json_path)
130
+
131
+ # Fallback to first available voice if detection failed
132
+ if not voice_path:
133
+ print(f"⚠️ Voice not found, using first available voice")
134
+ voice_files = list_voice_samples()
135
+ if not voice_files:
136
+ print("❌ No voice samples found")
137
+ return None
138
+ voice_path = voice_files[0]
139
+ voice_name = voice_path.stem
140
+ detection_method = "fallback_first_available"
141
+
142
+ print(f"🎀 Using voice: {voice_name} (method: {detection_method})")
143
+ compatible_voice = ensure_voice_sample_compatibility(voice_path)
144
+
145
+ # Get TTS parameters for this chunk
146
+ tts_params = get_tts_params_for_chunk(chunk)
147
+
148
+ # Prepare model with voice
149
+ model.prepare_conditionals(compatible_voice)
150
+
151
+ # Get chunk text
152
+ chunk_text = chunk.get('text', '')
153
+ if not chunk_text:
154
+ print("❌ No text found in chunk")
155
+ return None
156
+
157
+ print(f"🎀 Synthesizing: {chunk_text[:50]}...")
158
+ print(f"πŸ“Š TTS params: exag={tts_params['exaggeration']}, cfg={tts_params['cfg_weight']}, temp={tts_params['temperature']}")
159
+
160
+ # Generate audio with specified parameters
161
+ with torch.no_grad():
162
+ wav = model.generate(chunk_text,
163
+ exaggeration=tts_params['exaggeration'],
164
+ cfg_weight=tts_params['cfg_weight'],
165
+ temperature=tts_params['temperature']).detach().cpu()
166
+
167
+ if wav.dim() == 1:
168
+ wav = wav.unsqueeze(0)
169
+
170
+ # Convert tensor to AudioSegment for processing
171
+ import io
172
+ import soundfile as sf
173
+
174
+ wav_np = wav.squeeze().numpy()
175
+ with io.BytesIO() as wav_buffer:
176
+ sf.write(wav_buffer, wav_np, model.sr, format='wav')
177
+ wav_buffer.seek(0)
178
+ audio_segment = AudioSegment.from_wav(wav_buffer)
179
+
180
+ # Apply audio processing
181
+ audio_segment = apply_smart_fade_memory(audio_segment)
182
+ audio_segment, is_quarantined = smart_audio_validation_memory(audio_segment, model.sr)
183
+
184
+ # Apply trimming and contextual silence based on boundary type
185
+ boundary_type = chunk.get('boundary_type', 'none')
186
+ if boundary_type and boundary_type != "none":
187
+ audio_segment = process_audio_with_trimming_and_silence(audio_segment, boundary_type)
188
+ else:
189
+ # Apply trimming even without boundary type if enabled
190
+ if ENABLE_AUDIO_TRIMMING:
191
+ from modules.audio_processor import trim_audio_endpoint
192
+ audio_segment = trim_audio_endpoint(audio_segment)
193
+
194
+ # Save final audio
195
+ audio_segment.export(out_path, format="wav")
196
+ print(f"βœ… Saved synthesized chunk: {out_path.name}")
197
+
198
+ # Clean up model
199
+ del model
200
+ torch.cuda.empty_cache()
201
+
202
+ return str(out_path)
203
+
204
+ except Exception as e:
205
+ print(f"❌ Failed to synthesize chunk: {e}")
206
+ import traceback
207
+ traceback.print_exc()
208
+ return None
wrapper/chunk_synthesizer.py.bak ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import torch
3
+ import time
4
+ from pydub import AudioSegment
5
+
6
+ from modules.tts_engine import load_optimized_model
7
+ from modules.file_manager import ensure_voice_sample_compatibility, list_voice_samples
8
+ from modules.audio_processor import apply_smart_fade_memory, smart_audio_validation_memory, add_contextual_silence_memory
9
+ from config.config import *
10
+
11
+ def synthesize_chunk(chunk, index, book_name, audio_dir, revision=False):
12
+ """Generate audio for a single chunk using simplified TTS process"""
13
+ filename = f"chunk_{index+1:05d}_rev.wav" if revision else f"chunk_{index+1:05d}.wav"
14
+ out_path = Path(audio_dir) / filename
15
+
16
+ try:
17
+ # Get device
18
+ device = "cuda" if torch.cuda.is_available() else "cpu"
19
+
20
+ # Load TTS model
21
+ print(f"πŸ€– Loading TTS model for chunk synthesis...")
22
+ model = load_optimized_model(device)
23
+
24
+ # Get voice sample - use first available voice for now
25
+ voice_files = list_voice_samples()
26
+ if not voice_files:
27
+ print("❌ No voice samples found")
28
+ return None
29
+
30
+ voice_path = voice_files[0] # Use first available voice
31
+ compatible_voice = ensure_voice_sample_compatibility(voice_path)
32
+
33
+ # Prepare model with voice
34
+ model.prepare_conditionals(compatible_voice, exaggeration=1.0)
35
+
36
+ # Get chunk text
37
+ chunk_text = chunk.get('text', '')
38
+ if not chunk_text:
39
+ print("❌ No text found in chunk")
40
+ return None
41
+
42
+ print(f"🎀 Synthesizing: {chunk_text[:50]}...")
43
+
44
+ # Generate audio
45
+ with torch.no_grad():
46
+ wav = model.generate(chunk_text,
47
+ exaggeration=1.0,
48
+ cfg_weight=0.7,
49
+ temperature=0.7).detach().cpu()
50
+
51
+ if wav.dim() == 1:
52
+ wav = wav.unsqueeze(0)
53
+
54
+ # Convert tensor to AudioSegment for processing
55
+ import io
56
+ import soundfile as sf
57
+
58
+ wav_np = wav.squeeze().numpy()
59
+ with io.BytesIO() as wav_buffer:
60
+ sf.write(wav_buffer, wav_np, model.sr, format='wav')
61
+ wav_buffer.seek(0)
62
+ audio_segment = AudioSegment.from_wav(wav_buffer)
63
+
64
+ # Apply audio processing
65
+ audio_segment = apply_smart_fade_memory(audio_segment)
66
+ audio_segment, is_quarantined = smart_audio_validation_memory(audio_segment, model.sr)
67
+
68
+ # Add contextual silence if specified
69
+ boundary_type = chunk.get('boundary_type', 'none')
70
+ if boundary_type and boundary_type != "none":
71
+ audio_segment = add_contextual_silence_memory(audio_segment, boundary_type)
72
+ elif chunk.get('is_paragraph_end', False):
73
+ silence = AudioSegment.silent(duration=SILENCE_PARAGRAPH_FALLBACK)
74
+ audio_segment = audio_segment + silence
75
+
76
+ # Save final audio
77
+ audio_segment.export(out_path, format="wav")
78
+ print(f"βœ… Saved synthesized chunk: {out_path.name}")
79
+
80
+ # Clean up model
81
+ del model
82
+ torch.cuda.empty_cache()
83
+
84
+ return str(out_path)
85
+
86
+ except Exception as e:
87
+ print(f"❌ Failed to synthesize chunk: {e}")
88
+ import traceback
89
+ traceback.print_exc()
90
+ return None
wrapper/chunk_synthesizer.py~ ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.tts_engine import process_one_chunk
2
+ from config.config import AUDIOBOOK_ROOT
3
+
4
+
5
+ def synthesize_chunk(chunk, index, revision=False):
6
+ """Generate audio for a single chunk"""
7
+ filename = f"chunk_{index:03}_rev.wav" if revision else f"chunk_{index:03}.wav"
8
+ out_path = AUDIO_OUTPUT_DIR / filename
9
+
10
+ try:
11
+ process_one_chunk(chunk, out_path) # Must accept (chunk_dict, output_path)
12
+ return str(out_path)
13
+ except Exception as e:
14
+ print(f"❌ Failed to synthesize chunk: {e}")
15
+ return None
wrapper/chunk_tool.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from wrapper.chunk_loader import load_chunks, save_chunks
2
+ from wrapper.chunk_search import search_chunks
3
+ from wrapper.chunk_editor import update_chunk
4
+ from wrapper.chunk_player import play_chunk_audio
5
+ from wrapper.chunk_synthesizer import synthesize_chunk
6
+ from wrapper.chunk_revisions import accept_revision
7
+ import os
8
+ from config.config import AUDIOBOOK_ROOT
9
+ AUDIO_DIR = AUDIOBOOK_ROOT
10
+
11
+ def select_book_for_repair():
12
+ """Let user select which book to repair"""
13
+ from pathlib import Path
14
+
15
+ # Look for books in both locations: TTS processing dirs and Text_Input
16
+ available_books = []
17
+
18
+ # First check TTS processing directories
19
+ audiobook_root = Path(AUDIOBOOK_ROOT)
20
+ if audiobook_root.exists():
21
+ for book_dir in audiobook_root.iterdir():
22
+ if book_dir.is_dir():
23
+ tts_chunks_dir = book_dir / "TTS" / "text_chunks"
24
+ json_path = tts_chunks_dir / "chunks_info.json"
25
+ if json_path.exists():
26
+ available_books.append((book_dir.name, json_path, "TTS"))
27
+
28
+ # Then check Text_Input directory for fallback
29
+ text_input_dir = Path("Text_Input")
30
+ if text_input_dir.exists():
31
+ for chunk_file in text_input_dir.glob("*_chunks.json"):
32
+ book_name = chunk_file.stem.replace("_chunks", "")
33
+ # Only add if not already found in TTS directories
34
+ if not any(book[0] == book_name for book in available_books):
35
+ available_books.append((book_name, chunk_file, "Text_Input"))
36
+
37
+ if not available_books:
38
+ print("❌ No chunk files found in TTS processing directories or Text_Input/")
39
+ return None, None
40
+
41
+ print("\nπŸ“š Available books for repair:")
42
+ for i, (book_name, json_path, source) in enumerate(available_books):
43
+ print(f" [{i}] {book_name} ({source}: {json_path.name})")
44
+
45
+ while True:
46
+ try:
47
+ choice = input(f"\nSelect book index [0-{len(available_books)-1}]: ").strip()
48
+ idx = int(choice)
49
+ if 0 <= idx < len(available_books):
50
+ book_name, json_path, source = available_books[idx]
51
+ return book_name, json_path
52
+ else:
53
+ print(f"❌ Please enter a number between 0 and {len(available_books)-1}")
54
+ except (ValueError, EOFError, KeyboardInterrupt):
55
+ print("❌ Invalid selection or cancelled")
56
+ return None, None
57
+
58
+ def run_chunk_repair_tool():
59
+ print("\nπŸ› οΈ Chunk Repair & Revision Tool")
60
+
61
+ # Ask user to select book
62
+ book_name, chunk_path = select_book_for_repair()
63
+ if not chunk_path:
64
+ return
65
+
66
+ print(f"\nπŸ“– Loading chunks from: {chunk_path.name}")
67
+ chunks = load_chunks(str(chunk_path))
68
+
69
+ # Determine audio directory path based on book structure
70
+ from pathlib import Path
71
+ audiobook_root = Path(AUDIOBOOK_ROOT)
72
+ book_audio_dir = audiobook_root / book_name / "TTS" / "audio_chunks"
73
+
74
+ if not book_audio_dir.exists():
75
+ print(f"❌ Audio directory not found: {book_audio_dir}")
76
+ print(f"πŸ“ Looked for: {book_audio_dir}")
77
+ return
78
+
79
+ print(f"πŸ“ Using audio directory: {book_audio_dir}")
80
+
81
+ while True:
82
+ query = input("\nSearch for text fragment (or 'Q' to quit): ").strip()
83
+ if query.lower() == "q":
84
+ print("Exiting revision tool.")
85
+ break
86
+
87
+ results = search_chunks(chunks, query)
88
+ if not results:
89
+ print("❌ No matching chunks found.")
90
+ continue
91
+
92
+ print(f"\nπŸ” Found {len(results)} match(es):")
93
+ for i, chunk in enumerate(results):
94
+ print(f"[{i}] \"{chunk['text'][:60]}...\" | Index: {chunk['index']}")
95
+
96
+ sel = input("Select chunk index to revise: ").strip()
97
+ if not sel.isdigit() or int(sel) >= len(results):
98
+ print("Invalid selection.")
99
+ continue
100
+
101
+ chunk = results[int(sel)]
102
+ index = chunk['index']
103
+ # Use 5-digit chunk numbering and correct directory path
104
+ chunk_audio_path = book_audio_dir / f"chunk_{index+1:05d}.wav"
105
+ chunk_audio_path_str = str(chunk_audio_path)
106
+
107
+ while True:
108
+ print(f"\nπŸ“ Chunk: \"{chunk['text']}\"")
109
+
110
+ # Display current chunk metadata
111
+ sentiment_compound = chunk.get('sentiment_compound', chunk.get('sentiment_score', 'N/A'))
112
+ tts_params = chunk.get('tts_params', {})
113
+
114
+ print(f" πŸ“ Index: {index}, Boundary: {chunk['boundary_type']}")
115
+ print(f" 😊 Sentiment: {sentiment_compound}")
116
+ print(f" πŸŽ›οΈ TTS Params: exag={tts_params.get('exaggeration', 'N/A')}, cfg={tts_params.get('cfg_weight', 'N/A')}, temp={tts_params.get('temperature', 'N/A')}")
117
+ print(f" πŸ“ Audio file: chunk_{index+1:05d}.wav")
118
+ print("\nOptions:")
119
+ print(" 1. Play original audio")
120
+ print(" 2. Edit text content")
121
+ print(" 3. Edit chunk metadata (boundary, sentiment)")
122
+ print(" 4. Edit TTS parameters (exaggeration, cfg_weight, temperature)")
123
+ print(" 5. Resynthesize audio with current settings")
124
+ print(" 6. Play revised audio")
125
+ print(" 7. Accept revision (replace original with revised)")
126
+ print(" 8. Back to search")
127
+
128
+ try:
129
+ choice = input("\nπŸ’‘ Enter option number [1-8]: ").strip()
130
+ except (EOFError, KeyboardInterrupt):
131
+ print("\n❌ Input cancelled")
132
+ return
133
+ if choice == "1":
134
+ print(f"\nπŸ”Š Playing original audio: {chunk_audio_path.name}")
135
+ play_chunk_audio(chunk_audio_path_str)
136
+ elif choice == "2":
137
+ print("\n✏️ Edit Text Content:")
138
+ print(f"Current text: \"{chunk['text']}\"")
139
+ print("πŸ’‘ Enter new text (or Enter to cancel):")
140
+ new_text = input(">>> ").strip()
141
+
142
+ if new_text:
143
+ chunk['text'] = new_text
144
+ chunk['word_count'] = len(new_text.split())
145
+ save_chunks(str(chunk_path), chunks)
146
+ print("βœ… Text content updated successfully")
147
+ print(f"πŸ“Š New word count: {chunk['word_count']}")
148
+ else:
149
+ print("❌ No changes made")
150
+ elif choice == "3":
151
+ print("\n✏️ Edit Chunk Metadata:")
152
+ print(f"Current boundary type: {chunk['boundary_type']}")
153
+ boundary = input("New boundary type (none/paragraph_end/chapter_start/chapter_end/section_break) [Enter to skip]: ").strip()
154
+
155
+ current_sentiment = chunk.get('sentiment_compound', chunk.get('sentiment_score', 'N/A'))
156
+ print(f"Current sentiment score: {current_sentiment}")
157
+ sentiment = input("New sentiment compound score (-1.0 to 1.0) [Enter to skip]: ").strip()
158
+
159
+ try:
160
+ if boundary:
161
+ chunk['boundary_type'] = boundary
162
+ print(f"βœ… Updated boundary type to: {boundary}")
163
+
164
+ if sentiment:
165
+ sentiment_val = float(sentiment)
166
+ if -1.0 <= sentiment_val <= 1.0:
167
+ chunk['sentiment_compound'] = sentiment_val
168
+ # Also update old key for compatibility
169
+ chunk['sentiment_score'] = sentiment_val
170
+ print(f"βœ… Updated sentiment score to: {sentiment_val}")
171
+ else:
172
+ print("❌ Sentiment score must be between -1.0 and 1.0")
173
+
174
+ save_chunks(str(chunk_path), chunks)
175
+ print("βœ… Chunk metadata updated successfully")
176
+ except ValueError as e:
177
+ print(f"❌ Invalid input: {e}")
178
+ except Exception as e:
179
+ print(f"❌ Error updating chunk: {e}")
180
+ elif choice == "4":
181
+ print("\nπŸŽ›οΈ Edit TTS Parameters:")
182
+ current_tts_params = chunk.get('tts_params', {})
183
+
184
+ def get_float_input(param_name, current_val, min_val=None, max_val=None):
185
+ while True:
186
+ try:
187
+ prompt = f"New {param_name} [{current_val}]: "
188
+ value = input(prompt).strip()
189
+ if not value:
190
+ return current_val
191
+ new_val = float(value)
192
+ if min_val is not None and new_val < min_val:
193
+ print(f"❌ {param_name} must be >= {min_val}")
194
+ continue
195
+ if max_val is not None and new_val > max_val:
196
+ print(f"❌ {param_name} must be <= {max_val}")
197
+ continue
198
+ return new_val
199
+ except ValueError:
200
+ print(f"❌ Invalid input. Please enter a valid number.")
201
+
202
+ # Edit TTS parameters
203
+ print(f"Current TTS parameters:")
204
+ current_exag = current_tts_params.get('exaggeration', 1.0)
205
+ current_cfg = current_tts_params.get('cfg_weight', 0.7)
206
+ current_temp = current_tts_params.get('temperature', 0.7)
207
+
208
+ print(f" Exaggeration: {current_exag}")
209
+ print(f" CFG Weight: {current_cfg}")
210
+ print(f" Temperature: {current_temp}")
211
+
212
+ new_exag = get_float_input("exaggeration", current_exag, 0.0, 3.0)
213
+ new_cfg = get_float_input("CFG weight", current_cfg, 0.0, 2.0)
214
+ new_temp = get_float_input("temperature", current_temp, 0.0, 2.0)
215
+
216
+ # Update chunk TTS parameters
217
+ if 'tts_params' not in chunk:
218
+ chunk['tts_params'] = {}
219
+
220
+ chunk['tts_params']['exaggeration'] = new_exag
221
+ chunk['tts_params']['cfg_weight'] = new_cfg
222
+ chunk['tts_params']['temperature'] = new_temp
223
+
224
+ save_chunks(str(chunk_path), chunks)
225
+ print(f"βœ… TTS parameters updated: exag={new_exag}, cfg={new_cfg}, temp={new_temp}")
226
+ elif choice == "5":
227
+ print(f"\n🎀 Resynthesizing chunk {index+1:05d}...")
228
+ revised_path = synthesize_chunk(chunk, index, book_name, book_audio_dir, revision=True)
229
+ if revised_path:
230
+ print(f"βœ… Chunk resynthesized: {revised_path}")
231
+ else:
232
+ print("❌ Failed to resynthesize chunk")
233
+ elif choice == "6":
234
+ rev_path = book_audio_dir / f"chunk_{index+1:05d}_rev.wav"
235
+ print(f"\nπŸ”Š Playing revised audio: {rev_path.name}")
236
+ play_chunk_audio(str(rev_path))
237
+ elif choice == "7":
238
+ print(f"\nπŸ“¦ Accepting revision for chunk {index+1:05d}...")
239
+ accept_revision(index, book_audio_dir)
240
+ print("βœ… Revision accepted successfully")
241
+ break
242
+ elif choice == "8":
243
+ print("πŸ”™ Returning to search...")
244
+ break
245
+ elif choice.lower() == 'q':
246
+ print("πŸšͺ Exiting chunk repair tool...")
247
+ return
248
+ else:
249
+ print(f"❌ Invalid option '{choice}'. Please enter a number 1-8 (or 'q' to quit).")
wrapper/chunk_tool.py.bak ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from wrapper.chunk_loader import load_chunks, save_chunks
2
+ from wrapper.chunk_search import search_chunks
3
+ from wrapper.chunk_editor import update_chunk
4
+ from wrapper.chunk_player import play_chunk_audio
5
+ from wrapper.chunk_synthesizer import synthesize_chunk
6
+ from wrapper.chunk_revisions import accept_revision
7
+ import os
8
+ from config.config import AUDIOBOOK_ROOT
9
+ AUDIO_DIR = AUDIOBOOK_ROOT
10
+
11
+ def select_book_for_repair():
12
+ """Let user select which book to repair"""
13
+ from pathlib import Path
14
+
15
+ # Look for books in both locations: TTS processing dirs and Text_Input
16
+ available_books = []
17
+
18
+ # First check TTS processing directories
19
+ audiobook_root = Path(AUDIOBOOK_ROOT)
20
+ if audiobook_root.exists():
21
+ for book_dir in audiobook_root.iterdir():
22
+ if book_dir.is_dir():
23
+ tts_chunks_dir = book_dir / "TTS" / "text_chunks"
24
+ json_path = tts_chunks_dir / "chunks_info.json"
25
+ if json_path.exists():
26
+ available_books.append((book_dir.name, json_path, "TTS"))
27
+
28
+ # Then check Text_Input directory for fallback
29
+ text_input_dir = Path("Text_Input")
30
+ if text_input_dir.exists():
31
+ for chunk_file in text_input_dir.glob("*_chunks.json"):
32
+ book_name = chunk_file.stem.replace("_chunks", "")
33
+ # Only add if not already found in TTS directories
34
+ if not any(book[0] == book_name for book in available_books):
35
+ available_books.append((book_name, chunk_file, "Text_Input"))
36
+
37
+ if not available_books:
38
+ print("❌ No chunk files found in TTS processing directories or Text_Input/")
39
+ return None, None
40
+
41
+ print("\nπŸ“š Available books for repair:")
42
+ for i, (book_name, json_path, source) in enumerate(available_books):
43
+ print(f" [{i}] {book_name} ({source}: {json_path.name})")
44
+
45
+ while True:
46
+ try:
47
+ choice = input(f"\nSelect book index [0-{len(available_books)-1}]: ").strip()
48
+ idx = int(choice)
49
+ if 0 <= idx < len(available_books):
50
+ book_name, json_path, source = available_books[idx]
51
+ return book_name, json_path
52
+ else:
53
+ print(f"❌ Please enter a number between 0 and {len(available_books)-1}")
54
+ except (ValueError, EOFError, KeyboardInterrupt):
55
+ print("❌ Invalid selection or cancelled")
56
+ return None, None
57
+
58
+ def run_chunk_repair_tool():
59
+ print("\nπŸ› οΈ Chunk Repair & Revision Tool")
60
+
61
+ # Ask user to select book
62
+ book_name, chunk_path = select_book_for_repair()
63
+ if not chunk_path:
64
+ return
65
+
66
+ print(f"\nπŸ“– Loading chunks from: {chunk_path.name}")
67
+ chunks = load_chunks(str(chunk_path))
68
+
69
+ # Determine audio directory path based on book structure
70
+ from pathlib import Path
71
+ audiobook_root = Path(AUDIOBOOK_ROOT)
72
+ book_audio_dir = audiobook_root / book_name / "TTS" / "audio_chunks"
73
+
74
+ if not book_audio_dir.exists():
75
+ print(f"❌ Audio directory not found: {book_audio_dir}")
76
+ print(f"πŸ“ Looked for: {book_audio_dir}")
77
+ return
78
+
79
+ print(f"πŸ“ Using audio directory: {book_audio_dir}")
80
+
81
+ while True:
82
+ query = input("\nSearch for text fragment (or 'Q' to quit): ").strip()
83
+ if query.lower() == "q":
84
+ print("Exiting revision tool.")
85
+ break
86
+
87
+ results = search_chunks(chunks, query)
88
+ if not results:
89
+ print("❌ No matching chunks found.")
90
+ continue
91
+
92
+ print(f"\nπŸ” Found {len(results)} match(es):")
93
+ for i, chunk in enumerate(results):
94
+ print(f"[{i}] \"{chunk['text'][:60]}...\" | Index: {chunk['index']}")
95
+
96
+ sel = input("Select chunk index to revise: ").strip()
97
+ if not sel.isdigit() or int(sel) >= len(results):
98
+ print("Invalid selection.")
99
+ continue
100
+
101
+ chunk = results[int(sel)]
102
+ index = chunk['index']
103
+ # Use 5-digit chunk numbering and correct directory path
104
+ chunk_audio_path = book_audio_dir / f"chunk_{index+1:05d}.wav"
105
+ chunk_audio_path_str = str(chunk_audio_path)
106
+
107
+ while True:
108
+ print(f"\nπŸ“ Chunk: \"{chunk['text']}\"")
109
+ print(f" Boundary: {chunk['boundary_type']}, Sentiment: {chunk.get('sentiment_score', 'N/A')}, Pause: {chunk.get('pause_duration', 'N/A')}")
110
+ print(f" Audio file: chunk_{index+1:05d}.wav")
111
+ print("\nOptions:")
112
+ print(" 1. Play original audio")
113
+ print(" 2. Edit text content")
114
+ print(" 3. Edit chunk values (boundary, sentiment, pause)")
115
+ print(" 4. Resynthesize audio with current settings")
116
+ print(" 5. Play revised audio")
117
+ print(" 6. Accept revision (replace original with revised)")
118
+ print(" 7. Back to search")
119
+
120
+ try:
121
+ choice = input("\nπŸ’‘ Enter option number [1-7]: ").strip()
122
+ except (EOFError, KeyboardInterrupt):
123
+ print("\n❌ Input cancelled")
124
+ return
125
+ if choice == "1":
126
+ print(f"\nπŸ”Š Playing original audio: {chunk_audio_path.name}")
127
+ play_chunk_audio(chunk_audio_path_str)
128
+ elif choice == "2":
129
+ print("\n✏️ Edit Text Content:")
130
+ print(f"Current text: \"{chunk['text']}\"")
131
+ print("πŸ’‘ Enter new text (or Enter to cancel):")
132
+ new_text = input(">>> ").strip()
133
+
134
+ if new_text:
135
+ chunk['text'] = new_text
136
+ chunk['word_count'] = len(new_text.split())
137
+ save_chunks(str(chunk_path), chunks)
138
+ print("βœ… Text content updated successfully")
139
+ print(f"πŸ“Š New word count: {chunk['word_count']}")
140
+ else:
141
+ print("❌ No changes made")
142
+ elif choice == "3":
143
+ print("\n✏️ Edit Chunk Values:")
144
+ print(f"Current boundary type: {chunk['boundary_type']}")
145
+ boundary = input("New boundary type (none/paragraph_end/chapter_start/chapter_end/section_break) [Enter to skip]: ").strip()
146
+
147
+ current_sentiment = chunk.get('sentiment_score', 'N/A')
148
+ print(f"Current sentiment score: {current_sentiment}")
149
+ sentiment = input("New sentiment score (-1.0 to 1.0) [Enter to skip]: ").strip()
150
+
151
+ current_pause = chunk.get('pause_duration', 'N/A')
152
+ print(f"Current pause duration: {current_pause}")
153
+ pause = input("New pause duration (milliseconds) [Enter to skip]: ").strip()
154
+
155
+ try:
156
+ update_chunk(
157
+ chunk,
158
+ boundary_type=boundary if boundary else None,
159
+ sentiment_score=float(sentiment) if sentiment else None,
160
+ pause_duration=float(pause) if pause else None
161
+ )
162
+ save_chunks(str(chunk_path), chunks)
163
+ print("βœ… Chunk values updated successfully")
164
+ except ValueError as e:
165
+ print(f"❌ Invalid input: {e}")
166
+ except Exception as e:
167
+ print(f"❌ Error updating chunk: {e}")
168
+ elif choice == "4":
169
+ print(f"\n🎀 Resynthesizing chunk {index+1:05d}...")
170
+ revised_path = synthesize_chunk(chunk, index, book_name, book_audio_dir, revision=True)
171
+ if revised_path:
172
+ print(f"βœ… Chunk resynthesized: {revised_path}")
173
+ else:
174
+ print("❌ Failed to resynthesize chunk")
175
+ elif choice == "5":
176
+ rev_path = book_audio_dir / f"chunk_{index+1:05d}_rev.wav"
177
+ print(f"\nπŸ”Š Playing revised audio: {rev_path.name}")
178
+ play_chunk_audio(str(rev_path))
179
+ elif choice == "6":
180
+ print(f"\nπŸ“¦ Accepting revision for chunk {index+1:05d}...")
181
+ accept_revision(index, book_audio_dir)
182
+ print("βœ… Revision accepted successfully")
183
+ break
184
+ elif choice == "7":
185
+ print("πŸ”™ Returning to search...")
186
+ break
187
+ elif choice.lower() == 'q':
188
+ print("πŸšͺ Exiting chunk repair tool...")
189
+ return
190
+ else:
191
+ print(f"❌ Invalid option '{choice}'. Please enter a number 1-7 (or 'q' to quit).")
wrapper/chunk_tool.py~ ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from wrapper.chunk_loader import load_chunks, save_chunks
2
+ from wrapper.chunk_search import search_chunks
3
+ from wrapper.chunk_editor import update_chunk
4
+ from wrapper.chunk_player import play_chunk_audio
5
+ from wrapper.chunk_synthesizer import synthesize_chunk
6
+ from wrapper.chunk_revisions import accept_revision
7
+ import os
8
+ from config.config import AUDIOBOOK_ROOT
9
+ AUDIO_DIR = AUDIOBOOK_ROO
10
+
11
+ CHUNK_PATH = "Text_Input/my_book_chunks.json"
12
+
13
+
14
+ def run_chunk_repair_tool():
15
+ print("\nπŸ› οΈ Chunk Repair & Revision Tool")
16
+ chunks = load_chunks(CHUNK_PATH)
17
+
18
+ while True:
19
+ query = input("\nSearch for text fragment (or 'Q' to quit): ").strip()
20
+ if query.lower() == "q":
21
+ print("Exiting revision tool.")
22
+ break
23
+
24
+ results = search_chunks(chunks, query)
25
+ if not results:
26
+ print("❌ No matching chunks found.")
27
+ continue
28
+
29
+ print(f"\nπŸ” Found {len(results)} match(es):")
30
+ for i, chunk in enumerate(results):
31
+ print(f"[{i}] \"{chunk['text'][:60]}...\" | Index: {chunk['index']}")
32
+
33
+ sel = input("Select chunk index to revise: ").strip()
34
+ if not sel.isdigit() or int(sel) >= len(results):
35
+ print("Invalid selection.")
36
+ continue
37
+
38
+ chunk = results[int(sel)]
39
+ index = chunk['index']
40
+ chunk_path = os.path.join(AUDIO_DIR, f"chunk_{index:03}.wav")
41
+
42
+ while True:
43
+ print(f"\nπŸ“ Chunk: \"{chunk['text']}\"")
44
+ print(f" Boundary: {chunk['boundary_type']}, Sentiment: {chunk.get('sentiment_score', 'N/A')}, Pause: {chunk.get('pause_duration', 'N/A')}")
45
+ print("\nOptions:")
46
+ print(" 1. Play original")
47
+ print(" 2. Edit values")
48
+ print(" 3. Resynthesize")
49
+ print(" 4. Play revised")
50
+ print(" 5. Accept revision")
51
+ print(" 6. Back to search")
52
+
53
+ choice = input("Enter option number: ").strip()
54
+ if choice == "1":
55
+ play_chunk_audio(chunk_path)
56
+ elif choice == "2":
57
+ boundary = input("New boundary type (or Enter to skip): ").strip()
58
+ sentiment = input("New sentiment score (or Enter to skip): ").strip()
59
+ pause = input("New pause duration (or Enter to skip): ").strip()
60
+
61
+ update_chunk(
62
+ chunk,
63
+ boundary_type=boundary if boundary else None,
64
+ sentiment_score=float(sentiment) if sentiment else None,
65
+ pause_duration=float(pause) if pause else None
66
+ )
67
+ save_chunks(CHUNK_PATH, chunks)
68
+ elif choice == "3":
69
+ synthesize_chunk(chunk, index, revision=True)
70
+ elif choice == "4":
71
+ rev_path = os.path.join(AUDIO_DIR, f"chunk_{index:03}_rev.wav")
72
+ play_chunk_audio(rev_path)
73
+ elif choice == "5":
74
+ accept_revision(index)
75
+ break
76
+ elif choice == "6":
77
+ break
78
+ else:
79
+ print("Invalid input. Try again.")