danneauxs commited on
Commit Β·
346d87a
1
Parent(s): d0851e9
Deploy complete ChatterboxTTS system with utils, tools, and wrapper modules
Browse files- .gitignore +2 -0
- modules/asr_manager.py +233 -0
- modules/system_detector.py +231 -0
- modules/voice_detector.py +240 -0
- requirements.txt +55 -0
- tools/combine_only.py +396 -0
- utils/abbreviations.txt +11 -0
- utils/abbreviations.txt~ +0 -0
- utils/chunk_manager.TXT +153 -0
- utils/dirlist.TXT +49 -0
- utils/generate_from_json (copy).py +143 -0
- utils/generate_from_json.py +143 -0
- utils/generate_from_json.py.bak +143 -0
- utils/prechunktest.TXT +4 -0
- utils/resume_handler.TXT +525 -0
- utils/text_cleaner.TXT +29 -0
- utils/text_processor.TXT +449 -0
- wrapper/chunk_editor.py +8 -0
- wrapper/chunk_editor.py.bak +8 -0
- wrapper/chunk_loader.py +72 -0
- wrapper/chunk_loader.py.bak +9 -0
- wrapper/chunk_player.py +12 -0
- wrapper/chunk_player.py.bak +12 -0
- wrapper/chunk_revisions.py +34 -0
- wrapper/chunk_revisions.py.bak +34 -0
- wrapper/chunk_revisions.py~ +33 -0
- wrapper/chunk_search.py +9 -0
- wrapper/chunk_search.py.bak +9 -0
- wrapper/chunk_synthesizer.py +208 -0
- wrapper/chunk_synthesizer.py.bak +90 -0
- wrapper/chunk_synthesizer.py~ +15 -0
- wrapper/chunk_tool.py +249 -0
- wrapper/chunk_tool.py.bak +191 -0
- wrapper/chunk_tool.py~ +79 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
modules/asr_manager.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ASR Manager Module
|
| 3 |
+
Centralized ASR model loading with adaptive GPU/CPU fallback and real-time VRAM monitoring
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import logging
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from config.config import DEFAULT_ASR_MODEL, ASR_MODEL_VRAM_MB, ASR_MODEL_RAM_MB
|
| 10 |
+
|
| 11 |
+
def get_real_time_vram_status():
|
| 12 |
+
"""Get current GPU memory usage in real-time"""
|
| 13 |
+
try:
|
| 14 |
+
if torch.cuda.is_available():
|
| 15 |
+
gpu_count = torch.cuda.device_count()
|
| 16 |
+
if gpu_count > 0:
|
| 17 |
+
# Use first GPU
|
| 18 |
+
total_vram = torch.cuda.get_device_properties(0).total_memory
|
| 19 |
+
allocated_vram = torch.cuda.memory_allocated(0)
|
| 20 |
+
reserved_vram = torch.cuda.memory_reserved(0)
|
| 21 |
+
available_vram = total_vram - allocated_vram
|
| 22 |
+
|
| 23 |
+
return {
|
| 24 |
+
'total_mb': total_vram // 1024 // 1024,
|
| 25 |
+
'allocated_mb': allocated_vram // 1024 // 1024,
|
| 26 |
+
'reserved_mb': reserved_vram // 1024 // 1024,
|
| 27 |
+
'available_mb': available_vram // 1024 // 1024,
|
| 28 |
+
'has_gpu': True
|
| 29 |
+
}
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logging.warning(f"Failed to get real-time VRAM status: {e}")
|
| 32 |
+
|
| 33 |
+
return {
|
| 34 |
+
'total_mb': 0,
|
| 35 |
+
'allocated_mb': 0,
|
| 36 |
+
'reserved_mb': 0,
|
| 37 |
+
'available_mb': 0,
|
| 38 |
+
'has_gpu': False
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
def calculate_available_vram_for_asr(safety_buffer_mb=500):
|
| 42 |
+
"""Calculate VRAM available for ASR with safety buffer"""
|
| 43 |
+
vram_status = get_real_time_vram_status()
|
| 44 |
+
|
| 45 |
+
if not vram_status['has_gpu']:
|
| 46 |
+
return 0
|
| 47 |
+
|
| 48 |
+
# Available VRAM minus safety buffer for stability
|
| 49 |
+
available_with_buffer = max(0, vram_status['available_mb'] - safety_buffer_mb)
|
| 50 |
+
|
| 51 |
+
return available_with_buffer
|
| 52 |
+
|
| 53 |
+
def can_model_fit_gpu(model_name, available_vram_mb):
|
| 54 |
+
"""Check if a specific ASR model can fit in available VRAM"""
|
| 55 |
+
required_vram = ASR_MODEL_VRAM_MB.get(model_name, 0)
|
| 56 |
+
return available_vram_mb >= required_vram
|
| 57 |
+
|
| 58 |
+
def try_load_model_with_fallback(model_name, primary_device, fallback_device="cpu"):
|
| 59 |
+
"""Try to load model on primary device, fallback to secondary if it fails"""
|
| 60 |
+
import whisper
|
| 61 |
+
|
| 62 |
+
# Convert device names for whisper compatibility
|
| 63 |
+
def convert_device_name(device):
|
| 64 |
+
if device.lower() == "gpu":
|
| 65 |
+
return "cuda"
|
| 66 |
+
return device.lower()
|
| 67 |
+
|
| 68 |
+
primary_device_whisper = convert_device_name(primary_device)
|
| 69 |
+
fallback_device_whisper = convert_device_name(fallback_device)
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
print(f"π― Attempting to load {model_name} on {primary_device.upper()}")
|
| 73 |
+
model = whisper.load_model(model_name, device=primary_device_whisper)
|
| 74 |
+
print(f"β
Successfully loaded {model_name} on {primary_device.upper()}")
|
| 75 |
+
return model, primary_device
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"β οΈ {model_name} failed on {primary_device} ({str(e)[:50]}...)")
|
| 79 |
+
|
| 80 |
+
if fallback_device_whisper != primary_device_whisper:
|
| 81 |
+
try:
|
| 82 |
+
print(f"π Trying {model_name} on {fallback_device.upper()}")
|
| 83 |
+
model = whisper.load_model(model_name, device=fallback_device_whisper)
|
| 84 |
+
print(f"β
Successfully loaded {model_name} on {fallback_device.upper()}")
|
| 85 |
+
return model, fallback_device
|
| 86 |
+
|
| 87 |
+
except Exception as fallback_e:
|
| 88 |
+
print(f"β {model_name} also failed on {fallback_device} ({str(fallback_e)[:50]}...)")
|
| 89 |
+
|
| 90 |
+
# Both failed
|
| 91 |
+
raise Exception(f"Model {model_name} failed on both {primary_device} and {fallback_device}")
|
| 92 |
+
|
| 93 |
+
def load_asr_model_adaptive(asr_config=None):
|
| 94 |
+
"""
|
| 95 |
+
Adaptive ASR model loading with real-time VRAM checking and intelligent fallback
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
asr_config: ASR configuration dict from interfaces (None for GUI fallback)
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
tuple: (asr_model, actual_device_used) or (None, None) if all loading fails
|
| 102 |
+
"""
|
| 103 |
+
print(f"π Starting adaptive ASR model loading...")
|
| 104 |
+
|
| 105 |
+
# Get current VRAM status
|
| 106 |
+
vram_status = get_real_time_vram_status()
|
| 107 |
+
available_vram = calculate_available_vram_for_asr()
|
| 108 |
+
|
| 109 |
+
print(f"π₯οΈ Real-time VRAM status:")
|
| 110 |
+
print(f" Total: {vram_status['total_mb']:,}MB")
|
| 111 |
+
print(f" Allocated: {vram_status['allocated_mb']:,}MB")
|
| 112 |
+
print(f" Available for ASR: {available_vram:,}MB (with 500MB safety buffer)")
|
| 113 |
+
|
| 114 |
+
# Determine what models to try based on config
|
| 115 |
+
if asr_config and asr_config.get('enabled') and 'primary_model' in asr_config:
|
| 116 |
+
# Intelligent selection from CLI/Gradio
|
| 117 |
+
primary_model = asr_config['primary_model']
|
| 118 |
+
primary_device = asr_config['primary_device']
|
| 119 |
+
fallback_model = asr_config['fallback_model']
|
| 120 |
+
fallback_device = asr_config['fallback_device']
|
| 121 |
+
|
| 122 |
+
print(f"π§ Using intelligent ASR config:")
|
| 123 |
+
print(f" Primary: {primary_model} on {primary_device.upper()}")
|
| 124 |
+
print(f" Fallback: {fallback_model} on {fallback_device.upper()}")
|
| 125 |
+
|
| 126 |
+
# Real-time VRAM check for primary model
|
| 127 |
+
if primary_device.lower() == 'gpu':
|
| 128 |
+
if not vram_status['has_gpu']:
|
| 129 |
+
print(f"β οΈ No GPU available, forcing CPU mode")
|
| 130 |
+
primary_device = 'cpu'
|
| 131 |
+
elif not can_model_fit_gpu(primary_model, available_vram):
|
| 132 |
+
required = ASR_MODEL_VRAM_MB.get(primary_model, 0)
|
| 133 |
+
print(f"β οΈ Insufficient VRAM for {primary_model} (need {required}MB, have {available_vram}MB)")
|
| 134 |
+
print(f"π Switching primary to CPU")
|
| 135 |
+
primary_device = 'cpu'
|
| 136 |
+
|
| 137 |
+
# Try primary model
|
| 138 |
+
try:
|
| 139 |
+
return try_load_model_with_fallback(primary_model, primary_device, primary_device)
|
| 140 |
+
except:
|
| 141 |
+
# Primary failed, try fallback model
|
| 142 |
+
print(f"π Primary model failed, trying fallback configuration...")
|
| 143 |
+
|
| 144 |
+
# Real-time VRAM check for fallback model
|
| 145 |
+
if fallback_device.lower() == 'gpu':
|
| 146 |
+
if not vram_status['has_gpu']:
|
| 147 |
+
print(f"β οΈ No GPU available for fallback, using CPU")
|
| 148 |
+
fallback_device = 'cpu'
|
| 149 |
+
elif not can_model_fit_gpu(fallback_model, available_vram):
|
| 150 |
+
required = ASR_MODEL_VRAM_MB.get(fallback_model, 0)
|
| 151 |
+
print(f"β οΈ Insufficient VRAM for fallback {fallback_model} (need {required}MB, have {available_vram}MB)")
|
| 152 |
+
fallback_device = 'cpu'
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
return try_load_model_with_fallback(fallback_model, fallback_device, 'cpu')
|
| 156 |
+
except:
|
| 157 |
+
print(f"β Both configured models failed!")
|
| 158 |
+
|
| 159 |
+
else:
|
| 160 |
+
# Fallback mode for GUI or missing config
|
| 161 |
+
print(f"π§ Using fallback mode: {DEFAULT_ASR_MODEL}")
|
| 162 |
+
|
| 163 |
+
# Last resort: try default model with adaptive device selection
|
| 164 |
+
print(f"π Last resort: trying {DEFAULT_ASR_MODEL} with adaptive device selection")
|
| 165 |
+
|
| 166 |
+
# Choose device based on real-time VRAM availability
|
| 167 |
+
if vram_status['has_gpu'] and can_model_fit_gpu(DEFAULT_ASR_MODEL, available_vram):
|
| 168 |
+
device = 'cuda' # Use cuda directly for whisper
|
| 169 |
+
device_display = 'GPU'
|
| 170 |
+
print(f"β
Using GPU for {DEFAULT_ASR_MODEL}")
|
| 171 |
+
else:
|
| 172 |
+
device = 'cpu'
|
| 173 |
+
device_display = 'CPU'
|
| 174 |
+
print(f"π Using CPU for {DEFAULT_ASR_MODEL}")
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
import whisper
|
| 178 |
+
model = whisper.load_model(DEFAULT_ASR_MODEL, device=device)
|
| 179 |
+
print(f"β
Successfully loaded {DEFAULT_ASR_MODEL} on {device_display}")
|
| 180 |
+
return model, device_display.lower()
|
| 181 |
+
except Exception as e:
|
| 182 |
+
print(f"β Critical failure: Could not load {DEFAULT_ASR_MODEL} on {device}: {e}")
|
| 183 |
+
|
| 184 |
+
# Ultimate fallback to CPU if GPU failed
|
| 185 |
+
if device == 'cuda':
|
| 186 |
+
try:
|
| 187 |
+
print(f"π Ultimate fallback: {DEFAULT_ASR_MODEL} on CPU")
|
| 188 |
+
model = whisper.load_model(DEFAULT_ASR_MODEL, device='cpu')
|
| 189 |
+
print(f"β
Successfully loaded {DEFAULT_ASR_MODEL} on CPU")
|
| 190 |
+
return model, 'cpu'
|
| 191 |
+
except Exception as cpu_e:
|
| 192 |
+
print(f"π Complete failure: {cpu_e}")
|
| 193 |
+
|
| 194 |
+
return None, None
|
| 195 |
+
|
| 196 |
+
def cleanup_asr_model(asr_model):
|
| 197 |
+
"""Clean up ASR model to free memory"""
|
| 198 |
+
if asr_model is not None:
|
| 199 |
+
try:
|
| 200 |
+
del asr_model
|
| 201 |
+
if torch.cuda.is_available():
|
| 202 |
+
torch.cuda.empty_cache()
|
| 203 |
+
print(f"π§Ή ASR model cleaned up")
|
| 204 |
+
except Exception as e:
|
| 205 |
+
logging.warning(f"Failed to cleanup ASR model: {e}")
|
| 206 |
+
|
| 207 |
+
def get_asr_memory_info():
|
| 208 |
+
"""Get memory information for ASR debugging"""
|
| 209 |
+
vram_status = get_real_time_vram_status()
|
| 210 |
+
available_vram = calculate_available_vram_for_asr()
|
| 211 |
+
|
| 212 |
+
info = {
|
| 213 |
+
'vram_total_mb': vram_status['total_mb'],
|
| 214 |
+
'vram_allocated_mb': vram_status['allocated_mb'],
|
| 215 |
+
'vram_available_for_asr_mb': available_vram,
|
| 216 |
+
'has_gpu': vram_status['has_gpu']
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
return info
|
| 220 |
+
|
| 221 |
+
if __name__ == "__main__":
|
| 222 |
+
# Test the adaptive loading
|
| 223 |
+
print("Testing ASR Manager...")
|
| 224 |
+
info = get_asr_memory_info()
|
| 225 |
+
print(f"Memory info: {info}")
|
| 226 |
+
|
| 227 |
+
# Test adaptive loading
|
| 228 |
+
model, device = load_asr_model_adaptive()
|
| 229 |
+
if model:
|
| 230 |
+
print(f"Test successful: Model loaded on {device}")
|
| 231 |
+
cleanup_asr_model(model)
|
| 232 |
+
else:
|
| 233 |
+
print("Test failed: No model loaded")
|
modules/system_detector.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
System Resource Detection Module
|
| 3 |
+
Detects VRAM, RAM, CPU cores and recommends appropriate ASR models
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import psutil
|
| 7 |
+
import torch
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
# Add project root to path for imports
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 15 |
+
|
| 16 |
+
from config.config import ASR_MODEL_VRAM_MB, ASR_MODEL_RAM_MB
|
| 17 |
+
|
| 18 |
+
def get_gpu_memory():
|
| 19 |
+
"""Get total and available GPU memory in MB"""
|
| 20 |
+
try:
|
| 21 |
+
if torch.cuda.is_available():
|
| 22 |
+
gpu_count = torch.cuda.device_count()
|
| 23 |
+
if gpu_count > 0:
|
| 24 |
+
# Use first GPU
|
| 25 |
+
total_vram = torch.cuda.get_device_properties(0).total_memory
|
| 26 |
+
allocated_vram = torch.cuda.memory_allocated(0)
|
| 27 |
+
available_vram = total_vram - allocated_vram
|
| 28 |
+
|
| 29 |
+
return {
|
| 30 |
+
'total_mb': total_vram // 1024 // 1024,
|
| 31 |
+
'available_mb': available_vram // 1024 // 1024,
|
| 32 |
+
'allocated_mb': allocated_vram // 1024 // 1024
|
| 33 |
+
}
|
| 34 |
+
except:
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
return {'total_mb': 0, 'available_mb': 0, 'allocated_mb': 0}
|
| 38 |
+
|
| 39 |
+
def get_system_memory():
|
| 40 |
+
"""Get total and available system RAM in MB"""
|
| 41 |
+
try:
|
| 42 |
+
memory = psutil.virtual_memory()
|
| 43 |
+
return {
|
| 44 |
+
'total_mb': memory.total // 1024 // 1024,
|
| 45 |
+
'available_mb': memory.available // 1024 // 1024,
|
| 46 |
+
'used_mb': memory.used // 1024 // 1024
|
| 47 |
+
}
|
| 48 |
+
except:
|
| 49 |
+
return {'total_mb': 0, 'available_mb': 0, 'used_mb': 0}
|
| 50 |
+
|
| 51 |
+
def get_cpu_cores():
|
| 52 |
+
"""Get number of CPU cores"""
|
| 53 |
+
try:
|
| 54 |
+
return psutil.cpu_count(logical=False) or psutil.cpu_count()
|
| 55 |
+
except:
|
| 56 |
+
return 1
|
| 57 |
+
|
| 58 |
+
def estimate_tts_vram_usage():
|
| 59 |
+
"""Estimate VRAM usage by ChatterboxTTS (updated based on real usage)"""
|
| 60 |
+
return 5500 # 5.5GB in MB (was 7GB, adjusted based on actual 3.5GB usage + buffer)
|
| 61 |
+
|
| 62 |
+
def get_system_profile():
|
| 63 |
+
"""Get complete system resource profile"""
|
| 64 |
+
gpu_info = get_gpu_memory()
|
| 65 |
+
ram_info = get_system_memory()
|
| 66 |
+
cpu_cores = get_cpu_cores()
|
| 67 |
+
|
| 68 |
+
# Estimate available resources after TTS loading
|
| 69 |
+
tts_vram_estimate = estimate_tts_vram_usage()
|
| 70 |
+
available_vram_after_tts = max(0, gpu_info['available_mb'] - tts_vram_estimate)
|
| 71 |
+
|
| 72 |
+
return {
|
| 73 |
+
'gpu': gpu_info,
|
| 74 |
+
'ram': ram_info,
|
| 75 |
+
'cpu_cores': cpu_cores,
|
| 76 |
+
'available_vram_after_tts': available_vram_after_tts,
|
| 77 |
+
'has_gpu': gpu_info['total_mb'] > 0
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
def categorize_system(profile):
|
| 81 |
+
"""Categorize system capabilities"""
|
| 82 |
+
gpu_total = profile['gpu']['total_mb']
|
| 83 |
+
ram_total = profile['ram']['total_mb']
|
| 84 |
+
cpu_cores = profile['cpu_cores']
|
| 85 |
+
|
| 86 |
+
# VRAM categories
|
| 87 |
+
if gpu_total < 4000:
|
| 88 |
+
vram_category = "low"
|
| 89 |
+
elif gpu_total <= 12000:
|
| 90 |
+
vram_category = "medium"
|
| 91 |
+
else:
|
| 92 |
+
vram_category = "high"
|
| 93 |
+
|
| 94 |
+
# RAM categories
|
| 95 |
+
if ram_total < 16000:
|
| 96 |
+
ram_category = "low"
|
| 97 |
+
elif ram_total <= 64000:
|
| 98 |
+
ram_category = "medium"
|
| 99 |
+
else:
|
| 100 |
+
ram_category = "high"
|
| 101 |
+
|
| 102 |
+
# CPU categories
|
| 103 |
+
if cpu_cores < 6:
|
| 104 |
+
cpu_category = "low"
|
| 105 |
+
elif cpu_cores <= 16:
|
| 106 |
+
cpu_category = "medium"
|
| 107 |
+
else:
|
| 108 |
+
cpu_category = "high"
|
| 109 |
+
|
| 110 |
+
return {
|
| 111 |
+
'vram': vram_category,
|
| 112 |
+
'ram': ram_category,
|
| 113 |
+
'cpu': cpu_category
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
def get_safe_asr_models(profile):
|
| 117 |
+
"""Get ASR models that can safely run on GPU with available VRAM"""
|
| 118 |
+
available_vram = profile['available_vram_after_tts']
|
| 119 |
+
safe_models = []
|
| 120 |
+
|
| 121 |
+
for model, vram_req in ASR_MODEL_VRAM_MB.items():
|
| 122 |
+
if vram_req <= available_vram:
|
| 123 |
+
safe_models.append(model)
|
| 124 |
+
|
| 125 |
+
return safe_models
|
| 126 |
+
|
| 127 |
+
def get_safe_cpu_models(profile):
|
| 128 |
+
"""Get ASR models that can safely run on CPU with available RAM"""
|
| 129 |
+
available_ram = profile['ram']['available_mb']
|
| 130 |
+
safe_models = []
|
| 131 |
+
|
| 132 |
+
for model, ram_req in ASR_MODEL_RAM_MB.items():
|
| 133 |
+
if ram_req <= available_ram:
|
| 134 |
+
safe_models.append(model)
|
| 135 |
+
|
| 136 |
+
return safe_models
|
| 137 |
+
|
| 138 |
+
def recommend_asr_models(profile):
|
| 139 |
+
"""Recommend Safe/Moderate/Insane ASR model configurations"""
|
| 140 |
+
categories = categorize_system(profile)
|
| 141 |
+
safe_gpu_models = get_safe_asr_models(profile)
|
| 142 |
+
safe_cpu_models = get_safe_cpu_models(profile)
|
| 143 |
+
|
| 144 |
+
recommendations = {}
|
| 145 |
+
|
| 146 |
+
# Model priority order (best to worst)
|
| 147 |
+
model_priority = ["large-v3", "large", "large-v2", "medium", "small", "base", "tiny"]
|
| 148 |
+
|
| 149 |
+
# Safe: Conservative choice
|
| 150 |
+
safe_gpu = None
|
| 151 |
+
safe_cpu = None
|
| 152 |
+
|
| 153 |
+
for model in reversed(model_priority): # Start from smallest
|
| 154 |
+
if model in safe_gpu_models and not safe_gpu:
|
| 155 |
+
safe_gpu = model
|
| 156 |
+
if model in safe_cpu_models and not safe_cpu:
|
| 157 |
+
safe_cpu = model
|
| 158 |
+
if safe_gpu and safe_cpu:
|
| 159 |
+
break
|
| 160 |
+
|
| 161 |
+
# Moderate: Balanced choice
|
| 162 |
+
moderate_gpu = None
|
| 163 |
+
moderate_cpu = None
|
| 164 |
+
|
| 165 |
+
# Try to get a model 1-2 steps up from safe
|
| 166 |
+
safe_idx = model_priority.index(safe_gpu) if safe_gpu else len(model_priority)
|
| 167 |
+
moderate_idx = max(0, safe_idx - 2)
|
| 168 |
+
|
| 169 |
+
for i in range(moderate_idx, len(model_priority)):
|
| 170 |
+
model = model_priority[i]
|
| 171 |
+
if model in safe_gpu_models and not moderate_gpu:
|
| 172 |
+
moderate_gpu = model
|
| 173 |
+
if model in safe_cpu_models and not moderate_cpu:
|
| 174 |
+
moderate_cpu = model
|
| 175 |
+
if moderate_gpu and moderate_cpu:
|
| 176 |
+
break
|
| 177 |
+
|
| 178 |
+
# Insane: Push the limits (best available models)
|
| 179 |
+
insane_gpu = None
|
| 180 |
+
insane_cpu = None
|
| 181 |
+
|
| 182 |
+
# Get the best (largest) models that are safe
|
| 183 |
+
for model in model_priority: # Start from best
|
| 184 |
+
if model in safe_gpu_models and not insane_gpu:
|
| 185 |
+
insane_gpu = model
|
| 186 |
+
if model in safe_cpu_models and not insane_cpu:
|
| 187 |
+
insane_cpu = model
|
| 188 |
+
if insane_gpu and insane_cpu:
|
| 189 |
+
break
|
| 190 |
+
|
| 191 |
+
# Build recommendations
|
| 192 |
+
recommendations['safe'] = {
|
| 193 |
+
'primary': {'model': safe_gpu or safe_cpu, 'device': 'gpu' if safe_gpu else 'cpu'},
|
| 194 |
+
'fallback': {'model': safe_cpu, 'device': 'cpu'}
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
recommendations['moderate'] = {
|
| 198 |
+
'primary': {'model': moderate_gpu or moderate_cpu, 'device': 'gpu' if moderate_gpu else 'cpu'},
|
| 199 |
+
'fallback': {'model': moderate_cpu, 'device': 'cpu'}
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
recommendations['insane'] = {
|
| 203 |
+
'primary': {'model': insane_gpu or insane_cpu, 'device': 'gpu' if insane_gpu else 'cpu'},
|
| 204 |
+
'fallback': {'model': insane_cpu, 'device': 'cpu'}
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
return recommendations
|
| 208 |
+
|
| 209 |
+
def print_system_summary(profile):
|
| 210 |
+
"""Print a human-readable system summary"""
|
| 211 |
+
categories = categorize_system(profile)
|
| 212 |
+
|
| 213 |
+
print(f"π₯οΈ System Profile:")
|
| 214 |
+
print(f" VRAM: {profile['gpu']['total_mb']:,}MB total, {profile['available_vram_after_tts']:,}MB available after TTS ({categories['vram']} class)")
|
| 215 |
+
print(f" RAM: {profile['ram']['total_mb']:,}MB total, {profile['ram']['available_mb']:,}MB available ({categories['ram']} class)")
|
| 216 |
+
print(f" CPU: {profile['cpu_cores']} cores ({categories['cpu']} class)")
|
| 217 |
+
|
| 218 |
+
if not profile['has_gpu']:
|
| 219 |
+
print(f" β οΈ No CUDA GPU detected - ASR will run on CPU only")
|
| 220 |
+
|
| 221 |
+
if __name__ == "__main__":
|
| 222 |
+
# Test the detection
|
| 223 |
+
profile = get_system_profile()
|
| 224 |
+
print_system_summary(profile)
|
| 225 |
+
|
| 226 |
+
recommendations = recommend_asr_models(profile)
|
| 227 |
+
print(f"\nASR Model Recommendations:")
|
| 228 |
+
for level, config in recommendations.items():
|
| 229 |
+
primary = config['primary']
|
| 230 |
+
fallback = config['fallback']
|
| 231 |
+
print(f"π’ {level.upper()}: {primary['model']} ({primary['device']}) + {fallback['model']} (cpu fallback)")
|
modules/voice_detector.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Voice Detection Module
|
| 3 |
+
Handles voice detection from multiple sources: JSON metadata, log files, filenames
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
import json
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from config.config import AUDIOBOOK_ROOT
|
| 10 |
+
from modules.file_manager import list_voice_samples
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_likely_voices_for_book(book_name, chunks_json_path=None):
|
| 14 |
+
"""
|
| 15 |
+
Get the most likely voice candidates for a book using the 3 detection methods:
|
| 16 |
+
1. JSON metadata/comments (if available)
|
| 17 |
+
2. run.log file
|
| 18 |
+
3. Generated audiobook filenames (may return multiple)
|
| 19 |
+
|
| 20 |
+
Returns: list of (voice_name, voice_path, detection_method) tuples
|
| 21 |
+
"""
|
| 22 |
+
print(f"π Finding likely voices for book: {book_name}")
|
| 23 |
+
likely_voices = []
|
| 24 |
+
|
| 25 |
+
# Method 1: Check JSON metadata and comments
|
| 26 |
+
if chunks_json_path:
|
| 27 |
+
voice_from_json = get_voice_from_json(chunks_json_path)
|
| 28 |
+
if voice_from_json:
|
| 29 |
+
voice_path = find_voice_file_by_name(voice_from_json)
|
| 30 |
+
if voice_path:
|
| 31 |
+
likely_voices.append((voice_from_json, voice_path, "json_metadata"))
|
| 32 |
+
print(f"β
Voice found in JSON: {voice_from_json}")
|
| 33 |
+
|
| 34 |
+
# Method 2: Check run.log file
|
| 35 |
+
voice_from_log = get_voice_from_log(book_name)
|
| 36 |
+
if voice_from_log:
|
| 37 |
+
voice_path = find_voice_file_by_name(voice_from_log)
|
| 38 |
+
if voice_path:
|
| 39 |
+
# Avoid duplicates
|
| 40 |
+
if not any(v[0] == voice_from_log for v in likely_voices):
|
| 41 |
+
likely_voices.append((voice_from_log, voice_path, "run_log"))
|
| 42 |
+
print(f"β
Voice found in run.log: {voice_from_log}")
|
| 43 |
+
|
| 44 |
+
# Method 3: Check generated filename patterns (may find multiple)
|
| 45 |
+
voices_from_files = get_voices_from_filenames(book_name)
|
| 46 |
+
for voice_name in voices_from_files:
|
| 47 |
+
voice_path = find_voice_file_by_name(voice_name)
|
| 48 |
+
if voice_path:
|
| 49 |
+
# Avoid duplicates
|
| 50 |
+
if not any(v[0] == voice_name for v in likely_voices):
|
| 51 |
+
likely_voices.append((voice_name, voice_path, "filename_pattern"))
|
| 52 |
+
print(f"β
Voice found in filename: {voice_name}")
|
| 53 |
+
|
| 54 |
+
if not likely_voices:
|
| 55 |
+
print(f"β οΈ No likely voices detected for {book_name}")
|
| 56 |
+
else:
|
| 57 |
+
print(f"π Found {len(likely_voices)} likely voice candidates")
|
| 58 |
+
|
| 59 |
+
return likely_voices
|
| 60 |
+
|
| 61 |
+
def detect_voice_for_book(book_name, chunks_json_path=None):
|
| 62 |
+
"""
|
| 63 |
+
Detect the most likely voice for a book (returns first candidate)
|
| 64 |
+
For backwards compatibility with existing code
|
| 65 |
+
"""
|
| 66 |
+
likely_voices = get_likely_voices_for_book(book_name, chunks_json_path)
|
| 67 |
+
if likely_voices:
|
| 68 |
+
return likely_voices[0] # Return the first (most likely) candidate
|
| 69 |
+
return None, None, "not_found"
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_voice_from_json(json_path):
|
| 73 |
+
"""Extract voice information from JSON metadata"""
|
| 74 |
+
try:
|
| 75 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 76 |
+
content = f.read()
|
| 77 |
+
|
| 78 |
+
# Check for voice metadata in JSON
|
| 79 |
+
if '"voice_used":' in content:
|
| 80 |
+
data = json.loads(content)
|
| 81 |
+
if isinstance(data, dict) and 'voice_used' in data:
|
| 82 |
+
return data['voice_used']
|
| 83 |
+
elif isinstance(data, list) and data and 'voice_used' in data[0]:
|
| 84 |
+
return data[0]['voice_used']
|
| 85 |
+
|
| 86 |
+
# Check for voice as comment in JSON (fallback option)
|
| 87 |
+
voice_comment_match = re.search(r'//\s*voice:\s*([^\n]+)', content, re.IGNORECASE)
|
| 88 |
+
if voice_comment_match:
|
| 89 |
+
return voice_comment_match.group(1).strip()
|
| 90 |
+
|
| 91 |
+
except Exception as e:
|
| 92 |
+
print(f"β οΈ Error reading JSON for voice info: {e}")
|
| 93 |
+
|
| 94 |
+
return None
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def get_voice_from_log(book_name):
|
| 98 |
+
"""Extract voice information from run.log file"""
|
| 99 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 100 |
+
log_file = audiobook_root / book_name / "run.log"
|
| 101 |
+
|
| 102 |
+
if log_file.exists():
|
| 103 |
+
try:
|
| 104 |
+
with open(log_file, 'r', encoding='utf-8') as f:
|
| 105 |
+
for line in f:
|
| 106 |
+
line = line.strip()
|
| 107 |
+
if line.startswith("Voice: ") or line.startswith("Voice used: "):
|
| 108 |
+
voice_name = line.split(": ", 1)[1].strip()
|
| 109 |
+
return voice_name
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"β οΈ Error reading run log: {e}")
|
| 112 |
+
|
| 113 |
+
return None
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def get_voices_from_filenames(book_name):
|
| 117 |
+
"""Extract voice names from existing audiobook filename patterns (may return multiple)"""
|
| 118 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 119 |
+
book_dir = audiobook_root / book_name
|
| 120 |
+
|
| 121 |
+
if not book_dir.exists():
|
| 122 |
+
return []
|
| 123 |
+
|
| 124 |
+
found_voices = []
|
| 125 |
+
|
| 126 |
+
# Look for WAV files with voice pattern: BookName [VoiceName].wav
|
| 127 |
+
for wav_file in book_dir.glob("*.wav"):
|
| 128 |
+
match = re.search(r'\[([^\]]+)\]\.wav$', wav_file.name)
|
| 129 |
+
if match:
|
| 130 |
+
voice_name = match.group(1)
|
| 131 |
+
if voice_name not in found_voices:
|
| 132 |
+
found_voices.append(voice_name)
|
| 133 |
+
|
| 134 |
+
# Look for M4B files with voice pattern: BookName[VoiceName].m4b
|
| 135 |
+
for m4b_file in book_dir.glob("*.m4b"):
|
| 136 |
+
match = re.search(r'\[([^\]]+)\]\.m4b$', m4b_file.name)
|
| 137 |
+
if match:
|
| 138 |
+
voice_name = match.group(1)
|
| 139 |
+
if voice_name not in found_voices:
|
| 140 |
+
found_voices.append(voice_name)
|
| 141 |
+
|
| 142 |
+
return found_voices
|
| 143 |
+
|
| 144 |
+
def get_voice_from_filename(book_name):
|
| 145 |
+
"""Extract voice name from existing audiobook filename patterns (backwards compatibility)"""
|
| 146 |
+
voices = get_voices_from_filenames(book_name)
|
| 147 |
+
return voices[0] if voices else None
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def find_voice_file_by_name(voice_name):
|
| 151 |
+
"""Find voice file by name in Voice_Samples directory"""
|
| 152 |
+
voice_files = list_voice_samples()
|
| 153 |
+
|
| 154 |
+
# Exact match first
|
| 155 |
+
for voice_file in voice_files:
|
| 156 |
+
if voice_file.stem == voice_name:
|
| 157 |
+
return voice_file
|
| 158 |
+
|
| 159 |
+
# Partial match (case insensitive)
|
| 160 |
+
voice_name_lower = voice_name.lower()
|
| 161 |
+
for voice_file in voice_files:
|
| 162 |
+
if voice_name_lower in voice_file.stem.lower():
|
| 163 |
+
return voice_file
|
| 164 |
+
|
| 165 |
+
return None
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def add_voice_to_json(json_path, voice_name, method="metadata"):
|
| 171 |
+
"""
|
| 172 |
+
Add voice information to JSON file
|
| 173 |
+
|
| 174 |
+
method options:
|
| 175 |
+
- "metadata": Add as top-level metadata
|
| 176 |
+
- "comment": Add as comment that doesn't affect parsing
|
| 177 |
+
"""
|
| 178 |
+
try:
|
| 179 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 180 |
+
content = f.read()
|
| 181 |
+
|
| 182 |
+
if method == "metadata":
|
| 183 |
+
# Add voice as metadata to JSON structure
|
| 184 |
+
data = json.loads(content)
|
| 185 |
+
|
| 186 |
+
if isinstance(data, list):
|
| 187 |
+
# For list format, add metadata as first element or update existing
|
| 188 |
+
if data and isinstance(data[0], dict) and not any(key.startswith('text') for key in data[0].keys()):
|
| 189 |
+
# First element is already metadata
|
| 190 |
+
data[0]['voice_used'] = voice_name
|
| 191 |
+
else:
|
| 192 |
+
# Insert metadata as first element
|
| 193 |
+
metadata = {"voice_used": voice_name, "_metadata": True}
|
| 194 |
+
data.insert(0, metadata)
|
| 195 |
+
elif isinstance(data, dict):
|
| 196 |
+
# For dict format, add to top level
|
| 197 |
+
data['voice_used'] = voice_name
|
| 198 |
+
|
| 199 |
+
# Save updated JSON
|
| 200 |
+
with open(json_path, 'w', encoding='utf-8') as f:
|
| 201 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
| 202 |
+
|
| 203 |
+
elif method == "comment":
|
| 204 |
+
# Add voice as comment at the top of file
|
| 205 |
+
voice_comment = f"// voice: {voice_name}\n"
|
| 206 |
+
|
| 207 |
+
if not content.startswith("// voice:"):
|
| 208 |
+
content = voice_comment + content
|
| 209 |
+
with open(json_path, 'w', encoding='utf-8') as f:
|
| 210 |
+
f.write(content)
|
| 211 |
+
|
| 212 |
+
print(f"β
Added voice '{voice_name}' to {json_path.name} using {method} method")
|
| 213 |
+
return True
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
print(f"β Error adding voice to JSON: {e}")
|
| 217 |
+
return False
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def remove_voice_comment_from_json(json_path):
|
| 221 |
+
"""Remove voice comment from JSON file for clean processing"""
|
| 222 |
+
try:
|
| 223 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 224 |
+
content = f.read()
|
| 225 |
+
|
| 226 |
+
# Remove voice comment lines
|
| 227 |
+
lines = content.split('\n')
|
| 228 |
+
filtered_lines = [line for line in lines if not line.strip().startswith('// voice:')]
|
| 229 |
+
|
| 230 |
+
if len(filtered_lines) != len(lines):
|
| 231 |
+
# Comments were removed, save cleaned version
|
| 232 |
+
cleaned_content = '\n'.join(filtered_lines)
|
| 233 |
+
with open(json_path, 'w', encoding='utf-8') as f:
|
| 234 |
+
f.write(cleaned_content)
|
| 235 |
+
return True
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
print(f"β οΈ Error cleaning JSON comments: {e}")
|
| 239 |
+
|
| 240 |
+
return False
|
requirements.txt
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ChatterboxTTS HuggingFace Spaces Requirements
|
| 2 |
+
# Optimized for HF Spaces environment with flexible versions
|
| 3 |
+
|
| 4 |
+
# Core ML and TTS - Essential (flexible versions for HF compatibility)
|
| 5 |
+
torch>=2.0.0
|
| 6 |
+
torchaudio>=2.0.0
|
| 7 |
+
transformers>=4.20.0
|
| 8 |
+
huggingface_hub>=0.15.0
|
| 9 |
+
safetensors>=0.3.0
|
| 10 |
+
|
| 11 |
+
# Audio processing - Required
|
| 12 |
+
soundfile>=0.12.0
|
| 13 |
+
librosa>=0.9.0
|
| 14 |
+
pydub>=0.25.0
|
| 15 |
+
audioread>=3.0.0
|
| 16 |
+
|
| 17 |
+
# ASR System - Intelligent ASR with fallback
|
| 18 |
+
openai-whisper>=20231117
|
| 19 |
+
|
| 20 |
+
# System monitoring and resource detection
|
| 21 |
+
psutil>=5.8.0
|
| 22 |
+
pynvml>=11.0.0
|
| 23 |
+
|
| 24 |
+
# Core scientific computing (flexible for HF environment)
|
| 25 |
+
numpy>=1.21.0
|
| 26 |
+
scipy>=1.7.0
|
| 27 |
+
|
| 28 |
+
# Text processing
|
| 29 |
+
regex>=2023.0.0
|
| 30 |
+
vaderSentiment>=3.3.0
|
| 31 |
+
|
| 32 |
+
# Web interface - Gradio (let HF manage version)
|
| 33 |
+
gradio>=4.0.0
|
| 34 |
+
|
| 35 |
+
# Progress and logging
|
| 36 |
+
tqdm>=4.60.0
|
| 37 |
+
|
| 38 |
+
# File handling
|
| 39 |
+
pathlib2>=2.3.0
|
| 40 |
+
|
| 41 |
+
# Configuration and utilities
|
| 42 |
+
python-dotenv>=1.0.0
|
| 43 |
+
|
| 44 |
+
# Optional utilities
|
| 45 |
+
requests>=2.25.0
|
| 46 |
+
packaging>=21.0
|
| 47 |
+
|
| 48 |
+
# Core ChatterboxTTS model dependencies
|
| 49 |
+
resemble-perth>=1.0.1
|
| 50 |
+
omegaconf>=2.3.0
|
| 51 |
+
einops>=0.6.0
|
| 52 |
+
diffusers>=0.21.0
|
| 53 |
+
tokenizers>=0.13.0
|
| 54 |
+
conformer>=0.3.0
|
| 55 |
+
s3tokenizer==0.2.0
|
tools/combine_only.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Combine Only Tool
|
| 3 |
+
Standalone tool for combining existing audio chunks into final audiobook
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
import time
|
| 8 |
+
import logging
|
| 9 |
+
from datetime import timedelta
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
from config.config import *
|
| 13 |
+
from modules.file_manager import (
|
| 14 |
+
get_audio_files_in_directory, combine_audio_chunks,
|
| 15 |
+
convert_to_m4b, add_metadata_to_m4b, find_book_files
|
| 16 |
+
)
|
| 17 |
+
from modules.audio_processor import get_wav_duration
|
| 18 |
+
from modules.progress_tracker import log_console, log_run
|
| 19 |
+
import subprocess
|
| 20 |
+
import shutil
|
| 21 |
+
|
| 22 |
+
def combine_audio_for_book(book_path_str, voice_name=None):
|
| 23 |
+
"""Combine audio chunks for a specific book (GUI-friendly version)"""
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
book_path = Path(book_path_str)
|
| 26 |
+
|
| 27 |
+
print(f"\n{CYAN}π Combining Audio Chunks for: {book_path.name}{RESET}")
|
| 28 |
+
print("=" * 60)
|
| 29 |
+
|
| 30 |
+
# Setup paths
|
| 31 |
+
tts_dir = book_path / "TTS"
|
| 32 |
+
audio_chunks_dir = tts_dir / "audio_chunks"
|
| 33 |
+
|
| 34 |
+
if not audio_chunks_dir.exists():
|
| 35 |
+
print(f"{RED}β No audio_chunks folder found in {book_path}{RESET}")
|
| 36 |
+
print(f"π‘ Make sure this book has been processed with TTS generation first.")
|
| 37 |
+
return False
|
| 38 |
+
|
| 39 |
+
# Find audio chunks
|
| 40 |
+
chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
|
| 41 |
+
|
| 42 |
+
if not chunk_paths:
|
| 43 |
+
print(f"{RED}β No chunk_*.wav files found in {audio_chunks_dir}{RESET}")
|
| 44 |
+
print(f"π‘ Expected files like: chunk_00001.wav, chunk_00002.wav, etc.")
|
| 45 |
+
return False
|
| 46 |
+
|
| 47 |
+
print(f"\nπ¦ Found {GREEN}{len(chunk_paths)}{RESET} audio chunks")
|
| 48 |
+
|
| 49 |
+
# Verify chunk sequence
|
| 50 |
+
missing_chunks = verify_chunk_sequence(chunk_paths)
|
| 51 |
+
if missing_chunks:
|
| 52 |
+
print(f"\nβ οΈ {YELLOW}Warning: Missing chunks detected:{RESET}")
|
| 53 |
+
for chunk_num in missing_chunks[:10]: # Show first 10 missing
|
| 54 |
+
print(f" Missing: chunk_{chunk_num:05}.wav")
|
| 55 |
+
if len(missing_chunks) > 10:
|
| 56 |
+
print(f" ... and {len(missing_chunks) - 10} more")
|
| 57 |
+
print(f"{YELLOW}π Continuing with available chunks for GUI operation...{RESET}")
|
| 58 |
+
|
| 59 |
+
# Display chunk info
|
| 60 |
+
total_duration = sum(get_wav_duration(chunk_path) for chunk_path in chunk_paths)
|
| 61 |
+
duration_str = str(timedelta(seconds=int(total_duration)))
|
| 62 |
+
|
| 63 |
+
print(f"\nπ Chunk Analysis:")
|
| 64 |
+
print(f" Total Chunks: {GREEN}{len(chunk_paths)}{RESET}")
|
| 65 |
+
print(f" Total Duration: {GREEN}{duration_str}{RESET}")
|
| 66 |
+
print(f" Average Chunk: {GREEN}{total_duration/len(chunk_paths):.1f}s{RESET}")
|
| 67 |
+
|
| 68 |
+
# Perform the actual combine operation
|
| 69 |
+
return _perform_combine_operation(book_path, chunk_paths, total_duration, voice_name)
|
| 70 |
+
|
| 71 |
+
def _perform_combine_operation(book_path, chunk_paths, total_duration, voice_name=None):
|
| 72 |
+
"""Perform the actual audio combining operation"""
|
| 73 |
+
import time
|
| 74 |
+
from datetime import timedelta
|
| 75 |
+
|
| 76 |
+
basename = book_path.name
|
| 77 |
+
|
| 78 |
+
# Determine file naming based on voice
|
| 79 |
+
if voice_name:
|
| 80 |
+
file_suffix = f" [{voice_name}]"
|
| 81 |
+
else:
|
| 82 |
+
file_suffix = "_combined"
|
| 83 |
+
|
| 84 |
+
# Start timing
|
| 85 |
+
start_time = time.time()
|
| 86 |
+
|
| 87 |
+
# Create concat file and combine
|
| 88 |
+
print(f"\nπ Combining audio chunks...")
|
| 89 |
+
combined_wav_path = book_path / f"{basename}{file_suffix}.wav"
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
combine_audio_chunks(chunk_paths, combined_wav_path)
|
| 93 |
+
print(f"β
Combined WAV created: {combined_wav_path.name}")
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f"{RED}β Failed to combine chunks: {e}{RESET}")
|
| 96 |
+
return False
|
| 97 |
+
|
| 98 |
+
# Find metadata files
|
| 99 |
+
text_book_dir = TEXT_INPUT_ROOT / basename
|
| 100 |
+
book_files = find_book_files(text_book_dir)
|
| 101 |
+
text_files, cover_file, nfo_file = book_files['text'], book_files['cover'], book_files['nfo']
|
| 102 |
+
|
| 103 |
+
if not cover_file:
|
| 104 |
+
print(f"β οΈ {YELLOW}No cover image found in {text_book_dir}{RESET}")
|
| 105 |
+
else:
|
| 106 |
+
print(f"πΈ Using cover: {cover_file.name}")
|
| 107 |
+
|
| 108 |
+
if not nfo_file:
|
| 109 |
+
print(f"β οΈ {YELLOW}No book.nfo metadata found in {text_book_dir}{RESET}")
|
| 110 |
+
else:
|
| 111 |
+
print(f"π Using metadata: {nfo_file.name}")
|
| 112 |
+
|
| 113 |
+
# M4B conversion
|
| 114 |
+
print(f"\nπ± Converting to M4B audiobook...")
|
| 115 |
+
temp_m4b_path = book_path / "temp_output.m4b"
|
| 116 |
+
final_m4b_path = book_path / f"{basename}{file_suffix}.m4b"
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
convert_to_m4b(combined_wav_path, temp_m4b_path)
|
| 120 |
+
add_metadata_to_m4b(temp_m4b_path, final_m4b_path, cover_file, nfo_file)
|
| 121 |
+
print(f"β
M4B audiobook created: {final_m4b_path.name}")
|
| 122 |
+
except Exception as e:
|
| 123 |
+
print(f"{RED}β Failed to create M4B: {e}{RESET}")
|
| 124 |
+
return False
|
| 125 |
+
|
| 126 |
+
# Calculate final timing
|
| 127 |
+
elapsed_total = time.time() - start_time
|
| 128 |
+
elapsed_td = timedelta(seconds=int(elapsed_total))
|
| 129 |
+
|
| 130 |
+
# Verify final file
|
| 131 |
+
if final_m4b_path.exists():
|
| 132 |
+
final_size = final_m4b_path.stat().st_size / (1024 * 1024) # MB
|
| 133 |
+
print(f"π¦ Final file size: {GREEN}{final_size:.1f} MB{RESET}")
|
| 134 |
+
|
| 135 |
+
# Calculate efficiency
|
| 136 |
+
realtime_factor = total_duration / elapsed_total if elapsed_total > 0 else 0
|
| 137 |
+
duration_str = str(timedelta(seconds=int(total_duration)))
|
| 138 |
+
|
| 139 |
+
print(f"\nπ {GREEN}Combine completed successfully!{RESET}")
|
| 140 |
+
print(f"π Final Statistics:")
|
| 141 |
+
print(f" Audio Duration: {GREEN}{duration_str}{RESET}")
|
| 142 |
+
print(f" Processing Time: {GREEN}{elapsed_td}{RESET}")
|
| 143 |
+
print(f" Realtime Factor: {GREEN}{realtime_factor:.2f}x{RESET}")
|
| 144 |
+
print(f" Output Location: {GREEN}{final_m4b_path}{RESET}")
|
| 145 |
+
|
| 146 |
+
# Clean up temp files
|
| 147 |
+
try:
|
| 148 |
+
if temp_m4b_path.exists():
|
| 149 |
+
temp_m4b_path.unlink()
|
| 150 |
+
print(f"π§Ή Cleaned up temporary file: {temp_m4b_path.name}")
|
| 151 |
+
except Exception as e:
|
| 152 |
+
print(f"β οΈ Could not clean up temp file: {e}")
|
| 153 |
+
|
| 154 |
+
return True
|
| 155 |
+
else:
|
| 156 |
+
print(f"{RED}β Final M4B file was not created successfully{RESET}")
|
| 157 |
+
return False
|
| 158 |
+
|
| 159 |
+
def run_combine_only_mode():
|
| 160 |
+
"""Combine existing chunks into audiobook (CLI version)"""
|
| 161 |
+
print(f"\n{CYAN}π Combine-Only Mode: Assembling Existing Audio Chunks{RESET}")
|
| 162 |
+
print("=" * 60)
|
| 163 |
+
|
| 164 |
+
# Show available audiobooks
|
| 165 |
+
books = sorted([d for d in AUDIOBOOK_ROOT.iterdir() if d.is_dir()])
|
| 166 |
+
if not books:
|
| 167 |
+
print(f"{RED}β No folders found in Audiobook/ directory.{RESET}")
|
| 168 |
+
print(f"π‘ Make sure you have processed books with audio chunks to combine.")
|
| 169 |
+
return None
|
| 170 |
+
|
| 171 |
+
print(f"{CYAN}Available audiobooks to combine:{RESET}")
|
| 172 |
+
for i, book in enumerate(books):
|
| 173 |
+
# Check if it has audio chunks
|
| 174 |
+
audio_chunks_dir = book / "TTS" / "audio_chunks"
|
| 175 |
+
if audio_chunks_dir.exists():
|
| 176 |
+
chunk_count = len(list(audio_chunks_dir.glob('chunk_*.wav')))
|
| 177 |
+
status = f"({chunk_count} chunks)" if chunk_count > 0 else "(no chunks)"
|
| 178 |
+
print(f" [{i}] {book.name} {status}")
|
| 179 |
+
else:
|
| 180 |
+
print(f" [{i}] {book.name} (no TTS folder)")
|
| 181 |
+
|
| 182 |
+
# Book selection
|
| 183 |
+
while True:
|
| 184 |
+
try:
|
| 185 |
+
idx = int(input(f"\n{YELLOW}Select audiobook index: {RESET}"))
|
| 186 |
+
if 0 <= idx < len(books):
|
| 187 |
+
break
|
| 188 |
+
else:
|
| 189 |
+
print(f"{RED}Invalid selection. Please enter a number between 0 and {len(books)-1}.{RESET}")
|
| 190 |
+
except (ValueError, KeyboardInterrupt):
|
| 191 |
+
print(f"{RED}Invalid selection. Please try again.{RESET}")
|
| 192 |
+
except EOFError:
|
| 193 |
+
print(f"\n{RED}β Input error - unable to read selection.{RESET}")
|
| 194 |
+
return None
|
| 195 |
+
except Exception as e:
|
| 196 |
+
print(f"{RED}β Unexpected error: {e}{RESET}")
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
selected_book = books[idx]
|
| 200 |
+
basename = selected_book.name
|
| 201 |
+
|
| 202 |
+
print(f"\nπ― Selected: {BOLD}{basename}{RESET}")
|
| 203 |
+
|
| 204 |
+
# Setup paths
|
| 205 |
+
tts_dir = selected_book / "TTS"
|
| 206 |
+
audio_chunks_dir = tts_dir / "audio_chunks"
|
| 207 |
+
|
| 208 |
+
if not audio_chunks_dir.exists():
|
| 209 |
+
print(f"{RED}β No audio_chunks folder found in {selected_book}{RESET}")
|
| 210 |
+
print(f"π‘ Make sure this book has been processed with TTS generation first.")
|
| 211 |
+
return None
|
| 212 |
+
|
| 213 |
+
# Find audio chunks
|
| 214 |
+
chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
|
| 215 |
+
|
| 216 |
+
if not chunk_paths:
|
| 217 |
+
print(f"{RED}β No chunk_*.wav files found in {audio_chunks_dir}{RESET}")
|
| 218 |
+
print(f"π‘ Expected files like: chunk_00001.wav, chunk_00002.wav, etc.")
|
| 219 |
+
return None
|
| 220 |
+
|
| 221 |
+
print(f"\nπ¦ Found {GREEN}{len(chunk_paths)}{RESET} audio chunks")
|
| 222 |
+
|
| 223 |
+
# Verify chunk sequence
|
| 224 |
+
missing_chunks = verify_chunk_sequence(chunk_paths)
|
| 225 |
+
if missing_chunks:
|
| 226 |
+
print(f"\nβ οΈ {YELLOW}Warning: Missing chunks detected:{RESET}")
|
| 227 |
+
for chunk_num in missing_chunks[:10]: # Show first 10 missing
|
| 228 |
+
print(f" Missing: chunk_{chunk_num:05}.wav")
|
| 229 |
+
if len(missing_chunks) > 10:
|
| 230 |
+
print(f" ... and {len(missing_chunks) - 10} more")
|
| 231 |
+
|
| 232 |
+
try:
|
| 233 |
+
continue_anyway = input(f"\n{YELLOW}Continue with incomplete chunks? [y/N]: {RESET}").strip().lower()
|
| 234 |
+
if continue_anyway != 'y':
|
| 235 |
+
print("π Combine operation cancelled.")
|
| 236 |
+
return None
|
| 237 |
+
except (EOFError, KeyboardInterrupt):
|
| 238 |
+
print(f"\n{RED}π Combine operation cancelled.{RESET}")
|
| 239 |
+
return None
|
| 240 |
+
|
| 241 |
+
# Display chunk info
|
| 242 |
+
total_duration = sum(get_wav_duration(chunk_path) for chunk_path in chunk_paths)
|
| 243 |
+
duration_str = str(timedelta(seconds=int(total_duration)))
|
| 244 |
+
|
| 245 |
+
print(f"\nπ Chunk Analysis:")
|
| 246 |
+
print(f" Total Chunks: {GREEN}{len(chunk_paths)}{RESET}")
|
| 247 |
+
print(f" Total Duration: {GREEN}{duration_str}{RESET}")
|
| 248 |
+
print(f" Average Chunk: {GREEN}{total_duration/len(chunk_paths):.1f}s{RESET}")
|
| 249 |
+
|
| 250 |
+
# Use the shared combine operation (CLI doesn't pass voice name)
|
| 251 |
+
success = _perform_combine_operation(selected_book, chunk_paths, total_duration)
|
| 252 |
+
|
| 253 |
+
if success:
|
| 254 |
+
return selected_book / f"{basename}_combined.m4b"
|
| 255 |
+
else:
|
| 256 |
+
return None
|
| 257 |
+
|
| 258 |
+
def verify_chunk_sequence(chunk_paths):
|
| 259 |
+
"""Verify chunk sequence and return missing chunk numbers"""
|
| 260 |
+
chunk_numbers = []
|
| 261 |
+
|
| 262 |
+
for chunk_path in chunk_paths:
|
| 263 |
+
match = re.match(r"chunk_(\d+)\.wav", chunk_path.name)
|
| 264 |
+
if match:
|
| 265 |
+
chunk_numbers.append(int(match.group(1)))
|
| 266 |
+
|
| 267 |
+
if not chunk_numbers:
|
| 268 |
+
return []
|
| 269 |
+
|
| 270 |
+
chunk_numbers.sort()
|
| 271 |
+
expected_range = range(1, max(chunk_numbers) + 1)
|
| 272 |
+
missing = [num for num in expected_range if num not in chunk_numbers]
|
| 273 |
+
|
| 274 |
+
return missing
|
| 275 |
+
|
| 276 |
+
def list_available_books_for_combine():
|
| 277 |
+
"""List books available for combine operation"""
|
| 278 |
+
books_info = []
|
| 279 |
+
|
| 280 |
+
if not AUDIOBOOK_ROOT.exists():
|
| 281 |
+
return books_info
|
| 282 |
+
|
| 283 |
+
for book_dir in AUDIOBOOK_ROOT.iterdir():
|
| 284 |
+
if not book_dir.is_dir():
|
| 285 |
+
continue
|
| 286 |
+
|
| 287 |
+
audio_chunks_dir = book_dir / "TTS" / "audio_chunks"
|
| 288 |
+
if not audio_chunks_dir.exists():
|
| 289 |
+
continue
|
| 290 |
+
|
| 291 |
+
chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
|
| 292 |
+
if not chunk_paths:
|
| 293 |
+
continue
|
| 294 |
+
|
| 295 |
+
# Calculate total duration
|
| 296 |
+
try:
|
| 297 |
+
total_duration = sum(get_wav_duration(chunk_path) for chunk_path in chunk_paths)
|
| 298 |
+
duration_str = str(timedelta(seconds=int(total_duration)))
|
| 299 |
+
except:
|
| 300 |
+
duration_str = "Unknown"
|
| 301 |
+
|
| 302 |
+
books_info.append({
|
| 303 |
+
"name": book_dir.name,
|
| 304 |
+
"path": book_dir,
|
| 305 |
+
"chunk_count": len(chunk_paths),
|
| 306 |
+
"duration": duration_str
|
| 307 |
+
})
|
| 308 |
+
|
| 309 |
+
return books_info
|
| 310 |
+
|
| 311 |
+
def quick_combine(book_name):
|
| 312 |
+
"""Quick combine operation for specific book (CLI usage)"""
|
| 313 |
+
book_path = AUDIOBOOK_ROOT / book_name
|
| 314 |
+
|
| 315 |
+
if not book_path.exists():
|
| 316 |
+
print(f"{RED}β Book '{book_name}' not found in Audiobook directory{RESET}")
|
| 317 |
+
return None
|
| 318 |
+
|
| 319 |
+
audio_chunks_dir = book_path / "TTS" / "audio_chunks"
|
| 320 |
+
chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
|
| 321 |
+
|
| 322 |
+
if not chunk_paths:
|
| 323 |
+
print(f"{RED}β No audio chunks found for '{book_name}'{RESET}")
|
| 324 |
+
return None
|
| 325 |
+
|
| 326 |
+
print(f"π Quick combining {len(chunk_paths)} chunks for '{book_name}'...")
|
| 327 |
+
|
| 328 |
+
# Use same logic as main function but without interactive prompts
|
| 329 |
+
combined_wav_path = book_path / f"{book_name}_quick_combined.wav"
|
| 330 |
+
final_m4b_path = book_path / f"{book_name}_quick_combined.m4b"
|
| 331 |
+
|
| 332 |
+
combine_audio_chunks(chunk_paths, combined_wav_path)
|
| 333 |
+
|
| 334 |
+
temp_m4b_path = book_path / "temp_quick.m4b"
|
| 335 |
+
convert_to_m4b(combined_wav_path, temp_m4b_path)
|
| 336 |
+
|
| 337 |
+
# Simple M4B without metadata for quick operation
|
| 338 |
+
temp_m4b_path.rename(final_m4b_path)
|
| 339 |
+
|
| 340 |
+
print(f"β
Quick combine complete: {final_m4b_path}")
|
| 341 |
+
return final_m4b_path
|
| 342 |
+
|
| 343 |
+
def apply_playback_speed_to_m4b(input_m4b_path, output_m4b_path, speed_factor):
|
| 344 |
+
"""Apply playback speed adjustment to M4B file using ffmpeg"""
|
| 345 |
+
try:
|
| 346 |
+
print(f"π Applying {speed_factor}x speed to {Path(input_m4b_path).name}")
|
| 347 |
+
|
| 348 |
+
# Check if ffmpeg is available
|
| 349 |
+
if not shutil.which('ffmpeg'):
|
| 350 |
+
print("β ffmpeg not found - required for M4B speed adjustment")
|
| 351 |
+
return False
|
| 352 |
+
|
| 353 |
+
# Build ffmpeg command for speed adjustment
|
| 354 |
+
cmd = [
|
| 355 |
+
'ffmpeg', '-y', # -y to overwrite output file
|
| 356 |
+
'-i', str(input_m4b_path),
|
| 357 |
+
'-filter:a', f'atempo={speed_factor}', # Audio speed adjustment
|
| 358 |
+
'-c:a', 'aac', # Re-encode to AAC for M4B compatibility
|
| 359 |
+
'-b:a', '64k', # Audio bitrate
|
| 360 |
+
str(output_m4b_path)
|
| 361 |
+
]
|
| 362 |
+
|
| 363 |
+
print(f"Running: {' '.join(cmd)}")
|
| 364 |
+
|
| 365 |
+
# Execute ffmpeg command
|
| 366 |
+
result = subprocess.run(
|
| 367 |
+
cmd,
|
| 368 |
+
capture_output=True,
|
| 369 |
+
text=True,
|
| 370 |
+
timeout=300 # 5 minute timeout
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
if result.returncode == 0:
|
| 374 |
+
print(f"β
Successfully created speed-adjusted M4B: {Path(output_m4b_path).name}")
|
| 375 |
+
return True
|
| 376 |
+
else:
|
| 377 |
+
print(f"β ffmpeg failed: {result.stderr}")
|
| 378 |
+
return False
|
| 379 |
+
|
| 380 |
+
except subprocess.TimeoutExpired:
|
| 381 |
+
print("β M4B speed adjustment timed out")
|
| 382 |
+
return False
|
| 383 |
+
except Exception as e:
|
| 384 |
+
print(f"β Error adjusting M4B speed: {e}")
|
| 385 |
+
return False
|
| 386 |
+
|
| 387 |
+
if __name__ == "__main__":
|
| 388 |
+
import sys
|
| 389 |
+
|
| 390 |
+
if len(sys.argv) > 1:
|
| 391 |
+
# CLI usage: python combine_only.py "Book Name"
|
| 392 |
+
book_name = sys.argv[1]
|
| 393 |
+
quick_combine(book_name)
|
| 394 |
+
else:
|
| 395 |
+
# Interactive mode
|
| 396 |
+
run_combine_only_mode()
|
utils/abbreviations.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Dr. -> Doctor
|
| 2 |
+
Mr. -> Mister
|
| 3 |
+
Mrs. -> Missus
|
| 4 |
+
Ms. -> Miss
|
| 5 |
+
U.S. -> US
|
| 6 |
+
U.K. -> UK
|
| 7 |
+
etc. -> et cetera
|
| 8 |
+
vs. -> versus
|
| 9 |
+
1st -> first
|
| 10 |
+
2nd -> second
|
| 11 |
+
3rd -> third
|
utils/abbreviations.txt~
ADDED
|
File without changes
|
utils/chunk_manager.TXT
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# chunk_manager.py
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from text_cleaner import smart_punctuate # Assuming you've extracted this already
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
def save_chunks_to_json(chunks, output_path):
|
| 10 |
+
"""Save enriched chunk list to a JSON file"""
|
| 11 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 12 |
+
json.dump(chunks, f, indent=2, ensure_ascii=False)
|
| 13 |
+
print(f"β
Saved {len(chunks)} chunks to: {output_path}")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def break_long_sentence(sentence, max_words):
|
| 17 |
+
break_patterns = [
|
| 18 |
+
r'(,\s+and\s+)', r'(,\s+but\s+)', r'(,\s+)', r'(;\s*)', r'β', r'(\.\s*")',
|
| 19 |
+
]
|
| 20 |
+
chunks = []
|
| 21 |
+
remaining_text = sentence.strip()
|
| 22 |
+
|
| 23 |
+
while remaining_text:
|
| 24 |
+
words = remaining_text.split()
|
| 25 |
+
if len(words) <= max_words:
|
| 26 |
+
chunks.append(remaining_text.strip())
|
| 27 |
+
break
|
| 28 |
+
|
| 29 |
+
for pattern in break_patterns:
|
| 30 |
+
for match in re.finditer(pattern, remaining_text):
|
| 31 |
+
break_pos = match.end()
|
| 32 |
+
candidate = remaining_text[:break_pos].strip()
|
| 33 |
+
if len(candidate.split()) <= max_words:
|
| 34 |
+
chunks.append(candidate)
|
| 35 |
+
remaining_text = remaining_text[break_pos:].strip()
|
| 36 |
+
break
|
| 37 |
+
else:
|
| 38 |
+
continue
|
| 39 |
+
break
|
| 40 |
+
else:
|
| 41 |
+
forced = " ".join(words[:max_words]) + ","
|
| 42 |
+
chunks.append(forced)
|
| 43 |
+
remaining_text = " ".join(words[max_words:]).strip()
|
| 44 |
+
|
| 45 |
+
return chunks
|
| 46 |
+
|
| 47 |
+
def fix_short_sentences(chunk_text):
|
| 48 |
+
short = re.findall(r'\b[A-Z][a-z]{1,3}\.\s+', chunk_text)
|
| 49 |
+
if len(short) >= 2:
|
| 50 |
+
merged = chunk_text.replace(". ", ", ")
|
| 51 |
+
if not merged.endswith("."):
|
| 52 |
+
merged += "."
|
| 53 |
+
return merged
|
| 54 |
+
return chunk_text
|
| 55 |
+
|
| 56 |
+
def detect_content_boundary(chunk_text):
|
| 57 |
+
if re.match(r'^\s*(Chapter \d+|CHAPTER \d+)', chunk_text, re.IGNORECASE):
|
| 58 |
+
return "chapter_start"
|
| 59 |
+
if re.search(r'\*\*\*|---|###', chunk_text):
|
| 60 |
+
return "section_break"
|
| 61 |
+
if chunk_text.endswith('\n\n') or chunk_text.endswith('\n'):
|
| 62 |
+
return "paragraph_end"
|
| 63 |
+
return None
|
| 64 |
+
|
| 65 |
+
def sentence_chunk_text(text, max_words=30, min_words=4):
|
| 66 |
+
sentence_end_re = re.compile(r'([.!?][\"\')]*\s+)')
|
| 67 |
+
lines = text.splitlines()
|
| 68 |
+
paragraph_buffer = []
|
| 69 |
+
final_chunks = []
|
| 70 |
+
|
| 71 |
+
def flush_paragraph(lines_in_para):
|
| 72 |
+
raw_sentences = []
|
| 73 |
+
|
| 74 |
+
for line in lines_in_para:
|
| 75 |
+
start = 0
|
| 76 |
+
for match in sentence_end_re.finditer(line):
|
| 77 |
+
end = match.end()
|
| 78 |
+
sentence = line[start:end].strip()
|
| 79 |
+
if sentence:
|
| 80 |
+
raw_sentences.append(sentence)
|
| 81 |
+
start = end
|
| 82 |
+
if start < len(line):
|
| 83 |
+
sentence = line[start:].strip()
|
| 84 |
+
if sentence:
|
| 85 |
+
raw_sentences.append(sentence)
|
| 86 |
+
|
| 87 |
+
# Now group into chunks
|
| 88 |
+
temp_chunks = []
|
| 89 |
+
short_group = []
|
| 90 |
+
|
| 91 |
+
for sentence in raw_sentences:
|
| 92 |
+
wc = len(sentence.split())
|
| 93 |
+
|
| 94 |
+
if wc > max_words:
|
| 95 |
+
split_chunks = break_long_sentence(sentence, max_words)
|
| 96 |
+
for chunk in split_chunks:
|
| 97 |
+
temp_chunks.append(chunk.strip())
|
| 98 |
+
elif wc < min_words:
|
| 99 |
+
short_group.append(sentence)
|
| 100 |
+
else:
|
| 101 |
+
if short_group:
|
| 102 |
+
merged_text = ", ".join(short_group + [sentence])
|
| 103 |
+
temp_chunks.append(merged_text.strip())
|
| 104 |
+
short_group = []
|
| 105 |
+
else:
|
| 106 |
+
temp_chunks.append(sentence.strip())
|
| 107 |
+
|
| 108 |
+
if short_group:
|
| 109 |
+
merged_text = ", ".join(short_group)
|
| 110 |
+
temp_chunks.append(merged_text.strip())
|
| 111 |
+
|
| 112 |
+
# Apply proper paragraph end tagging
|
| 113 |
+
for i, chunk in enumerate(temp_chunks):
|
| 114 |
+
final_chunks.append((
|
| 115 |
+
fix_short_sentences(chunk),
|
| 116 |
+
True if i == len(temp_chunks) - 1 else False
|
| 117 |
+
))
|
| 118 |
+
|
| 119 |
+
for line in lines:
|
| 120 |
+
stripped = line.strip()
|
| 121 |
+
if not stripped:
|
| 122 |
+
flush_paragraph(paragraph_buffer)
|
| 123 |
+
paragraph_buffer = []
|
| 124 |
+
else:
|
| 125 |
+
paragraph_buffer.append(stripped)
|
| 126 |
+
|
| 127 |
+
# Flush any remaining paragraph
|
| 128 |
+
if paragraph_buffer:
|
| 129 |
+
flush_paragraph(paragraph_buffer)
|
| 130 |
+
|
| 131 |
+
return final_chunks
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def prechunk_text_file(path, max_words=30, min_words=4):
|
| 136 |
+
raw = Path(path).read_text(encoding='utf-8')
|
| 137 |
+
text = smart_punctuate(raw)
|
| 138 |
+
chunks = sentence_chunk_text(text, max_words=max_words, min_words=min_words)
|
| 139 |
+
|
| 140 |
+
enriched_chunks = []
|
| 141 |
+
for i, (chunk_text, is_para_end) in enumerate(chunks):
|
| 142 |
+
boundary = detect_content_boundary(chunk_text)
|
| 143 |
+
|
| 144 |
+
enriched_chunks.append({
|
| 145 |
+
"index": i,
|
| 146 |
+
"text": chunk_text.strip(),
|
| 147 |
+
"word_count": len(chunk_text.strip().split()),
|
| 148 |
+
"boundary_type": boundary or "none",
|
| 149 |
+
"is_paragraph_end": is_para_end
|
| 150 |
+
})
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
return enriched_chunks
|
utils/dirlist.TXT
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
def list_directory_recursively(start_path):
|
| 4 |
+
"""
|
| 5 |
+
Lists all folders and their files recursively starting from the given path.
|
| 6 |
+
|
| 7 |
+
Args:
|
| 8 |
+
start_path (str): The path to the directory to start listing from.
|
| 9 |
+
"""
|
| 10 |
+
if not os.path.isdir(start_path):
|
| 11 |
+
print(f"Error: '{start_path}' is not a valid directory.")
|
| 12 |
+
return
|
| 13 |
+
|
| 14 |
+
print(f"Listing contents of: {start_path}\n")
|
| 15 |
+
|
| 16 |
+
# os.walk yields a 3-tuple: (dirpath, dirnames, filenames)
|
| 17 |
+
# dirpath: The path of the current directory.
|
| 18 |
+
# dirnames: A list of the names of the subdirectories in dirpath (not full paths).
|
| 19 |
+
# filenames: A list of the names of the non-directory files in dirpath (not full paths).
|
| 20 |
+
for root, dirs, files in os.walk(start_path):
|
| 21 |
+
# Print the current directory path
|
| 22 |
+
# Use os.path.relpath to show path relative to start_path, or keep root for full path
|
| 23 |
+
relative_root = os.path.relpath(root, start_path)
|
| 24 |
+
if relative_root == '.':
|
| 25 |
+
# This is the starting directory itself
|
| 26 |
+
print(f"Folder: {os.path.basename(root)}/")
|
| 27 |
+
else:
|
| 28 |
+
print(f"Folder: {relative_root}/")
|
| 29 |
+
|
| 30 |
+
# Print files in the current directory
|
| 31 |
+
if files:
|
| 32 |
+
for file in sorted(files): # Sort files for consistent output
|
| 33 |
+
print(f" File: {file}")
|
| 34 |
+
|
| 35 |
+
# You can also print subdirectories found at this level if you wish
|
| 36 |
+
# for dir_name in sorted(dirs):
|
| 37 |
+
# print(f" Subfolder: {dir_name}/")
|
| 38 |
+
|
| 39 |
+
print() # Add an empty line for readability between folders
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
# Example usage:
|
| 43 |
+
# Get directory path from user
|
| 44 |
+
directory_to_list = input("Enter the path to the Linux directory you want to list (e.g., /home/user/documents): ")
|
| 45 |
+
|
| 46 |
+
list_directory_recursively(directory_to_list)
|
| 47 |
+
|
| 48 |
+
# You can also hardcode a path for testing:
|
| 49 |
+
# list_directory_recursively("/path/to/your/test_directory")
|
utils/generate_from_json (copy).py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Direct Audio Generation from JSON Tool
|
| 4 |
+
|
| 5 |
+
This script allows for generating audiobook chunks directly from a pre-existing
|
| 6 |
+
`chunks_info.json` file. It is intended for debugging and testing purposes,
|
| 7 |
+
allowing a user to manually edit the TTS parameters in the JSON file and
|
| 8 |
+
hear the results without the VADER analysis step.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import sys
|
| 14 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 15 |
+
import time
|
| 16 |
+
from datetime import timedelta
|
| 17 |
+
|
| 18 |
+
# Add project root to path to allow module imports
|
| 19 |
+
project_root = Path(__file__).parent
|
| 20 |
+
sys.path.append(str(project_root))
|
| 21 |
+
|
| 22 |
+
from config.config import *
|
| 23 |
+
from modules.tts_engine import load_optimized_model, process_one_chunk
|
| 24 |
+
from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility
|
| 25 |
+
from wrapper.chunk_loader import load_chunks
|
| 26 |
+
from chatterbox.tts import punc_norm
|
| 27 |
+
from modules.progress_tracker import log_chunk_progress, log_run
|
| 28 |
+
|
| 29 |
+
def main():
|
| 30 |
+
"""Main function to drive the generation process."""
|
| 31 |
+
print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}")
|
| 32 |
+
|
| 33 |
+
# 1. Get Book Name
|
| 34 |
+
book_name = input("Enter the book name (e.g., 'london'): ").strip()
|
| 35 |
+
if not book_name:
|
| 36 |
+
print("β Book name cannot be empty.")
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
# 2. Locate and Load JSON
|
| 40 |
+
book_audio_dir = AUDIOBOOK_ROOT / book_name
|
| 41 |
+
json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json"
|
| 42 |
+
|
| 43 |
+
if not json_path.exists():
|
| 44 |
+
print(f"β Error: JSON file not found at {json_path}")
|
| 45 |
+
print("Please ensure you have run the 'Prepare text file' option for this book first.")
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
print(f"π Loading chunks from: {json_path}")
|
| 49 |
+
all_chunks = load_chunks(str(json_path))
|
| 50 |
+
print(f"β
Found {len(all_chunks)} chunks.")
|
| 51 |
+
|
| 52 |
+
# 3. Select Voice
|
| 53 |
+
voice_files = list_voice_samples()
|
| 54 |
+
if not voice_files:
|
| 55 |
+
print(f"β No voice samples found in {VOICE_SAMPLES_DIR}")
|
| 56 |
+
return
|
| 57 |
+
|
| 58 |
+
print("\nAvailable voices:")
|
| 59 |
+
for i, voice_file in enumerate(voice_files, 1):
|
| 60 |
+
print(f" [{i}] {voice_file.stem}")
|
| 61 |
+
|
| 62 |
+
while True:
|
| 63 |
+
try:
|
| 64 |
+
choice = input("Select voice number: ").strip()
|
| 65 |
+
idx = int(choice) - 1
|
| 66 |
+
if 0 <= idx < len(voice_files):
|
| 67 |
+
voice_path = voice_files[idx]
|
| 68 |
+
break
|
| 69 |
+
print("Invalid selection.")
|
| 70 |
+
except (ValueError, IndexError):
|
| 71 |
+
print("Invalid selection.")
|
| 72 |
+
|
| 73 |
+
# Ensure voice compatibility
|
| 74 |
+
voice_path = ensure_voice_sample_compatibility(voice_path)
|
| 75 |
+
|
| 76 |
+
# 4. Setup Environment
|
| 77 |
+
if torch.cuda.is_available():
|
| 78 |
+
device = "cuda"
|
| 79 |
+
elif torch.backends.mps.is_available():
|
| 80 |
+
device = "mps"
|
| 81 |
+
else:
|
| 82 |
+
device = "cpu"
|
| 83 |
+
|
| 84 |
+
print(f"\nπ Using device: {device}")
|
| 85 |
+
print(f"π€ Using voice: {Path(voice_path).name}")
|
| 86 |
+
|
| 87 |
+
# 5. Load Model
|
| 88 |
+
model = load_optimized_model(device)
|
| 89 |
+
|
| 90 |
+
# 6. Prepare voice conditionals (THIS WAS MISSING!)
|
| 91 |
+
print(f"π€ Preparing voice conditionals with: {Path(voice_path).name}")
|
| 92 |
+
model.prepare_conditionals(voice_path)
|
| 93 |
+
|
| 94 |
+
# 7. Process Chunks
|
| 95 |
+
output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name)
|
| 96 |
+
|
| 97 |
+
# Clean existing audio chunks
|
| 98 |
+
print("π§Ή Clearing old audio chunks...")
|
| 99 |
+
for wav_file in audio_chunks_dir.glob("*.wav"):
|
| 100 |
+
wav_file.unlink()
|
| 101 |
+
|
| 102 |
+
start_time = time.time()
|
| 103 |
+
total_chunks = len(all_chunks)
|
| 104 |
+
log_path = output_root / "debug_generation.log"
|
| 105 |
+
|
| 106 |
+
print(f"\nπ Generating {total_chunks} chunks...")
|
| 107 |
+
|
| 108 |
+
with ThreadPoolExecutor(max_workers=1) as executor: # Force sequential processing
|
| 109 |
+
futures = []
|
| 110 |
+
for i, chunk_data in enumerate(all_chunks):
|
| 111 |
+
# Extract exaggeration from JSON, force others to default
|
| 112 |
+
chunk_tts_params = {
|
| 113 |
+
"exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION),
|
| 114 |
+
"cfg_weight": DEFAULT_CFG_WEIGHT,
|
| 115 |
+
"temperature": DEFAULT_TEMPERATURE
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
future = executor.submit(
|
| 119 |
+
process_one_chunk,
|
| 120 |
+
i, chunk_data['text'], text_chunks_dir, audio_chunks_dir,
|
| 121 |
+
voice_path, chunk_tts_params, start_time, total_chunks,
|
| 122 |
+
punc_norm, book_name, log_run, log_path, device,
|
| 123 |
+
model, None, chunk_data['is_paragraph_end'], all_chunks, chunk_data['boundary_type']
|
| 124 |
+
)
|
| 125 |
+
futures.append(future)
|
| 126 |
+
|
| 127 |
+
for future in as_completed(futures):
|
| 128 |
+
try:
|
| 129 |
+
result = future.result()
|
| 130 |
+
if result:
|
| 131 |
+
idx, _ = result
|
| 132 |
+
log_chunk_progress(idx, total_chunks, start_time, 0)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"\nβ An error occurred while processing a chunk: {e}")
|
| 135 |
+
|
| 136 |
+
elapsed_time = time.time() - start_time
|
| 137 |
+
print(f"\n{GREEN}β
Generation Complete!{RESET}")
|
| 138 |
+
print(f"β±οΈ Total time: {timedelta(seconds=int(elapsed_time))}")
|
| 139 |
+
print(f"π Audio chunks are in: {audio_chunks_dir}")
|
| 140 |
+
print("You can now use Option 3 from the main menu to combine them.")
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
main()
|
utils/generate_from_json.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Direct Audio Generation from JSON Tool
|
| 4 |
+
|
| 5 |
+
This script allows for generating audiobook chunks directly from a pre-existing
|
| 6 |
+
`chunks_info.json` file. It is intended for debugging and testing purposes,
|
| 7 |
+
allowing a user to manually edit the TTS parameters in the JSON file and
|
| 8 |
+
hear the results without the VADER analysis step.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import sys
|
| 14 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 15 |
+
import time
|
| 16 |
+
from datetime import timedelta
|
| 17 |
+
|
| 18 |
+
# Add project root to path to allow module imports
|
| 19 |
+
project_root = Path(__file__).parent
|
| 20 |
+
sys.path.append(str(project_root))
|
| 21 |
+
|
| 22 |
+
from config.config import *
|
| 23 |
+
from modules.tts_engine import load_optimized_model, process_one_chunk
|
| 24 |
+
from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility
|
| 25 |
+
from wrapper.chunk_loader import load_chunks
|
| 26 |
+
from chatterbox.tts import punc_norm
|
| 27 |
+
from modules.progress_tracker import log_chunk_progress, log_run
|
| 28 |
+
|
| 29 |
+
def main():
|
| 30 |
+
"""Main function to drive the generation process."""
|
| 31 |
+
print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}")
|
| 32 |
+
|
| 33 |
+
# 1. Get Book Name
|
| 34 |
+
book_name = input("Enter the book name (e.g., 'london'): ").strip()
|
| 35 |
+
if not book_name:
|
| 36 |
+
print("β Book name cannot be empty.")
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
# 2. Locate and Load JSON
|
| 40 |
+
book_audio_dir = AUDIOBOOK_ROOT / book_name
|
| 41 |
+
json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json"
|
| 42 |
+
|
| 43 |
+
if not json_path.exists():
|
| 44 |
+
print(f"β Error: JSON file not found at {json_path}")
|
| 45 |
+
print("Please ensure you have run the 'Prepare text file' option for this book first.")
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
print(f"π Loading chunks from: {json_path}")
|
| 49 |
+
all_chunks = load_chunks(str(json_path))
|
| 50 |
+
print(f"β
Found {len(all_chunks)} chunks.")
|
| 51 |
+
|
| 52 |
+
# 3. Select Voice
|
| 53 |
+
voice_files = list_voice_samples()
|
| 54 |
+
if not voice_files:
|
| 55 |
+
print(f"β No voice samples found in {VOICE_SAMPLES_DIR}")
|
| 56 |
+
return
|
| 57 |
+
|
| 58 |
+
print("\nAvailable voices:")
|
| 59 |
+
for i, voice_file in enumerate(voice_files, 1):
|
| 60 |
+
print(f" [{i}] {voice_file.stem}")
|
| 61 |
+
|
| 62 |
+
while True:
|
| 63 |
+
try:
|
| 64 |
+
choice = input("Select voice number: ").strip()
|
| 65 |
+
idx = int(choice) - 1
|
| 66 |
+
if 0 <= idx < len(voice_files):
|
| 67 |
+
voice_path = voice_files[idx]
|
| 68 |
+
break
|
| 69 |
+
print("Invalid selection.")
|
| 70 |
+
except (ValueError, IndexError):
|
| 71 |
+
print("Invalid selection.")
|
| 72 |
+
|
| 73 |
+
# Ensure voice compatibility
|
| 74 |
+
voice_path = ensure_voice_sample_compatibility(voice_path)
|
| 75 |
+
|
| 76 |
+
# 4. Setup Environment
|
| 77 |
+
if torch.cuda.is_available():
|
| 78 |
+
device = "cuda"
|
| 79 |
+
elif torch.backends.mps.is_available():
|
| 80 |
+
device = "mps"
|
| 81 |
+
else:
|
| 82 |
+
device = "cpu"
|
| 83 |
+
|
| 84 |
+
print(f"\nπ Using device: {device}")
|
| 85 |
+
print(f"π€ Using voice: {Path(voice_path).name}")
|
| 86 |
+
|
| 87 |
+
# 5. Load Model
|
| 88 |
+
model = load_optimized_model(device)
|
| 89 |
+
|
| 90 |
+
# 6. Prepare voice conditionals (THIS WAS MISSING!)
|
| 91 |
+
print(f"π€ Preparing voice conditionals with: {Path(voice_path).name}")
|
| 92 |
+
model.prepare_conditionals(voice_path)
|
| 93 |
+
|
| 94 |
+
# 7. Process Chunks
|
| 95 |
+
output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name)
|
| 96 |
+
|
| 97 |
+
# Clean existing audio chunks
|
| 98 |
+
print("π§Ή Clearing old audio chunks...")
|
| 99 |
+
for wav_file in audio_chunks_dir.glob("*.wav"):
|
| 100 |
+
wav_file.unlink()
|
| 101 |
+
|
| 102 |
+
start_time = time.time()
|
| 103 |
+
total_chunks = len(all_chunks)
|
| 104 |
+
log_path = output_root / "debug_generation.log"
|
| 105 |
+
|
| 106 |
+
print(f"\nπ Generating {total_chunks} chunks...")
|
| 107 |
+
|
| 108 |
+
with ThreadPoolExecutor(max_workers=2) as executor: # Test parallel processing
|
| 109 |
+
futures = []
|
| 110 |
+
for i, chunk_data in enumerate(all_chunks):
|
| 111 |
+
# Extract exaggeration from JSON, force others to default
|
| 112 |
+
chunk_tts_params = {
|
| 113 |
+
"exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION),
|
| 114 |
+
"cfg_weight": DEFAULT_CFG_WEIGHT,
|
| 115 |
+
"temperature": DEFAULT_TEMPERATURE
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
future = executor.submit(
|
| 119 |
+
process_one_chunk,
|
| 120 |
+
i, chunk_data['text'], text_chunks_dir, audio_chunks_dir,
|
| 121 |
+
voice_path, chunk_tts_params, start_time, total_chunks,
|
| 122 |
+
punc_norm, book_name, log_run, log_path, device,
|
| 123 |
+
model, None, all_chunks, chunk_data['boundary_type']
|
| 124 |
+
)
|
| 125 |
+
futures.append(future)
|
| 126 |
+
|
| 127 |
+
for future in as_completed(futures):
|
| 128 |
+
try:
|
| 129 |
+
result = future.result()
|
| 130 |
+
if result:
|
| 131 |
+
idx, _ = result
|
| 132 |
+
log_chunk_progress(idx, total_chunks, start_time, 0)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"\nβ An error occurred while processing a chunk: {e}")
|
| 135 |
+
|
| 136 |
+
elapsed_time = time.time() - start_time
|
| 137 |
+
print(f"\n{GREEN}β
Generation Complete!{RESET}")
|
| 138 |
+
print(f"β±οΈ Total time: {timedelta(seconds=int(elapsed_time))}")
|
| 139 |
+
print(f"π Audio chunks are in: {audio_chunks_dir}")
|
| 140 |
+
print("You can now use Option 3 from the main menu to combine them.")
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
main()
|
utils/generate_from_json.py.bak
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Direct Audio Generation from JSON Tool
|
| 4 |
+
|
| 5 |
+
This script allows for generating audiobook chunks directly from a pre-existing
|
| 6 |
+
`chunks_info.json` file. It is intended for debugging and testing purposes,
|
| 7 |
+
allowing a user to manually edit the TTS parameters in the JSON file and
|
| 8 |
+
hear the results without the VADER analysis step.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import sys
|
| 14 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 15 |
+
import time
|
| 16 |
+
from datetime import timedelta
|
| 17 |
+
|
| 18 |
+
# Add project root to path to allow module imports
|
| 19 |
+
project_root = Path(__file__).parent
|
| 20 |
+
sys.path.append(str(project_root))
|
| 21 |
+
|
| 22 |
+
from config.config import *
|
| 23 |
+
from modules.tts_engine import load_optimized_model, process_one_chunk
|
| 24 |
+
from modules.file_manager import setup_book_directories, list_voice_samples, ensure_voice_sample_compatibility
|
| 25 |
+
from wrapper.chunk_loader import load_chunks
|
| 26 |
+
from chatterbox.tts import punc_norm
|
| 27 |
+
from modules.progress_tracker import log_chunk_progress, log_run
|
| 28 |
+
|
| 29 |
+
def main():
|
| 30 |
+
"""Main function to drive the generation process."""
|
| 31 |
+
print(f"{BOLD}{CYAN}--- Direct Audio Generation from JSON Tool ---\{RESET}")
|
| 32 |
+
|
| 33 |
+
# 1. Get Book Name
|
| 34 |
+
book_name = input("Enter the book name (e.g., 'london'): ").strip()
|
| 35 |
+
if not book_name:
|
| 36 |
+
print("β Book name cannot be empty.")
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
# 2. Locate and Load JSON
|
| 40 |
+
book_audio_dir = AUDIOBOOK_ROOT / book_name
|
| 41 |
+
json_path = book_audio_dir / "TTS" / "text_chunks" / "chunks_info.json"
|
| 42 |
+
|
| 43 |
+
if not json_path.exists():
|
| 44 |
+
print(f"β Error: JSON file not found at {json_path}")
|
| 45 |
+
print("Please ensure you have run the 'Prepare text file' option for this book first.")
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
print(f"π Loading chunks from: {json_path}")
|
| 49 |
+
all_chunks = load_chunks(str(json_path))
|
| 50 |
+
print(f"β
Found {len(all_chunks)} chunks.")
|
| 51 |
+
|
| 52 |
+
# 3. Select Voice
|
| 53 |
+
voice_files = list_voice_samples()
|
| 54 |
+
if not voice_files:
|
| 55 |
+
print(f"β No voice samples found in {VOICE_SAMPLES_DIR}")
|
| 56 |
+
return
|
| 57 |
+
|
| 58 |
+
print("\nAvailable voices:")
|
| 59 |
+
for i, voice_file in enumerate(voice_files, 1):
|
| 60 |
+
print(f" [{i}] {voice_file.stem}")
|
| 61 |
+
|
| 62 |
+
while True:
|
| 63 |
+
try:
|
| 64 |
+
choice = input("Select voice number: ").strip()
|
| 65 |
+
idx = int(choice) - 1
|
| 66 |
+
if 0 <= idx < len(voice_files):
|
| 67 |
+
voice_path = voice_files[idx]
|
| 68 |
+
break
|
| 69 |
+
print("Invalid selection.")
|
| 70 |
+
except (ValueError, IndexError):
|
| 71 |
+
print("Invalid selection.")
|
| 72 |
+
|
| 73 |
+
# Ensure voice compatibility
|
| 74 |
+
voice_path = ensure_voice_sample_compatibility(voice_path)
|
| 75 |
+
|
| 76 |
+
# 4. Setup Environment
|
| 77 |
+
if torch.cuda.is_available():
|
| 78 |
+
device = "cuda"
|
| 79 |
+
elif torch.backends.mps.is_available():
|
| 80 |
+
device = "mps"
|
| 81 |
+
else:
|
| 82 |
+
device = "cpu"
|
| 83 |
+
|
| 84 |
+
print(f"\nπ Using device: {device}")
|
| 85 |
+
print(f"π€ Using voice: {Path(voice_path).name}")
|
| 86 |
+
|
| 87 |
+
# 5. Load Model
|
| 88 |
+
model = load_optimized_model(device)
|
| 89 |
+
|
| 90 |
+
# 6. Prepare voice conditionals (THIS WAS MISSING!)
|
| 91 |
+
print(f"π€ Preparing voice conditionals with: {Path(voice_path).name}")
|
| 92 |
+
model.prepare_conditionals(voice_path)
|
| 93 |
+
|
| 94 |
+
# 7. Process Chunks
|
| 95 |
+
output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(Path(TEXT_INPUT_ROOT) / book_name)
|
| 96 |
+
|
| 97 |
+
# Clean existing audio chunks
|
| 98 |
+
print("π§Ή Clearing old audio chunks...")
|
| 99 |
+
for wav_file in audio_chunks_dir.glob("*.wav"):
|
| 100 |
+
wav_file.unlink()
|
| 101 |
+
|
| 102 |
+
start_time = time.time()
|
| 103 |
+
total_chunks = len(all_chunks)
|
| 104 |
+
log_path = output_root / "debug_generation.log"
|
| 105 |
+
|
| 106 |
+
print(f"\nπ Generating {total_chunks} chunks...")
|
| 107 |
+
|
| 108 |
+
with ThreadPoolExecutor(max_workers=2) as executor: # Test parallel processing
|
| 109 |
+
futures = []
|
| 110 |
+
for i, chunk_data in enumerate(all_chunks):
|
| 111 |
+
# Extract exaggeration from JSON, force others to default
|
| 112 |
+
chunk_tts_params = {
|
| 113 |
+
"exaggeration": chunk_data.get("tts_params", {}).get("exaggeration", DEFAULT_EXAGGERATION),
|
| 114 |
+
"cfg_weight": DEFAULT_CFG_WEIGHT,
|
| 115 |
+
"temperature": DEFAULT_TEMPERATURE
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
future = executor.submit(
|
| 119 |
+
process_one_chunk,
|
| 120 |
+
i, chunk_data['text'], text_chunks_dir, audio_chunks_dir,
|
| 121 |
+
voice_path, chunk_tts_params, start_time, total_chunks,
|
| 122 |
+
punc_norm, book_name, log_run, log_path, device,
|
| 123 |
+
model, None, chunk_data['is_paragraph_end'], all_chunks, chunk_data['boundary_type']
|
| 124 |
+
)
|
| 125 |
+
futures.append(future)
|
| 126 |
+
|
| 127 |
+
for future in as_completed(futures):
|
| 128 |
+
try:
|
| 129 |
+
result = future.result()
|
| 130 |
+
if result:
|
| 131 |
+
idx, _ = result
|
| 132 |
+
log_chunk_progress(idx, total_chunks, start_time, 0)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"\nβ An error occurred while processing a chunk: {e}")
|
| 135 |
+
|
| 136 |
+
elapsed_time = time.time() - start_time
|
| 137 |
+
print(f"\n{GREEN}β
Generation Complete!{RESET}")
|
| 138 |
+
print(f"β±οΈ Total time: {timedelta(seconds=int(elapsed_time))}")
|
| 139 |
+
print(f"π Audio chunks are in: {audio_chunks_dir}")
|
| 140 |
+
print("You can now use Option 3 from the main menu to combine them.")
|
| 141 |
+
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
main()
|
utils/prechunktest.TXT
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from chunk_manager import prechunk_text_file, save_chunks_to_json
|
| 2 |
+
|
| 3 |
+
chunks = prechunk_text_file("/home/danno/MyApps/chatterbox/Text_Input/test.txt", max_words=30, min_words=4)
|
| 4 |
+
save_chunks_to_json(chunks, "Text_Input/my_book_chunks.json")
|
utils/resume_handler.TXT
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Resume Handler Module
|
| 3 |
+
Handles resume functionality for interrupted processing
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import time
|
| 8 |
+
import logging
|
| 9 |
+
from datetime import timedelta
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
from config import *
|
| 13 |
+
from modules.text_processor import smart_punctuate, sentence_chunk_text
|
| 14 |
+
from modules.file_manager import (
|
| 15 |
+
setup_book_directories, find_book_files, list_voice_samples,
|
| 16 |
+
ensure_voice_sample_compatibility, get_audio_files_in_directory,
|
| 17 |
+
combine_audio_chunks, convert_to_m4b, add_metadata_to_m4b
|
| 18 |
+
)
|
| 19 |
+
from modules.audio_processor import get_chunk_audio_duration, pause_for_chunk_review
|
| 20 |
+
from modules.progress_tracker import setup_logging, log_chunk_progress, log_run
|
| 21 |
+
|
| 22 |
+
def analyze_existing_chunks(audio_chunks_dir):
|
| 23 |
+
"""Analyze existing chunks to determine resume point"""
|
| 24 |
+
if not audio_chunks_dir.exists():
|
| 25 |
+
return 0, []
|
| 26 |
+
|
| 27 |
+
chunk_paths = get_audio_files_in_directory(audio_chunks_dir)
|
| 28 |
+
|
| 29 |
+
if not chunk_paths:
|
| 30 |
+
return 0, []
|
| 31 |
+
|
| 32 |
+
# Find the highest chunk number
|
| 33 |
+
chunk_numbers = []
|
| 34 |
+
for chunk_path in chunk_paths:
|
| 35 |
+
import re
|
| 36 |
+
match = re.match(r"chunk_(\d+)\.wav", chunk_path.name)
|
| 37 |
+
if match:
|
| 38 |
+
chunk_numbers.append(int(match.group(1)))
|
| 39 |
+
|
| 40 |
+
if not chunk_numbers:
|
| 41 |
+
return 0, []
|
| 42 |
+
|
| 43 |
+
chunk_numbers.sort()
|
| 44 |
+
last_chunk_number = max(chunk_numbers)
|
| 45 |
+
|
| 46 |
+
# Check for gaps in sequence
|
| 47 |
+
missing_chunks = []
|
| 48 |
+
for i in range(1, last_chunk_number + 1):
|
| 49 |
+
if i not in chunk_numbers:
|
| 50 |
+
missing_chunks.append(i)
|
| 51 |
+
|
| 52 |
+
print(f"π Existing chunks analysis:")
|
| 53 |
+
print(f" Total chunks found: {GREEN}{len(chunk_numbers)}{RESET}")
|
| 54 |
+
print(f" Highest chunk number: {GREEN}{last_chunk_number}{RESET}")
|
| 55 |
+
if missing_chunks:
|
| 56 |
+
print(f" Missing chunks: {YELLOW}{len(missing_chunks)}{RESET}")
|
| 57 |
+
if len(missing_chunks) <= 10:
|
| 58 |
+
print(f" Missing: {missing_chunks}")
|
| 59 |
+
else:
|
| 60 |
+
print(f" Missing: {missing_chunks[:10]}... (+{len(missing_chunks)-10} more)")
|
| 61 |
+
|
| 62 |
+
return last_chunk_number, missing_chunks
|
| 63 |
+
|
| 64 |
+
def suggest_resume_point(last_chunk, missing_chunks):
|
| 65 |
+
"""Suggest optimal resume point based on existing chunks"""
|
| 66 |
+
if not missing_chunks:
|
| 67 |
+
# No gaps, can resume from next chunk
|
| 68 |
+
return last_chunk + 1
|
| 69 |
+
|
| 70 |
+
# If there are missing chunks, suggest resuming from first missing
|
| 71 |
+
first_missing = min(missing_chunks)
|
| 72 |
+
|
| 73 |
+
print(f"\nπ‘ Resume suggestions:")
|
| 74 |
+
print(f" Resume from chunk {GREEN}{last_chunk + 1}{RESET} (continue from last)")
|
| 75 |
+
print(f" Resume from chunk {YELLOW}{first_missing}{RESET} (fill gaps first)")
|
| 76 |
+
|
| 77 |
+
return first_missing
|
| 78 |
+
|
| 79 |
+
def validate_resume_point(start_chunk, total_expected_chunks):
|
| 80 |
+
"""Validate that resume point makes sense"""
|
| 81 |
+
if start_chunk < 1:
|
| 82 |
+
print(f"{RED}β Invalid resume point: {start_chunk}. Must be >= 1{RESET}")
|
| 83 |
+
return False
|
| 84 |
+
|
| 85 |
+
if start_chunk > total_expected_chunks:
|
| 86 |
+
print(f"{RED}β Resume point {start_chunk} exceeds expected total chunks {total_expected_chunks}{RESET}")
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
+
return True
|
| 90 |
+
|
| 91 |
+
def process_book_folder_resume(book_dir, voice_path, tts_params, device, start_chunk=1):
|
| 92 |
+
"""Enhanced book processing with resume capability"""
|
| 93 |
+
from modules.tts_engine import process_one_chunk, load_optimized_model, get_optimal_workers
|
| 94 |
+
from chatterbox.tts import punc_norm
|
| 95 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 96 |
+
|
| 97 |
+
# Setup directories
|
| 98 |
+
output_root, tts_dir, text_chunks_dir, audio_chunks_dir = setup_book_directories(book_dir)
|
| 99 |
+
|
| 100 |
+
# Find book files
|
| 101 |
+
text_files, cover_file, nfo_file = find_book_files(book_dir)
|
| 102 |
+
|
| 103 |
+
if not text_files:
|
| 104 |
+
logging.info(f"[{book_dir.name}] ERROR: No .txt files found in the book folder.")
|
| 105 |
+
return None, None, []
|
| 106 |
+
|
| 107 |
+
# Don't delete existing directories if resuming
|
| 108 |
+
if start_chunk == 1:
|
| 109 |
+
# Only clear on fresh start
|
| 110 |
+
import shutil
|
| 111 |
+
for d in [text_chunks_dir, audio_chunks_dir]:
|
| 112 |
+
if d.exists() and d.is_dir():
|
| 113 |
+
shutil.rmtree(d)
|
| 114 |
+
|
| 115 |
+
for d in [output_root, tts_dir, text_chunks_dir, audio_chunks_dir]:
|
| 116 |
+
d.mkdir(parents=True, exist_ok=True)
|
| 117 |
+
else:
|
| 118 |
+
# Ensure directories exist for resume
|
| 119 |
+
for d in [output_root, tts_dir, text_chunks_dir, audio_chunks_dir]:
|
| 120 |
+
d.mkdir(parents=True, exist_ok=True)
|
| 121 |
+
|
| 122 |
+
setup_logging(output_root)
|
| 123 |
+
|
| 124 |
+
# Enhanced text processing
|
| 125 |
+
all_chunks = []
|
| 126 |
+
for tf in text_files:
|
| 127 |
+
with open(tf, 'r', encoding='utf-8') as f:
|
| 128 |
+
raw = f.read()
|
| 129 |
+
smart = smart_punctuate(raw)
|
| 130 |
+
chunks = sentence_chunk_text(smart, max_words=MAX_CHUNK_WORDS, min_words=MIN_CHUNK_WORDS)
|
| 131 |
+
for chunk_text, is_para_end in chunks:
|
| 132 |
+
all_chunks.append({
|
| 133 |
+
"text": chunk_text,
|
| 134 |
+
"is_paragraph_end": is_para_end
|
| 135 |
+
})
|
| 136 |
+
|
| 137 |
+
# Validate resume point
|
| 138 |
+
if not validate_resume_point(start_chunk, len(all_chunks)):
|
| 139 |
+
return None, None, []
|
| 140 |
+
|
| 141 |
+
# Filter chunks to process (resume logic)
|
| 142 |
+
if start_chunk > 1:
|
| 143 |
+
print(f"π Resuming from chunk {start_chunk}")
|
| 144 |
+
print(f"π Skipping chunks 1-{start_chunk-1} (already completed)")
|
| 145 |
+
|
| 146 |
+
# Check which chunks already exist
|
| 147 |
+
existing_chunks = []
|
| 148 |
+
for i in range(start_chunk-1):
|
| 149 |
+
chunk_path = audio_chunks_dir / f"chunk_{i+1:05}.wav"
|
| 150 |
+
if chunk_path.exists():
|
| 151 |
+
existing_chunks.append(i+1)
|
| 152 |
+
|
| 153 |
+
print(f"β
Found {len(existing_chunks)} existing chunks")
|
| 154 |
+
|
| 155 |
+
# Only process remaining chunks
|
| 156 |
+
chunks_to_process = all_chunks[start_chunk-1:]
|
| 157 |
+
chunk_offset = start_chunk - 1
|
| 158 |
+
else:
|
| 159 |
+
chunks_to_process = all_chunks
|
| 160 |
+
chunk_offset = 0
|
| 161 |
+
|
| 162 |
+
run_log_lines = [
|
| 163 |
+
f"\n===== RESUME Processing: {book_dir.name} =====",
|
| 164 |
+
f"Voice: {voice_path.name}",
|
| 165 |
+
f"Started: {time.strftime('%Y-%m-%d %H:%M:%S')}",
|
| 166 |
+
f"Resume from chunk: {start_chunk}",
|
| 167 |
+
f"Text files processed: {len(text_files)}",
|
| 168 |
+
f"Total chunks generated: {len(all_chunks)}",
|
| 169 |
+
f"Chunks to process: {len(chunks_to_process)}"
|
| 170 |
+
]
|
| 171 |
+
|
| 172 |
+
# Write initial run info immediately
|
| 173 |
+
initial_log = run_log_lines + [
|
| 174 |
+
f"--- Generation Settings ---",
|
| 175 |
+
f"Batch Processing: Enabled ({BATCH_SIZE} chunks per batch)",
|
| 176 |
+
f"ASR Enabled: {ENABLE_ASR}",
|
| 177 |
+
f"Hum Detection: {ENABLE_HUM_DETECTION}",
|
| 178 |
+
f"Dynamic Workers: {USE_DYNAMIC_WORKERS}",
|
| 179 |
+
f"Voice used: {voice_path.name}",
|
| 180 |
+
f"Exaggeration: {tts_params['exaggeration']}",
|
| 181 |
+
f"CFG weight: {tts_params['cfg_weight']}",
|
| 182 |
+
f"Temperature: {tts_params['temperature']}",
|
| 183 |
+
f"Processing Status: IN PROGRESS...",
|
| 184 |
+
f"="*50
|
| 185 |
+
]
|
| 186 |
+
|
| 187 |
+
log_run("\n".join(initial_log), output_root / "run.log")
|
| 188 |
+
print(f"π Initial run info written to: {output_root / 'run.log'}")
|
| 189 |
+
|
| 190 |
+
start_time = time.time()
|
| 191 |
+
total_chunks = len(all_chunks)
|
| 192 |
+
remaining_chunks = len(chunks_to_process)
|
| 193 |
+
log_path = output_root / "chunk_validation.log"
|
| 194 |
+
|
| 195 |
+
# Calculate existing audio duration for accurate progress
|
| 196 |
+
total_audio_duration = 0.0
|
| 197 |
+
if start_chunk > 1:
|
| 198 |
+
print("π Calculating existing audio duration...")
|
| 199 |
+
for i in range(start_chunk-1):
|
| 200 |
+
chunk_path = audio_chunks_dir / f"chunk_{i+1:05}.wav"
|
| 201 |
+
if chunk_path.exists():
|
| 202 |
+
total_audio_duration += get_chunk_audio_duration(chunk_path)
|
| 203 |
+
print(f"π Existing audio: {timedelta(seconds=int(total_audio_duration))}")
|
| 204 |
+
|
| 205 |
+
# Batch processing for remaining chunks
|
| 206 |
+
print(f"π Processing {remaining_chunks} remaining chunks in batches of {BATCH_SIZE}")
|
| 207 |
+
|
| 208 |
+
all_results = []
|
| 209 |
+
|
| 210 |
+
for batch_start in range(0, remaining_chunks, BATCH_SIZE):
|
| 211 |
+
batch_end = min(batch_start + BATCH_SIZE, remaining_chunks)
|
| 212 |
+
batch_chunks = chunks_to_process[batch_start:batch_end]
|
| 213 |
+
|
| 214 |
+
actual_start_chunk = chunk_offset + batch_start + 1
|
| 215 |
+
actual_end_chunk = chunk_offset + batch_end
|
| 216 |
+
|
| 217 |
+
print(f"\nπ Processing batch: chunks {actual_start_chunk}-{actual_end_chunk}")
|
| 218 |
+
|
| 219 |
+
# Fresh model for each batch
|
| 220 |
+
model = load_optimized_model(device)
|
| 221 |
+
compatible_voice = ensure_voice_sample_compatibility(voice_path, output_dir=tts_dir)
|
| 222 |
+
model.prepare_conditionals(compatible_voice, exaggeration=tts_params['exaggeration'])
|
| 223 |
+
|
| 224 |
+
# Load ASR model once per batch if needed
|
| 225 |
+
asr_model = None
|
| 226 |
+
if ENABLE_ASR:
|
| 227 |
+
import whisper
|
| 228 |
+
print(f"π€ Loading Whisper ASR model for batch...")
|
| 229 |
+
asr_model = whisper.load_model("base", device="cuda")
|
| 230 |
+
|
| 231 |
+
futures = []
|
| 232 |
+
batch_results = []
|
| 233 |
+
|
| 234 |
+
# Dynamic worker allocation
|
| 235 |
+
optimal_workers = get_optimal_workers()
|
| 236 |
+
print(f"π§ Using {optimal_workers} workers for batch {actual_start_chunk}-{actual_end_chunk}")
|
| 237 |
+
|
| 238 |
+
with ThreadPoolExecutor(max_workers=optimal_workers) as executor:
|
| 239 |
+
for i, chunk_data in enumerate(batch_chunks):
|
| 240 |
+
global_chunk_index = chunk_offset + batch_start + i
|
| 241 |
+
|
| 242 |
+
# Check for shutdown request
|
| 243 |
+
if shutdown_requested:
|
| 244 |
+
print(f"\nβΉοΈ {YELLOW}Stopping submission of new chunks...{RESET}")
|
| 245 |
+
break
|
| 246 |
+
|
| 247 |
+
chunk = chunk_data["text"]
|
| 248 |
+
is_paragraph_end = chunk_data.get("is_paragraph_end", False)
|
| 249 |
+
all_chunk_texts = [cd["text"] for cd in all_chunks]
|
| 250 |
+
|
| 251 |
+
futures.append(executor.submit(
|
| 252 |
+
process_one_chunk,
|
| 253 |
+
global_chunk_index, chunk, text_chunks_dir, audio_chunks_dir,
|
| 254 |
+
voice_path, tts_params, start_time, total_chunks,
|
| 255 |
+
punc_norm, book_dir.name, log_run, log_path, device,
|
| 256 |
+
model, asr_model, is_paragraph_end, all_chunk_texts
|
| 257 |
+
))
|
| 258 |
+
|
| 259 |
+
# Wait for batch to complete
|
| 260 |
+
print(f"π {CYAN}Waiting for batch {actual_start_chunk}-{actual_end_chunk} to complete...{RESET}")
|
| 261 |
+
completed_count = 0
|
| 262 |
+
|
| 263 |
+
for fut in as_completed(futures):
|
| 264 |
+
try:
|
| 265 |
+
idx, wav_path = fut.result()
|
| 266 |
+
if wav_path and wav_path.exists():
|
| 267 |
+
# Measure actual audio duration for this chunk
|
| 268 |
+
chunk_duration = get_chunk_audio_duration(wav_path)
|
| 269 |
+
total_audio_duration += chunk_duration
|
| 270 |
+
batch_results.append((idx, wav_path))
|
| 271 |
+
|
| 272 |
+
# Update progress every 10 chunks within batch
|
| 273 |
+
completed_count += 1
|
| 274 |
+
if completed_count % 10 == 0:
|
| 275 |
+
current_chunk = chunk_offset + batch_start + completed_count
|
| 276 |
+
log_chunk_progress(current_chunk - 1, total_chunks, start_time, total_audio_duration)
|
| 277 |
+
|
| 278 |
+
except Exception as e:
|
| 279 |
+
logging.error(f"Future failed in batch: {e}")
|
| 280 |
+
|
| 281 |
+
# Clean up model after batch
|
| 282 |
+
print(f"π§Ή Cleaning up after batch {actual_start_chunk}-{actual_end_chunk}")
|
| 283 |
+
del model
|
| 284 |
+
if asr_model:
|
| 285 |
+
del asr_model
|
| 286 |
+
torch.cuda.empty_cache()
|
| 287 |
+
import gc
|
| 288 |
+
gc.collect()
|
| 289 |
+
time.sleep(2)
|
| 290 |
+
|
| 291 |
+
all_results.extend(batch_results)
|
| 292 |
+
print(f"β
Batch {actual_start_chunk}-{actual_end_chunk} completed ({len(batch_results)} chunks)")
|
| 293 |
+
|
| 294 |
+
# Final processing - combine ALL chunks (existing + new)
|
| 295 |
+
quarantine_dir = audio_chunks_dir / "quarantine"
|
| 296 |
+
pause_for_chunk_review(quarantine_dir)
|
| 297 |
+
|
| 298 |
+
# Collect ALL chunk paths (both existing and newly created)
|
| 299 |
+
chunk_paths = []
|
| 300 |
+
for i in range(total_chunks):
|
| 301 |
+
chunk_path = audio_chunks_dir / f"chunk_{i+1:05}.wav"
|
| 302 |
+
if chunk_path.exists():
|
| 303 |
+
chunk_paths.append(chunk_path)
|
| 304 |
+
else:
|
| 305 |
+
logging.warning(f"Missing chunk file: chunk_{i+1:05}.wav")
|
| 306 |
+
|
| 307 |
+
if not chunk_paths:
|
| 308 |
+
logging.info(f"{RED}β No valid audio chunks found. Skipping concatenation and conversion.{RESET}")
|
| 309 |
+
return None, None, []
|
| 310 |
+
|
| 311 |
+
print(f"π Found {len(chunk_paths)} total chunks for final audiobook")
|
| 312 |
+
|
| 313 |
+
# Calculate timing
|
| 314 |
+
elapsed_total = time.time() - start_time
|
| 315 |
+
elapsed_td = timedelta(seconds=int(elapsed_total))
|
| 316 |
+
|
| 317 |
+
# Get total audio duration from ALL chunks
|
| 318 |
+
total_audio_duration_final = sum(get_chunk_audio_duration(chunk_path) for chunk_path in chunk_paths)
|
| 319 |
+
audio_duration_td = timedelta(seconds=int(total_audio_duration_final))
|
| 320 |
+
realtime_factor = total_audio_duration_final / elapsed_total if elapsed_total > 0 else 0.0
|
| 321 |
+
|
| 322 |
+
print(f"\nβ±οΈ Resume Processing Complete:")
|
| 323 |
+
print(f" Elapsed Time: {CYAN}{str(elapsed_td)}{RESET}")
|
| 324 |
+
print(f" Audio Duration: {GREEN}{str(audio_duration_td)}{RESET}")
|
| 325 |
+
print(f" Realtime Factor: {YELLOW}{realtime_factor:.2f}x{RESET}")
|
| 326 |
+
|
| 327 |
+
# Combine audio
|
| 328 |
+
combined_wav_path = output_root / f"{book_dir.name} [{voice_path.stem}].wav"
|
| 329 |
+
print("\nπΎ Saving WAV file...")
|
| 330 |
+
combine_audio_chunks(chunk_paths, combined_wav_path)
|
| 331 |
+
|
| 332 |
+
# M4B conversion
|
| 333 |
+
temp_m4b_path = output_root / "output.m4b"
|
| 334 |
+
final_m4b_path = output_root / f"{book_dir.name}[{voice_path.stem}].m4b"
|
| 335 |
+
convert_to_m4b(combined_wav_path, temp_m4b_path)
|
| 336 |
+
add_metadata_to_m4b(temp_m4b_path, final_m4b_path, cover_file, nfo_file)
|
| 337 |
+
|
| 338 |
+
logging.info(f"Audiobook created: {final_m4b_path}")
|
| 339 |
+
|
| 340 |
+
# Append final completion info
|
| 341 |
+
completion_log = [
|
| 342 |
+
f"\n--- Resume Processing Complete ---",
|
| 343 |
+
f"Completed: {time.strftime('%Y-%m-%d %H:%M:%S')}",
|
| 344 |
+
f"Processing Time: {str(elapsed_td)}",
|
| 345 |
+
f"Audio Duration: {str(audio_duration_td)}",
|
| 346 |
+
f"Realtime Factor: {realtime_factor:.2f}x",
|
| 347 |
+
f"Total Chunks: {len(chunk_paths)}",
|
| 348 |
+
f"Combined WAV: {combined_wav_path}",
|
| 349 |
+
f"Final M4B: {final_m4b_path}"
|
| 350 |
+
]
|
| 351 |
+
|
| 352 |
+
# Append to existing log
|
| 353 |
+
log_run("\n".join(completion_log), output_root / "run.log")
|
| 354 |
+
print(f"π Final completion info appended to: {output_root / 'run.log'}")
|
| 355 |
+
|
| 356 |
+
return final_m4b_path, combined_wav_path, run_log_lines
|
| 357 |
+
|
| 358 |
+
def resume_book_from_chunk(start_chunk):
|
| 359 |
+
"""Interactive resume function for stuck book"""
|
| 360 |
+
print(f"\nπ Resume Book Processing from Chunk {start_chunk}")
|
| 361 |
+
print("=" * 50)
|
| 362 |
+
|
| 363 |
+
# Show available books
|
| 364 |
+
book_dirs = sorted([d for d in TEXT_INPUT_ROOT.iterdir() if d.is_dir()])
|
| 365 |
+
if not book_dirs:
|
| 366 |
+
print(f"{RED}No folders found in Text_Input/.{RESET}")
|
| 367 |
+
return None
|
| 368 |
+
|
| 369 |
+
print("Available books:")
|
| 370 |
+
for i, book in enumerate(book_dirs):
|
| 371 |
+
# Check if book has existing processing
|
| 372 |
+
audiobook_dir = AUDIOBOOK_ROOT / book.name
|
| 373 |
+
if audiobook_dir.exists():
|
| 374 |
+
audio_chunks_dir = audiobook_dir / "TTS" / "audio_chunks"
|
| 375 |
+
if audio_chunks_dir.exists():
|
| 376 |
+
last_chunk, missing = analyze_existing_chunks(audio_chunks_dir)
|
| 377 |
+
status = f"(last chunk: {last_chunk})"
|
| 378 |
+
else:
|
| 379 |
+
status = "(no existing chunks)"
|
| 380 |
+
else:
|
| 381 |
+
status = "(not started)"
|
| 382 |
+
|
| 383 |
+
print(f" [{i}] {book.name} {status}")
|
| 384 |
+
|
| 385 |
+
while True:
|
| 386 |
+
try:
|
| 387 |
+
book_idx = int(input("Select book index: "))
|
| 388 |
+
if 0 <= book_idx < len(book_dirs):
|
| 389 |
+
book_dir = book_dirs[book_idx]
|
| 390 |
+
break
|
| 391 |
+
except Exception:
|
| 392 |
+
pass
|
| 393 |
+
print("Invalid selection. Try again.")
|
| 394 |
+
|
| 395 |
+
# Analyze existing chunks for selected book
|
| 396 |
+
audiobook_dir = AUDIOBOOK_ROOT / book_dir.name
|
| 397 |
+
if audiobook_dir.exists():
|
| 398 |
+
audio_chunks_dir = audiobook_dir / "TTS" / "audio_chunks"
|
| 399 |
+
if audio_chunks_dir.exists():
|
| 400 |
+
last_chunk, missing = analyze_existing_chunks(audio_chunks_dir)
|
| 401 |
+
suggested_resume = suggest_resume_point(last_chunk, missing)
|
| 402 |
+
|
| 403 |
+
print(f"\nSuggested resume point: {GREEN}{suggested_resume}{RESET}")
|
| 404 |
+
|
| 405 |
+
# Allow user to override
|
| 406 |
+
user_input = input(f"Resume from chunk [{suggested_resume}]: ").strip()
|
| 407 |
+
if user_input:
|
| 408 |
+
try:
|
| 409 |
+
start_chunk = int(user_input)
|
| 410 |
+
except ValueError:
|
| 411 |
+
print(f"Invalid input, using suggested: {suggested_resume}")
|
| 412 |
+
start_chunk = suggested_resume
|
| 413 |
+
else:
|
| 414 |
+
start_chunk = suggested_resume
|
| 415 |
+
|
| 416 |
+
# Show available voices
|
| 417 |
+
voice_files = list_voice_samples()
|
| 418 |
+
if not voice_files:
|
| 419 |
+
print(f"{RED}No voice samples found.{RESET}")
|
| 420 |
+
return None
|
| 421 |
+
|
| 422 |
+
print("\nAvailable voices:")
|
| 423 |
+
for i, voice in enumerate(voice_files):
|
| 424 |
+
print(f" [{i}] {voice.name}")
|
| 425 |
+
|
| 426 |
+
while True:
|
| 427 |
+
try:
|
| 428 |
+
voice_idx = int(input("Select voice index: "))
|
| 429 |
+
if 0 <= voice_idx < len(voice_files):
|
| 430 |
+
voice_path = voice_files[voice_idx]
|
| 431 |
+
break
|
| 432 |
+
except Exception:
|
| 433 |
+
pass
|
| 434 |
+
print("Invalid selection. Try again.")
|
| 435 |
+
|
| 436 |
+
# Get TTS parameters
|
| 437 |
+
def prompt_float(prompt, default):
|
| 438 |
+
val = input(f"{prompt} [{default}]: ").strip()
|
| 439 |
+
return float(val) if val else default
|
| 440 |
+
|
| 441 |
+
exaggeration = prompt_float("Enter exaggeration (emotion intensity)", 0.5)
|
| 442 |
+
cfg_weight = prompt_float("Enter cfg_weight (faithfulness to text)", 0.2)
|
| 443 |
+
temperature = prompt_float("Enter temperature (randomness)", 0.2)
|
| 444 |
+
|
| 445 |
+
tts_params = dict(exaggeration=exaggeration, cfg_weight=cfg_weight, temperature=temperature)
|
| 446 |
+
|
| 447 |
+
# Determine device
|
| 448 |
+
if torch.cuda.is_available():
|
| 449 |
+
device = "cuda"
|
| 450 |
+
elif torch.backends.mps.is_available():
|
| 451 |
+
device = "mps"
|
| 452 |
+
else:
|
| 453 |
+
device = "cpu"
|
| 454 |
+
|
| 455 |
+
print(f"\nπ Resuming {book_dir.name} from chunk {start_chunk}")
|
| 456 |
+
print(f"π€ Voice: {voice_path.name}")
|
| 457 |
+
print(f"βοΈ Parameters: {tts_params}")
|
| 458 |
+
|
| 459 |
+
# Process with resume
|
| 460 |
+
return process_book_folder_resume(book_dir, voice_path, tts_params, device, start_chunk)
|
| 461 |
+
|
| 462 |
+
def find_incomplete_books():
|
| 463 |
+
"""Find books that appear to be incomplete"""
|
| 464 |
+
incomplete_books = []
|
| 465 |
+
|
| 466 |
+
for book_dir in TEXT_INPUT_ROOT.iterdir():
|
| 467 |
+
if not book_dir.is_dir():
|
| 468 |
+
continue
|
| 469 |
+
|
| 470 |
+
audiobook_dir = AUDIOBOOK_ROOT / book_dir.name
|
| 471 |
+
if not audiobook_dir.exists():
|
| 472 |
+
continue
|
| 473 |
+
|
| 474 |
+
audio_chunks_dir = audiobook_dir / "TTS" / "audio_chunks"
|
| 475 |
+
if not audio_chunks_dir.exists():
|
| 476 |
+
continue
|
| 477 |
+
|
| 478 |
+
# Check if there's a final M4B
|
| 479 |
+
m4b_files = list(audiobook_dir.glob("*.m4b"))
|
| 480 |
+
wav_files = list(audiobook_dir.glob("*.wav"))
|
| 481 |
+
|
| 482 |
+
if not m4b_files and not wav_files:
|
| 483 |
+
# No final output, likely incomplete
|
| 484 |
+
last_chunk, missing = analyze_existing_chunks(audio_chunks_dir)
|
| 485 |
+
if last_chunk > 0:
|
| 486 |
+
incomplete_books.append({
|
| 487 |
+
"name": book_dir.name,
|
| 488 |
+
"last_chunk": last_chunk,
|
| 489 |
+
"missing_chunks": len(missing),
|
| 490 |
+
"path": book_dir
|
| 491 |
+
})
|
| 492 |
+
|
| 493 |
+
return incomplete_books
|
| 494 |
+
|
| 495 |
+
def auto_resume_incomplete():
|
| 496 |
+
"""Automatically suggest resume for incomplete books"""
|
| 497 |
+
incomplete = find_incomplete_books()
|
| 498 |
+
|
| 499 |
+
if not incomplete:
|
| 500 |
+
print(f"{GREEN}β
No incomplete books found!{RESET}")
|
| 501 |
+
return
|
| 502 |
+
|
| 503 |
+
print(f"{YELLOW}π Found {len(incomplete)} incomplete books:{RESET}")
|
| 504 |
+
for i, book in enumerate(incomplete):
|
| 505 |
+
print(f" [{i}] {book['name']} (last chunk: {book['last_chunk']}, missing: {book['missing_chunks']})")
|
| 506 |
+
|
| 507 |
+
choice = input(f"\nSelect book to resume [0-{len(incomplete)-1}] or 'q' to quit: ").strip()
|
| 508 |
+
|
| 509 |
+
if choice.lower() == 'q':
|
| 510 |
+
return
|
| 511 |
+
|
| 512 |
+
try:
|
| 513 |
+
idx = int(choice)
|
| 514 |
+
if 0 <= idx < len(incomplete):
|
| 515 |
+
selected_book = incomplete[idx]
|
| 516 |
+
suggested_resume = selected_book['last_chunk'] + 1
|
| 517 |
+
|
| 518 |
+
print(f"\nπ― Selected: {selected_book['name']}")
|
| 519 |
+
print(f"π‘ Suggested resume point: chunk {suggested_resume}")
|
| 520 |
+
|
| 521 |
+
return resume_book_from_chunk(suggested_resume)
|
| 522 |
+
except ValueError:
|
| 523 |
+
print("Invalid selection.")
|
| 524 |
+
|
| 525 |
+
return None
|
utils/text_cleaner.TXT
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# text_cleaner.py
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
|
| 5 |
+
def smart_punctuate(text):
|
| 6 |
+
"""Basic punctuation cleanup for sentence ends"""
|
| 7 |
+
lines = text.splitlines()
|
| 8 |
+
out = []
|
| 9 |
+
|
| 10 |
+
for l in lines:
|
| 11 |
+
stripped = l.strip()
|
| 12 |
+
|
| 13 |
+
# Preserve empty lines (paragraph breaks)
|
| 14 |
+
if not stripped:
|
| 15 |
+
out.append("")
|
| 16 |
+
elif not re.search(r'[.!?]$', stripped):
|
| 17 |
+
out.append(stripped + ".")
|
| 18 |
+
else:
|
| 19 |
+
out.append(stripped)
|
| 20 |
+
|
| 21 |
+
result = "\n".join(out)
|
| 22 |
+
|
| 23 |
+
# Normalize quotes and formatting
|
| 24 |
+
result = result.replace('"', '"').replace('β', '"').replace('β', '"')
|
| 25 |
+
result = result.replace('β', "'").replace('β', "'")
|
| 26 |
+
result = re.sub(r'\*\*([^*]+)\*\*', r'\1', result) # remove markdown bold
|
| 27 |
+
result = re.sub(r'_{2,}', '', result) # remove underlines
|
| 28 |
+
|
| 29 |
+
return result
|
utils/text_processor.TXT
ADDED
|
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Text Processing Module
|
| 3 |
+
Handles text chunking, abbreviations, and preprocessing for TTS
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
import logging
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from config import *
|
| 10 |
+
|
| 11 |
+
# ============================================================================
|
| 12 |
+
# ABBREVIATION REPLACEMENT SYSTEM
|
| 13 |
+
# ============================================================================
|
| 14 |
+
|
| 15 |
+
def load_abbreviations(file_path="abbreviations.txt"):
|
| 16 |
+
"""Load abbreviation replacements from external file"""
|
| 17 |
+
replacements = {}
|
| 18 |
+
abbrev_file = Path(file_path)
|
| 19 |
+
|
| 20 |
+
if not abbrev_file.exists():
|
| 21 |
+
print(f"β οΈ {YELLOW}Abbreviations file not found: {file_path}{RESET}")
|
| 22 |
+
print(f"π Creating sample file...")
|
| 23 |
+
create_sample_abbreviations_file(abbrev_file)
|
| 24 |
+
return replacements
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
with open(abbrev_file, 'r', encoding='utf-8') as f:
|
| 28 |
+
for line_num, line in enumerate(f, 1):
|
| 29 |
+
line = line.strip()
|
| 30 |
+
|
| 31 |
+
# Skip empty lines and comments
|
| 32 |
+
if not line or line.startswith('#'):
|
| 33 |
+
continue
|
| 34 |
+
|
| 35 |
+
# Parse "abbrev -> replacement" format
|
| 36 |
+
if ' -> ' in line:
|
| 37 |
+
abbrev, replacement = line.split(' -> ', 1)
|
| 38 |
+
replacements[abbrev.strip()] = replacement.strip()
|
| 39 |
+
else:
|
| 40 |
+
print(f"β οΈ Invalid format on line {line_num}: {line}")
|
| 41 |
+
|
| 42 |
+
print(f"β
Loaded {len(replacements)} abbreviation replacements from {file_path}")
|
| 43 |
+
|
| 44 |
+
except Exception as e:
|
| 45 |
+
print(f"β Error loading abbreviations: {e}")
|
| 46 |
+
|
| 47 |
+
return replacements
|
| 48 |
+
|
| 49 |
+
def create_sample_abbreviations_file(file_path):
|
| 50 |
+
"""Create a sample abbreviations file with common replacements"""
|
| 51 |
+
sample_content = """# Abbreviation Replacements for TTS
|
| 52 |
+
# Format: abbreviation -> replacement
|
| 53 |
+
# Lines starting with # are comments
|
| 54 |
+
|
| 55 |
+
# Common titles and abbreviations
|
| 56 |
+
Dr. -> Doctor
|
| 57 |
+
Mr. -> Mister
|
| 58 |
+
Mrs. -> Missus
|
| 59 |
+
Ms. -> Miss
|
| 60 |
+
Prof. -> Professor
|
| 61 |
+
Rev. -> Reverend
|
| 62 |
+
Lt. -> Lieutenant
|
| 63 |
+
Capt. -> Captain
|
| 64 |
+
Gen. -> General
|
| 65 |
+
Col. -> Colonel
|
| 66 |
+
Jr. -> Junior
|
| 67 |
+
Sr. -> Senior
|
| 68 |
+
|
| 69 |
+
# Political and organizations
|
| 70 |
+
M.P. -> MP
|
| 71 |
+
U.S. -> US
|
| 72 |
+
U.K. -> UK
|
| 73 |
+
U.N. -> UN
|
| 74 |
+
F.B.I. -> FBI
|
| 75 |
+
C.I.A. -> CIA
|
| 76 |
+
N.A.S.A. -> NASA
|
| 77 |
+
|
| 78 |
+
# Common abbreviations
|
| 79 |
+
etc. -> et cetera
|
| 80 |
+
vs. -> versus
|
| 81 |
+
e.g. -> for example
|
| 82 |
+
i.e. -> that is
|
| 83 |
+
Inc. -> Incorporated
|
| 84 |
+
Corp. -> Corporation
|
| 85 |
+
Ltd. -> Limited
|
| 86 |
+
Co. -> Company
|
| 87 |
+
|
| 88 |
+
# Numbers and ordinals
|
| 89 |
+
1st -> first
|
| 90 |
+
2nd -> second
|
| 91 |
+
3rd -> third
|
| 92 |
+
4th -> fourth
|
| 93 |
+
5th -> fifth
|
| 94 |
+
10th -> tenth
|
| 95 |
+
20th -> twentieth
|
| 96 |
+
21st -> twenty-first
|
| 97 |
+
30th -> thirtieth
|
| 98 |
+
40th -> fortieth
|
| 99 |
+
50th -> fiftieth
|
| 100 |
+
60th -> sixtieth
|
| 101 |
+
70th -> seventieth
|
| 102 |
+
80th -> eightieth
|
| 103 |
+
90th -> ninetieth
|
| 104 |
+
100th -> one hundredth
|
| 105 |
+
|
| 106 |
+
# Time abbreviations
|
| 107 |
+
a.m. -> AM
|
| 108 |
+
p.m. -> PM
|
| 109 |
+
A.M. -> AM
|
| 110 |
+
P.M. -> PM
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
try:
|
| 114 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
| 115 |
+
f.write(sample_content)
|
| 116 |
+
print(f"π Created sample abbreviations file: {file_path}")
|
| 117 |
+
print(f"π‘ Edit this file to add your own replacements!")
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"β Error creating sample file: {e}")
|
| 120 |
+
|
| 121 |
+
def preprocess_abbreviations(text, replacements):
|
| 122 |
+
"""Replace abbreviations with TTS-friendly versions"""
|
| 123 |
+
if not replacements:
|
| 124 |
+
return text
|
| 125 |
+
|
| 126 |
+
original_text = text
|
| 127 |
+
replacements_made = 0
|
| 128 |
+
|
| 129 |
+
# Apply replacements (order matters for overlapping patterns)
|
| 130 |
+
for abbrev, replacement in replacements.items():
|
| 131 |
+
if abbrev in text:
|
| 132 |
+
text = text.replace(abbrev, replacement)
|
| 133 |
+
replacements_made += 1
|
| 134 |
+
|
| 135 |
+
if replacements_made > 0:
|
| 136 |
+
logging.info(f"π Applied {replacements_made} abbreviation replacements")
|
| 137 |
+
|
| 138 |
+
return text
|
| 139 |
+
|
| 140 |
+
# ============================================================================
|
| 141 |
+
# TEXT PREPROCESSING AND CHUNKING
|
| 142 |
+
# ============================================================================
|
| 143 |
+
|
| 144 |
+
def smart_punctuate(text):
|
| 145 |
+
"""Enhanced punctuation normalization with abbreviation replacement"""
|
| 146 |
+
|
| 147 |
+
# Load abbreviations and apply them
|
| 148 |
+
abbreviation_replacements = load_abbreviations()
|
| 149 |
+
text = preprocess_abbreviations(text, abbreviation_replacements)
|
| 150 |
+
|
| 151 |
+
# Then continue with existing punctuation logic
|
| 152 |
+
lines = text.splitlines()
|
| 153 |
+
out = []
|
| 154 |
+
|
| 155 |
+
for l in lines:
|
| 156 |
+
stripped = l.strip()
|
| 157 |
+
|
| 158 |
+
# Preserve empty lines (paragraph breaks)
|
| 159 |
+
if not stripped:
|
| 160 |
+
out.append("") # Keep the blank line
|
| 161 |
+
# Process non-empty lines
|
| 162 |
+
elif not re.search(r'[.!?]$', stripped):
|
| 163 |
+
out.append(stripped + ".")
|
| 164 |
+
else:
|
| 165 |
+
out.append(stripped)
|
| 166 |
+
|
| 167 |
+
result = "\n".join(out)
|
| 168 |
+
|
| 169 |
+
# Enhanced text preprocessing - use simple string replacements
|
| 170 |
+
result = result.replace('"', '"').replace('"', '"') # Replace smart double quotes
|
| 171 |
+
result = result.replace(''', "'").replace(''', "'") # Replace smart single quotes
|
| 172 |
+
|
| 173 |
+
# Remove problematic formatting
|
| 174 |
+
result = re.sub(r'\*\*([^*]+)\*\*', r'\1', result) # Remove bold markdown
|
| 175 |
+
result = re.sub(r'_{2,}', '', result) # Remove underlines
|
| 176 |
+
|
| 177 |
+
return result
|
| 178 |
+
|
| 179 |
+
def fix_short_sentence_artifacts(chunk_text):
|
| 180 |
+
"""
|
| 181 |
+
Fix multiple short sentences that cause TTS errors.
|
| 182 |
+
Example: "Yes. No. Maybe." β "Yes, no, maybe."
|
| 183 |
+
"Right." β "Right," (if it's a single-word chunk)
|
| 184 |
+
"""
|
| 185 |
+
# Handle full chunk that is just one short sentence
|
| 186 |
+
words = chunk_text.strip().split()
|
| 187 |
+
if len(words) == 1 and chunk_text.strip().endswith('.'):
|
| 188 |
+
return chunk_text.strip()[:-1] + ',' # Replace period with comma
|
| 189 |
+
|
| 190 |
+
parts = re.split(r'([.!?])', chunk_text.strip())
|
| 191 |
+
if len(parts) < 2:
|
| 192 |
+
return chunk_text # nothing to fix
|
| 193 |
+
|
| 194 |
+
# Reconstruct sentence-punctuation pairs
|
| 195 |
+
sentences = []
|
| 196 |
+
for i in range(0, len(parts)-1, 2):
|
| 197 |
+
sentence = parts[i].strip()
|
| 198 |
+
punct = parts[i+1]
|
| 199 |
+
if sentence:
|
| 200 |
+
word_count = len(sentence.split())
|
| 201 |
+
sentences.append((sentence, punct, word_count))
|
| 202 |
+
|
| 203 |
+
# Handle multiple short sentences
|
| 204 |
+
short_count = sum(1 for _, _, wc in sentences if wc <= 3)
|
| 205 |
+
|
| 206 |
+
if short_count >= 2 and len(sentences) >= 2:
|
| 207 |
+
merged = ", ".join(s for s, _, _ in sentences) + "."
|
| 208 |
+
return merged
|
| 209 |
+
|
| 210 |
+
# Handle case where first sentence is a single word
|
| 211 |
+
if len(sentences) >= 2 and sentences[0][2] == 1 and sentences[0][1] == ".":
|
| 212 |
+
# Replace period with comma
|
| 213 |
+
first, second = sentences[0][0], sentences[1][0]
|
| 214 |
+
rest = " ".join(s for s, _, _ in sentences[2:])
|
| 215 |
+
new_text = f"{first}, {second}"
|
| 216 |
+
if rest:
|
| 217 |
+
new_text += " " + rest
|
| 218 |
+
return new_text
|
| 219 |
+
|
| 220 |
+
return chunk_text
|
| 221 |
+
|
| 222 |
+
def sentence_chunk_text(text, max_words=MAX_CHUNK_WORDS, min_words=MIN_CHUNK_WORDS):
|
| 223 |
+
"""Enhanced sentence chunking with smart mid-sentence breaking for long sentences"""
|
| 224 |
+
|
| 225 |
+
# First, split into sentences
|
| 226 |
+
sentence_end_re = re.compile(r'([.!?][\"\'\)]*\s+)')
|
| 227 |
+
sentences = []
|
| 228 |
+
start_index = 0
|
| 229 |
+
|
| 230 |
+
for match in sentence_end_re.finditer(text):
|
| 231 |
+
end_index = match.end()
|
| 232 |
+
sentence = text[start_index:end_index].strip()
|
| 233 |
+
if sentence:
|
| 234 |
+
sentences.append(sentence)
|
| 235 |
+
start_index = end_index
|
| 236 |
+
|
| 237 |
+
if start_index < len(text):
|
| 238 |
+
remainder = text[start_index:].strip()
|
| 239 |
+
if remainder:
|
| 240 |
+
sentences.append(remainder)
|
| 241 |
+
|
| 242 |
+
# Process each sentence and break if too long
|
| 243 |
+
processed_chunks = []
|
| 244 |
+
|
| 245 |
+
for sentence in sentences:
|
| 246 |
+
sentence = sentence.strip()
|
| 247 |
+
if not sentence:
|
| 248 |
+
continue
|
| 249 |
+
|
| 250 |
+
# Check if sentence exceeds word limit
|
| 251 |
+
sentence_words = sentence.split()
|
| 252 |
+
|
| 253 |
+
if len(sentence_words) <= max_words:
|
| 254 |
+
# Sentence is fine as-is
|
| 255 |
+
is_para_end = sentence.endswith("\n") or sentence.endswith("\n\n")
|
| 256 |
+
processed_chunks.append((sentence, is_para_end))
|
| 257 |
+
else:
|
| 258 |
+
# Sentence is too long - need to break it intelligently
|
| 259 |
+
broken_chunks = break_long_sentence(sentence, max_words)
|
| 260 |
+
processed_chunks.extend(broken_chunks)
|
| 261 |
+
|
| 262 |
+
# Now group short chunks together (original grouping logic)
|
| 263 |
+
final_chunks = []
|
| 264 |
+
short_group = []
|
| 265 |
+
|
| 266 |
+
for chunk_text, is_para_end in processed_chunks:
|
| 267 |
+
word_count = len(chunk_text.split())
|
| 268 |
+
|
| 269 |
+
if word_count < min_words and not is_para_end:
|
| 270 |
+
# Collect short chunks for grouping
|
| 271 |
+
short_group.append(re.sub(r'[.!?]+$', '', chunk_text.strip()))
|
| 272 |
+
else:
|
| 273 |
+
# Process any accumulated short chunks
|
| 274 |
+
if short_group:
|
| 275 |
+
if word_count < min_words:
|
| 276 |
+
# This chunk is also short, add it to the group
|
| 277 |
+
short_group.append(re.sub(r'[.!?]+$', '', chunk_text.strip()))
|
| 278 |
+
merged = ", ".join(short_group) + "."
|
| 279 |
+
final_chunks.append((merged.strip(), is_para_end))
|
| 280 |
+
short_group = []
|
| 281 |
+
else:
|
| 282 |
+
# Merge short group with current chunk
|
| 283 |
+
merged = ", ".join(short_group) + ", " + chunk_text
|
| 284 |
+
final_chunks.append((merged.strip(), is_para_end))
|
| 285 |
+
short_group = []
|
| 286 |
+
else:
|
| 287 |
+
# Normal chunk
|
| 288 |
+
final_chunks.append((chunk_text, is_para_end))
|
| 289 |
+
|
| 290 |
+
# Handle any remaining short group
|
| 291 |
+
if short_group:
|
| 292 |
+
merged = ", ".join(short_group) + "."
|
| 293 |
+
final_chunks.append((merged.strip(), False))
|
| 294 |
+
|
| 295 |
+
# Apply short sentence cleanup
|
| 296 |
+
fixed_chunks = []
|
| 297 |
+
for chunk_text, is_para_end in final_chunks:
|
| 298 |
+
fixed_text = fix_short_sentence_artifacts(chunk_text)
|
| 299 |
+
fixed_chunks.append((fixed_text, is_para_end))
|
| 300 |
+
|
| 301 |
+
return fixed_chunks
|
| 302 |
+
|
| 303 |
+
def break_long_sentence(sentence, max_words):
|
| 304 |
+
"""Break a long sentence at natural pause points"""
|
| 305 |
+
|
| 306 |
+
# Define break points in order of preference
|
| 307 |
+
break_patterns = [
|
| 308 |
+
r'(,\s+and\s+)', # ", and "
|
| 309 |
+
r'(,\s+but\s+)', # ", but "
|
| 310 |
+
r'(,\s+yet\s+)', # ", yet "
|
| 311 |
+
r'(,\s+or\s+)', # ", or "
|
| 312 |
+
r'(,\s+so\s+)', # ", so "
|
| 313 |
+
r'(;\s*)', # "; "
|
| 314 |
+
r'(β\s*)', # "β " (em dash)
|
| 315 |
+
r'(\s+β\s+)', # " β " (spaced em dash)
|
| 316 |
+
r'(\.\s*")', # '." ' (end quote)
|
| 317 |
+
r'("\s*)', # '" ' (start quote)
|
| 318 |
+
r'(,\s+which\s+)', # ", which "
|
| 319 |
+
r'(,\s+when\s+)', # ", when "
|
| 320 |
+
r'(,\s+where\s+)', # ", where "
|
| 321 |
+
r'(,\s+while\s+)', # ", while "
|
| 322 |
+
r'(,\s+though\s+)', # ", though "
|
| 323 |
+
r'(,\s+)', # ", " (any comma - last resort)
|
| 324 |
+
]
|
| 325 |
+
|
| 326 |
+
chunks = []
|
| 327 |
+
remaining_text = sentence.strip()
|
| 328 |
+
is_para_end = sentence.endswith("\n") or sentence.endswith("\n\n")
|
| 329 |
+
|
| 330 |
+
while remaining_text:
|
| 331 |
+
words = remaining_text.split()
|
| 332 |
+
|
| 333 |
+
if len(words) <= max_words:
|
| 334 |
+
# Remaining text fits in one chunk
|
| 335 |
+
chunks.append((remaining_text.strip(), is_para_end if not chunks else False))
|
| 336 |
+
break
|
| 337 |
+
|
| 338 |
+
# Find the best break point within the word limit
|
| 339 |
+
best_break_pos = None
|
| 340 |
+
best_break_text = ""
|
| 341 |
+
|
| 342 |
+
# Try each break pattern
|
| 343 |
+
for pattern in break_patterns:
|
| 344 |
+
for match in re.finditer(pattern, remaining_text):
|
| 345 |
+
break_pos = match.end()
|
| 346 |
+
|
| 347 |
+
# Check if this break point gives us a good chunk size
|
| 348 |
+
potential_chunk = remaining_text[:break_pos].strip()
|
| 349 |
+
chunk_words = potential_chunk.split()
|
| 350 |
+
|
| 351 |
+
if len(chunk_words) <= max_words and len(chunk_words) >= min(6, max_words // 3):
|
| 352 |
+
best_break_pos = break_pos
|
| 353 |
+
best_break_text = potential_chunk
|
| 354 |
+
break
|
| 355 |
+
|
| 356 |
+
if best_break_pos:
|
| 357 |
+
break
|
| 358 |
+
|
| 359 |
+
if best_break_pos:
|
| 360 |
+
# Found a good break point
|
| 361 |
+
chunks.append((best_break_text, False))
|
| 362 |
+
remaining_text = remaining_text[best_break_pos:].strip()
|
| 363 |
+
else:
|
| 364 |
+
# No good break point found - force break at word limit
|
| 365 |
+
words = remaining_text.split()
|
| 366 |
+
force_break_words = words[:max_words]
|
| 367 |
+
force_break_text = " ".join(force_break_words)
|
| 368 |
+
|
| 369 |
+
# Try to end at a reasonable point
|
| 370 |
+
if not force_break_text.endswith(('.', '!', '?', ',', ';')):
|
| 371 |
+
force_break_text += ","
|
| 372 |
+
|
| 373 |
+
chunks.append((force_break_text, False))
|
| 374 |
+
remaining_text = " ".join(words[max_words:]).strip()
|
| 375 |
+
|
| 376 |
+
return chunks
|
| 377 |
+
|
| 378 |
+
# ============================================================================
|
| 379 |
+
# CONTENT BOUNDARY DETECTION
|
| 380 |
+
# ============================================================================
|
| 381 |
+
|
| 382 |
+
def detect_content_boundaries(chunk_text, chunk_index, all_chunks):
|
| 383 |
+
"""Detect chapter breaks and paragraph endings for appropriate silence"""
|
| 384 |
+
boundary_type = None
|
| 385 |
+
|
| 386 |
+
# Chapter detection (flexible patterns)
|
| 387 |
+
chapter_patterns = [
|
| 388 |
+
r'^(Chapter \d+|CHAPTER \d+)',
|
| 389 |
+
r'^(Ch\. \d+|CH\. \d+)',
|
| 390 |
+
r'^\d+\.', # Simple "1." numbering
|
| 391 |
+
r'^[IVX]+\.', # Roman numerals "I.", "II.", etc.
|
| 392 |
+
]
|
| 393 |
+
|
| 394 |
+
for pattern in chapter_patterns:
|
| 395 |
+
if re.search(pattern, chunk_text.strip(), re.MULTILINE):
|
| 396 |
+
boundary_type = "chapter_start"
|
| 397 |
+
break
|
| 398 |
+
|
| 399 |
+
# Look ahead for chapter start (current chunk ends chapter)
|
| 400 |
+
if chunk_index + 1 < len(all_chunks):
|
| 401 |
+
next_chunk = all_chunks[chunk_index + 1]
|
| 402 |
+
for pattern in chapter_patterns:
|
| 403 |
+
if re.search(pattern, next_chunk.strip()):
|
| 404 |
+
boundary_type = "chapter_end"
|
| 405 |
+
break
|
| 406 |
+
|
| 407 |
+
# Section breaks (asterisks, multiple line breaks)
|
| 408 |
+
if re.search(r'\*{3,}|\#{3,}|β{3,}', chunk_text):
|
| 409 |
+
boundary_type = "section_break"
|
| 410 |
+
|
| 411 |
+
# Paragraph ending (already detected in chunking)
|
| 412 |
+
if chunk_text.endswith('\n\n') or chunk_text.endswith('\n'):
|
| 413 |
+
if boundary_type is None:
|
| 414 |
+
boundary_type = "paragraph_end"
|
| 415 |
+
|
| 416 |
+
return boundary_type
|
| 417 |
+
|
| 418 |
+
# ============================================================================
|
| 419 |
+
# UTILITY FUNCTIONS
|
| 420 |
+
# ============================================================================
|
| 421 |
+
|
| 422 |
+
def reload_abbreviations():
|
| 423 |
+
"""Reload abbreviations from file (useful for testing changes)"""
|
| 424 |
+
return load_abbreviations()
|
| 425 |
+
|
| 426 |
+
def test_abbreviations(test_text="Dr. Smith met with the M.P. at 3:30 p.m. on the 21st."):
|
| 427 |
+
"""Test abbreviation replacements on sample text"""
|
| 428 |
+
abbreviation_replacements = load_abbreviations()
|
| 429 |
+
print(f"Original: {test_text}")
|
| 430 |
+
processed = preprocess_abbreviations(test_text, abbreviation_replacements)
|
| 431 |
+
print(f"Processed: {processed}")
|
| 432 |
+
return processed
|
| 433 |
+
|
| 434 |
+
def test_chunking(test_text=None, max_words=20, min_words=4):
|
| 435 |
+
"""Test the enhanced chunking with sample or custom text"""
|
| 436 |
+
if test_text is None:
|
| 437 |
+
test_text = '''Though perfectly worldly-wise, and able, as she expressed it, to take care of herself, there was yet something curiously ingenuous in her single-minded attitude towards life, and her whole-hearted determination to "make good." This glimpse of a world unknown to me was not without its charm, and I enjoyed seeing her vivid little face light up as she talked.'''
|
| 438 |
+
|
| 439 |
+
chunks = sentence_chunk_text(test_text, max_words=max_words, min_words=min_words)
|
| 440 |
+
|
| 441 |
+
print("Enhanced Chunking Results:")
|
| 442 |
+
for i, (chunk, is_para) in enumerate(chunks):
|
| 443 |
+
word_count = len(chunk.split())
|
| 444 |
+
print(f"Chunk {i+1} ({word_count} words): {chunk}")
|
| 445 |
+
if word_count > max_words:
|
| 446 |
+
print(f" β οΈ WARNING: Still over limit!")
|
| 447 |
+
print()
|
| 448 |
+
|
| 449 |
+
return chunks
|
wrapper/chunk_editor.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def update_chunk(chunk, boundary_type=None, pause_duration=None, sentiment_score=None):
|
| 2 |
+
if boundary_type is not None:
|
| 3 |
+
chunk['boundary_type'] = boundary_type
|
| 4 |
+
if pause_duration is not None:
|
| 5 |
+
chunk['pause_duration'] = pause_duration
|
| 6 |
+
if sentiment_score is not None:
|
| 7 |
+
chunk['sentiment_score'] = sentiment_score
|
| 8 |
+
return chunk
|
wrapper/chunk_editor.py.bak
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def update_chunk(chunk, boundary_type=None, pause_duration=None, sentiment_score=None):
|
| 2 |
+
if boundary_type is not None:
|
| 3 |
+
chunk['boundary_type'] = boundary_type
|
| 4 |
+
if pause_duration is not None:
|
| 5 |
+
chunk['pause_duration'] = pause_duration
|
| 6 |
+
if sentiment_score is not None:
|
| 7 |
+
chunk['sentiment_score'] = sentiment_score
|
| 8 |
+
return chunk
|
wrapper/chunk_loader.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
def load_chunks(path):
|
| 4 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 5 |
+
data = json.load(f)
|
| 6 |
+
|
| 7 |
+
# Filter out metadata entries (they start with _metadata: True)
|
| 8 |
+
if isinstance(data, list):
|
| 9 |
+
chunks = [item for item in data if not (isinstance(item, dict) and item.get('_metadata', False))]
|
| 10 |
+
return chunks
|
| 11 |
+
|
| 12 |
+
return data
|
| 13 |
+
|
| 14 |
+
def load_metadata(path):
|
| 15 |
+
"""Extract metadata from JSON file"""
|
| 16 |
+
try:
|
| 17 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 18 |
+
data = json.load(f)
|
| 19 |
+
|
| 20 |
+
if isinstance(data, list) and data:
|
| 21 |
+
# Look for metadata in first element
|
| 22 |
+
first_item = data[0]
|
| 23 |
+
if isinstance(first_item, dict) and first_item.get('_metadata', False):
|
| 24 |
+
return first_item
|
| 25 |
+
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"β οΈ Error loading metadata from {path}: {e}")
|
| 28 |
+
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
def save_chunks(path, chunks):
|
| 32 |
+
# Validate and clean chunks before saving
|
| 33 |
+
from collections import OrderedDict
|
| 34 |
+
import copy
|
| 35 |
+
|
| 36 |
+
cleaned_chunks = []
|
| 37 |
+
for chunk in chunks:
|
| 38 |
+
if isinstance(chunk, dict) and 'text' in chunk:
|
| 39 |
+
original_text = chunk['text']
|
| 40 |
+
# Clean up any quote corruption
|
| 41 |
+
cleaned_text = original_text.replace('\\"', '"').replace("\\'", "'")
|
| 42 |
+
|
| 43 |
+
# Check for dialogue corruption patterns
|
| 44 |
+
if ('replied' in cleaned_text or 'said' in cleaned_text) and '"' in cleaned_text:
|
| 45 |
+
# Additional cleanup for dialogue
|
| 46 |
+
import re
|
| 47 |
+
cleaned_text = re.sub(r'(["\'])\s*,\s*(["\'])\s*\.', r'\1.', cleaned_text) # Fix ", ". pattern
|
| 48 |
+
cleaned_text = re.sub(r'(["\'])\s*,\s*(["\'])\s*$', r'\1.', cleaned_text) # Fix trailing ", "
|
| 49 |
+
|
| 50 |
+
if cleaned_text != original_text:
|
| 51 |
+
print(f"π§ FIXED dialogue corruption:")
|
| 52 |
+
print(f" Before: {original_text}")
|
| 53 |
+
print(f" After: {cleaned_text}")
|
| 54 |
+
|
| 55 |
+
# Preserve structure (OrderedDict or regular dict)
|
| 56 |
+
if isinstance(chunk, OrderedDict):
|
| 57 |
+
chunk_copy = OrderedDict()
|
| 58 |
+
for key, value in chunk.items():
|
| 59 |
+
if key == 'text':
|
| 60 |
+
chunk_copy[key] = cleaned_text
|
| 61 |
+
else:
|
| 62 |
+
chunk_copy[key] = copy.deepcopy(value)
|
| 63 |
+
else:
|
| 64 |
+
chunk_copy = chunk.copy()
|
| 65 |
+
chunk_copy['text'] = cleaned_text
|
| 66 |
+
|
| 67 |
+
cleaned_chunks.append(chunk_copy)
|
| 68 |
+
else:
|
| 69 |
+
cleaned_chunks.append(chunk)
|
| 70 |
+
|
| 71 |
+
with open(path, 'w', encoding='utf-8') as f:
|
| 72 |
+
json.dump(cleaned_chunks, f, indent=2, ensure_ascii=False)
|
wrapper/chunk_loader.py.bak
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
def load_chunks(path):
|
| 4 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 5 |
+
return json.load(f)
|
| 6 |
+
|
| 7 |
+
def save_chunks(path, chunks):
|
| 8 |
+
with open(path, 'w', encoding='utf-8') as f:
|
| 9 |
+
json.dump(chunks, f, indent=2)
|
wrapper/chunk_player.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
def play_chunk_audio(path):
|
| 5 |
+
if not os.path.exists(path):
|
| 6 |
+
print(f"β Audio file not found: {path}")
|
| 7 |
+
return
|
| 8 |
+
try:
|
| 9 |
+
subprocess.run(["ffplay", "-nodisp", "-autoexit", path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 10 |
+
except Exception as e:
|
| 11 |
+
print(f"Error playing audio: {e}")
|
| 12 |
+
|
wrapper/chunk_player.py.bak
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
def play_chunk_audio(path):
|
| 5 |
+
if not os.path.exists(path):
|
| 6 |
+
print(f"β Audio file not found: {path}")
|
| 7 |
+
return
|
| 8 |
+
try:
|
| 9 |
+
subprocess.run(["ffplay", "-nodisp", "-autoexit", path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 10 |
+
except Exception as e:
|
| 11 |
+
print(f"Error playing audio: {e}")
|
| 12 |
+
|
wrapper/chunk_revisions.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from config.config import AUDIOBOOK_ROOT
|
| 5 |
+
base = AUDIOBOOK_ROOT
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def accept_revision(index, audio_dir):
|
| 9 |
+
"""
|
| 10 |
+
Archive original chunk and replace with revised version.
|
| 11 |
+
Assumes revised version is saved as: chunk_XXXXX_rev.wav
|
| 12 |
+
"""
|
| 13 |
+
base = Path(audio_dir)
|
| 14 |
+
# Use 1-based indexing and 5-digit format
|
| 15 |
+
original = base / f"chunk_{index+1:05d}.wav"
|
| 16 |
+
revised = base / f"chunk_{index+1:05d}_rev.wav"
|
| 17 |
+
archive_dir = base.parent.parent / "Audio_Revisions"
|
| 18 |
+
archive_dir.mkdir(exist_ok=True)
|
| 19 |
+
|
| 20 |
+
if not revised.exists():
|
| 21 |
+
print("β No revised file found. Cannot accept.")
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
# Archive original if exists
|
| 25 |
+
if original.exists():
|
| 26 |
+
archived = archive_dir / f"chunk_{index+1:05d}_orig.wav"
|
| 27 |
+
shutil.move(str(original), str(archived))
|
| 28 |
+
print(f"π¦ Original chunk archived to {archived.name}")
|
| 29 |
+
else:
|
| 30 |
+
print(f"β οΈ Original chunk missing β no archive created.")
|
| 31 |
+
|
| 32 |
+
# Move revised chunk to main filename
|
| 33 |
+
shutil.move(str(revised), str(original))
|
| 34 |
+
print(f"β
Revised chunk accepted as {original.name}")
|
wrapper/chunk_revisions.py.bak
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from config.config import AUDIOBOOK_ROOT
|
| 5 |
+
base = AUDIOBOOK_ROOT
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def accept_revision(index, audio_dir):
|
| 9 |
+
"""
|
| 10 |
+
Archive original chunk and replace with revised version.
|
| 11 |
+
Assumes revised version is saved as: chunk_XXXXX_rev.wav
|
| 12 |
+
"""
|
| 13 |
+
base = Path(audio_dir)
|
| 14 |
+
# Use 1-based indexing and 5-digit format
|
| 15 |
+
original = base / f"chunk_{index+1:05d}.wav"
|
| 16 |
+
revised = base / f"chunk_{index+1:05d}_rev.wav"
|
| 17 |
+
archive_dir = base.parent.parent / "Audio_Revisions"
|
| 18 |
+
archive_dir.mkdir(exist_ok=True)
|
| 19 |
+
|
| 20 |
+
if not revised.exists():
|
| 21 |
+
print("β No revised file found. Cannot accept.")
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
# Archive original if exists
|
| 25 |
+
if original.exists():
|
| 26 |
+
archived = archive_dir / f"chunk_{index+1:05d}_orig.wav"
|
| 27 |
+
shutil.move(str(original), str(archived))
|
| 28 |
+
print(f"π¦ Original chunk archived to {archived.name}")
|
| 29 |
+
else:
|
| 30 |
+
print(f"β οΈ Original chunk missing β no archive created.")
|
| 31 |
+
|
| 32 |
+
# Move revised chunk to main filename
|
| 33 |
+
shutil.move(str(revised), str(original))
|
| 34 |
+
print(f"β
Revised chunk accepted as {original.name}")
|
wrapper/chunk_revisions.py~
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from config.config import AUDIOBOOK_ROOT
|
| 5 |
+
base = AUDIOBOOK_ROOT
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def accept_revision(index):
|
| 9 |
+
"""
|
| 10 |
+
Archive original chunk and replace with revised version.
|
| 11 |
+
Assumes revised version is saved as: chunk_XXX_rev.wav
|
| 12 |
+
"""
|
| 13 |
+
base = Path(AUDIO_OUTPUT_DIR)
|
| 14 |
+
original = base / f"chunk_{index:03}.wav"
|
| 15 |
+
revised = base / f"chunk_{index:03}_rev.wav"
|
| 16 |
+
archive_dir = base / "Audio_Revisions"
|
| 17 |
+
archive_dir.mkdir(exist_ok=True)
|
| 18 |
+
|
| 19 |
+
if not revised.exists():
|
| 20 |
+
print("β No revised file found. Cannot accept.")
|
| 21 |
+
return
|
| 22 |
+
|
| 23 |
+
# Archive original if exists
|
| 24 |
+
if original.exists():
|
| 25 |
+
archived = archive_dir / f"chunk_{index:03}_orig.wav"
|
| 26 |
+
shutil.move(original, archived)
|
| 27 |
+
print(f"π¦ Original chunk archived to {archived.name}")
|
| 28 |
+
else:
|
| 29 |
+
print(f"β οΈ Original chunk missing β no archive created.")
|
| 30 |
+
|
| 31 |
+
# Move revised chunk to main filename
|
| 32 |
+
shutil.move(revised, original)
|
| 33 |
+
print(f"β
Revised chunk accepted as {original.name}")
|
wrapper/chunk_search.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def search_chunks(chunks, query):
|
| 2 |
+
results = []
|
| 3 |
+
query_lower = query.lower()
|
| 4 |
+
|
| 5 |
+
for chunk in chunks:
|
| 6 |
+
if query_lower in chunk['text'].lower():
|
| 7 |
+
results.append(chunk)
|
| 8 |
+
|
| 9 |
+
return results
|
wrapper/chunk_search.py.bak
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def search_chunks(chunks, query):
|
| 2 |
+
results = []
|
| 3 |
+
query_lower = query.lower()
|
| 4 |
+
|
| 5 |
+
for chunk in chunks:
|
| 6 |
+
if query_lower in chunk['text'].lower():
|
| 7 |
+
results.append(chunk)
|
| 8 |
+
|
| 9 |
+
return results
|
wrapper/chunk_synthesizer.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import torch
|
| 3 |
+
import time
|
| 4 |
+
import re
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
+
|
| 7 |
+
from modules.tts_engine import load_optimized_model
|
| 8 |
+
from modules.file_manager import ensure_voice_sample_compatibility, list_voice_samples
|
| 9 |
+
from modules.audio_processor import apply_smart_fade_memory, smart_audio_validation_memory, process_audio_with_trimming_and_silence
|
| 10 |
+
from config.config import *
|
| 11 |
+
|
| 12 |
+
def get_original_voice_from_log(book_name):
|
| 13 |
+
"""Extract original voice name from run log"""
|
| 14 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 15 |
+
log_file = audiobook_root / book_name / "run.log"
|
| 16 |
+
|
| 17 |
+
if log_file.exists():
|
| 18 |
+
try:
|
| 19 |
+
with open(log_file, 'r', encoding='utf-8') as f:
|
| 20 |
+
for line in f:
|
| 21 |
+
line = line.strip()
|
| 22 |
+
if line.startswith("Voice: ") or line.startswith("Voice used: "):
|
| 23 |
+
voice_name = line.split(": ", 1)[1].strip()
|
| 24 |
+
print(f"π Found original voice in log: {voice_name}")
|
| 25 |
+
return voice_name
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"β οΈ Error reading run log: {e}")
|
| 28 |
+
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
def get_original_voice_from_filename(book_name):
|
| 32 |
+
"""Extract voice name from existing audiobook filename"""
|
| 33 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 34 |
+
book_dir = audiobook_root / book_name
|
| 35 |
+
|
| 36 |
+
# Look for WAV files with voice pattern: BookName [VoiceName].wav
|
| 37 |
+
for wav_file in book_dir.glob("*.wav"):
|
| 38 |
+
match = re.search(r'\[([^\]]+)\]\.wav$', wav_file.name)
|
| 39 |
+
if match:
|
| 40 |
+
voice_name = match.group(1)
|
| 41 |
+
print(f"π Found original voice in filename: {voice_name}")
|
| 42 |
+
return voice_name
|
| 43 |
+
|
| 44 |
+
# Look for M4B files with voice pattern: BookName[VoiceName].m4b
|
| 45 |
+
for m4b_file in book_dir.glob("*.m4b"):
|
| 46 |
+
match = re.search(r'\[([^\]]+)\]\.m4b$', m4b_file.name)
|
| 47 |
+
if match:
|
| 48 |
+
voice_name = match.group(1)
|
| 49 |
+
print(f"π Found original voice in M4B filename: {voice_name}")
|
| 50 |
+
return voice_name
|
| 51 |
+
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
def find_voice_file_by_name(voice_name):
|
| 55 |
+
"""Find voice file by name in Voice_Samples directory"""
|
| 56 |
+
voice_files = list_voice_samples()
|
| 57 |
+
|
| 58 |
+
# Exact match first
|
| 59 |
+
for voice_file in voice_files:
|
| 60 |
+
if voice_file.stem == voice_name:
|
| 61 |
+
print(f"β
Found exact voice match: {voice_file.name}")
|
| 62 |
+
return voice_file
|
| 63 |
+
|
| 64 |
+
# Partial match (case insensitive)
|
| 65 |
+
voice_name_lower = voice_name.lower()
|
| 66 |
+
for voice_file in voice_files:
|
| 67 |
+
if voice_name_lower in voice_file.stem.lower():
|
| 68 |
+
print(f"β
Found partial voice match: {voice_file.name}")
|
| 69 |
+
return voice_file
|
| 70 |
+
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
def get_tts_params_for_chunk(chunk):
|
| 74 |
+
"""Extract TTS parameters from chunk data or prompt user"""
|
| 75 |
+
# Check if chunk has TTS params stored
|
| 76 |
+
if 'tts_params' in chunk:
|
| 77 |
+
tts_params = chunk['tts_params']
|
| 78 |
+
print(f"π Using stored TTS params: exag={tts_params.get('exaggeration', 1.0)}, cfg={tts_params.get('cfg_weight', 0.7)}, temp={tts_params.get('temperature', 0.7)}")
|
| 79 |
+
return tts_params
|
| 80 |
+
|
| 81 |
+
# Prompt user for TTS parameters
|
| 82 |
+
print(f"\nβοΈ TTS Parameters for chunk synthesis:")
|
| 83 |
+
|
| 84 |
+
def get_float_input(prompt, default):
|
| 85 |
+
while True:
|
| 86 |
+
try:
|
| 87 |
+
value = input(f"{prompt} [{default}]: ").strip()
|
| 88 |
+
if not value:
|
| 89 |
+
return default
|
| 90 |
+
return float(value)
|
| 91 |
+
except ValueError:
|
| 92 |
+
print(f"β Invalid input. Please enter a valid number.")
|
| 93 |
+
|
| 94 |
+
exaggeration = get_float_input("Exaggeration", DEFAULT_EXAGGERATION)
|
| 95 |
+
cfg_weight = get_float_input("CFG Weight", DEFAULT_CFG_WEIGHT)
|
| 96 |
+
temperature = get_float_input("Temperature", DEFAULT_TEMPERATURE)
|
| 97 |
+
|
| 98 |
+
return {
|
| 99 |
+
'exaggeration': exaggeration,
|
| 100 |
+
'cfg_weight': cfg_weight,
|
| 101 |
+
'temperature': temperature
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
def synthesize_chunk(chunk, index, book_name, audio_dir, revision=False, chunks_json_path=None, override_voice_name=None):
|
| 105 |
+
"""Generate audio for a single chunk using specified or detected voice and TTS parameters"""
|
| 106 |
+
filename = f"chunk_{index+1:05d}_rev.wav" if revision else f"chunk_{index+1:05d}.wav"
|
| 107 |
+
out_path = Path(audio_dir) / filename
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
# Get device
|
| 111 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 112 |
+
|
| 113 |
+
# Load TTS model
|
| 114 |
+
print(f"π€ Loading TTS model for chunk synthesis...")
|
| 115 |
+
model = load_optimized_model(device)
|
| 116 |
+
|
| 117 |
+
# Determine voice to use
|
| 118 |
+
if override_voice_name:
|
| 119 |
+
# Use explicitly provided voice
|
| 120 |
+
print(f"π€ Using explicitly selected voice: {override_voice_name}")
|
| 121 |
+
voice_path = find_voice_file_by_name(override_voice_name)
|
| 122 |
+
voice_name = override_voice_name
|
| 123 |
+
detection_method = "user_selected"
|
| 124 |
+
else:
|
| 125 |
+
# Use enhanced voice detection
|
| 126 |
+
print(f"π Detecting original voice for book: {book_name}")
|
| 127 |
+
from modules.voice_detector import detect_voice_for_book
|
| 128 |
+
|
| 129 |
+
voice_name, voice_path, detection_method = detect_voice_for_book(book_name, chunks_json_path)
|
| 130 |
+
|
| 131 |
+
# Fallback to first available voice if detection failed
|
| 132 |
+
if not voice_path:
|
| 133 |
+
print(f"β οΈ Voice not found, using first available voice")
|
| 134 |
+
voice_files = list_voice_samples()
|
| 135 |
+
if not voice_files:
|
| 136 |
+
print("β No voice samples found")
|
| 137 |
+
return None
|
| 138 |
+
voice_path = voice_files[0]
|
| 139 |
+
voice_name = voice_path.stem
|
| 140 |
+
detection_method = "fallback_first_available"
|
| 141 |
+
|
| 142 |
+
print(f"π€ Using voice: {voice_name} (method: {detection_method})")
|
| 143 |
+
compatible_voice = ensure_voice_sample_compatibility(voice_path)
|
| 144 |
+
|
| 145 |
+
# Get TTS parameters for this chunk
|
| 146 |
+
tts_params = get_tts_params_for_chunk(chunk)
|
| 147 |
+
|
| 148 |
+
# Prepare model with voice
|
| 149 |
+
model.prepare_conditionals(compatible_voice)
|
| 150 |
+
|
| 151 |
+
# Get chunk text
|
| 152 |
+
chunk_text = chunk.get('text', '')
|
| 153 |
+
if not chunk_text:
|
| 154 |
+
print("β No text found in chunk")
|
| 155 |
+
return None
|
| 156 |
+
|
| 157 |
+
print(f"π€ Synthesizing: {chunk_text[:50]}...")
|
| 158 |
+
print(f"π TTS params: exag={tts_params['exaggeration']}, cfg={tts_params['cfg_weight']}, temp={tts_params['temperature']}")
|
| 159 |
+
|
| 160 |
+
# Generate audio with specified parameters
|
| 161 |
+
with torch.no_grad():
|
| 162 |
+
wav = model.generate(chunk_text,
|
| 163 |
+
exaggeration=tts_params['exaggeration'],
|
| 164 |
+
cfg_weight=tts_params['cfg_weight'],
|
| 165 |
+
temperature=tts_params['temperature']).detach().cpu()
|
| 166 |
+
|
| 167 |
+
if wav.dim() == 1:
|
| 168 |
+
wav = wav.unsqueeze(0)
|
| 169 |
+
|
| 170 |
+
# Convert tensor to AudioSegment for processing
|
| 171 |
+
import io
|
| 172 |
+
import soundfile as sf
|
| 173 |
+
|
| 174 |
+
wav_np = wav.squeeze().numpy()
|
| 175 |
+
with io.BytesIO() as wav_buffer:
|
| 176 |
+
sf.write(wav_buffer, wav_np, model.sr, format='wav')
|
| 177 |
+
wav_buffer.seek(0)
|
| 178 |
+
audio_segment = AudioSegment.from_wav(wav_buffer)
|
| 179 |
+
|
| 180 |
+
# Apply audio processing
|
| 181 |
+
audio_segment = apply_smart_fade_memory(audio_segment)
|
| 182 |
+
audio_segment, is_quarantined = smart_audio_validation_memory(audio_segment, model.sr)
|
| 183 |
+
|
| 184 |
+
# Apply trimming and contextual silence based on boundary type
|
| 185 |
+
boundary_type = chunk.get('boundary_type', 'none')
|
| 186 |
+
if boundary_type and boundary_type != "none":
|
| 187 |
+
audio_segment = process_audio_with_trimming_and_silence(audio_segment, boundary_type)
|
| 188 |
+
else:
|
| 189 |
+
# Apply trimming even without boundary type if enabled
|
| 190 |
+
if ENABLE_AUDIO_TRIMMING:
|
| 191 |
+
from modules.audio_processor import trim_audio_endpoint
|
| 192 |
+
audio_segment = trim_audio_endpoint(audio_segment)
|
| 193 |
+
|
| 194 |
+
# Save final audio
|
| 195 |
+
audio_segment.export(out_path, format="wav")
|
| 196 |
+
print(f"β
Saved synthesized chunk: {out_path.name}")
|
| 197 |
+
|
| 198 |
+
# Clean up model
|
| 199 |
+
del model
|
| 200 |
+
torch.cuda.empty_cache()
|
| 201 |
+
|
| 202 |
+
return str(out_path)
|
| 203 |
+
|
| 204 |
+
except Exception as e:
|
| 205 |
+
print(f"β Failed to synthesize chunk: {e}")
|
| 206 |
+
import traceback
|
| 207 |
+
traceback.print_exc()
|
| 208 |
+
return None
|
wrapper/chunk_synthesizer.py.bak
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import torch
|
| 3 |
+
import time
|
| 4 |
+
from pydub import AudioSegment
|
| 5 |
+
|
| 6 |
+
from modules.tts_engine import load_optimized_model
|
| 7 |
+
from modules.file_manager import ensure_voice_sample_compatibility, list_voice_samples
|
| 8 |
+
from modules.audio_processor import apply_smart_fade_memory, smart_audio_validation_memory, add_contextual_silence_memory
|
| 9 |
+
from config.config import *
|
| 10 |
+
|
| 11 |
+
def synthesize_chunk(chunk, index, book_name, audio_dir, revision=False):
|
| 12 |
+
"""Generate audio for a single chunk using simplified TTS process"""
|
| 13 |
+
filename = f"chunk_{index+1:05d}_rev.wav" if revision else f"chunk_{index+1:05d}.wav"
|
| 14 |
+
out_path = Path(audio_dir) / filename
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
# Get device
|
| 18 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
+
|
| 20 |
+
# Load TTS model
|
| 21 |
+
print(f"π€ Loading TTS model for chunk synthesis...")
|
| 22 |
+
model = load_optimized_model(device)
|
| 23 |
+
|
| 24 |
+
# Get voice sample - use first available voice for now
|
| 25 |
+
voice_files = list_voice_samples()
|
| 26 |
+
if not voice_files:
|
| 27 |
+
print("β No voice samples found")
|
| 28 |
+
return None
|
| 29 |
+
|
| 30 |
+
voice_path = voice_files[0] # Use first available voice
|
| 31 |
+
compatible_voice = ensure_voice_sample_compatibility(voice_path)
|
| 32 |
+
|
| 33 |
+
# Prepare model with voice
|
| 34 |
+
model.prepare_conditionals(compatible_voice, exaggeration=1.0)
|
| 35 |
+
|
| 36 |
+
# Get chunk text
|
| 37 |
+
chunk_text = chunk.get('text', '')
|
| 38 |
+
if not chunk_text:
|
| 39 |
+
print("β No text found in chunk")
|
| 40 |
+
return None
|
| 41 |
+
|
| 42 |
+
print(f"π€ Synthesizing: {chunk_text[:50]}...")
|
| 43 |
+
|
| 44 |
+
# Generate audio
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
wav = model.generate(chunk_text,
|
| 47 |
+
exaggeration=1.0,
|
| 48 |
+
cfg_weight=0.7,
|
| 49 |
+
temperature=0.7).detach().cpu()
|
| 50 |
+
|
| 51 |
+
if wav.dim() == 1:
|
| 52 |
+
wav = wav.unsqueeze(0)
|
| 53 |
+
|
| 54 |
+
# Convert tensor to AudioSegment for processing
|
| 55 |
+
import io
|
| 56 |
+
import soundfile as sf
|
| 57 |
+
|
| 58 |
+
wav_np = wav.squeeze().numpy()
|
| 59 |
+
with io.BytesIO() as wav_buffer:
|
| 60 |
+
sf.write(wav_buffer, wav_np, model.sr, format='wav')
|
| 61 |
+
wav_buffer.seek(0)
|
| 62 |
+
audio_segment = AudioSegment.from_wav(wav_buffer)
|
| 63 |
+
|
| 64 |
+
# Apply audio processing
|
| 65 |
+
audio_segment = apply_smart_fade_memory(audio_segment)
|
| 66 |
+
audio_segment, is_quarantined = smart_audio_validation_memory(audio_segment, model.sr)
|
| 67 |
+
|
| 68 |
+
# Add contextual silence if specified
|
| 69 |
+
boundary_type = chunk.get('boundary_type', 'none')
|
| 70 |
+
if boundary_type and boundary_type != "none":
|
| 71 |
+
audio_segment = add_contextual_silence_memory(audio_segment, boundary_type)
|
| 72 |
+
elif chunk.get('is_paragraph_end', False):
|
| 73 |
+
silence = AudioSegment.silent(duration=SILENCE_PARAGRAPH_FALLBACK)
|
| 74 |
+
audio_segment = audio_segment + silence
|
| 75 |
+
|
| 76 |
+
# Save final audio
|
| 77 |
+
audio_segment.export(out_path, format="wav")
|
| 78 |
+
print(f"β
Saved synthesized chunk: {out_path.name}")
|
| 79 |
+
|
| 80 |
+
# Clean up model
|
| 81 |
+
del model
|
| 82 |
+
torch.cuda.empty_cache()
|
| 83 |
+
|
| 84 |
+
return str(out_path)
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"β Failed to synthesize chunk: {e}")
|
| 88 |
+
import traceback
|
| 89 |
+
traceback.print_exc()
|
| 90 |
+
return None
|
wrapper/chunk_synthesizer.py~
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.tts_engine import process_one_chunk
|
| 2 |
+
from config.config import AUDIOBOOK_ROOT
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def synthesize_chunk(chunk, index, revision=False):
|
| 6 |
+
"""Generate audio for a single chunk"""
|
| 7 |
+
filename = f"chunk_{index:03}_rev.wav" if revision else f"chunk_{index:03}.wav"
|
| 8 |
+
out_path = AUDIO_OUTPUT_DIR / filename
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
process_one_chunk(chunk, out_path) # Must accept (chunk_dict, output_path)
|
| 12 |
+
return str(out_path)
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(f"β Failed to synthesize chunk: {e}")
|
| 15 |
+
return None
|
wrapper/chunk_tool.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from wrapper.chunk_loader import load_chunks, save_chunks
|
| 2 |
+
from wrapper.chunk_search import search_chunks
|
| 3 |
+
from wrapper.chunk_editor import update_chunk
|
| 4 |
+
from wrapper.chunk_player import play_chunk_audio
|
| 5 |
+
from wrapper.chunk_synthesizer import synthesize_chunk
|
| 6 |
+
from wrapper.chunk_revisions import accept_revision
|
| 7 |
+
import os
|
| 8 |
+
from config.config import AUDIOBOOK_ROOT
|
| 9 |
+
AUDIO_DIR = AUDIOBOOK_ROOT
|
| 10 |
+
|
| 11 |
+
def select_book_for_repair():
|
| 12 |
+
"""Let user select which book to repair"""
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
# Look for books in both locations: TTS processing dirs and Text_Input
|
| 16 |
+
available_books = []
|
| 17 |
+
|
| 18 |
+
# First check TTS processing directories
|
| 19 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 20 |
+
if audiobook_root.exists():
|
| 21 |
+
for book_dir in audiobook_root.iterdir():
|
| 22 |
+
if book_dir.is_dir():
|
| 23 |
+
tts_chunks_dir = book_dir / "TTS" / "text_chunks"
|
| 24 |
+
json_path = tts_chunks_dir / "chunks_info.json"
|
| 25 |
+
if json_path.exists():
|
| 26 |
+
available_books.append((book_dir.name, json_path, "TTS"))
|
| 27 |
+
|
| 28 |
+
# Then check Text_Input directory for fallback
|
| 29 |
+
text_input_dir = Path("Text_Input")
|
| 30 |
+
if text_input_dir.exists():
|
| 31 |
+
for chunk_file in text_input_dir.glob("*_chunks.json"):
|
| 32 |
+
book_name = chunk_file.stem.replace("_chunks", "")
|
| 33 |
+
# Only add if not already found in TTS directories
|
| 34 |
+
if not any(book[0] == book_name for book in available_books):
|
| 35 |
+
available_books.append((book_name, chunk_file, "Text_Input"))
|
| 36 |
+
|
| 37 |
+
if not available_books:
|
| 38 |
+
print("β No chunk files found in TTS processing directories or Text_Input/")
|
| 39 |
+
return None, None
|
| 40 |
+
|
| 41 |
+
print("\nπ Available books for repair:")
|
| 42 |
+
for i, (book_name, json_path, source) in enumerate(available_books):
|
| 43 |
+
print(f" [{i}] {book_name} ({source}: {json_path.name})")
|
| 44 |
+
|
| 45 |
+
while True:
|
| 46 |
+
try:
|
| 47 |
+
choice = input(f"\nSelect book index [0-{len(available_books)-1}]: ").strip()
|
| 48 |
+
idx = int(choice)
|
| 49 |
+
if 0 <= idx < len(available_books):
|
| 50 |
+
book_name, json_path, source = available_books[idx]
|
| 51 |
+
return book_name, json_path
|
| 52 |
+
else:
|
| 53 |
+
print(f"β Please enter a number between 0 and {len(available_books)-1}")
|
| 54 |
+
except (ValueError, EOFError, KeyboardInterrupt):
|
| 55 |
+
print("β Invalid selection or cancelled")
|
| 56 |
+
return None, None
|
| 57 |
+
|
| 58 |
+
def run_chunk_repair_tool():
|
| 59 |
+
print("\nπ οΈ Chunk Repair & Revision Tool")
|
| 60 |
+
|
| 61 |
+
# Ask user to select book
|
| 62 |
+
book_name, chunk_path = select_book_for_repair()
|
| 63 |
+
if not chunk_path:
|
| 64 |
+
return
|
| 65 |
+
|
| 66 |
+
print(f"\nπ Loading chunks from: {chunk_path.name}")
|
| 67 |
+
chunks = load_chunks(str(chunk_path))
|
| 68 |
+
|
| 69 |
+
# Determine audio directory path based on book structure
|
| 70 |
+
from pathlib import Path
|
| 71 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 72 |
+
book_audio_dir = audiobook_root / book_name / "TTS" / "audio_chunks"
|
| 73 |
+
|
| 74 |
+
if not book_audio_dir.exists():
|
| 75 |
+
print(f"β Audio directory not found: {book_audio_dir}")
|
| 76 |
+
print(f"π Looked for: {book_audio_dir}")
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
print(f"π Using audio directory: {book_audio_dir}")
|
| 80 |
+
|
| 81 |
+
while True:
|
| 82 |
+
query = input("\nSearch for text fragment (or 'Q' to quit): ").strip()
|
| 83 |
+
if query.lower() == "q":
|
| 84 |
+
print("Exiting revision tool.")
|
| 85 |
+
break
|
| 86 |
+
|
| 87 |
+
results = search_chunks(chunks, query)
|
| 88 |
+
if not results:
|
| 89 |
+
print("β No matching chunks found.")
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
print(f"\nπ Found {len(results)} match(es):")
|
| 93 |
+
for i, chunk in enumerate(results):
|
| 94 |
+
print(f"[{i}] \"{chunk['text'][:60]}...\" | Index: {chunk['index']}")
|
| 95 |
+
|
| 96 |
+
sel = input("Select chunk index to revise: ").strip()
|
| 97 |
+
if not sel.isdigit() or int(sel) >= len(results):
|
| 98 |
+
print("Invalid selection.")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
chunk = results[int(sel)]
|
| 102 |
+
index = chunk['index']
|
| 103 |
+
# Use 5-digit chunk numbering and correct directory path
|
| 104 |
+
chunk_audio_path = book_audio_dir / f"chunk_{index+1:05d}.wav"
|
| 105 |
+
chunk_audio_path_str = str(chunk_audio_path)
|
| 106 |
+
|
| 107 |
+
while True:
|
| 108 |
+
print(f"\nπ Chunk: \"{chunk['text']}\"")
|
| 109 |
+
|
| 110 |
+
# Display current chunk metadata
|
| 111 |
+
sentiment_compound = chunk.get('sentiment_compound', chunk.get('sentiment_score', 'N/A'))
|
| 112 |
+
tts_params = chunk.get('tts_params', {})
|
| 113 |
+
|
| 114 |
+
print(f" π Index: {index}, Boundary: {chunk['boundary_type']}")
|
| 115 |
+
print(f" π Sentiment: {sentiment_compound}")
|
| 116 |
+
print(f" ποΈ TTS Params: exag={tts_params.get('exaggeration', 'N/A')}, cfg={tts_params.get('cfg_weight', 'N/A')}, temp={tts_params.get('temperature', 'N/A')}")
|
| 117 |
+
print(f" π Audio file: chunk_{index+1:05d}.wav")
|
| 118 |
+
print("\nOptions:")
|
| 119 |
+
print(" 1. Play original audio")
|
| 120 |
+
print(" 2. Edit text content")
|
| 121 |
+
print(" 3. Edit chunk metadata (boundary, sentiment)")
|
| 122 |
+
print(" 4. Edit TTS parameters (exaggeration, cfg_weight, temperature)")
|
| 123 |
+
print(" 5. Resynthesize audio with current settings")
|
| 124 |
+
print(" 6. Play revised audio")
|
| 125 |
+
print(" 7. Accept revision (replace original with revised)")
|
| 126 |
+
print(" 8. Back to search")
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
choice = input("\nπ‘ Enter option number [1-8]: ").strip()
|
| 130 |
+
except (EOFError, KeyboardInterrupt):
|
| 131 |
+
print("\nβ Input cancelled")
|
| 132 |
+
return
|
| 133 |
+
if choice == "1":
|
| 134 |
+
print(f"\nπ Playing original audio: {chunk_audio_path.name}")
|
| 135 |
+
play_chunk_audio(chunk_audio_path_str)
|
| 136 |
+
elif choice == "2":
|
| 137 |
+
print("\nβοΈ Edit Text Content:")
|
| 138 |
+
print(f"Current text: \"{chunk['text']}\"")
|
| 139 |
+
print("π‘ Enter new text (or Enter to cancel):")
|
| 140 |
+
new_text = input(">>> ").strip()
|
| 141 |
+
|
| 142 |
+
if new_text:
|
| 143 |
+
chunk['text'] = new_text
|
| 144 |
+
chunk['word_count'] = len(new_text.split())
|
| 145 |
+
save_chunks(str(chunk_path), chunks)
|
| 146 |
+
print("β
Text content updated successfully")
|
| 147 |
+
print(f"π New word count: {chunk['word_count']}")
|
| 148 |
+
else:
|
| 149 |
+
print("β No changes made")
|
| 150 |
+
elif choice == "3":
|
| 151 |
+
print("\nβοΈ Edit Chunk Metadata:")
|
| 152 |
+
print(f"Current boundary type: {chunk['boundary_type']}")
|
| 153 |
+
boundary = input("New boundary type (none/paragraph_end/chapter_start/chapter_end/section_break) [Enter to skip]: ").strip()
|
| 154 |
+
|
| 155 |
+
current_sentiment = chunk.get('sentiment_compound', chunk.get('sentiment_score', 'N/A'))
|
| 156 |
+
print(f"Current sentiment score: {current_sentiment}")
|
| 157 |
+
sentiment = input("New sentiment compound score (-1.0 to 1.0) [Enter to skip]: ").strip()
|
| 158 |
+
|
| 159 |
+
try:
|
| 160 |
+
if boundary:
|
| 161 |
+
chunk['boundary_type'] = boundary
|
| 162 |
+
print(f"β
Updated boundary type to: {boundary}")
|
| 163 |
+
|
| 164 |
+
if sentiment:
|
| 165 |
+
sentiment_val = float(sentiment)
|
| 166 |
+
if -1.0 <= sentiment_val <= 1.0:
|
| 167 |
+
chunk['sentiment_compound'] = sentiment_val
|
| 168 |
+
# Also update old key for compatibility
|
| 169 |
+
chunk['sentiment_score'] = sentiment_val
|
| 170 |
+
print(f"β
Updated sentiment score to: {sentiment_val}")
|
| 171 |
+
else:
|
| 172 |
+
print("β Sentiment score must be between -1.0 and 1.0")
|
| 173 |
+
|
| 174 |
+
save_chunks(str(chunk_path), chunks)
|
| 175 |
+
print("β
Chunk metadata updated successfully")
|
| 176 |
+
except ValueError as e:
|
| 177 |
+
print(f"β Invalid input: {e}")
|
| 178 |
+
except Exception as e:
|
| 179 |
+
print(f"β Error updating chunk: {e}")
|
| 180 |
+
elif choice == "4":
|
| 181 |
+
print("\nποΈ Edit TTS Parameters:")
|
| 182 |
+
current_tts_params = chunk.get('tts_params', {})
|
| 183 |
+
|
| 184 |
+
def get_float_input(param_name, current_val, min_val=None, max_val=None):
|
| 185 |
+
while True:
|
| 186 |
+
try:
|
| 187 |
+
prompt = f"New {param_name} [{current_val}]: "
|
| 188 |
+
value = input(prompt).strip()
|
| 189 |
+
if not value:
|
| 190 |
+
return current_val
|
| 191 |
+
new_val = float(value)
|
| 192 |
+
if min_val is not None and new_val < min_val:
|
| 193 |
+
print(f"β {param_name} must be >= {min_val}")
|
| 194 |
+
continue
|
| 195 |
+
if max_val is not None and new_val > max_val:
|
| 196 |
+
print(f"β {param_name} must be <= {max_val}")
|
| 197 |
+
continue
|
| 198 |
+
return new_val
|
| 199 |
+
except ValueError:
|
| 200 |
+
print(f"β Invalid input. Please enter a valid number.")
|
| 201 |
+
|
| 202 |
+
# Edit TTS parameters
|
| 203 |
+
print(f"Current TTS parameters:")
|
| 204 |
+
current_exag = current_tts_params.get('exaggeration', 1.0)
|
| 205 |
+
current_cfg = current_tts_params.get('cfg_weight', 0.7)
|
| 206 |
+
current_temp = current_tts_params.get('temperature', 0.7)
|
| 207 |
+
|
| 208 |
+
print(f" Exaggeration: {current_exag}")
|
| 209 |
+
print(f" CFG Weight: {current_cfg}")
|
| 210 |
+
print(f" Temperature: {current_temp}")
|
| 211 |
+
|
| 212 |
+
new_exag = get_float_input("exaggeration", current_exag, 0.0, 3.0)
|
| 213 |
+
new_cfg = get_float_input("CFG weight", current_cfg, 0.0, 2.0)
|
| 214 |
+
new_temp = get_float_input("temperature", current_temp, 0.0, 2.0)
|
| 215 |
+
|
| 216 |
+
# Update chunk TTS parameters
|
| 217 |
+
if 'tts_params' not in chunk:
|
| 218 |
+
chunk['tts_params'] = {}
|
| 219 |
+
|
| 220 |
+
chunk['tts_params']['exaggeration'] = new_exag
|
| 221 |
+
chunk['tts_params']['cfg_weight'] = new_cfg
|
| 222 |
+
chunk['tts_params']['temperature'] = new_temp
|
| 223 |
+
|
| 224 |
+
save_chunks(str(chunk_path), chunks)
|
| 225 |
+
print(f"β
TTS parameters updated: exag={new_exag}, cfg={new_cfg}, temp={new_temp}")
|
| 226 |
+
elif choice == "5":
|
| 227 |
+
print(f"\nπ€ Resynthesizing chunk {index+1:05d}...")
|
| 228 |
+
revised_path = synthesize_chunk(chunk, index, book_name, book_audio_dir, revision=True)
|
| 229 |
+
if revised_path:
|
| 230 |
+
print(f"β
Chunk resynthesized: {revised_path}")
|
| 231 |
+
else:
|
| 232 |
+
print("β Failed to resynthesize chunk")
|
| 233 |
+
elif choice == "6":
|
| 234 |
+
rev_path = book_audio_dir / f"chunk_{index+1:05d}_rev.wav"
|
| 235 |
+
print(f"\nπ Playing revised audio: {rev_path.name}")
|
| 236 |
+
play_chunk_audio(str(rev_path))
|
| 237 |
+
elif choice == "7":
|
| 238 |
+
print(f"\nπ¦ Accepting revision for chunk {index+1:05d}...")
|
| 239 |
+
accept_revision(index, book_audio_dir)
|
| 240 |
+
print("β
Revision accepted successfully")
|
| 241 |
+
break
|
| 242 |
+
elif choice == "8":
|
| 243 |
+
print("π Returning to search...")
|
| 244 |
+
break
|
| 245 |
+
elif choice.lower() == 'q':
|
| 246 |
+
print("πͺ Exiting chunk repair tool...")
|
| 247 |
+
return
|
| 248 |
+
else:
|
| 249 |
+
print(f"β Invalid option '{choice}'. Please enter a number 1-8 (or 'q' to quit).")
|
wrapper/chunk_tool.py.bak
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from wrapper.chunk_loader import load_chunks, save_chunks
|
| 2 |
+
from wrapper.chunk_search import search_chunks
|
| 3 |
+
from wrapper.chunk_editor import update_chunk
|
| 4 |
+
from wrapper.chunk_player import play_chunk_audio
|
| 5 |
+
from wrapper.chunk_synthesizer import synthesize_chunk
|
| 6 |
+
from wrapper.chunk_revisions import accept_revision
|
| 7 |
+
import os
|
| 8 |
+
from config.config import AUDIOBOOK_ROOT
|
| 9 |
+
AUDIO_DIR = AUDIOBOOK_ROOT
|
| 10 |
+
|
| 11 |
+
def select_book_for_repair():
|
| 12 |
+
"""Let user select which book to repair"""
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
# Look for books in both locations: TTS processing dirs and Text_Input
|
| 16 |
+
available_books = []
|
| 17 |
+
|
| 18 |
+
# First check TTS processing directories
|
| 19 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 20 |
+
if audiobook_root.exists():
|
| 21 |
+
for book_dir in audiobook_root.iterdir():
|
| 22 |
+
if book_dir.is_dir():
|
| 23 |
+
tts_chunks_dir = book_dir / "TTS" / "text_chunks"
|
| 24 |
+
json_path = tts_chunks_dir / "chunks_info.json"
|
| 25 |
+
if json_path.exists():
|
| 26 |
+
available_books.append((book_dir.name, json_path, "TTS"))
|
| 27 |
+
|
| 28 |
+
# Then check Text_Input directory for fallback
|
| 29 |
+
text_input_dir = Path("Text_Input")
|
| 30 |
+
if text_input_dir.exists():
|
| 31 |
+
for chunk_file in text_input_dir.glob("*_chunks.json"):
|
| 32 |
+
book_name = chunk_file.stem.replace("_chunks", "")
|
| 33 |
+
# Only add if not already found in TTS directories
|
| 34 |
+
if not any(book[0] == book_name for book in available_books):
|
| 35 |
+
available_books.append((book_name, chunk_file, "Text_Input"))
|
| 36 |
+
|
| 37 |
+
if not available_books:
|
| 38 |
+
print("β No chunk files found in TTS processing directories or Text_Input/")
|
| 39 |
+
return None, None
|
| 40 |
+
|
| 41 |
+
print("\nπ Available books for repair:")
|
| 42 |
+
for i, (book_name, json_path, source) in enumerate(available_books):
|
| 43 |
+
print(f" [{i}] {book_name} ({source}: {json_path.name})")
|
| 44 |
+
|
| 45 |
+
while True:
|
| 46 |
+
try:
|
| 47 |
+
choice = input(f"\nSelect book index [0-{len(available_books)-1}]: ").strip()
|
| 48 |
+
idx = int(choice)
|
| 49 |
+
if 0 <= idx < len(available_books):
|
| 50 |
+
book_name, json_path, source = available_books[idx]
|
| 51 |
+
return book_name, json_path
|
| 52 |
+
else:
|
| 53 |
+
print(f"β Please enter a number between 0 and {len(available_books)-1}")
|
| 54 |
+
except (ValueError, EOFError, KeyboardInterrupt):
|
| 55 |
+
print("β Invalid selection or cancelled")
|
| 56 |
+
return None, None
|
| 57 |
+
|
| 58 |
+
def run_chunk_repair_tool():
|
| 59 |
+
print("\nπ οΈ Chunk Repair & Revision Tool")
|
| 60 |
+
|
| 61 |
+
# Ask user to select book
|
| 62 |
+
book_name, chunk_path = select_book_for_repair()
|
| 63 |
+
if not chunk_path:
|
| 64 |
+
return
|
| 65 |
+
|
| 66 |
+
print(f"\nπ Loading chunks from: {chunk_path.name}")
|
| 67 |
+
chunks = load_chunks(str(chunk_path))
|
| 68 |
+
|
| 69 |
+
# Determine audio directory path based on book structure
|
| 70 |
+
from pathlib import Path
|
| 71 |
+
audiobook_root = Path(AUDIOBOOK_ROOT)
|
| 72 |
+
book_audio_dir = audiobook_root / book_name / "TTS" / "audio_chunks"
|
| 73 |
+
|
| 74 |
+
if not book_audio_dir.exists():
|
| 75 |
+
print(f"β Audio directory not found: {book_audio_dir}")
|
| 76 |
+
print(f"π Looked for: {book_audio_dir}")
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
print(f"π Using audio directory: {book_audio_dir}")
|
| 80 |
+
|
| 81 |
+
while True:
|
| 82 |
+
query = input("\nSearch for text fragment (or 'Q' to quit): ").strip()
|
| 83 |
+
if query.lower() == "q":
|
| 84 |
+
print("Exiting revision tool.")
|
| 85 |
+
break
|
| 86 |
+
|
| 87 |
+
results = search_chunks(chunks, query)
|
| 88 |
+
if not results:
|
| 89 |
+
print("β No matching chunks found.")
|
| 90 |
+
continue
|
| 91 |
+
|
| 92 |
+
print(f"\nπ Found {len(results)} match(es):")
|
| 93 |
+
for i, chunk in enumerate(results):
|
| 94 |
+
print(f"[{i}] \"{chunk['text'][:60]}...\" | Index: {chunk['index']}")
|
| 95 |
+
|
| 96 |
+
sel = input("Select chunk index to revise: ").strip()
|
| 97 |
+
if not sel.isdigit() or int(sel) >= len(results):
|
| 98 |
+
print("Invalid selection.")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
chunk = results[int(sel)]
|
| 102 |
+
index = chunk['index']
|
| 103 |
+
# Use 5-digit chunk numbering and correct directory path
|
| 104 |
+
chunk_audio_path = book_audio_dir / f"chunk_{index+1:05d}.wav"
|
| 105 |
+
chunk_audio_path_str = str(chunk_audio_path)
|
| 106 |
+
|
| 107 |
+
while True:
|
| 108 |
+
print(f"\nπ Chunk: \"{chunk['text']}\"")
|
| 109 |
+
print(f" Boundary: {chunk['boundary_type']}, Sentiment: {chunk.get('sentiment_score', 'N/A')}, Pause: {chunk.get('pause_duration', 'N/A')}")
|
| 110 |
+
print(f" Audio file: chunk_{index+1:05d}.wav")
|
| 111 |
+
print("\nOptions:")
|
| 112 |
+
print(" 1. Play original audio")
|
| 113 |
+
print(" 2. Edit text content")
|
| 114 |
+
print(" 3. Edit chunk values (boundary, sentiment, pause)")
|
| 115 |
+
print(" 4. Resynthesize audio with current settings")
|
| 116 |
+
print(" 5. Play revised audio")
|
| 117 |
+
print(" 6. Accept revision (replace original with revised)")
|
| 118 |
+
print(" 7. Back to search")
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
choice = input("\nπ‘ Enter option number [1-7]: ").strip()
|
| 122 |
+
except (EOFError, KeyboardInterrupt):
|
| 123 |
+
print("\nβ Input cancelled")
|
| 124 |
+
return
|
| 125 |
+
if choice == "1":
|
| 126 |
+
print(f"\nπ Playing original audio: {chunk_audio_path.name}")
|
| 127 |
+
play_chunk_audio(chunk_audio_path_str)
|
| 128 |
+
elif choice == "2":
|
| 129 |
+
print("\nβοΈ Edit Text Content:")
|
| 130 |
+
print(f"Current text: \"{chunk['text']}\"")
|
| 131 |
+
print("π‘ Enter new text (or Enter to cancel):")
|
| 132 |
+
new_text = input(">>> ").strip()
|
| 133 |
+
|
| 134 |
+
if new_text:
|
| 135 |
+
chunk['text'] = new_text
|
| 136 |
+
chunk['word_count'] = len(new_text.split())
|
| 137 |
+
save_chunks(str(chunk_path), chunks)
|
| 138 |
+
print("β
Text content updated successfully")
|
| 139 |
+
print(f"π New word count: {chunk['word_count']}")
|
| 140 |
+
else:
|
| 141 |
+
print("β No changes made")
|
| 142 |
+
elif choice == "3":
|
| 143 |
+
print("\nβοΈ Edit Chunk Values:")
|
| 144 |
+
print(f"Current boundary type: {chunk['boundary_type']}")
|
| 145 |
+
boundary = input("New boundary type (none/paragraph_end/chapter_start/chapter_end/section_break) [Enter to skip]: ").strip()
|
| 146 |
+
|
| 147 |
+
current_sentiment = chunk.get('sentiment_score', 'N/A')
|
| 148 |
+
print(f"Current sentiment score: {current_sentiment}")
|
| 149 |
+
sentiment = input("New sentiment score (-1.0 to 1.0) [Enter to skip]: ").strip()
|
| 150 |
+
|
| 151 |
+
current_pause = chunk.get('pause_duration', 'N/A')
|
| 152 |
+
print(f"Current pause duration: {current_pause}")
|
| 153 |
+
pause = input("New pause duration (milliseconds) [Enter to skip]: ").strip()
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
update_chunk(
|
| 157 |
+
chunk,
|
| 158 |
+
boundary_type=boundary if boundary else None,
|
| 159 |
+
sentiment_score=float(sentiment) if sentiment else None,
|
| 160 |
+
pause_duration=float(pause) if pause else None
|
| 161 |
+
)
|
| 162 |
+
save_chunks(str(chunk_path), chunks)
|
| 163 |
+
print("β
Chunk values updated successfully")
|
| 164 |
+
except ValueError as e:
|
| 165 |
+
print(f"β Invalid input: {e}")
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"β Error updating chunk: {e}")
|
| 168 |
+
elif choice == "4":
|
| 169 |
+
print(f"\nπ€ Resynthesizing chunk {index+1:05d}...")
|
| 170 |
+
revised_path = synthesize_chunk(chunk, index, book_name, book_audio_dir, revision=True)
|
| 171 |
+
if revised_path:
|
| 172 |
+
print(f"β
Chunk resynthesized: {revised_path}")
|
| 173 |
+
else:
|
| 174 |
+
print("β Failed to resynthesize chunk")
|
| 175 |
+
elif choice == "5":
|
| 176 |
+
rev_path = book_audio_dir / f"chunk_{index+1:05d}_rev.wav"
|
| 177 |
+
print(f"\nπ Playing revised audio: {rev_path.name}")
|
| 178 |
+
play_chunk_audio(str(rev_path))
|
| 179 |
+
elif choice == "6":
|
| 180 |
+
print(f"\nπ¦ Accepting revision for chunk {index+1:05d}...")
|
| 181 |
+
accept_revision(index, book_audio_dir)
|
| 182 |
+
print("β
Revision accepted successfully")
|
| 183 |
+
break
|
| 184 |
+
elif choice == "7":
|
| 185 |
+
print("π Returning to search...")
|
| 186 |
+
break
|
| 187 |
+
elif choice.lower() == 'q':
|
| 188 |
+
print("πͺ Exiting chunk repair tool...")
|
| 189 |
+
return
|
| 190 |
+
else:
|
| 191 |
+
print(f"β Invalid option '{choice}'. Please enter a number 1-7 (or 'q' to quit).")
|
wrapper/chunk_tool.py~
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from wrapper.chunk_loader import load_chunks, save_chunks
|
| 2 |
+
from wrapper.chunk_search import search_chunks
|
| 3 |
+
from wrapper.chunk_editor import update_chunk
|
| 4 |
+
from wrapper.chunk_player import play_chunk_audio
|
| 5 |
+
from wrapper.chunk_synthesizer import synthesize_chunk
|
| 6 |
+
from wrapper.chunk_revisions import accept_revision
|
| 7 |
+
import os
|
| 8 |
+
from config.config import AUDIOBOOK_ROOT
|
| 9 |
+
AUDIO_DIR = AUDIOBOOK_ROO
|
| 10 |
+
|
| 11 |
+
CHUNK_PATH = "Text_Input/my_book_chunks.json"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def run_chunk_repair_tool():
|
| 15 |
+
print("\nπ οΈ Chunk Repair & Revision Tool")
|
| 16 |
+
chunks = load_chunks(CHUNK_PATH)
|
| 17 |
+
|
| 18 |
+
while True:
|
| 19 |
+
query = input("\nSearch for text fragment (or 'Q' to quit): ").strip()
|
| 20 |
+
if query.lower() == "q":
|
| 21 |
+
print("Exiting revision tool.")
|
| 22 |
+
break
|
| 23 |
+
|
| 24 |
+
results = search_chunks(chunks, query)
|
| 25 |
+
if not results:
|
| 26 |
+
print("β No matching chunks found.")
|
| 27 |
+
continue
|
| 28 |
+
|
| 29 |
+
print(f"\nπ Found {len(results)} match(es):")
|
| 30 |
+
for i, chunk in enumerate(results):
|
| 31 |
+
print(f"[{i}] \"{chunk['text'][:60]}...\" | Index: {chunk['index']}")
|
| 32 |
+
|
| 33 |
+
sel = input("Select chunk index to revise: ").strip()
|
| 34 |
+
if not sel.isdigit() or int(sel) >= len(results):
|
| 35 |
+
print("Invalid selection.")
|
| 36 |
+
continue
|
| 37 |
+
|
| 38 |
+
chunk = results[int(sel)]
|
| 39 |
+
index = chunk['index']
|
| 40 |
+
chunk_path = os.path.join(AUDIO_DIR, f"chunk_{index:03}.wav")
|
| 41 |
+
|
| 42 |
+
while True:
|
| 43 |
+
print(f"\nπ Chunk: \"{chunk['text']}\"")
|
| 44 |
+
print(f" Boundary: {chunk['boundary_type']}, Sentiment: {chunk.get('sentiment_score', 'N/A')}, Pause: {chunk.get('pause_duration', 'N/A')}")
|
| 45 |
+
print("\nOptions:")
|
| 46 |
+
print(" 1. Play original")
|
| 47 |
+
print(" 2. Edit values")
|
| 48 |
+
print(" 3. Resynthesize")
|
| 49 |
+
print(" 4. Play revised")
|
| 50 |
+
print(" 5. Accept revision")
|
| 51 |
+
print(" 6. Back to search")
|
| 52 |
+
|
| 53 |
+
choice = input("Enter option number: ").strip()
|
| 54 |
+
if choice == "1":
|
| 55 |
+
play_chunk_audio(chunk_path)
|
| 56 |
+
elif choice == "2":
|
| 57 |
+
boundary = input("New boundary type (or Enter to skip): ").strip()
|
| 58 |
+
sentiment = input("New sentiment score (or Enter to skip): ").strip()
|
| 59 |
+
pause = input("New pause duration (or Enter to skip): ").strip()
|
| 60 |
+
|
| 61 |
+
update_chunk(
|
| 62 |
+
chunk,
|
| 63 |
+
boundary_type=boundary if boundary else None,
|
| 64 |
+
sentiment_score=float(sentiment) if sentiment else None,
|
| 65 |
+
pause_duration=float(pause) if pause else None
|
| 66 |
+
)
|
| 67 |
+
save_chunks(CHUNK_PATH, chunks)
|
| 68 |
+
elif choice == "3":
|
| 69 |
+
synthesize_chunk(chunk, index, revision=True)
|
| 70 |
+
elif choice == "4":
|
| 71 |
+
rev_path = os.path.join(AUDIO_DIR, f"chunk_{index:03}_rev.wav")
|
| 72 |
+
play_chunk_audio(rev_path)
|
| 73 |
+
elif choice == "5":
|
| 74 |
+
accept_revision(index)
|
| 75 |
+
break
|
| 76 |
+
elif choice == "6":
|
| 77 |
+
break
|
| 78 |
+
else:
|
| 79 |
+
print("Invalid input. Try again.")
|