"""
Text processing utilities for Chatterbox TTS
"""
import re
from typing import List


def chunk_text(text: str, max_chars: int = 300) -> List[str]:
    """
    Split text into chunks at sentence boundaries, respecting max_chars limit.
    
    Args:
        text: Input text to chunk
        max_chars: Maximum characters per chunk (default 300)
        
    Returns:
        List of text chunks
        
    Examples:
        >>> chunk_text("Hello. World.", max_chars=10)
        ['Hello.', 'World.']
        
        >>> chunk_text("This is a test. Another sentence here.", max_chars=20)
        ['This is a test.', 'Another sentence here.']
    """
    if len(text) <= max_chars:
        return [text]
    
    # Split on sentence boundaries (., !, ?)
    # Keep the punctuation with the sentence
    sentences = re.split(r'(?<=[.!?])\s+', text)
    
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        # If a single sentence exceeds max_chars, split it further
        if len(sentence) > max_chars:
            # If we have accumulated text, save it first
            if current_chunk:
                chunks.append(current_chunk.strip())
                current_chunk = ""
            
            # Split long sentence at comma or space boundaries
            sub_chunks = _split_long_sentence(sentence, max_chars)
            chunks.extend(sub_chunks)
        else:
            # Check if adding this sentence would exceed limit
            if len(current_chunk) + len(sentence) + 1 > max_chars:
                # Save current chunk and start new one
                if current_chunk:
                    chunks.append(current_chunk.strip())
                current_chunk = sentence
            else:
                # Add to current chunk
                if current_chunk:
                    current_chunk += " " + sentence
                else:
                    current_chunk = sentence
    
    # Don't forget the last chunk
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks


def _split_long_sentence(sentence: str, max_chars: int) -> List[str]:
    """
    Split a long sentence at comma or space boundaries.
    
    Args:
        sentence: Long sentence to split
        max_chars: Maximum characters per chunk
        
    Returns:
        List of sentence fragments
    """
    # Try splitting at commas first
    if ',' in sentence:
        parts = re.split(r'(,\s*)', sentence)
        # Recombine with commas
        fragments = []
        current = ""
        for i in range(0, len(parts), 2):
            part = parts[i]
            comma = parts[i + 1] if i + 1 < len(parts) else ""
            
            if len(current) + len(part) + len(comma) > max_chars:
                if current:
                    fragments.append(current.strip())
                current = part + comma
            else:
                current += part + comma
        
        if current:
            fragments.append(current.strip())
        
        # Check if any fragment still exceeds limit
        final_fragments = []
        for frag in fragments:
            if len(frag) > max_chars:
                final_fragments.extend(_split_at_spaces(frag, max_chars))
            else:
                final_fragments.append(frag)
        
        return final_fragments
    else:
        # Fall back to splitting at spaces
        return _split_at_spaces(sentence, max_chars)


def _split_at_spaces(text: str, max_chars: int) -> List[str]:
    """
    Split text at space boundaries.
    
    Args:
        text: Text to split
        max_chars: Maximum characters per chunk
        
    Returns:
        List of text fragments
    """
    words = text.split()
    chunks = []
    current_chunk = ""
    
    for word in words:
        if len(current_chunk) + len(word) + 1 > max_chars:
            if current_chunk:
                chunks.append(current_chunk.strip())
            current_chunk = word
        else:
            if current_chunk:
                current_chunk += " " + word
            else:
                current_chunk = word
    
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    return chunks