import logging
import os
import shutil
import tempfile
from pathlib import Path
from typing import Dict, List, Optional, Tuple

import cv2
import numpy as np
import torch
import torchvision.transforms as T
import whisper
from moviepy import *
from open_clip import create_model_and_transforms

logger = logging.getLogger("uvicorn.error")


def extract_audio(video_path: str, output_dir: str) -> str:
    """
    Extract audio from video and save as WAV file.

    Args:
        video_path (str): Path to input video file.
        output_dir (str): Directory to save the audio file.

    Returns:
        str: Path to the saved audio file.
    """
    video_name = Path(video_path).stem
    audio_path = Path(output_dir) / f"{video_name}.wav"

    try:
        video = VideoFileClip(str(video_path))
        audio = video.audio

        if audio is not None:
            audio.write_audiofile(str(audio_path), logger="bar")
            audio.close()
        video.close()

        if not audio_path.exists():
            raise RuntimeError("Audio file was not created")

        return str(audio_path)

    except Exception as e:
        logger.error(f"Error extracting audio: {str(e)}")
        return ""


def transcribe_audio(audio_path: str, model_name: str = "base") -> str:
    """
    Transcribe audio using Whisper.

    Args:
        audio_path (str): Path to the audio file.
        model_name (str): Whisper model name.

    Returns:
        str: Transcription text.
    """
    try:
        model = whisper.load_model(model_name)
        result = model.transcribe(str(audio_path), fp16=False, verbose=False)
        return str(result.get("text", "")).strip()

    except Exception as e:
        raise RuntimeError(f"Error transcribing audio: {str(e)}")


def transcribe_video(
    video_path: str,
    output_dir: str = "g3/data/prompt_data/audio",
    model_name: str = "base",
):
    """
    Transcribe video by extracting audio and then transcribing it.

    Args:
        video_path (str): Path to the video file.
        output_dir (str): Directory to save the audio file.
        model_name (str): Whisper model name.

    Returns:
        str: Path to the saved transcription text file.
    """
    audio_path = extract_audio(video_path, output_dir)
    if not audio_path:
        logger.error("Audio extraction failed. No audio file created.")
        return

    logger.info(f"Audio extracted to: {audio_path}")
    transcript_text = transcribe_audio(audio_path, model_name=model_name)

    transcript_path = Path(output_dir) / f"{Path(video_path).stem}_transcript.txt"
    with open(transcript_path, "w", encoding="utf-8") as f:
        f.write(transcript_text)
    logger.info(f"Transcript saved to: {transcript_path}")


def transcribe_video_directory(
    video_dir: str,
    output_dir: str = "g3/data/prompt_data/audio",
    model_name: str = "base",
):
    """
    Transcribe all videos in a directory.

    Args:
        video_dir (str): Directory containing video files.
        output_dir (str): Directory to save the audio and transcript files.
        model_name (str): Whisper model name.

    Returns:
        None
    """
    video_extensions = {".mp4", ".avi", ".mov", ".mkv"}
    os.makedirs(output_dir, exist_ok=True)

    video_files = [
        f
        for f in Path(video_dir).glob("*")
        if f.is_file() and f.suffix.lower() in video_extensions
    ]

    if not video_files:
        logger.info(f"No video files found in directory: {video_dir}")

    for video_file in video_files:
        logger.info(f"Processing video: {video_file}")
        transcribe_video(str(video_file), output_dir, model_name=model_name)