import gradio as gr
import torch
import numpy as np
from Bio import SeqIO
import tempfile
import os
import json
from pathlib import Path
import zipfile
import spaces
from utils.download_models import *
from utils.handle_files import parse_fasta_files
from utils.pipelines import generate_embeddings, full_embedding_pipeline

print("Downloading ESM2 models...")

MODELS = {
    "facebook/esm2_t6_8M_UR50D": "ESM2-8M",
    "facebook/esm2_t12_35M_UR50D": "ESM2-35M",
    #"esm2_t36_650M_UR50D": "ESM2-650M"
}

cache_dirs = cache_all_models(MODELS)
models_and_tokenizers = load_all_models(MODELS)


# Create Gradio interface
with gr.Blocks(title="ESM2 Protein Embeddings") as demo:
    gr.Markdown("""
    # ESM2 Protein Sequence Embeddings
    
    Generate embeddings for protein sequences using Meta's ESM2 language model.
    
    **Features:**
    - Process one or multiple FASTA files
    - Generate high-dimensional embeddings (1280-D) using ESM2-650M
    - Download embeddings in NumPy format or as JSON metadata
    - Supports batch processing for efficiency
    
    **Instructions:**
    1. Upload one or more FASTA files containing protein sequences
    2. Click "Generate Embeddings"
    3. Download the output files (embeddings.npz, metadata.json, summary.txt)
    
    **Output Files:**
    - `embeddings.npz`: Compressed NumPy file with all embeddings
    - `metadata.json`: JSON file with sequence IDs and metadata
    - `summary.txt`: Human-readable summary
    - `embeddings_[filename].npz`: Per-file embeddings
    """)
    
    with gr.Row():
        with gr.Column():
            input_files = gr.File(
                label="Upload FASTA files",
                file_count="multiple",
                file_types=[".fasta", ".fa", ".faa"]
            )
            submit_btn = gr.Button("Generate Embeddings", variant="primary", size="lg")
        
        with gr.Column():
            status_output = gr.Textbox(
                label="Processing Status",
                interactive=False,
                lines=6
            )
    
    with gr.Row():
        download_output = gr.File(
            label="Download Output Files",
            file_count="multiple"
        )

    with gr.Row():
        model_dropdown = gr.Dropdown(
            choices=list(MODELS.values()),
            value=list(MODELS.values())[0],
            label="Select Model"
        )


    model_to_use = gr.State(value=models_and_tokenizers[model_dropdown.value][0])
    tokenizer_to_use = gr.State(value=models_and_tokenizers[model_dropdown.value][1])

    def pick_model(model_name):
        model_key = [key for key, value in MODELS.items() if value == model_name][0]
        print(f"Selected model: {model_name} ({model_key})")
        return models_and_tokenizers[model_key]    

    model_dropdown.change(
        fn=pick_model,
        inputs=model_dropdown,
        outputs=[model_to_use, tokenizer_to_use]
    )

    submit_btn.click(
        fn=full_embedding_pipeline,
        inputs=[input_files],
        outputs=[download_output, status_output]
    )


    gr.Markdown("""
    ### How to use the embeddings:
    
    ```python
    import numpy as np
    import json
    
    # Load embeddings
    embeddings = np.load('embeddings.npz')
    
    # Access a specific embedding
    embedding = embeddings['file_name_sequence_id']
    
    # Load metadata
    with open('metadata.json', 'r') as f:
        metadata = json.load(f)
    ```
    """)


if __name__ == "__main__":
    demo.launch()