import gradio as gr
import torch
import gymnasium as gym
import imageio
from huggingface_hub import hf_hub_download

# IMPORTANT: Update this to your HF model repo ID
REPO_ID = "MoniGarr/spinning-up--on-ant-sac"

# Load the trained model from the Hub
model_path = hf_hub_download(repo_id=REPO_ID, filename="model.pt")
model = torch.load(model_path)
print("Model loaded successfully!")

def run_agent(max_steps=500):
    """
    Runs the agent in the Ant-v4 environment and returns a video.
    """
    # Create the environment with "rgb_array" render mode for video capture
    env = gym.make("Ant-v4", render_mode="rgb_array")
    frames = []

    (obs, _) = env.reset()
    
    for _ in range(max_steps):
        # Capture the frame BEFORE taking a step
        frames.append(env.render())
        
        # Get action from the loaded policy
        with torch.no_grad():
            action = model.act(torch.as_tensor(obs, dtype=torch.float32))
        
        # Take a step in the environment
        obs, _, terminated, truncated, _ = env.step(action)
        
        if terminated or truncated:
            break
            
    env.close()
    
    # Save frames as a video
    video_path = "ant_demo.mp4"
    imageio.mimsave(video_path, frames, fps=30)
    
    return video_path

# Create the Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# OpenAI Spinning Up: SAC Agent on Ant-v4")
    gr.Markdown(
        "This demo loads a pre-trained Soft Actor-Critic (SAC) agent "
        "and visualizes its performance in the `Ant-v4` environment. "
        "Click the button below to generate a new simulation video."
    )
    
    run_button = gr.Button("Run Agent Simulation", variant="primary")
    video_output = gr.Video(label="Agent Performance")
    
    run_button.click(
        fn=run_agent,
        inputs=None,
        outputs=video_output
    )

demo.launch()