@echo off
setlocal enabledelayedexpansion
title Evrmind

:: Find model file
set "MODEL="
for %%f in ("%~dp0*.gguf") do (
    set "MODEL=%%f"
    goto :found_model
)
echo Error: No .gguf model file found in %~dp0
echo Place your model file (e.g. evr-llama-3.1-8b-instruct.gguf) in this directory.
pause
exit /b 1

:found_model

:: Find server binary (prefer CUDA, fallback to Vulkan)
set "BIN_DIR="
if exist "%~dp0windows-cuda\llama-server.exe" (
    set "BIN_DIR=%~dp0windows-cuda"
) else if exist "%~dp0windows-vulkan\llama-server.exe" (
    set "BIN_DIR=%~dp0windows-vulkan"
) else (
    echo Error: No llama-server.exe found.
    echo Expected in: windows-cuda\ or windows-vulkan\
    echo.
    echo Download from: https://github.com/evrmind-uk/evr-llama/releases/tag/v1.0.0
    echo Then extract into the matching folder name, e.g.:
    echo   Extract evrmind-windows-cuda.zip into a folder called windows-cuda
    pause
    exit /b 1
)

set "PATH=%BIN_DIR%;%PATH%"

echo.
echo   ========================================
echo              E V R M I N D
echo   ========================================
echo.
echo   Model:  %MODEL%
echo   Server: http://localhost:8080
echo.
echo   Starting server...

start /b "" "%BIN_DIR%\llama-server" -m "%MODEL%" -ngl 99 --host 127.0.0.1 --port 8080 --path "%~dp0webui"

:: Wait for server (timeout after 60 seconds)
set "WAIT_COUNT=0"
:wait_loop
timeout /t 1 /nobreak >nul
curl -sf http://127.0.0.1:8080/health >nul 2>&1
if %errorlevel% equ 0 goto :server_ready
set /a WAIT_COUNT+=1
if %WAIT_COUNT% geq 60 (
    echo   Error: Server did not start within 60 seconds.
    echo   Check that your GPU is supported and drivers are up to date.
    taskkill /f /im llama-server.exe >nul 2>&1
    pause
    exit /b 1
)
goto :wait_loop

:server_ready
echo   Server ready!
echo.
echo   Open in your browser:
echo     http://localhost:8080
echo.
echo   Press any key to stop the server...
pause >nul

:: Kill server
taskkill /f /im llama-server.exe >nul 2>&1