@echo off setlocal enabledelayedexpansion title Evrmind :: Find model file set "MODEL=" for %%f in ("%~dp0*.gguf") do ( set "MODEL=%%f" goto :found_model ) echo Error: No .gguf model file found in %~dp0 echo Place your model file (e.g. evr-llama-3.1-8b-instruct.gguf) in this directory. pause exit /b 1 :found_model :: Find server binary (prefer CUDA, fallback to Vulkan) set "BIN_DIR=" if exist "%~dp0windows-cuda\llama-server.exe" ( set "BIN_DIR=%~dp0windows-cuda" ) else if exist "%~dp0windows-vulkan\llama-server.exe" ( set "BIN_DIR=%~dp0windows-vulkan" ) else ( echo Error: No llama-server.exe found. echo Expected in: windows-cuda\ or windows-vulkan\ echo. echo Download from: https://github.com/evrmind-uk/evr-llama/releases/tag/v1.0.0 echo Then extract into the matching folder name, e.g.: echo Extract evrmind-windows-cuda.zip into a folder called windows-cuda pause exit /b 1 ) set "PATH=%BIN_DIR%;%PATH%" echo. echo ======================================== echo E V R M I N D echo ======================================== echo. echo Model: %MODEL% echo Server: http://localhost:8080 echo. echo Starting server... start /b "" "%BIN_DIR%\llama-server" -m "%MODEL%" -ngl 99 --host 127.0.0.1 --port 8080 --path "%~dp0webui" :: Wait for server (timeout after 60 seconds) set "WAIT_COUNT=0" :wait_loop timeout /t 1 /nobreak >nul curl -sf http://127.0.0.1:8080/health >nul 2>&1 if %errorlevel% equ 0 goto :server_ready set /a WAIT_COUNT+=1 if %WAIT_COUNT% geq 60 ( echo Error: Server did not start within 60 seconds. echo Check that your GPU is supported and drivers are up to date. taskkill /f /im llama-server.exe >nul 2>&1 pause exit /b 1 ) goto :wait_loop :server_ready echo Server ready! echo. echo Open in your browser: echo http://localhost:8080 echo. echo Press any key to stop the server... pause >nul :: Kill server taskkill /f /im llama-server.exe >nul 2>&1