@echo off setlocal EnableExtensions EnableDelayedExpansion cd /d "%~dp0" if not defined PYTHONUTF8 set "PYTHONUTF8=1" if not defined DEVICE_MAP set "DEVICE_MAP=cuda" if not defined LOAD_IN_4BIT set "LOAD_IN_4BIT=1" if not defined MAX_NEW_TOKENS set "MAX_NEW_TOKENS=384" if not defined MIN_NEW_TOKENS_HARMFUL set "MIN_NEW_TOKENS_HARMFUL=48" if not defined MIN_NEW_TOKENS_HARMLESS set "MIN_NEW_TOKENS_HARMLESS=0" if not defined REPETITION_PENALTY set "REPETITION_PENALTY=1.10" if not defined NO_REPEAT_NGRAM_SIZE set "NO_REPEAT_NGRAM_SIZE=4" if not defined AUTO_CONTINUE_ON_CAP_HIT set "AUTO_CONTINUE_ON_CAP_HIT=1" if not defined AUTO_CONTINUE_TOKENS set "AUTO_CONTINUE_TOKENS=96" if not defined AUTO_CONTINUE_MAX_PASSES set "AUTO_CONTINUE_MAX_PASSES=2" if not defined TRIM_INCOMPLETE_ON_MAX_HIT set "TRIM_INCOMPLETE_ON_MAX_HIT=1" echo. echo [run_local] DEVICE_MAP=%DEVICE_MAP% echo [run_local] LOAD_IN_4BIT=%LOAD_IN_4BIT% echo [run_local] MAX_NEW_TOKENS=%MAX_NEW_TOKENS% echo [run_local] MIN_NEW_TOKENS_HARMFUL=%MIN_NEW_TOKENS_HARMFUL% echo [run_local] MIN_NEW_TOKENS_HARMLESS=%MIN_NEW_TOKENS_HARMLESS% echo [run_local] REPETITION_PENALTY=%REPETITION_PENALTY%, NO_REPEAT_NGRAM_SIZE=%NO_REPEAT_NGRAM_SIZE% echo [run_local] AUTO_CONTINUE=%AUTO_CONTINUE_ON_CAP_HIT%/%AUTO_CONTINUE_TOKENS%x%AUTO_CONTINUE_MAX_PASSES% echo. python inference_serving.py --repo-id . endlocal