File size: 2,764 Bytes
3be54c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# tau-rag production environment variables
# Copy to .env (gitignored) and fill in real values before `docker compose up`.

# ==================================================== CORE
# Preset: mock | no_llm | hebrew_legal | hebrew_legal_prod | hebrew_dense
TAU_RAG_PRESET=hebrew_legal_prod

# Bind address / port (inside container; map via compose)
TAU_RAG_HOST=0.0.0.0
TAU_RAG_PORT=8000

# Number of uvicorn workers (rule of thumb: 2 × cores)
TAU_RAG_WORKERS=4

# Log level: DEBUG | INFO | WARNING | ERROR
TAU_RAG_LOG_LEVEL=INFO

# Request timeout (seconds) — hard kill above this
TAU_RAG_REQUEST_TIMEOUT_S=30

# ==================================================== AUTH
# Require API keys on all /v1/* endpoints. Highly recommended in prod.
TAU_RAG_AUTH_REQUIRED=true

# Seed an admin API key on first boot (delete after rotating in-app).
# Leave empty to disable auto-seed.
TAU_RAG_SEED_ADMIN_KEY=

# HMAC signing secret (32+ chars, hex or random). Required if hmac enabled.
TAU_RAG_HMAC_SECRET=

# ==================================================== DATA
# Root of the legal corpus. Mount as read-only volume.
LAWDBHEB_ROOT=/data/LawDBHeb

# Which subfolder under LAWDBHEB_ROOT to load (legal_rag_index_hybrid, etc.)
LAWDBHEB_INDEX_SUBFOLDER=legal_rag_index_hybrid

# Persistent runtime directory (signals, snapshots, metrics). Mount as volume.
TAU_RAG_RUNTIME_DIR=/app/runtime

# ==================================================== LLM PROVIDERS
# Only set the one(s) you actually use.
ANTHROPIC_API_KEY=
OPENAI_API_KEY=

# Default generation model (override via preset)
TAU_RAG_LLM_MODEL=claude-sonnet-4-6
TAU_RAG_LLM_TEMPERATURE=0.2
TAU_RAG_LLM_MAX_TOKENS=800

# ==================================================== OBSERVABILITY
# Prometheus scrape endpoint exposed at /metrics when true
TAU_RAG_PROMETHEUS_ENABLED=true

# OpenTelemetry OTLP endpoint (empty = disabled)
OTEL_EXPORTER_OTLP_ENDPOINT=
OTEL_SERVICE_NAME=tau-rag

# Sentry DSN for error tracking (empty = disabled)
SENTRY_DSN=

# ==================================================== HUGGING FACE
# Cache dir for transformers/sentence-transformers (mount as named volume)
HF_HOME=/root/.cache/huggingface

# Optional HF token for gated models
HF_TOKEN=

# ==================================================== RATE LIMITING
# Per-key QPS (0 = unlimited)
TAU_RAG_RATE_LIMIT_QPS=10
TAU_RAG_RATE_LIMIT_BURST=20

# ==================================================== FEATURE FLAGS
# Toggle entire middleware families (per v3.x module)
TAU_RAG_FEAT_SEMANTIC_CACHE=true
TAU_RAG_FEAT_DIVERSITY_RANKER=true
TAU_RAG_FEAT_ANSWER_GROUNDING=true
TAU_RAG_FEAT_ANSWER_HEDGING_DETECTOR=true
TAU_RAG_FEAT_ANSWER_NUMERIC_CONSISTENCY=true
TAU_RAG_FEAT_RANK_STABILITY=true
TAU_RAG_FEAT_QUERY_THROUGHPUT=true