{ "title": "tau-rag ยท Overview", "uid": "tau-rag-overview", "schemaVersion": 39, "version": 1, "refresh": "30s", "tags": ["tau-rag", "rag"], "time": {"from": "now-6h", "to": "now"}, "panels": [ { "type": "stat", "title": "Request Rate (qps)", "targets": [{ "expr": "sum(rate(tau_rag_requests_total[5m]))", "refId": "A" }], "gridPos": {"x": 0, "y": 0, "w": 6, "h": 4} }, { "type": "stat", "title": "5xx Error Rate", "targets": [{ "expr": "sum(rate(tau_rag_requests_total{status=~\"5..\"}[5m])) / sum(rate(tau_rag_requests_total[5m]))", "refId": "A" }], "fieldConfig": {"defaults": {"unit": "percentunit"}}, "gridPos": {"x": 6, "y": 0, "w": 6, "h": 4} }, { "type": "stat", "title": "p95 Latency (s)", "targets": [{ "expr": "histogram_quantile(0.95, sum(rate(tau_rag_request_duration_seconds_bucket[5m])) by (le))", "refId": "A" }], "fieldConfig": {"defaults": {"unit": "s"}}, "gridPos": {"x": 12, "y": 0, "w": 6, "h": 4} }, { "type": "stat", "title": "p99 Latency (s)", "targets": [{ "expr": "histogram_quantile(0.99, sum(rate(tau_rag_request_duration_seconds_bucket[5m])) by (le))", "refId": "A" }], "fieldConfig": {"defaults": {"unit": "s"}}, "gridPos": {"x": 18, "y": 0, "w": 6, "h": 4} }, { "type": "timeseries", "title": "Requests by endpoint", "targets": [{ "expr": "sum(rate(tau_rag_requests_total[1m])) by (endpoint)", "refId": "A", "legendFormat": "{{endpoint}}" }], "gridPos": {"x": 0, "y": 4, "w": 24, "h": 8} }, { "type": "timeseries", "title": "Retrieval latency (p50/p95/p99)", "targets": [ {"expr": "histogram_quantile(0.5, sum(rate(tau_rag_retrieval_duration_seconds_bucket[5m])) by (le))", "refId": "A", "legendFormat": "p50"}, {"expr": "histogram_quantile(0.95, sum(rate(tau_rag_retrieval_duration_seconds_bucket[5m])) by (le))", "refId": "B", "legendFormat": "p95"}, {"expr": "histogram_quantile(0.99, sum(rate(tau_rag_retrieval_duration_seconds_bucket[5m])) by (le))", "refId": "C", "legendFormat": "p99"} ], "gridPos": {"x": 0, "y": 12, "w": 12, "h": 8} }, { "type": "timeseries", "title": "Generation latency (p50/p95/p99)", "targets": [ {"expr": "histogram_quantile(0.5, sum(rate(tau_rag_generation_duration_seconds_bucket[5m])) by (le))", "refId": "A", "legendFormat": "p50"}, {"expr": "histogram_quantile(0.95, sum(rate(tau_rag_generation_duration_seconds_bucket[5m])) by (le))", "refId": "B", "legendFormat": "p95"}, {"expr": "histogram_quantile(0.99, sum(rate(tau_rag_generation_duration_seconds_bucket[5m])) by (le))", "refId": "C", "legendFormat": "p99"} ], "gridPos": {"x": 12, "y": 12, "w": 12, "h": 8} }, { "type": "timeseries", "title": "Answer quality signals", "targets": [ {"expr": "tau_rag_answer_grounding_ratio", "refId": "A", "legendFormat": "grounding"}, {"expr": "tau_rag_numeric_support_ratio", "refId": "B", "legendFormat": "numeric"}, {"expr": "tau_rag_coverage_ratio", "refId": "C", "legendFormat": "coverage"} ], "gridPos": {"x": 0, "y": 20, "w": 12, "h": 8} }, { "type": "stat", "title": "Flaky queries (v3.64)", "targets": [{"expr": "tau_rag_flaky_queries_count", "refId": "A"}], "gridPos": {"x": 12, "y": 20, "w": 6, "h": 4} }, { "type": "stat", "title": "ECE (v3.55)", "targets": [{"expr": "tau_rag_calibration_ece", "refId": "A"}], "gridPos": {"x": 18, "y": 20, "w": 6, "h": 4} } ] }