MDIIII commited on
Commit
2f5bdbc
·
1 Parent(s): 68025ee

perf: raise rate limit to 200/min for paid-tier models

Browse files
Files changed (3) hide show
  1. app.py +28 -7
  2. config.py +19 -6
  3. data/prices.py +273 -7
app.py CHANGED
@@ -11,7 +11,7 @@ from fastapi.middleware.cors import CORSMiddleware
11
  from pydantic import BaseModel, Field
12
  from typing import Optional
13
 
14
- from config import BENCHMARKS, FREE_MODELS, ASSETS
15
  from db.store import init_db, create_run, complete_run, fail_run, get_run, get_leaderboard, get_decisions
16
  from backtest.runner import run_backtest
17
 
@@ -25,13 +25,12 @@ logger = logging.getLogger(__name__)
25
  @asynccontextmanager
26
  async def lifespan(app: FastAPI):
27
  init_db()
28
- logger.info("CryptoAgentBench API started")
29
  yield
30
 
31
 
32
  app = FastAPI(
33
  title="CryptoAgentBench API",
34
- description="Benchmark open-source LLMs as crypto trading agents",
35
  version="1.0.0",
36
  lifespan=lifespan,
37
  )
@@ -49,7 +48,7 @@ app.add_middleware(
49
 
50
  class BacktestRequest(BaseModel):
51
  benchmark: str = Field(..., description="A, B, or C")
52
- model: str = Field(default="meta-llama/llama-3.3-70b-instruct:free")
53
  asset: str = Field(default="BTC/USDT")
54
  start_date: str = Field(default="2024-01-01", description="YYYY-MM-DD")
55
  end_date: str = Field(default="2024-06-30", description="YYYY-MM-DD")
@@ -91,11 +90,34 @@ def health():
91
  }
92
 
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  @app.get("/models")
95
  def list_models():
96
  return {
97
- "models": FREE_MODELS,
98
- "note": "All models are free-tier via OpenRouter",
 
 
99
  }
100
 
101
 
@@ -147,7 +169,6 @@ def get_run_detail(run_id: str):
147
  run = get_run(run_id)
148
  if not run:
149
  raise HTTPException(status_code=404, detail="Run not found")
150
- # Don't embed all decisions in the detail view
151
  run_out = {k: v for k, v in run.items() if k not in ("equity_curve", "hodl_curve")}
152
  run_out["equity_curve"] = run.get("equity_curve", [])
153
  run_out["hodl_curve"] = run.get("hodl_curve", [])
 
11
  from pydantic import BaseModel, Field
12
  from typing import Optional
13
 
14
+ from config import BENCHMARKS, FREE_MODELS, AVAILABLE_MODELS, ASSETS, OPENROUTER_API_KEY
15
  from db.store import init_db, create_run, complete_run, fail_run, get_run, get_leaderboard, get_decisions
16
  from backtest.runner import run_backtest
17
 
 
25
  @asynccontextmanager
26
  async def lifespan(app: FastAPI):
27
  init_db()
28
+ logger.info("DB initialised")
29
  yield
30
 
31
 
32
  app = FastAPI(
33
  title="CryptoAgentBench API",
 
34
  version="1.0.0",
35
  lifespan=lifespan,
36
  )
 
48
 
49
  class BacktestRequest(BaseModel):
50
  benchmark: str = Field(..., description="A, B, or C")
51
+ model: str = Field(default="google/gemma-4-31b-it:free")
52
  asset: str = Field(default="BTC/USDT")
53
  start_date: str = Field(default="2024-01-01", description="YYYY-MM-DD")
54
  end_date: str = Field(default="2024-06-30", description="YYYY-MM-DD")
 
90
  }
91
 
92
 
93
+ @app.get("/health/llm")
94
+ def health_llm():
95
+ import requests as req
96
+ key = OPENROUTER_API_KEY
97
+ if not key:
98
+ return {"llm_ok": False, "error": "OPENROUTER_API_KEY not set", "key_prefix": None}
99
+ key_prefix = key[:6] + "..." if len(key) > 6 else "(short)"
100
+ try:
101
+ resp = req.post(
102
+ "https://openrouter.ai/api/v1/chat/completions",
103
+ headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
104
+ json={"model": FREE_MODELS[0], "messages": [{"role": "user", "content": "Reply OK"}], "max_tokens": 5},
105
+ timeout=20,
106
+ )
107
+ if resp.status_code == 200:
108
+ return {"llm_ok": True, "key_prefix": key_prefix, "status": 200}
109
+ return {"llm_ok": False, "key_prefix": key_prefix, "status": resp.status_code, "error": resp.text[:200]}
110
+ except Exception as e:
111
+ return {"llm_ok": False, "key_prefix": key_prefix, "error": str(e)[:200]}
112
+
113
+
114
  @app.get("/models")
115
  def list_models():
116
  return {
117
+ "free_models": FREE_MODELS,
118
+ "paid_models": AVAILABLE_MODELS[len(FREE_MODELS):],
119
+ "models": AVAILABLE_MODELS,
120
+ "note": "Free models via OpenRouter free tier; paid models are affordable open-source.",
121
  }
122
 
123
 
 
169
  run = get_run(run_id)
170
  if not run:
171
  raise HTTPException(status_code=404, detail="Run not found")
 
172
  run_out = {k: v for k, v in run.items() if k not in ("equity_curve", "hodl_curve")}
173
  run_out["equity_curve"] = run.get("equity_curve", [])
174
  run_out["hodl_curve"] = run.get("hodl_curve", [])
config.py CHANGED
@@ -4,13 +4,25 @@ import os
4
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
5
  OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
6
 
7
- # Free models on OpenRouter
8
  FREE_MODELS = [
9
- "deepseek/deepseek-r1:free",
 
 
10
  "meta-llama/llama-3.3-70b-instruct:free",
11
- "qwen/qwen3-coder:free",
12
  ]
13
- DEFAULT_MODEL = FREE_MODELS[1]
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Supported assets
16
  ASSETS = ["BTC/USDT", "ETH/USDT"]
@@ -26,8 +38,9 @@ BENCHMARKS = ["A", "B", "C"]
26
  INITIAL_CAPITAL = 10_000.0
27
  TRADE_FEE = 0.001 # 0.1%
28
 
29
- # Rate limiting (OpenRouter free tier)
30
- MAX_REQUESTS_PER_MINUTE = 18 # conservative under 20
 
31
  LLM_TIMEOUT = 120
32
  LLM_MAX_RETRIES = 3
33
 
 
4
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
5
  OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
6
 
7
+ # Free models on OpenRouter (verified working 2026-06)
8
  FREE_MODELS = [
9
+ "google/gemma-4-31b-it:free",
10
+ "google/gemma-4-26b-a4b-it:free",
11
+ "nvidia/nemotron-3-super-120b-a12b:free",
12
  "meta-llama/llama-3.3-70b-instruct:free",
 
13
  ]
14
+ DEFAULT_MODEL = FREE_MODELS[0]
15
+
16
+ # Paid (affordable) open-source models via OpenRouter
17
+ PAID_MODELS = [
18
+ "meta-llama/llama-3.1-8b-instruct",
19
+ "google/gemma-4-26b-a4b-it",
20
+ "qwen/qwen3.5-9b",
21
+ "meta-llama/llama-3.3-70b-instruct",
22
+ ]
23
+
24
+ # All available models (union)
25
+ AVAILABLE_MODELS = FREE_MODELS + PAID_MODELS
26
 
27
  # Supported assets
28
  ASSETS = ["BTC/USDT", "ETH/USDT"]
 
38
  INITIAL_CAPITAL = 10_000.0
39
  TRADE_FEE = 0.001 # 0.1%
40
 
41
+ # Rate limiting paid OpenRouter tier supports 200+ req/min
42
+ # 60/min shared across parallel runs → ~15/run when 4 run simultaneously
43
+ MAX_REQUESTS_PER_MINUTE = 200
44
  LLM_TIMEOUT = 120
45
  LLM_MAX_RETRIES = 3
46
 
data/prices.py CHANGED
@@ -1,17 +1,284 @@
1
  import logging
 
2
  from datetime import datetime, timedelta
3
  import pandas as pd
 
4
 
5
  logger = logging.getLogger(__name__)
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def fetch_ohlcv(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
9
- """Fetch OHLCV data. Tries ccxt first, falls back to yfinance."""
10
- try:
11
- return _fetch_ccxt(asset, start_date, end_date)
12
- except Exception as e:
13
- logger.warning(f"ccxt failed for {asset}: {e}, falling back to yfinance")
14
- return _fetch_yfinance(asset, start_date, end_date)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def _fetch_ccxt(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
@@ -48,7 +315,6 @@ def _fetch_yfinance(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
48
  from config import ASSET_YFINANCE_MAP
49
 
50
  ticker = ASSET_YFINANCE_MAP.get(asset, asset.replace("/", "-"))
51
- # Add one day buffer because yfinance end is exclusive
52
  end_dt = (datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
53
  data = yf.download(ticker, start=start_date, end=end_dt, progress=False, auto_adjust=True)
54
 
 
1
  import logging
2
+ import time
3
  from datetime import datetime, timedelta
4
  import pandas as pd
5
+ import requests
6
 
7
  logger = logging.getLogger(__name__)
8
 
9
+ ASSET_CRYPTOCOMPARE_MAP = {
10
+ "BTC/USDT": ("BTC", "USD"),
11
+ "ETH/USDT": ("ETH", "USD"),
12
+ }
13
+ ASSET_COINBASE_MAP = {
14
+ "BTC/USDT": "BTC-USD",
15
+ "ETH/USDT": "ETH-USD",
16
+ }
17
+ ASSET_KRAKEN_MAP = {
18
+ "BTC/USDT": "XXBTZUSD",
19
+ "ETH/USDT": "XETHZUSD",
20
+ }
21
+ ASSET_BINANCE_MAP = {
22
+ "BTC/USDT": "BTCUSDT",
23
+ "ETH/USDT": "ETHUSDT",
24
+ }
25
+
26
 
27
  def fetch_ohlcv(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
28
+ """Fetch OHLCV data from multiple sources in order."""
29
+ errors = []
30
+ for name, fn in [
31
+ ("CryptoCompare", _fetch_cryptocompare),
32
+ ("Coinbase", _fetch_coinbase),
33
+ ("Kraken", _fetch_kraken),
34
+ ("Binance-REST", _fetch_binance),
35
+ ("ccxt", _fetch_ccxt),
36
+ ("yfinance", _fetch_yfinance),
37
+ ]:
38
+ try:
39
+ df = fn(asset, start_date, end_date)
40
+ if df is not None and not df.empty:
41
+ logger.info(f"Fetched {len(df)} candles for {asset} via {name}")
42
+ return df
43
+ except Exception as e:
44
+ errors.append(f"{name}: {e}")
45
+ logger.warning(f"{name} failed for {asset}: {e}")
46
+
47
+ raise ValueError(f"All data sources failed for {asset}: {'; '.join(errors)}")
48
+
49
+
50
+ def _fetch_cryptocompare(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
51
+ """CryptoCompare free API — no auth required, works from any IP."""
52
+ mapping = ASSET_CRYPTOCOMPARE_MAP.get(asset)
53
+ if not mapping:
54
+ raise ValueError(f"No CryptoCompare mapping for {asset}")
55
+ fsym, tsym = mapping
56
+
57
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
58
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
59
+ days_total = (end_dt - start_dt).days + 1
60
+
61
+ all_rows = []
62
+ # CryptoCompare returns up to 2000 daily candles per call
63
+ batch_size = 2000
64
+ to_ts = int(end_dt.timestamp()) + 86400
65
+
66
+ while to_ts > int(start_dt.timestamp()):
67
+ limit = min(batch_size, days_total)
68
+ resp = requests.get(
69
+ "https://min-api.cryptocompare.com/data/v2/histoday",
70
+ params={
71
+ "fsym": fsym,
72
+ "tsym": tsym,
73
+ "limit": limit,
74
+ "toTs": to_ts,
75
+ },
76
+ timeout=30,
77
+ headers={"User-Agent": "CryptoAgentBench/1.0"},
78
+ )
79
+ resp.raise_for_status()
80
+ data = resp.json()
81
+ if data.get("Response") != "Success":
82
+ raise ValueError(f"CryptoCompare error: {data.get('Message', data)}")
83
+
84
+ candles = data["Data"]["Data"]
85
+ if not candles:
86
+ break
87
+
88
+ for c in candles:
89
+ date_str = datetime.utcfromtimestamp(c["time"]).strftime("%Y-%m-%d")
90
+ if date_str < start_date or date_str > end_date:
91
+ continue
92
+ if c["close"] == 0:
93
+ continue
94
+ all_rows.append({
95
+ "date": date_str,
96
+ "open": float(c["open"]),
97
+ "high": float(c["high"]),
98
+ "low": float(c["low"]),
99
+ "close": float(c["close"]),
100
+ "volume": float(c["volumefrom"]),
101
+ })
102
+
103
+ earliest = datetime.utcfromtimestamp(candles[0]["time"]).strftime("%Y-%m-%d")
104
+ if earliest <= start_date:
105
+ break
106
+ to_ts = int(candles[0]["time"]) - 1
107
+
108
+ if not all_rows:
109
+ raise ValueError(f"No CryptoCompare data for {fsym}/{tsym} in range {start_date}-{end_date}")
110
+
111
+ df = pd.DataFrame(all_rows)
112
+ df = df.drop_duplicates("date").sort_values("date").reset_index(drop=True)
113
+ return df
114
+
115
+
116
+ def _fetch_coinbase(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
117
+ """Coinbase Advanced Trade public API — no auth, US-IP friendly."""
118
+ product_id = ASSET_COINBASE_MAP.get(asset)
119
+ if not product_id:
120
+ raise ValueError(f"No Coinbase mapping for {asset}")
121
+
122
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
123
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)
124
+
125
+ all_rows = []
126
+ # Coinbase returns max 300 candles per call for granularity=86400
127
+ chunk_days = 290
128
+ current = start_dt
129
+
130
+ while current < end_dt:
131
+ chunk_end = min(current + timedelta(days=chunk_days), end_dt)
132
+ resp = requests.get(
133
+ f"https://api.exchange.coinbase.com/products/{product_id}/candles",
134
+ params={
135
+ "granularity": 86400,
136
+ "start": current.isoformat(),
137
+ "end": chunk_end.isoformat(),
138
+ },
139
+ timeout=30,
140
+ headers={"User-Agent": "CryptoAgentBench/1.0"},
141
+ )
142
+ resp.raise_for_status()
143
+ candles = resp.json()
144
+ if isinstance(candles, dict) and "message" in candles:
145
+ raise ValueError(f"Coinbase error: {candles['message']}")
146
+
147
+ for c in candles:
148
+ # Format: [timestamp, low, high, open, close, volume]
149
+ ts, low, high, open_, close, vol = c[0], c[1], c[2], c[3], c[4], c[5]
150
+ date_str = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
151
+ if date_str < start_date or date_str > end_date:
152
+ continue
153
+ all_rows.append({
154
+ "date": date_str,
155
+ "open": float(open_),
156
+ "high": float(high),
157
+ "low": float(low),
158
+ "close": float(close),
159
+ "volume": float(vol),
160
+ })
161
+
162
+ current = chunk_end
163
+ time.sleep(0.2)
164
+
165
+ if not all_rows:
166
+ raise ValueError(f"No Coinbase data for {product_id} in range {start_date}-{end_date}")
167
+
168
+ df = pd.DataFrame(all_rows)
169
+ df = df.drop_duplicates("date").sort_values("date").reset_index(drop=True)
170
+ return df
171
+
172
+
173
+ def _fetch_kraken(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
174
+ pair = ASSET_KRAKEN_MAP.get(asset)
175
+ if not pair:
176
+ raise ValueError(f"No Kraken pair for {asset}")
177
+
178
+ since = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())
179
+ end_ts = int(
180
+ (datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)).timestamp()
181
+ )
182
+
183
+ all_rows = []
184
+ current_since = since
185
+
186
+ for _ in range(10):
187
+ resp = requests.get(
188
+ "https://api.kraken.com/0/public/OHLC",
189
+ params={"pair": pair, "interval": 1440, "since": current_since},
190
+ timeout=30,
191
+ )
192
+ resp.raise_for_status()
193
+ data = resp.json()
194
+ if data.get("error"):
195
+ raise ValueError(f"Kraken error: {data['error']}")
196
+
197
+ # Result dict has pair key + "last" key
198
+ pair_keys = [k for k in data["result"] if k != "last"]
199
+ if not pair_keys:
200
+ break
201
+ candles = data["result"][pair_keys[0]]
202
+ last = data["result"].get("last", 0)
203
+
204
+ added = 0
205
+ for c in candles:
206
+ ts = int(c[0])
207
+ if ts >= end_ts:
208
+ break
209
+ date_str = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
210
+ if start_date <= date_str <= end_date:
211
+ all_rows.append({
212
+ "date": date_str,
213
+ "open": float(c[1]),
214
+ "high": float(c[2]),
215
+ "low": float(c[3]),
216
+ "close": float(c[4]),
217
+ "volume": float(c[6]),
218
+ })
219
+ added += 1
220
+
221
+ if last == 0 or last >= end_ts or len(candles) < 720:
222
+ break
223
+ current_since = last
224
+ time.sleep(0.5)
225
+
226
+ if not all_rows:
227
+ raise ValueError(f"No Kraken data for {pair} in range {start_date}-{end_date}")
228
+
229
+ df = pd.DataFrame(all_rows)
230
+ df = df.drop_duplicates("date").sort_values("date").reset_index(drop=True)
231
+ return df
232
+
233
+
234
+ def _fetch_binance(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
235
+ symbol = ASSET_BINANCE_MAP.get(asset)
236
+ if not symbol:
237
+ raise ValueError(f"No Binance symbol for {asset}")
238
+
239
+ start_ms = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000)
240
+ end_ms = int(
241
+ (datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)).timestamp() * 1000
242
+ )
243
+
244
+ all_candles = []
245
+ current_start = start_ms
246
+
247
+ while current_start < end_ms:
248
+ resp = requests.get(
249
+ "https://api.binance.com/api/v3/klines",
250
+ params={
251
+ "symbol": symbol,
252
+ "interval": "1d",
253
+ "startTime": current_start,
254
+ "endTime": end_ms,
255
+ "limit": 1000,
256
+ },
257
+ timeout=30,
258
+ )
259
+ resp.raise_for_status()
260
+ candles = resp.json()
261
+ if not candles:
262
+ break
263
+ all_candles.extend(candles)
264
+ current_start = candles[-1][0] + 86400000
265
+ if len(candles) < 1000:
266
+ break
267
+
268
+ if not all_candles:
269
+ raise ValueError(f"No data from Binance for {symbol}")
270
+
271
+ df = pd.DataFrame(all_candles, columns=[
272
+ "timestamp", "open", "high", "low", "close", "volume",
273
+ "close_time", "quote_volume", "num_trades",
274
+ "taker_buy_base", "taker_buy_quote", "ignore",
275
+ ])
276
+ df["date"] = pd.to_datetime(df["timestamp"], unit="ms").dt.strftime("%Y-%m-%d")
277
+ df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
278
+ for col in ["open", "high", "low", "close", "volume"]:
279
+ df[col] = df[col].astype(float)
280
+ df = df.drop_duplicates("date").sort_values("date").reset_index(drop=True)
281
+ return df[["date", "open", "high", "low", "close", "volume"]]
282
 
283
 
284
  def _fetch_ccxt(asset: str, start_date: str, end_date: str) -> pd.DataFrame:
 
315
  from config import ASSET_YFINANCE_MAP
316
 
317
  ticker = ASSET_YFINANCE_MAP.get(asset, asset.replace("/", "-"))
 
318
  end_dt = (datetime.strptime(end_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
319
  data = yf.download(ticker, start=start_date, end=end_dt, progress=False, auto_adjust=True)
320