Adisri99 commited on
Commit
3065476
·
verified ·
1 Parent(s): e98cfad

Update app/models.py

Browse files
Files changed (1) hide show
  1. app/models.py +55 -7
app/models.py CHANGED
@@ -1,35 +1,82 @@
1
  from typing import Dict, Tuple
 
2
  import pandas as pd
3
  from xgboost import XGBRegressor
4
  from .features import FEATURE_COLUMNS
5
 
 
 
 
 
 
 
 
 
6
  def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]:
7
  X = feature_df[FEATURE_COLUMNS]
8
  y = feature_df["target_return_5d"]
 
9
  model = XGBRegressor(
10
- n_estimators=120,
11
  max_depth=4,
12
- learning_rate=0.05,
13
  subsample=0.9,
14
  colsample_bytree=0.9,
15
  objective="reg:squarederror",
16
  random_state=42,
17
  n_jobs=2,
 
 
18
  enable_categorical=False,
19
  )
20
  model.fit(X, y)
21
  return model, feature_df
22
 
23
- def latest_predictions(model: XGBRegressor, feature_df: pd.DataFrame) -> pd.DataFrame:
24
- latest = feature_df.sort_values("date").groupby("ticker", as_index=False).tail(1).reset_index(drop=True)
25
- latest["predicted_return"] = model.predict(latest[FEATURE_COLUMNS])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  return latest
27
 
 
28
  def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]:
29
  booster = model.get_booster()
30
  raw_gain = booster.get_score(importance_type="gain") or {}
31
- gain_map = {name: float(raw_gain.get(f"f{i}", 0.0)) for i, name in enumerate(FEATURE_COLUMNS)}
 
 
 
 
32
  ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n]
 
33
  template = [{"feature": f, "contribution": v} for f, v in ranked]
34
  if not template:
35
  template = [
@@ -37,4 +84,5 @@ def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_
37
  {"feature": "momentum_factor", "contribution": 0.0},
38
  {"feature": "market_return", "contribution": 0.0},
39
  ]
40
- return {ticker: template for ticker in latest_df["ticker"].tolist()}
 
 
1
  from typing import Dict, Tuple
2
+ import numpy as np
3
  import pandas as pd
4
  from xgboost import XGBRegressor
5
  from .features import FEATURE_COLUMNS
6
 
7
+
8
+ def _zscore(series: pd.Series) -> pd.Series:
9
+ std = float(series.std(ddof=0))
10
+ if std < 1e-12:
11
+ return pd.Series(np.zeros(len(series)), index=series.index)
12
+ return (series - float(series.mean())) / std
13
+
14
+
15
  def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]:
16
  X = feature_df[FEATURE_COLUMNS]
17
  y = feature_df["target_return_5d"]
18
+
19
  model = XGBRegressor(
20
+ n_estimators=200,
21
  max_depth=4,
22
+ learning_rate=0.04,
23
  subsample=0.9,
24
  colsample_bytree=0.9,
25
  objective="reg:squarederror",
26
  random_state=42,
27
  n_jobs=2,
28
+ reg_alpha=0.1,
29
+ reg_lambda=1.0,
30
  enable_categorical=False,
31
  )
32
  model.fit(X, y)
33
  return model, feature_df
34
 
35
+
36
+ def build_alpha_signals(model: XGBRegressor, feature_df: pd.DataFrame) -> pd.DataFrame:
37
+ latest = (
38
+ feature_df.sort_values("date")
39
+ .groupby("ticker", as_index=False)
40
+ .tail(1)
41
+ .reset_index(drop=True)
42
+ .copy()
43
+ )
44
+
45
+ latest["model_pred"] = model.predict(latest[FEATURE_COLUMNS])
46
+
47
+ latest["pred_z"] = _zscore(latest["model_pred"])
48
+ latest["mom_z"] = _zscore(latest["ret_20d"].fillna(0.0))
49
+ latest["trend_z"] = _zscore(latest["ma_ratio_10_50"].fillna(0.0))
50
+ latest["low_vol_z"] = _zscore(-latest["vol_20d"].fillna(0.0))
51
+ latest["volume_z2"] = _zscore(latest["volume_z"].fillna(0.0))
52
+
53
+ latest["alpha_score"] = (
54
+ 0.55 * latest["pred_z"]
55
+ + 0.20 * latest["mom_z"]
56
+ + 0.15 * latest["trend_z"]
57
+ + 0.05 * latest["low_vol_z"]
58
+ + 0.05 * latest["volume_z2"]
59
+ )
60
+
61
+ latest["alpha_score"] = latest["alpha_score"].clip(-3.0, 3.0)
62
+
63
+ base_daily = 0.0002
64
+ spread_daily = 0.0030
65
+ latest["expected_return"] = base_daily + spread_daily * latest["alpha_score"]
66
+
67
  return latest
68
 
69
+
70
  def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]:
71
  booster = model.get_booster()
72
  raw_gain = booster.get_score(importance_type="gain") or {}
73
+
74
+ gain_map = {}
75
+ for i, name in enumerate(FEATURE_COLUMNS):
76
+ gain_map[name] = float(raw_gain.get(f"f{i}", 0.0))
77
+
78
  ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n]
79
+
80
  template = [{"feature": f, "contribution": v} for f, v in ranked]
81
  if not template:
82
  template = [
 
84
  {"feature": "momentum_factor", "contribution": 0.0},
85
  {"feature": "market_return", "contribution": 0.0},
86
  ]
87
+
88
+ return {ticker: template for ticker in latest_df["ticker"].tolist()}