Spaces:
Sleeping
Sleeping
Update app/models.py
Browse files- app/models.py +55 -7
app/models.py
CHANGED
|
@@ -1,35 +1,82 @@
|
|
| 1 |
from typing import Dict, Tuple
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from xgboost import XGBRegressor
|
| 4 |
from .features import FEATURE_COLUMNS
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]:
|
| 7 |
X = feature_df[FEATURE_COLUMNS]
|
| 8 |
y = feature_df["target_return_5d"]
|
|
|
|
| 9 |
model = XGBRegressor(
|
| 10 |
-
n_estimators=
|
| 11 |
max_depth=4,
|
| 12 |
-
learning_rate=0.
|
| 13 |
subsample=0.9,
|
| 14 |
colsample_bytree=0.9,
|
| 15 |
objective="reg:squarederror",
|
| 16 |
random_state=42,
|
| 17 |
n_jobs=2,
|
|
|
|
|
|
|
| 18 |
enable_categorical=False,
|
| 19 |
)
|
| 20 |
model.fit(X, y)
|
| 21 |
return model, feature_df
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
latest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
return latest
|
| 27 |
|
|
|
|
| 28 |
def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]:
|
| 29 |
booster = model.get_booster()
|
| 30 |
raw_gain = booster.get_score(importance_type="gain") or {}
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n]
|
|
|
|
| 33 |
template = [{"feature": f, "contribution": v} for f, v in ranked]
|
| 34 |
if not template:
|
| 35 |
template = [
|
|
@@ -37,4 +84,5 @@ def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_
|
|
| 37 |
{"feature": "momentum_factor", "contribution": 0.0},
|
| 38 |
{"feature": "market_return", "contribution": 0.0},
|
| 39 |
]
|
| 40 |
-
|
|
|
|
|
|
| 1 |
from typing import Dict, Tuple
|
| 2 |
+
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
from xgboost import XGBRegressor
|
| 5 |
from .features import FEATURE_COLUMNS
|
| 6 |
|
| 7 |
+
|
| 8 |
+
def _zscore(series: pd.Series) -> pd.Series:
|
| 9 |
+
std = float(series.std(ddof=0))
|
| 10 |
+
if std < 1e-12:
|
| 11 |
+
return pd.Series(np.zeros(len(series)), index=series.index)
|
| 12 |
+
return (series - float(series.mean())) / std
|
| 13 |
+
|
| 14 |
+
|
| 15 |
def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]:
|
| 16 |
X = feature_df[FEATURE_COLUMNS]
|
| 17 |
y = feature_df["target_return_5d"]
|
| 18 |
+
|
| 19 |
model = XGBRegressor(
|
| 20 |
+
n_estimators=200,
|
| 21 |
max_depth=4,
|
| 22 |
+
learning_rate=0.04,
|
| 23 |
subsample=0.9,
|
| 24 |
colsample_bytree=0.9,
|
| 25 |
objective="reg:squarederror",
|
| 26 |
random_state=42,
|
| 27 |
n_jobs=2,
|
| 28 |
+
reg_alpha=0.1,
|
| 29 |
+
reg_lambda=1.0,
|
| 30 |
enable_categorical=False,
|
| 31 |
)
|
| 32 |
model.fit(X, y)
|
| 33 |
return model, feature_df
|
| 34 |
|
| 35 |
+
|
| 36 |
+
def build_alpha_signals(model: XGBRegressor, feature_df: pd.DataFrame) -> pd.DataFrame:
|
| 37 |
+
latest = (
|
| 38 |
+
feature_df.sort_values("date")
|
| 39 |
+
.groupby("ticker", as_index=False)
|
| 40 |
+
.tail(1)
|
| 41 |
+
.reset_index(drop=True)
|
| 42 |
+
.copy()
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
latest["model_pred"] = model.predict(latest[FEATURE_COLUMNS])
|
| 46 |
+
|
| 47 |
+
latest["pred_z"] = _zscore(latest["model_pred"])
|
| 48 |
+
latest["mom_z"] = _zscore(latest["ret_20d"].fillna(0.0))
|
| 49 |
+
latest["trend_z"] = _zscore(latest["ma_ratio_10_50"].fillna(0.0))
|
| 50 |
+
latest["low_vol_z"] = _zscore(-latest["vol_20d"].fillna(0.0))
|
| 51 |
+
latest["volume_z2"] = _zscore(latest["volume_z"].fillna(0.0))
|
| 52 |
+
|
| 53 |
+
latest["alpha_score"] = (
|
| 54 |
+
0.55 * latest["pred_z"]
|
| 55 |
+
+ 0.20 * latest["mom_z"]
|
| 56 |
+
+ 0.15 * latest["trend_z"]
|
| 57 |
+
+ 0.05 * latest["low_vol_z"]
|
| 58 |
+
+ 0.05 * latest["volume_z2"]
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
latest["alpha_score"] = latest["alpha_score"].clip(-3.0, 3.0)
|
| 62 |
+
|
| 63 |
+
base_daily = 0.0002
|
| 64 |
+
spread_daily = 0.0030
|
| 65 |
+
latest["expected_return"] = base_daily + spread_daily * latest["alpha_score"]
|
| 66 |
+
|
| 67 |
return latest
|
| 68 |
|
| 69 |
+
|
| 70 |
def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]:
|
| 71 |
booster = model.get_booster()
|
| 72 |
raw_gain = booster.get_score(importance_type="gain") or {}
|
| 73 |
+
|
| 74 |
+
gain_map = {}
|
| 75 |
+
for i, name in enumerate(FEATURE_COLUMNS):
|
| 76 |
+
gain_map[name] = float(raw_gain.get(f"f{i}", 0.0))
|
| 77 |
+
|
| 78 |
ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n]
|
| 79 |
+
|
| 80 |
template = [{"feature": f, "contribution": v} for f, v in ranked]
|
| 81 |
if not template:
|
| 82 |
template = [
|
|
|
|
| 84 |
{"feature": "momentum_factor", "contribution": 0.0},
|
| 85 |
{"feature": "market_return", "contribution": 0.0},
|
| 86 |
]
|
| 87 |
+
|
| 88 |
+
return {ticker: template for ticker in latest_df["ticker"].tolist()}
|