Adisri99's picture
Update app/models.py
3065476 verified
from typing import Dict, Tuple
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
from .features import FEATURE_COLUMNS
def _zscore(series: pd.Series) -> pd.Series:
std = float(series.std(ddof=0))
if std < 1e-12:
return pd.Series(np.zeros(len(series)), index=series.index)
return (series - float(series.mean())) / std
def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]:
X = feature_df[FEATURE_COLUMNS]
y = feature_df["target_return_5d"]
model = XGBRegressor(
n_estimators=200,
max_depth=4,
learning_rate=0.04,
subsample=0.9,
colsample_bytree=0.9,
objective="reg:squarederror",
random_state=42,
n_jobs=2,
reg_alpha=0.1,
reg_lambda=1.0,
enable_categorical=False,
)
model.fit(X, y)
return model, feature_df
def build_alpha_signals(model: XGBRegressor, feature_df: pd.DataFrame) -> pd.DataFrame:
latest = (
feature_df.sort_values("date")
.groupby("ticker", as_index=False)
.tail(1)
.reset_index(drop=True)
.copy()
)
latest["model_pred"] = model.predict(latest[FEATURE_COLUMNS])
latest["pred_z"] = _zscore(latest["model_pred"])
latest["mom_z"] = _zscore(latest["ret_20d"].fillna(0.0))
latest["trend_z"] = _zscore(latest["ma_ratio_10_50"].fillna(0.0))
latest["low_vol_z"] = _zscore(-latest["vol_20d"].fillna(0.0))
latest["volume_z2"] = _zscore(latest["volume_z"].fillna(0.0))
latest["alpha_score"] = (
0.55 * latest["pred_z"]
+ 0.20 * latest["mom_z"]
+ 0.15 * latest["trend_z"]
+ 0.05 * latest["low_vol_z"]
+ 0.05 * latest["volume_z2"]
)
latest["alpha_score"] = latest["alpha_score"].clip(-3.0, 3.0)
base_daily = 0.0002
spread_daily = 0.0030
latest["expected_return"] = base_daily + spread_daily * latest["alpha_score"]
return latest
def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]:
booster = model.get_booster()
raw_gain = booster.get_score(importance_type="gain") or {}
gain_map = {}
for i, name in enumerate(FEATURE_COLUMNS):
gain_map[name] = float(raw_gain.get(f"f{i}", 0.0))
ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n]
template = [{"feature": f, "contribution": v} for f, v in ranked]
if not template:
template = [
{"feature": "ret_20d", "contribution": 0.0},
{"feature": "momentum_factor", "contribution": 0.0},
{"feature": "market_return", "contribution": 0.0},
]
return {ticker: template for ticker in latest_df["ticker"].tolist()}