from typing import Dict, Tuple import numpy as np import pandas as pd from xgboost import XGBRegressor from .features import FEATURE_COLUMNS def _zscore(series: pd.Series) -> pd.Series: std = float(series.std(ddof=0)) if std < 1e-12: return pd.Series(np.zeros(len(series)), index=series.index) return (series - float(series.mean())) / std def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]: X = feature_df[FEATURE_COLUMNS] y = feature_df["target_return_5d"] model = XGBRegressor( n_estimators=200, max_depth=4, learning_rate=0.04, subsample=0.9, colsample_bytree=0.9, objective="reg:squarederror", random_state=42, n_jobs=2, reg_alpha=0.1, reg_lambda=1.0, enable_categorical=False, ) model.fit(X, y) return model, feature_df def build_alpha_signals(model: XGBRegressor, feature_df: pd.DataFrame) -> pd.DataFrame: latest = ( feature_df.sort_values("date") .groupby("ticker", as_index=False) .tail(1) .reset_index(drop=True) .copy() ) latest["model_pred"] = model.predict(latest[FEATURE_COLUMNS]) latest["pred_z"] = _zscore(latest["model_pred"]) latest["mom_z"] = _zscore(latest["ret_20d"].fillna(0.0)) latest["trend_z"] = _zscore(latest["ma_ratio_10_50"].fillna(0.0)) latest["low_vol_z"] = _zscore(-latest["vol_20d"].fillna(0.0)) latest["volume_z2"] = _zscore(latest["volume_z"].fillna(0.0)) latest["alpha_score"] = ( 0.55 * latest["pred_z"] + 0.20 * latest["mom_z"] + 0.15 * latest["trend_z"] + 0.05 * latest["low_vol_z"] + 0.05 * latest["volume_z2"] ) latest["alpha_score"] = latest["alpha_score"].clip(-3.0, 3.0) base_daily = 0.0002 spread_daily = 0.0030 latest["expected_return"] = base_daily + spread_daily * latest["alpha_score"] return latest def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]: booster = model.get_booster() raw_gain = booster.get_score(importance_type="gain") or {} gain_map = {} for i, name in enumerate(FEATURE_COLUMNS): gain_map[name] = float(raw_gain.get(f"f{i}", 0.0)) ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n] template = [{"feature": f, "contribution": v} for f, v in ranked] if not template: template = [ {"feature": "ret_20d", "contribution": 0.0}, {"feature": "momentum_factor", "contribution": 0.0}, {"feature": "market_return", "contribution": 0.0}, ] return {ticker: template for ticker in latest_df["ticker"].tolist()}