Spaces:
Sleeping
Sleeping
| from typing import Dict, Tuple | |
| import numpy as np | |
| import pandas as pd | |
| from xgboost import XGBRegressor | |
| from .features import FEATURE_COLUMNS | |
| def _zscore(series: pd.Series) -> pd.Series: | |
| std = float(series.std(ddof=0)) | |
| if std < 1e-12: | |
| return pd.Series(np.zeros(len(series)), index=series.index) | |
| return (series - float(series.mean())) / std | |
| def train_model(feature_df: pd.DataFrame) -> Tuple[XGBRegressor, pd.DataFrame]: | |
| X = feature_df[FEATURE_COLUMNS] | |
| y = feature_df["target_return_5d"] | |
| model = XGBRegressor( | |
| n_estimators=200, | |
| max_depth=4, | |
| learning_rate=0.04, | |
| subsample=0.9, | |
| colsample_bytree=0.9, | |
| objective="reg:squarederror", | |
| random_state=42, | |
| n_jobs=2, | |
| reg_alpha=0.1, | |
| reg_lambda=1.0, | |
| enable_categorical=False, | |
| ) | |
| model.fit(X, y) | |
| return model, feature_df | |
| def build_alpha_signals(model: XGBRegressor, feature_df: pd.DataFrame) -> pd.DataFrame: | |
| latest = ( | |
| feature_df.sort_values("date") | |
| .groupby("ticker", as_index=False) | |
| .tail(1) | |
| .reset_index(drop=True) | |
| .copy() | |
| ) | |
| latest["model_pred"] = model.predict(latest[FEATURE_COLUMNS]) | |
| latest["pred_z"] = _zscore(latest["model_pred"]) | |
| latest["mom_z"] = _zscore(latest["ret_20d"].fillna(0.0)) | |
| latest["trend_z"] = _zscore(latest["ma_ratio_10_50"].fillna(0.0)) | |
| latest["low_vol_z"] = _zscore(-latest["vol_20d"].fillna(0.0)) | |
| latest["volume_z2"] = _zscore(latest["volume_z"].fillna(0.0)) | |
| latest["alpha_score"] = ( | |
| 0.55 * latest["pred_z"] | |
| + 0.20 * latest["mom_z"] | |
| + 0.15 * latest["trend_z"] | |
| + 0.05 * latest["low_vol_z"] | |
| + 0.05 * latest["volume_z2"] | |
| ) | |
| latest["alpha_score"] = latest["alpha_score"].clip(-3.0, 3.0) | |
| base_daily = 0.0002 | |
| spread_daily = 0.0030 | |
| latest["expected_return"] = base_daily + spread_daily * latest["alpha_score"] | |
| return latest | |
| def top_feature_contributions(model: XGBRegressor, latest_df: pd.DataFrame, top_n: int = 5) -> Dict[str, list]: | |
| booster = model.get_booster() | |
| raw_gain = booster.get_score(importance_type="gain") or {} | |
| gain_map = {} | |
| for i, name in enumerate(FEATURE_COLUMNS): | |
| gain_map[name] = float(raw_gain.get(f"f{i}", 0.0)) | |
| ranked = sorted(gain_map.items(), key=lambda x: x[1], reverse=True)[:top_n] | |
| template = [{"feature": f, "contribution": v} for f, v in ranked] | |
| if not template: | |
| template = [ | |
| {"feature": "ret_20d", "contribution": 0.0}, | |
| {"feature": "momentum_factor", "contribution": 0.0}, | |
| {"feature": "market_return", "contribution": 0.0}, | |
| ] | |
| return {ticker: template for ticker in latest_df["ticker"].tolist()} |