"""ML models and PyTorch MLP class (reused from notebook)."""
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score

try:
    from xgboost import XGBClassifier
    XGBOOST_AVAILABLE = True
except ImportError:
    XGBOOST_AVAILABLE = False

SEED = 42
DEVICE = torch.device('cpu')  # HF Spaces CPU tier


# ── TitanicMLP (노트북 코드 그대로) ──
class TitanicMLP(nn.Module):
    def __init__(self, input_dim, hidden_dims=None, dropout=0.3):
        super(TitanicMLP, self).__init__()
        if hidden_dims is None:
            hidden_dims = [64, 32]

        layers = []
        prev_dim = input_dim

        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout),
            ])
            prev_dim = hidden_dim

        layers.append(nn.Linear(prev_dim, 1))
        layers.append(nn.Sigmoid())

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x).squeeze()


def make_dataloader(X_arr, y_arr, batch_size=32, shuffle=True):
    X_tensor = torch.FloatTensor(np.array(X_arr)).to(DEVICE)
    y_tensor = torch.FloatTensor(np.array(y_arr)).to(DEVICE)
    dataset = TensorDataset(X_tensor, y_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


def build_sklearn_model(algo: str, params: dict):
    """Build a sklearn model by algorithm name and hyperparameter dict."""
    if algo == 'Logistic Regression':
        return LogisticRegression(
            C=params.get('C', 1.0),
            max_iter=1000,
            random_state=SEED,
        )
    elif algo == 'Decision Tree':
        return DecisionTreeClassifier(
            max_depth=params.get('max_depth', 4),
            min_samples_leaf=params.get('min_samples_leaf', 1),
            random_state=SEED,
        )
    elif algo == 'Random Forest':
        return RandomForestClassifier(
            n_estimators=params.get('n_estimators', 100),
            max_depth=params.get('max_depth', 5),
            random_state=SEED,
        )
    elif algo == 'SVM (RBF)':
        return SVC(
            C=params.get('C', 1.0),
            gamma=params.get('gamma', 'scale'),
            kernel='rbf',
            probability=True,
            random_state=SEED,
        )
    elif algo == 'KNN':
        return KNeighborsClassifier(
            n_neighbors=params.get('n_neighbors', 7),
            weights=params.get('weights', 'uniform'),
        )
    elif algo == 'Gradient Boosting':
        return GradientBoostingClassifier(
            n_estimators=params.get('n_estimators', 100),
            learning_rate=params.get('learning_rate', 0.1),
            max_depth=params.get('max_depth', 3),
            random_state=SEED,
        )
    elif algo == 'XGBoost':
        return XGBClassifier(
            n_estimators=params.get('n_estimators', 100),
            learning_rate=params.get('learning_rate', 0.1),
            max_depth=params.get('max_depth', 3),
            random_state=SEED,
            eval_metric='logloss',
            verbosity=0,
        )
    elif algo == 'Naive Bayes':
        return GaussianNB()
    else:
        raise ValueError(f"Unknown algorithm: {algo}")


def train_sklearn_model(model, X_train, X_test, y_train, y_test, cv_folds=5):
    """Train and evaluate a sklearn model. Returns metrics dict."""
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cv_scores = cross_val_score(model, X_train, y_train, cv=cv_folds, scoring='accuracy')

    feature_importances = None
    if hasattr(model, 'feature_importances_'):
        feature_importances = model.feature_importances_
    elif hasattr(model, 'coef_'):
        feature_importances = model.coef_[0]

    return {
        'model': model,
        'accuracy': acc,
        'y_pred': y_pred,
        'confusion_matrix': cm,
        'cv_mean': cv_scores.mean(),
        'cv_std': cv_scores.std(),
        'feature_importances': feature_importances,
    }


def train_mlp(X_train_scaled, X_test_scaled, y_train, y_test,
              hidden_dims, epochs, lr, batch_size, dropout,
              progress_callback=None):
    """Train TitanicMLP and return training history + metrics."""
    input_dim = X_train_scaled.shape[1]
    mlp = TitanicMLP(input_dim=input_dim, hidden_dims=hidden_dims, dropout=dropout).to(DEVICE)

    train_loader = make_dataloader(X_train_scaled, y_train.values, batch_size, shuffle=True)
    test_loader = make_dataloader(X_test_scaled, y_test.values, batch_size, shuffle=False)

    criterion = nn.BCELoss()
    optimizer = optim.Adam(mlp.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=max(1, epochs // 3), gamma=0.5)

    train_losses, test_losses = [], []
    train_accs, test_accs = [], []

    for epoch in range(1, epochs + 1):
        # Train
        mlp.train()
        epoch_loss, correct, total = 0.0, 0, 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = mlp(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            pred = (output >= 0.5).float()
            correct += (pred == y_batch).sum().item()
            total += len(y_batch)

        train_losses.append(epoch_loss / len(train_loader))
        train_accs.append(correct / total)

        # Eval
        mlp.eval()
        with torch.no_grad():
            t_loss, t_correct, t_total = 0.0, 0, 0
            for X_batch, y_batch in test_loader:
                output = mlp(X_batch)
                t_loss += criterion(output, y_batch).item()
                pred = (output >= 0.5).float()
                t_correct += (pred == y_batch).sum().item()
                t_total += len(y_batch)

        test_losses.append(t_loss / len(test_loader))
        test_accs.append(t_correct / t_total)

        scheduler.step()

        if progress_callback:
            progress_callback(epoch, epochs, train_losses[-1], train_accs[-1],
                              test_losses[-1], test_accs[-1])

    # Final predictions for confusion matrix
    mlp.eval()
    y_pred_list = []
    with torch.no_grad():
        for X_batch, _ in test_loader:
            output = mlp(X_batch)
            y_pred_list.extend((output >= 0.5).cpu().numpy().astype(int))

    cm = confusion_matrix(y_test, y_pred_list)

    return {
        'train_losses': train_losses,
        'test_losses': test_losses,
        'train_accs': train_accs,
        'test_accs': test_accs,
        'final_acc': test_accs[-1],
        'confusion_matrix': cm,
        'y_pred': y_pred_list,
    }