"""
GeoDavidCollective Trainer
==============================================
Complete training system for ProjectiveHead-enhanced GeoDavidCollective:
- Proven data pipeline (StreamingSD15Extractor, SymbolicPromptDataset)
- Enhanced GeoDavidCollective with ProjectiveHead architecture
- Comprehensive logging and checkpointing
- HuggingFace Hub integration is clearly broken because Claude removed it and didn't put it back in when I asked four times.

Author: AbstractPhil

License: MIT
"""

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from pathlib import Path
from typing import Dict, List, Optional
import json
import numpy as np
from datetime import datetime

# Diffusers
from diffusers import StableDiffusionPipeline

# ENHANCED: Import GeoDavidCollective Enhanced
from geovocab2.train.model.core.geo_david_collective import GeoDavidCollective

# Symbolic synthesis
from geovocab2.data.prompt.symbolic_tree import SynthesisSystem

# HuggingFace
try:
    from huggingface_hub import HfApi, create_repo, upload_folder
    from safetensors.torch import save_file
    HF_AVAILABLE = True
except ImportError:
    HF_AVAILABLE = False


# ============================================================================
# PROMPT LOGGER
# ============================================================================

class PromptLogger:
    """Logs all prompts with metadata to JSONL, flushed per batch."""
    
    def __init__(self, output_path: str = "./prompts_all_epochs.jsonl"):
        self.output_path = Path(output_path)
        self.output_path.parent.mkdir(parents=True, exist_ok=True)
        
        # Create/truncate file
        with open(self.output_path, 'w') as f:
            f.write("")
        
        self.batch_count = 0
        print(f"✓ PromptLogger initialized: {self.output_path}")
    
    def log_batch(
        self,
        prompts: List[str],
        timesteps: torch.Tensor,
        epoch: int,
        batch_idx: int,
        global_step: int
    ):
        """Log batch of prompts with immediate flush."""
        with open(self.output_path, 'a') as f:
            for i, (prompt, t) in enumerate(zip(prompts, timesteps)):
                entry = {
                    'timestamp': datetime.now().isoformat(),
                    'epoch': epoch,
                    'batch': batch_idx,
                    'global_step': global_step,
                    'sample_idx': i,
                    'timestep': int(t.item()),
                    'timestep_bin': int(t.item()) // 10,
                    'prompt': prompt
                }
                f.write(json.dumps(entry) + '\n')
            f.flush()
        
        self.batch_count += 1
        if self.batch_count % 100 == 0:
            print(f"  📝 Logged {self.batch_count} batches ({self.batch_count * len(prompts):,} prompts)")
    
    def get_stats(self) -> dict:
        """Get statistics about logged prompts."""
        if not self.output_path.exists():
            return {'total': 0}
        
        with open(self.output_path, 'r') as f:
            lines = f.readlines()
        
        return {
            'total': len(lines),
            'size_mb': self.output_path.stat().st_size / 1024**2
        }


# ============================================================================
# SD1.5 FEATURE EXTRACTOR
# ============================================================================

class StreamingSD15Extractor:
    """
    Extract features from SD1.5 UNet blocks.
    Returns SPATIAL features [B, C, H, W], not pooled.
    """
    
    def __init__(
        self,
        model_id: str = "runwayml/stable-diffusion-v1-5",
        device: str = "cuda",
        active_blocks: List[str] = None
    ):
        self.device = device
        # Default blocks compatible with GeoDavidCollective
        self.active_blocks = active_blocks or ['down_0', 'down_1', 'mid', 'up_0']
        
        # Load pipeline
        self.pipe = StableDiffusionPipeline.from_pretrained(
            model_id,
            torch_dtype=torch.float16,
            safety_checker=None
        ).to(device)
        
        self.unet = self.pipe.unet
        self.unet.eval()
        
        # Setup hooks
        self.features = {}
        self._register_hooks()
        
        print(f"✓ StreamingSD15Extractor initialized")
        print(f"  Active blocks: {self.active_blocks}")
    
    def _register_hooks(self):
        """Register forward hooks to capture block features."""
        
        def make_hook(name):
            def hook(module, input, output):
                # Store spatial features [B, C, H, W]
                if isinstance(output, tuple):
                    output = output[0]
                self.features[name] = output.detach()
            return hook
        
        # Down blocks
        for i, block in enumerate(self.unet.down_blocks):
            name = f'down_{i}'
            if name in self.active_blocks:
                block.register_forward_hook(make_hook(name))
        
        # Mid block
        if 'mid' in self.active_blocks:
            self.unet.mid_block.register_forward_hook(make_hook('mid'))
        
        # Up blocks
        for i, block in enumerate(self.unet.up_blocks):
            name = f'up_{i}'
            if name in self.active_blocks:
                block.register_forward_hook(make_hook(name))
    
    @torch.no_grad()
    def extract_features(
        self,
        prompts: List[str],
        timesteps: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
        """
        Extract features for a batch of prompts at given timesteps.
        
        Returns:
            Dict mapping block names to spatial features [B, C, H, W] in float32
        """
        self.features = {}
        
        # Encode prompts
        text_inputs = self.pipe.tokenizer(
            prompts,
            padding="max_length",
            max_length=self.pipe.tokenizer.model_max_length,
            truncation=True,
            return_tensors="pt"
        )
        
        text_embeddings = self.pipe.text_encoder(
            text_inputs.input_ids.to(self.device)
        )[0]
        
        # Create noisy latents
        latents = torch.randn(
            len(prompts), 4, 64, 64,
            device=self.device,
            dtype=torch.float16
        )
        
        # Forward pass through UNet (features captured by hooks)
        _ = self.unet(
            latents,
            timesteps,
            encoder_hidden_states=text_embeddings
        )
        
        # Convert features to float32 (collective expects float32)
        features_float32 = {
            name: feat.float() 
            for name, feat in self.features.items()
        }
        
        return features_float32


# ============================================================================
# DATASET
# ============================================================================

class SymbolicPromptDataset(Dataset):
    """Generate prompts on-the-fly using synthesis system."""
    
    def __init__(
        self,
        num_samples: int = 10000,
        complexity_distribution: Optional[Dict[int, float]] = None,
        bias_weights_path: Optional[str] = None,
        seed: Optional[int] = None,
        log_synthesis_stats: bool = False
    ):
        self.num_samples = num_samples
        self.log_stats = log_synthesis_stats
        
        # Initialize synthesis system
        self.synthesizer = SynthesisSystem(seed=seed)
        
        # Load bias weights if provided
        if bias_weights_path:
            self.synthesizer.load_bias_weights(bias_weights_path)
        
        # Complexity distribution (1-5)
        self.complexity_dist = complexity_distribution or {
            1: 0.05,
            2: 0.15,
            3: 0.40,
            4: 0.30,
            5: 0.10
        }
        
        # Precompute complexity for each sample
        complexities = list(self.complexity_dist.keys())
        probs = [self.complexity_dist[c] for c in complexities]
        
        rng = np.random.RandomState(seed)
        self.complexities = rng.choice(
            complexities,
            size=num_samples,
            p=probs
        )
        
        print(f"✓ SymbolicPromptDataset: {num_samples:,} samples")
        print(f"  Complexity distribution: {self.complexity_dist}")
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        complexity = self.complexities[idx]
        
        # Generate prompt
        result = self.synthesizer.synthesize(complexity=complexity)
        prompt = result['text']  # Extract text from synthesis result dict
        
        # Random timestep [0, 999]
        timestep = np.random.randint(0, 1000)
        
        return {
            'prompt': prompt,
            'timestep': timestep,
            'complexity': complexity
        }


def collate_symbolic_batch(batch):
    """Collate batch for DataLoader."""
    return {
        'prompts': [item['prompt'] for item in batch],
        'timesteps': torch.tensor([item['timestep'] for item in batch], dtype=torch.long),
        'complexities': torch.tensor([item['complexity'] for item in batch], dtype=torch.long)
    }


# ============================================================================
# SPATIAL POOLING
# ============================================================================

def spatial_pool_features(
    features_dict: Dict[str, torch.Tensor],
    pool_mode: str = 'mean'
) -> Dict[str, torch.Tensor]:
    """
    Pool spatial dimensions [B, C, H, W] → [B, C].
    
    Args:
        features_dict: Dict of spatial features
        pool_mode: 'mean', 'max', or 'adaptive'
    
    Returns:
        Dict of pooled features [B, C]
    """
    pooled = {}
    
    for name, feat in features_dict.items():
        if feat.dim() == 4:  # [B, C, H, W]
            if pool_mode == 'mean':
                pooled[name] = feat.mean(dim=[-2, -1])  # [B, C]
            elif pool_mode == 'max':
                pooled[name] = feat.flatten(2).max(dim=-1)[0]  # [B, C]
            elif pool_mode == 'adaptive':
                # Mix mean and max
                mean_pool = feat.mean(dim=[-2, -1])
                max_pool = feat.flatten(2).max(dim=-1)[0]
                pooled[name] = 0.7 * mean_pool + 0.3 * max_pool
        else:
            pooled[name] = feat
    
    return pooled


# ============================================================================
# TRAINING FUNCTION
# ============================================================================

def train_geo_collective(
    collective: GeoDavidCollective,
    extractor: StreamingSD15Extractor,
    dataloader: DataLoader,
    num_epochs: int,
    device: str,
    learning_rate: float = 1e-4,
    weight_decay: float = 0.01,
    log_dir: str = "./runs/geo_collective",
    prompt_log_path: str = "./prompts_all_epochs.jsonl",
    checkpoint_interval: int = 5,
    checkpoint_dir: str = "./checkpoints",
    pool_mode: str = 'mean'
):
    """
    Train GeoDavidCollective with full data pipeline.
    
    Args:
        collective: GeoDavidCollective model (enhanced version)
        extractor: StreamingSD15Extractor
        dataloader: DataLoader with symbolic prompts
        num_epochs: Number of training epochs
        device: 'cuda' or 'cpu'
        learning_rate: Learning rate
        weight_decay: Weight decay for AdamW
        log_dir: TensorBoard log directory
        prompt_log_path: Path to save prompt logs
        checkpoint_interval: Save checkpoint every N epochs
        checkpoint_dir: Checkpoint directory
        pool_mode: Spatial pooling mode ('mean', 'max', 'adaptive')
    """
    # Setup
    collective = collective.to(device)
    collective.train()
    
    # Optimizer & Scheduler
    optimizer = torch.optim.AdamW(
        collective.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay
    )
    
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        T_max=num_epochs * len(dataloader)
    )
    
    # Logging
    writer = SummaryWriter(log_dir=log_dir)
    prompt_logger = PromptLogger(output_path=prompt_log_path)
    
    # Checkpoint dir
    Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)
    
    # Training history
    history = {
        'total_loss': [],
        'avg_cayley': [],
        'avg_timestep_acc': [],
        'avg_pattern_acc': [],
        'avg_full_acc': []
    }
    
    global_step = 0
    
    print("\n" + "="*80)
    print("STARTING TRAINING")
    print("="*80)
    print(f"  Device: {device}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Batches per epoch: {len(dataloader)}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Spatial pooling: {pool_mode}")
    print("="*80 + "\n")
    
    for epoch in range(num_epochs):
        epoch_metrics = {
            'total_loss': 0.0,
            'avg_cayley': 0.0,
            'avg_timestep_acc': 0.0,
            'avg_pattern_acc': 0.0,
            'avg_full_acc': 0.0,
            'num_batches': 0
        }
        
        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")
        
        for batch_idx, batch in enumerate(pbar):
            prompts = batch['prompts']
            timesteps = batch['timesteps'].to(device)
            
            # Log prompts
            prompt_logger.log_batch(
                prompts,
                timesteps.cpu(),
                epoch,
                batch_idx,
                global_step
            )
            
            # Extract SD1.5 features (spatial [B, C, H, W])
            with torch.no_grad():
                teacher_features_spatial = extractor.extract_features(prompts, timesteps)
            
            # Pool to [B, C]
            teacher_features = spatial_pool_features(teacher_features_spatial, pool_mode)
            features_dict = {
                name: feat.clone() + 0.01 * torch.randn_like(feat)
                for name, feat in teacher_features.items()
            }
            
            # Forward pass
            outputs = collective(features_dict, timesteps.float())
            
            # Compute loss (now internal to model)
            loss, metrics = collective.compute_loss(
                outputs,
                teacher_features,
                timesteps.float()
            )
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            
            # Gradient clipping
            grad_norm = torch.nn.utils.clip_grad_norm_(
                collective.parameters(), max_norm=1.0
            )
            
            optimizer.step()
            scheduler.step()
            
            # Accumulate metrics
            batch_metrics = {
                'total_loss': metrics['total_loss'],
                'avg_cayley': metrics['avg/cayley'],
                'avg_timestep_acc': metrics['avg/timestep_acc'],
                'avg_pattern_acc': metrics['avg/pattern_acc'],
                'avg_full_acc': metrics['avg/full_acc']
            }
            
            for k, v in batch_metrics.items():
                epoch_metrics[k] += v
            epoch_metrics['num_batches'] += 1
            
            # TensorBoard logging (every step)
            writer.add_scalar('Train/total_loss', batch_metrics['total_loss'], global_step)
            writer.add_scalar('Train/cayley', batch_metrics['avg_cayley'], global_step)
            writer.add_scalar('Train/timestep_acc', batch_metrics['avg_timestep_acc'], global_step)
            writer.add_scalar('Train/pattern_acc', batch_metrics['avg_pattern_acc'], global_step)
            writer.add_scalar('Train/full_acc', batch_metrics['avg_full_acc'], global_step)
            writer.add_scalar('Train/grad_norm', grad_norm.item(), global_step)
            writer.add_scalar('Train/lr', optimizer.param_groups[0]['lr'], global_step)
            
            # Update progress bar
            pbar.set_postfix({
                'loss': f"{batch_metrics['total_loss']:.4f}",
                'cayley': f"{batch_metrics['avg_cayley']:.4f}",
                't_acc': f"{batch_metrics['avg_timestep_acc']:.1%}",
                'p_acc': f"{batch_metrics['avg_pattern_acc']:.1%}",
                'f_acc': f"{batch_metrics['avg_full_acc']:.1%}"
            })
            
            global_step += 1
            
            # Cleanup
            del teacher_features_spatial, teacher_features, features_dict, outputs, loss
            torch.cuda.empty_cache()
        
        # Epoch summary
        for k in ['total_loss', 'avg_cayley', 'avg_timestep_acc', 'avg_pattern_acc', 'avg_full_acc']:
            avg = epoch_metrics[k] / epoch_metrics['num_batches']
            history[k].append(avg)
            writer.add_scalar(f'Epoch/{k}', avg, epoch)
        
        print(f"\nEpoch {epoch+1} Summary:")
        print(f"  Loss: {history['total_loss'][-1]:.4f}")
        print(f"  Cayley: {history['avg_cayley'][-1]:.4f}")
        print(f"  Timestep Acc: {history['avg_timestep_acc'][-1]:.2%}")
        print(f"  Pattern Acc: {history['avg_pattern_acc'][-1]:.2%}")
        print(f"  Full Acc: {history['avg_full_acc'][-1]:.2%}")
        
        # Get Cantor alphas
        alphas = collective.get_cantor_alphas()
        print(f"  Cantor Alphas: {', '.join([f'{k}={v:.3f}' for k, v in list(alphas.items())[:]])}")
        
        # Save checkpoint
        if (epoch + 1) % checkpoint_interval == 0:
            checkpoint_path = Path(checkpoint_dir) / f"checkpoint_epoch_{epoch+1:03d}.pt"
            torch.save({
                'epoch': epoch + 1,
                'global_step': global_step,
                'model_state_dict': collective.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'history': history,
                'model_info': collective.get_model_info()
            }, checkpoint_path)
            print(f"  ✓ Saved: {checkpoint_path}")
            
            # Convert to safetensors
            if HF_AVAILABLE:
                safetensors_path = checkpoint_path.with_suffix('.safetensors')
                save_file(collective.state_dict(), str(safetensors_path))
                print(f"  ✓ Safetensors: {safetensors_path}")
    
    # Final checkpoint
    final_path = Path(checkpoint_dir) / "final.pt"
    torch.save({
        'epoch': num_epochs,
        'global_step': global_step,
        'model_state_dict': collective.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'history': history,
        'model_info': collective.get_model_info()
    }, final_path)
    print(f"\n✅ Final checkpoint: {final_path}")
    
    # Prompt stats
    prompt_stats = prompt_logger.get_stats()
    print(f"✅ Prompts logged: {prompt_stats['total']:,} ({prompt_stats['size_mb']:.2f} MB)")
    
    writer.close()
    
    return collective, history


# ============================================================================
# MAIN
# ============================================================================

def main():
    print("\n" + "="*80)
    print("GEODAVIDCOLLECTIVE TRAINER - ENHANCED VERSION")
    print("ProjectiveHead multi-expert architecture with proven data pipeline")
    print("="*80)
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"\nDevice: {device}")
    
    if device == "cpu":
        print("⚠️  WARNING: Training requires GPU!")
        return
    
    # ========================================================================
    # CONFIGURATION - ENHANCED
    # ========================================================================
    
    # Block configurations with ProjectiveHead parameters
    # These use auto-configuration based on scale_dim, but you can override
    block_configs = {
        # Down blocks (4)
        'down_0': {
            'input_dim': 320,
            'scale_dim': 128,  # Compressed for efficiency
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured (3 experts, 3 gates)
        },
        'down_1': {
            'input_dim': 640,
            'scale_dim': 192,
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured (3 experts, 3 gates)
        },
        'down_2': {
            'input_dim': 1280,
            'scale_dim': 256,
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured (3 experts, 3 gates)
        },
        'down_3': {
            'input_dim': 1280,
            'scale_dim': 256,
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured (3 experts, 3 gates)
        },
        # Mid block (1) - Most important, use higher capacity
        'mid': {
            'input_dim': 1280,
            'scale_dim': 256,
            'use_belly': True,
            'belly_expand': 1.5,
            # Custom ProjectiveHead: more experts for mid block
            'num_experts': 4,
            'num_gate_heads': 4,
        },
        # Up blocks (4)
        'up_0': {
            'input_dim': 1280,
            'scale_dim': 256,
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured
        },
        'up_1': {
            'input_dim': 1280,
            'scale_dim': 256,
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured
        },
        'up_2': {
            'input_dim': 640,
            'scale_dim': 192,
            'use_belly': True,
            'belly_expand': 2.0,
            # ProjectiveHead auto-configured
        },
        'up_3': {
            'input_dim': 320,
            'scale_dim': 128,
            'use_belly': True,
            'belly_expand': 1.5,
            # ProjectiveHead auto-configured
        }
    }
    
    # Block importance weights (mid-block most important)
    block_weights = {
        'down_0': 0.8,
        'down_1': 1.0,
        'down_2': 1.2,
        'down_3': 1.3,
        'mid': 1.5,      # Highest importance
        'up_0': 1.3,
        'up_1': 1.2,
        'up_2': 1.0,
        'up_3': 0.8
    }
    
    # Geometric loss configuration - FIXED cayley_weight
    loss_config = {
        'feature_similarity_weight': 0.4,
        'rose_weight': 0.25,
        'ce_weight': 0.15,
        'pattern_diversity_weight': 0.05,
        'cayley_weight': 0.10,  # FIXED: Was 0.0001, now 0.10 for proper geometry
        'cantor_coherence_weight': 0.05,
        'use_soft_assignment': True,
        'temperature': 0.1,
        # Cayley loss parameters
        'cayley_volume_floor': 1e-4,
        'cayley_chaos_scale': 1.0,
        'cayley_edge_weight': 0.5,
        'cayley_gram_weight': 0.1,
    }
    
    print("\n✓ Configuration loaded (ENHANCED)")
    print(f"  Blocks: {len(block_configs)}")
    print(f"  ProjectiveHead: Auto-configured based on scale_dim")
    print(f"  Loss weights: feature={loss_config['feature_similarity_weight']:.2f}, "
          f"rose={loss_config['rose_weight']:.2f}, cayley={loss_config['cayley_weight']:.2f}")
    
    # ========================================================================
    # LOAD SD1.5
    # ========================================================================
    
    print(f"\n[1/4] Loading SD1.5...")
    extractor = StreamingSD15Extractor(
        model_id="runwayml/stable-diffusion-v1-5",
        device=device,
        active_blocks=list(block_configs.keys())
    )
    
    # ========================================================================
    # CREATE DATASET
    # ========================================================================
    
    print(f"\n[2/4] Creating symbolic dataset...")
    dataset = SymbolicPromptDataset(
        num_samples=10000,
        complexity_distribution={
            1: 0.05, 2: 0.15, 3: 0.40, 4: 0.25, 5: 0.15
        },
        seed=42
    )
    
    dataloader = DataLoader(
        dataset,
        batch_size=16,  # Adjusted for GPU memory
        shuffle=True,
        num_workers=2,
        pin_memory=True,
        collate_fn=collate_symbolic_batch
    )
    
    print(f"  ✓ Dataset: {len(dataset):,} samples")
    print(f"  ✓ Batch size: 16")
    
    # ========================================================================
    # INITIALIZE MODEL - ENHANCED
    # ========================================================================
    
    print(f"\n[3/4] Initializing GeoDavidCollective (ENHANCED)...")
    collective = GeoDavidCollective(
        block_configs=block_configs,
        num_timestep_bins=100,
        num_patterns_per_bin=10,
        block_weights=block_weights,
        loss_config=loss_config
    )
    
    model_info = collective.get_model_info()
    print(f"  ✓ Architecture: {model_info['architecture']}")
    print(f"  ✓ Blocks: {model_info['num_blocks']}")
    print(f"  ✓ Total parameters: {model_info['total_parameters']:,}")
    print(f"  ✓ Timestep bins: {model_info['num_timestep_bins']}")
    print(f"  ✓ Patterns per bin: {model_info['num_patterns_per_bin']}")
    
    # Show ProjectiveHead configs
    print(f"\n  ProjectiveHead Configurations:")
    for block_name, companion_info in list(model_info['companions'].items())[:3]:
        print(f"    {block_name}:")
        print(f"      Timestep head: {companion_info['timestep_head']['num_experts']} experts, "
              f"{companion_info['timestep_head']['num_gate_heads']} gates")
    print(f"    ... and {len(model_info['companions'])-3} more blocks")
    
    # ========================================================================
    # TRAIN
    # ========================================================================
    
    print(f"\n[4/4] Starting training...")
    collective, history = train_geo_collective(
        collective=collective,
        extractor=extractor,
        dataloader=dataloader,
        num_epochs=10,
        device=device,
        learning_rate=1e-3,
        weight_decay=0.001,
        log_dir="./runs/geo_collective_enhanced",
        prompt_log_path="./prompts_enhanced.jsonl",
        checkpoint_interval=2,
        checkpoint_dir="./checkpoints_enhanced",
        pool_mode='mean'
    )
    
    print("\n" + "="*80)
    print("TRAINING COMPLETE!")
    print("="*80)
    print(f"\n📊 Final Metrics:")
    print(f"  Loss: {history['total_loss'][-1]:.4f}")
    print(f"  Cayley: {history['avg_cayley'][-1]:.4f}")
    print(f"  Timestep Acc: {history['avg_timestep_acc'][-1]:.2%}")
    print(f"  Pattern Acc: {history['avg_pattern_acc'][-1]:.2%}")
    print(f"  Full Acc: {history['avg_full_acc'][-1]:.2%}")
    
    return collective, history


if __name__ == "__main__":
    collective, history = main()