""" ================================================================================ SENTINEL NEURAL ARCHITECTURE SEARCH ================================================================================ Theory: The Sentinel prior P(n) ∝ zⁿ/nⁿ penalizes complexity super-exponentially. This makes it ideal for NAS: it prefers shallow, efficient architectures. Key Innovation: Use Sentinel prior as the architecture sampling distribution. Deeper/more complex architectures are penalized super-exponentially, guiding search toward efficient designs. """ import numpy as np import torch import torch.nn as nn from typing import List, Dict, Tuple, Optional class SentinelNAS: """ Neural Architecture Search with Sentinel prior. Architecture score: score(arch) = performance(arch) · P(depth) · P(width) · P(params) where P(n) ∝ zⁿ/nⁿ is the Sentinel prior. This penalizes: - Deep architectures: P(depth) = z^{depth} / depth^{depth} - Wide architectures: P(width) = z^{width} / width^{width} - Large parameter counts: P(params) = z^{log(params)} / log(params)^{log(params)} """ def __init__(self, z: float = 1.0, max_depth: int = 10, max_width: int = 512, n_samples: int = 100): self.z = z self.max_depth = max_depth self.max_width = max_width self.n_samples = n_samples # Precompute Sentinel PMF self.depth_pmf = self._compute_sentinel_pmf(max_depth) self.width_pmf = self._compute_sentinel_pmf(max_width) def _compute_sentinel_pmf(self, max_n: int) -> np.ndarray: """Compute P(n) ∝ zⁿ/nⁿ for n = 1, ..., max_n.""" probs = [] for n in range(1, max_n + 1): try: p = (self.z ** n) / (n ** n) except OverflowError: p = 0.0 probs.append(p) probs = np.array(probs) probs = probs / probs.sum() return probs def sample_architecture(self) -> Dict: """Sample an architecture from the Sentinel prior.""" # Sample depth depth = np.random.choice(range(1, self.max_depth + 1), p=self.depth_pmf) # Sample width for each layer widths = [] for _ in range(depth): width = np.random.choice(range(1, self.max_width + 1), p=self.width_pmf) widths.append(min(width, 128)) # Cap for practicality # Architecture config arch = { 'depth': depth, 'widths': widths, 'activation': 'sentinel', 'skip_connections': depth > 3 # Add skips for deep networks } return arch def compute_sentinel_score(self, arch: Dict, performance: float, params: int) -> float: """ Compute architecture score with Sentinel prior. score = performance · P(depth) · P(width) · P(params) """ depth = arch['depth'] widths = arch['widths'] # Depth penalty depth_penalty = self.depth_pmf[min(depth - 1, self.max_depth - 1)] # Width penalty (average across layers) width_penalties = [] for w in widths: w_idx = min(w - 1, self.max_width - 1) width_penalties.append(self.width_pmf[w_idx]) width_penalty = np.mean(width_penalties) # Parameter penalty (log-scale) log_params = int(np.log2(max(params, 1))) + 1 param_penalty = self._sentinel_prob(log_params) # Combined score (higher is better) score = performance * depth_penalty * width_penalty * param_penalty return score def _sentinel_prob(self, n: int) -> float: """Compute P(n) = zⁿ/nⁿ.""" if n <= 0: return 1.0 try: return (self.z ** n) / (n ** n) except OverflowError: return 0.0 def search(self, evaluator, n_trials: int = 50) -> Tuple[Dict, float]: """ Run NAS with Sentinel prior. Args: evaluator: Function that takes architecture and returns (performance, params) n_trials: Number of architectures to evaluate Returns: best_arch: Best architecture found best_score: Best score """ best_arch = None best_score = -float('inf') print(f"\n--- Sentinel NAS Search ---") print(f" Trials: {n_trials}") print(f" Max depth: {self.max_depth}") print(f" Max width: {self.max_width}") print(f" Prior: P(n) ∝ {self.z}ⁿ/nⁿ") for trial in range(n_trials): arch = self.sample_architecture() performance, params = evaluator(arch) score = self.compute_sentinel_score(arch, performance, params) if score > best_score: best_score = score best_arch = arch if trial < 10 or trial % 10 == 0: print(f" Trial {trial+1}: depth={arch['depth']}, " f"perf={performance:.3f}, params={params:,}, " f"score={score:.6f}") return best_arch, best_score def build_architecture(arch: Dict, input_dim: int, output_dim: int) -> nn.Module: """Build PyTorch model from architecture config.""" layers = [] in_dim = input_dim for i, width in enumerate(arch['widths']): layers.append(nn.Linear(in_dim, width)) if arch.get('activation') == 'sentinel': layers.append(SentinelActivationLayer()) else: layers.append(nn.ReLU()) layers.append(nn.Dropout(0.1)) in_dim = width layers.append(nn.Linear(in_dim, output_dim)) return nn.Sequential(*layers) class SentinelActivationLayer(nn.Module): def __init__(self): super().__init__() self.inv_e = 1.0 / np.e def forward(self, x): return x * (1.0 / torch.cosh(self.inv_e * x)) def demo_sentinel_nas(): """Demo Sentinel NAS on synthetic task.""" print("=" * 70) print(" SENTINEL NEURAL ARCHITECTURE SEARCH") print("=" * 70) # Evaluator: synthetic performance function def evaluator(arch: Dict) -> Tuple[float, int]: """ Evaluate architecture (synthetic). Deeper/wider = better performance but with diminishing returns. More params = lower efficiency score. """ depth = arch['depth'] widths = arch['widths'] # Synthetic performance (peaks at moderate depth/width) optimal_depth = 4 optimal_width = 64 depth_score = 1.0 / (1.0 + abs(depth - optimal_depth) ** 2) width_score = np.mean([1.0 / (1.0 + abs(w - optimal_width) ** 2 / 100) for w in widths]) performance = 0.5 + 0.5 * (depth_score + width_score) / 2 # Parameter count params = sum(w * w for w in widths) # Simplified return performance, params # Run NAS nas = SentinelNAS(z=1.0, max_depth=8, max_width=128, n_samples=50) best_arch, best_score = nas.search(evaluator, n_trials=50) print(f"\n--- Best Architecture ---") print(f" Depth: {best_arch['depth']}") print(f" Widths: {best_arch['widths']}") print(f" Skip connections: {best_arch.get('skip_connections', False)}") print(f" Sentinel score: {best_score:.6f}") # Compare to random search print(f"\n--- Comparison: Random vs Sentinel ---") random_scores = [] sentinel_scores = [] for _ in range(20): arch_random = {'depth': np.random.randint(1, 8), 'widths': [np.random.randint(16, 128) for _ in range(5)], 'activation': 'relu'} perf_r, params_r = evaluator(arch_random) score_r = perf_r # No prior random_scores.append(score_r) arch_sentinel = nas.sample_architecture() perf_s, params_s = evaluator(arch_sentinel) score_s = nas.compute_sentinel_score(arch_sentinel, perf_s, params_s) sentinel_scores.append(score_s) print(f" Random search mean score: {np.mean(random_scores):.6f}") print(f" Sentinel NAS mean score: {np.mean(sentinel_scores):.6f}") print(f" Improvement: {(np.mean(sentinel_scores) / np.mean(random_scores) - 1) * 100:.1f}%") print(f"\n ✓ Super-exponential prior: penalizes complexity aggressively") print(f" ✓ Automatic efficiency: prefers shallow, narrow architectures") print(f" ✓ No manual regularization: prior is built into sampling") print(f" ✓ Theorem-backed: P(n) ∝ zⁿ/nⁿ from partition function") print(f"\n{'='*70}") print(f" SENTINEL NAS: SUPER-EXPONENTIAL PRIOR FOR EFFICIENT ARCHITECTURES") print(f"{'='*70}") if __name__ == '__main__': demo_sentinel_nas()