AbstractPhil commited on
Commit
6a8a9d6
·
verified ·
1 Parent(s): e4bc421

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +139 -21
config.json CHANGED
@@ -4,65 +4,183 @@
4
  "framework": "pytorch",
5
  "version": "1.0",
6
  "trained_epoch": 10,
7
- "training_date": "2025-10-28T03:18:14.663211",
8
  "num_blocks": 9,
9
  "total_parameters": 884327310,
10
  "num_timestep_bins": 100,
11
  "num_patterns_per_bin": 10,
12
- "blocks": {
13
  "down_0": {
14
  "input_dim": 320,
15
- "scale_dim": 128
 
 
 
 
 
16
  },
17
  "down_1": {
18
  "input_dim": 640,
19
- "scale_dim": 192
 
 
 
 
 
20
  },
21
  "down_2": {
22
  "input_dim": 1280,
23
- "scale_dim": 256
 
 
 
 
 
24
  },
25
  "down_3": {
26
  "input_dim": 1280,
27
- "scale_dim": 256
 
 
 
 
 
28
  },
29
  "mid": {
30
  "input_dim": 1280,
31
  "scale_dim": 256,
32
- "num_experts": 4
 
 
 
 
33
  },
34
  "up_0": {
35
  "input_dim": 1280,
36
- "scale_dim": 256
 
 
 
 
 
37
  },
38
  "up_1": {
39
  "input_dim": 1280,
40
- "scale_dim": 256
 
 
 
 
 
41
  },
42
  "up_2": {
43
  "input_dim": 640,
44
- "scale_dim": 192
 
 
 
 
 
45
  },
46
  "up_3": {
47
  "input_dim": 320,
48
- "scale_dim": 128
 
 
 
 
 
49
  }
50
  },
51
- "training": {
52
- "base_model": "runwayml/stable-diffusion-v1-5",
53
- "dataset_size": 10000,
54
- "batch_size": 16,
55
- "learning_rate": 0.001,
56
- "weight_decay": 0.001,
57
- "num_epochs": 10,
58
- "pool_mode": "mean"
 
 
59
  },
60
  "loss_config": {
61
  "feature_similarity_weight": 0.4,
62
  "rose_weight": 0.25,
63
  "ce_weight": 0.15,
64
- "cayley_weight": 0.1,
65
  "pattern_diversity_weight": 0.05,
66
- "cantor_coherence_weight": 0.05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  }
 
4
  "framework": "pytorch",
5
  "version": "1.0",
6
  "trained_epoch": 10,
7
+ "training_date": "2025-10-28T03:34:06.000667",
8
  "num_blocks": 9,
9
  "total_parameters": 884327310,
10
  "num_timestep_bins": 100,
11
  "num_patterns_per_bin": 10,
12
+ "block_configs": {
13
  "down_0": {
14
  "input_dim": 320,
15
+ "scale_dim": 128,
16
+ "use_belly": true,
17
+ "belly_expand": 2.0,
18
+ "num_experts": 3,
19
+ "num_gate_heads": 3,
20
+ "projective_head": "auto"
21
  },
22
  "down_1": {
23
  "input_dim": 640,
24
+ "scale_dim": 192,
25
+ "use_belly": true,
26
+ "belly_expand": 2.0,
27
+ "num_experts": 3,
28
+ "num_gate_heads": 3,
29
+ "projective_head": "auto"
30
  },
31
  "down_2": {
32
  "input_dim": 1280,
33
+ "scale_dim": 256,
34
+ "use_belly": true,
35
+ "belly_expand": 2.0,
36
+ "num_experts": 3,
37
+ "num_gate_heads": 3,
38
+ "projective_head": "auto"
39
  },
40
  "down_3": {
41
  "input_dim": 1280,
42
+ "scale_dim": 256,
43
+ "use_belly": true,
44
+ "belly_expand": 2.0,
45
+ "num_experts": 3,
46
+ "num_gate_heads": 3,
47
+ "projective_head": "auto"
48
  },
49
  "mid": {
50
  "input_dim": 1280,
51
  "scale_dim": 256,
52
+ "use_belly": true,
53
+ "belly_expand": 1.5,
54
+ "num_experts": 4,
55
+ "num_gate_heads": 4,
56
+ "projective_head": "custom"
57
  },
58
  "up_0": {
59
  "input_dim": 1280,
60
+ "scale_dim": 256,
61
+ "use_belly": true,
62
+ "belly_expand": 2.0,
63
+ "num_experts": 3,
64
+ "num_gate_heads": 3,
65
+ "projective_head": "auto"
66
  },
67
  "up_1": {
68
  "input_dim": 1280,
69
+ "scale_dim": 256,
70
+ "use_belly": true,
71
+ "belly_expand": 2.0,
72
+ "num_experts": 3,
73
+ "num_gate_heads": 3,
74
+ "projective_head": "auto"
75
  },
76
  "up_2": {
77
  "input_dim": 640,
78
+ "scale_dim": 192,
79
+ "use_belly": true,
80
+ "belly_expand": 2.0,
81
+ "num_experts": 3,
82
+ "num_gate_heads": 3,
83
+ "projective_head": "auto"
84
  },
85
  "up_3": {
86
  "input_dim": 320,
87
+ "scale_dim": 128,
88
+ "use_belly": true,
89
+ "belly_expand": 1.5,
90
+ "num_experts": 3,
91
+ "num_gate_heads": 3,
92
+ "projective_head": "auto"
93
  }
94
  },
95
+ "block_weights": {
96
+ "down_0": 0.8,
97
+ "down_1": 1.0,
98
+ "down_2": 1.2,
99
+ "down_3": 1.3,
100
+ "mid": 1.5,
101
+ "up_0": 1.3,
102
+ "up_1": 1.2,
103
+ "up_2": 1.0,
104
+ "up_3": 0.8
105
  },
106
  "loss_config": {
107
  "feature_similarity_weight": 0.4,
108
  "rose_weight": 0.25,
109
  "ce_weight": 0.15,
 
110
  "pattern_diversity_weight": 0.05,
111
+ "cayley_weight": 0.1,
112
+ "cantor_coherence_weight": 0.05,
113
+ "use_soft_assignment": true,
114
+ "temperature": 0.1,
115
+ "cayley_volume_floor": 0.0001,
116
+ "cayley_chaos_scale": 1.0,
117
+ "cayley_edge_weight": 0.5,
118
+ "cayley_gram_weight": 0.1
119
+ },
120
+ "training": {
121
+ "base_model": "runwayml/stable-diffusion-v1-5",
122
+ "sd_blocks_used": [
123
+ "down_0",
124
+ "down_1",
125
+ "down_2",
126
+ "down_3",
127
+ "mid",
128
+ "up_0",
129
+ "up_1",
130
+ "up_2",
131
+ "up_3"
132
+ ],
133
+ "dataset": {
134
+ "type": "SymbolicPromptDataset",
135
+ "num_samples": 10000,
136
+ "complexity_distribution": {
137
+ "1": 0.05,
138
+ "2": 0.15,
139
+ "3": 0.4,
140
+ "4": 0.25,
141
+ "5": 0.15
142
+ },
143
+ "seed": 42
144
+ },
145
+ "batch_size": 16,
146
+ "num_epochs": 10,
147
+ "optimizer": {
148
+ "type": "AdamW",
149
+ "learning_rate": 0.001,
150
+ "weight_decay": 0.001
151
+ },
152
+ "pool_mode": "mean",
153
+ "checkpoint_interval": 2,
154
+ "num_workers": 2,
155
+ "pin_memory": true
156
+ },
157
+ "feature_extraction": {
158
+ "method": "SD1.5 UNet Hooks",
159
+ "spatial_features": true,
160
+ "pooling": "mean",
161
+ "dtype": "float32"
162
+ },
163
+ "capabilities": {
164
+ "timestep_classification": true,
165
+ "pattern_classification": true,
166
+ "joint_classification": true,
167
+ "num_classes": 1000,
168
+ "geometric_constraints": true,
169
+ "multi_expert_routing": true
170
+ },
171
+ "companions": {
172
+ "type": "GeoDavidCompanion",
173
+ "timestep_head": "ProjectiveHead",
174
+ "pattern_head": "ProjectiveHead",
175
+ "geometric_features": [
176
+ "cayley_menger_volume",
177
+ "edge_lengths",
178
+ "gram_matrix"
179
+ ],
180
+ "loss_functions": [
181
+ "rose",
182
+ "cayley",
183
+ "cantor"
184
+ ]
185
  }
186
  }