import torch from safetensors.torch import save_file weights = {} # 4-bit / 4-bit Unsigned Integer Divider (Restoring Division) # Inputs: a3,a2,a1,a0, b3,b2,b1,b0 (8 inputs) # Outputs: q3,q2,q1,q0, r3,r2,r1,r0 (8 outputs) # # Algorithm: 4 stages of shift-compare-subtract def add_xor(name): weights[f'{name}.or.weight'] = torch.tensor([[1.0, 1.0]], dtype=torch.float32) weights[f'{name}.or.bias'] = torch.tensor([-1.0], dtype=torch.float32) weights[f'{name}.nand.weight'] = torch.tensor([[-1.0, -1.0]], dtype=torch.float32) weights[f'{name}.nand.bias'] = torch.tensor([1.0], dtype=torch.float32) weights[f'{name}.and.weight'] = torch.tensor([[1.0, 1.0]], dtype=torch.float32) weights[f'{name}.and.bias'] = torch.tensor([-2.0], dtype=torch.float32) def add_fa(name): add_xor(f'{name}.xor1') add_xor(f'{name}.sum') weights[f'{name}.carry.weight'] = torch.tensor([[1.0, 1.0, 1.0]], dtype=torch.float32) weights[f'{name}.carry.bias'] = torch.tensor([-2.0], dtype=torch.float32) def add_subtractor_4bit(name): for i in range(4): add_fa(f'{name}.fs{i}') def add_mux(name): weights[f'{name}.sel0.weight'] = torch.tensor([[1.0, -1.0]], dtype=torch.float32) weights[f'{name}.sel0.bias'] = torch.tensor([-2.0], dtype=torch.float32) weights[f'{name}.sel1.weight'] = torch.tensor([[1.0, 1.0]], dtype=torch.float32) weights[f'{name}.sel1.bias'] = torch.tensor([-2.0], dtype=torch.float32) weights[f'{name}.or.weight'] = torch.tensor([[1.0, 1.0]], dtype=torch.float32) weights[f'{name}.or.bias'] = torch.tensor([-1.0], dtype=torch.float32) def add_mux4(name): for i in range(4): add_mux(f'{name}.m{i}') # 4 division stages for stage in range(4): add_subtractor_4bit(f'stage{stage}.sub') add_mux4(f'stage{stage}.mux') save_file(weights, 'model.safetensors') def eval_xor(a, b): or_out = int(a + b >= 1) nand_out = int(-a - b + 1 >= 0) return int(or_out + nand_out >= 2) def eval_fa(a, b, cin): x1 = eval_xor(a, b) s = eval_xor(x1, cin) c = int(a + b + cin >= 2) return s, c def sub_4bit(a, b): a_bits = [(a >> i) & 1 for i in range(4)] b_bits = [(b >> i) & 1 for i in range(4)] borrow = 1 d_bits = [] for i in range(4): b_inv = 1 - b_bits[i] d, borrow = eval_fa(a_bits[i], b_inv, borrow) d_bits.append(d) result = sum(d_bits[i] << i for i in range(4)) no_borrow = borrow return result, no_borrow def divide_4bit(dividend, divisor): if divisor == 0: return 15, 0 a = [(dividend >> i) & 1 for i in range(4)] p = 0 q_bits = [] for i in range(3, -1, -1): p = (p << 1) | a[i] diff, ge = sub_4bit(p, divisor) if ge: q_bits.append(1) p = diff else: q_bits.append(0) q_bits.reverse() q = sum(q_bits[i] << i for i in range(4)) return q, p print("Verifying 4-bit Divider...") errors = 0 for a in range(16): for b in range(1, 16): q, r = divide_4bit(a, b) expected_q = a // b expected_r = a % b if q != expected_q or r != expected_r: errors += 1 if errors <= 5: print(f"ERROR: {a}/{b} = {q} r {r}, expected {expected_q} r {expected_r}") if errors == 0: print("All 240 test cases passed! (16 dividends × 15 non-zero divisors)") else: print(f"FAILED: {errors} errors") mag = sum(t.abs().sum().item() for t in weights.values()) print(f"Magnitude: {mag:.0f}") print(f"Parameters: {sum(t.numel() for t in weights.values())}")