{ "image_size": 448, "patch_size": 16, "num_channels": 3, "color_order": "RGB", "resize_mode": "letterbox", "pad_color_rgb": [ 114, 114, 114 ], "normalize_mean": [ 0.5, 0.5, 0.5 ], "normalize_std": [ 0.5, 0.5, 0.5 ], "input_dtype": "float32", "input_layout": "BCHW", "padding_mask": { "required_for_pytorch_forward": true, "shape": "(B, H, W)", "dtype": "bool", "convention": "True = padded pixel, False = valid pixel", "all_false_equivalent_to": "no masking" }, "output": { "name": "tag_logits", "shape": "(B, 19294)", "activation": "apply sigmoid for probabilities" }, "notes": [ "Letterbox resize keeps aspect ratio; pad with the RGB color above to reach 448x448.", "Normalize per-channel: (x/255 - mean) / std after letterboxing.", "Built-in recommended thresholds are in pr_thresholds.json (per-tag and global)." ] }