goodpixelltd commited on
Commit
ecdc352
·
verified ·
1 Parent(s): 8d77feb

Upload folder using huggingface_hub

Browse files
distilbert_mlm.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10f2c23b71cd59734511b6e10fb8a48aec95421790caefc308db5520ece6f501
3
+ size 243
distilbert_mlm.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748bc523def8a60f7cfc97b8fd49954b3f21920c0e36ee16d18e223b1e9c3fcd
3
+ size 481
distilbert_mlm.mlmodelc/metadata.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "DistilBERT MLM for grammar correction (multilingual, fast)",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 128 × 119547)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 128, 119547]",
13
+ "name" : "logits",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "version" : "1.0",
18
+ "modelParameters" : [
19
+
20
+ ],
21
+ "author" : "Typo00",
22
+ "specificationVersion" : 6,
23
+ "storagePrecision" : "Float16",
24
+ "mlProgramOperationTypeHistogram" : {
25
+ "Linear" : 38,
26
+ "Select" : 6,
27
+ "LayerNorm" : 14,
28
+ "Transpose" : 24,
29
+ "Matmul" : 12,
30
+ "Sub" : 1,
31
+ "Gelu" : 7,
32
+ "Softmax" : 6,
33
+ "Mul" : 7,
34
+ "Cast" : 2,
35
+ "Equal" : 1,
36
+ "Add" : 13,
37
+ "ExpandDims" : 2,
38
+ "Reshape" : 24,
39
+ "Gather" : 1,
40
+ "Tile" : 1
41
+ },
42
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
43
+ "stateSchema" : [
44
+
45
+ ],
46
+ "isUpdatable" : "0",
47
+ "availability" : {
48
+ "macOS" : "12.0",
49
+ "tvOS" : "15.0",
50
+ "visionOS" : "1.0",
51
+ "watchOS" : "8.0",
52
+ "iOS" : "15.0",
53
+ "macCatalyst" : "15.0"
54
+ },
55
+ "modelType" : {
56
+ "name" : "MLModelType_mlProgram"
57
+ },
58
+ "inputSchema" : [
59
+ {
60
+ "hasShapeFlexibility" : "0",
61
+ "isOptional" : "0",
62
+ "dataType" : "Int32",
63
+ "formattedType" : "MultiArray (Int32 1 × 128)",
64
+ "shortDescription" : "",
65
+ "shape" : "[1, 128]",
66
+ "name" : "input_ids",
67
+ "type" : "MultiArray"
68
+ },
69
+ {
70
+ "hasShapeFlexibility" : "0",
71
+ "isOptional" : "0",
72
+ "dataType" : "Int32",
73
+ "formattedType" : "MultiArray (Int32 1 × 128)",
74
+ "shortDescription" : "",
75
+ "shape" : "[1, 128]",
76
+ "name" : "attention_mask",
77
+ "type" : "MultiArray"
78
+ }
79
+ ],
80
+ "userDefinedMetadata" : {
81
+ "com.github.apple.coremltools.conversion_date" : "2026-01-12",
82
+ "com.github.apple.coremltools.source" : "torch==2.7.0",
83
+ "com.github.apple.coremltools.version" : "9.0",
84
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
85
+ },
86
+ "generatedClassName" : "distilbert_mlm",
87
+ "method" : "predict"
88
+ }
89
+ ]
distilbert_mlm.mlmodelc/model.mil ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ program(1.0)
2
+ [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
3
+ {
4
+ func main<ios15>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> input_ids) {
5
+ tensor<int32, []> input_embeds_axis_0 = const()[name = tensor<string, []>("input_embeds_axis_0"), val = tensor<int32, []>(0)];
6
+ tensor<fp16, [119547, 768]> embeddings_word_embeddings_weight_to_fp16 = const()[name = tensor<string, []>("embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [119547, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
7
+ tensor<fp16, [1, 128, 768]> input_embeds_cast_fp16 = gather(axis = input_embeds_axis_0, indices = input_ids, x = embeddings_word_embeddings_weight_to_fp16)[name = tensor<string, []>("input_embeds_cast_fp16")];
8
+ tensor<fp16, [1, 128, 768]> position_embeddings_1_to_fp16 = const()[name = tensor<string, []>("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183624320)))];
9
+ tensor<fp16, [1, 128, 768]> input_3_cast_fp16 = add(x = input_embeds_cast_fp16, y = position_embeddings_1_to_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
10
+ tensor<int32, [1]> input_5_axes_0 = const()[name = tensor<string, []>("input_5_axes_0"), val = tensor<int32, [1]>([-1])];
11
+ tensor<fp16, [768]> embeddings_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183820992)))];
12
+ tensor<fp16, [768]> embeddings_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183822592)))];
13
+ tensor<fp16, []> var_13_to_fp16 = const()[name = tensor<string, []>("op_13_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
14
+ tensor<fp16, [1, 128, 768]> input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, beta = embeddings_LayerNorm_bias_to_fp16, epsilon = var_13_to_fp16, gamma = embeddings_LayerNorm_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
15
+ tensor<int32, [1]> var_38_axes_0 = const()[name = tensor<string, []>("op_38_axes_0"), val = tensor<int32, [1]>([1])];
16
+ tensor<int32, [1, 1, 128]> var_38 = expand_dims(axes = var_38_axes_0, x = attention_mask)[name = tensor<string, []>("op_38")];
17
+ tensor<int32, [1]> extended_attention_mask_axes_0 = const()[name = tensor<string, []>("extended_attention_mask_axes_0"), val = tensor<int32, [1]>([2])];
18
+ tensor<int32, [1, 1, 1, 128]> extended_attention_mask = expand_dims(axes = extended_attention_mask_axes_0, x = var_38)[name = tensor<string, []>("extended_attention_mask")];
19
+ tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
20
+ tensor<string, []> var_45_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_45_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
21
+ tensor<fp16, [1, 1, 1, 128]> extended_attention_mask_to_fp16 = cast(dtype = var_45_to_fp16_dtype_0, x = extended_attention_mask)[name = tensor<string, []>("cast_45")];
22
+ tensor<fp16, [1, 1, 1, 128]> var_48_cast_fp16 = sub(x = var_46_to_fp16, y = extended_attention_mask_to_fp16)[name = tensor<string, []>("op_48_cast_fp16")];
23
+ tensor<fp16, []> var_49_to_fp16 = const()[name = tensor<string, []>("op_49_to_fp16"), val = tensor<fp16, []>(-0x1.388p+13)];
24
+ tensor<fp16, [1, 1, 1, 128]> mask_1_cast_fp16 = mul(x = var_48_cast_fp16, y = var_49_to_fp16)[name = tensor<string, []>("mask_1_cast_fp16")];
25
+ tensor<int32, []> var_62 = const()[name = tensor<string, []>("op_62"), val = tensor<int32, []>(-1)];
26
+ tensor<fp16, [768, 768]> transformer_layer_0_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183824192)))];
27
+ tensor<fp16, [768]> transformer_layer_0_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185003904)))];
28
+ tensor<fp16, [1, 128, 768]> linear_0_cast_fp16 = linear(bias = transformer_layer_0_attention_q_lin_bias_to_fp16, weight = transformer_layer_0_attention_q_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
29
+ tensor<int32, [4]> var_93 = const()[name = tensor<string, []>("op_93"), val = tensor<int32, [4]>([1, -1, 12, 64])];
30
+ tensor<fp16, [1, 128, 12, 64]> var_94_cast_fp16 = reshape(shape = var_93, x = linear_0_cast_fp16)[name = tensor<string, []>("op_94_cast_fp16")];
31
+ tensor<fp16, [768, 768]> transformer_layer_0_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185005504)))];
32
+ tensor<fp16, [768]> transformer_layer_0_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186185216)))];
33
+ tensor<fp16, [1, 128, 768]> linear_1_cast_fp16 = linear(bias = transformer_layer_0_attention_k_lin_bias_to_fp16, weight = transformer_layer_0_attention_k_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
34
+ tensor<int32, [4]> var_99 = const()[name = tensor<string, []>("op_99"), val = tensor<int32, [4]>([1, -1, 12, 64])];
35
+ tensor<fp16, [1, 128, 12, 64]> var_100_cast_fp16 = reshape(shape = var_99, x = linear_1_cast_fp16)[name = tensor<string, []>("op_100_cast_fp16")];
36
+ tensor<fp16, [768, 768]> transformer_layer_0_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186186816)))];
37
+ tensor<fp16, [768]> transformer_layer_0_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187366528)))];
38
+ tensor<fp16, [1, 128, 768]> linear_2_cast_fp16 = linear(bias = transformer_layer_0_attention_v_lin_bias_to_fp16, weight = transformer_layer_0_attention_v_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
39
+ tensor<int32, [4]> var_105 = const()[name = tensor<string, []>("op_105"), val = tensor<int32, [4]>([1, -1, 12, 64])];
40
+ tensor<fp16, [1, 128, 12, 64]> var_106_cast_fp16 = reshape(shape = var_105, x = linear_2_cast_fp16)[name = tensor<string, []>("op_106_cast_fp16")];
41
+ tensor<int32, [4]> v_1_perm_0 = const()[name = tensor<string, []>("v_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
42
+ tensor<fp16, []> _inversed_q_3_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_3_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
43
+ tensor<fp16, [1, 128, 12, 64]> _inversed_q_3_cast_fp16 = mul(x = var_94_cast_fp16, y = _inversed_q_3_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_3_cast_fp16")];
44
+ tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
45
+ tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
46
+ tensor<int32, [4]> transpose_18_perm_0 = const()[name = tensor<string, []>("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
47
+ tensor<int32, [4]> transpose_19_perm_0 = const()[name = tensor<string, []>("transpose_19_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
48
+ tensor<fp16, [1, 12, 64, 128]> transpose_19 = transpose(perm = transpose_19_perm_0, x = var_100_cast_fp16)[name = tensor<string, []>("transpose_51")];
49
+ tensor<fp16, [1, 12, 128, 64]> transpose_18 = transpose(perm = transpose_18_perm_0, x = _inversed_q_3_cast_fp16)[name = tensor<string, []>("transpose_52")];
50
+ tensor<fp16, [1, 12, 128, 128]> scores_1_cast_fp16 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_18, y = transpose_19)[name = tensor<string, []>("scores_1_cast_fp16")];
51
+ tensor<fp16, []> var_64_promoted_to_fp16 = const()[name = tensor<string, []>("op_64_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
52
+ tensor<bool, [1, 1, 1, 128]> var_112_cast_fp16 = equal(x = mask_1_cast_fp16, y = var_64_promoted_to_fp16)[name = tensor<string, []>("op_112_cast_fp16")];
53
+ tensor<int32, [4]> mask_3_reps_0 = const()[name = tensor<string, []>("mask_3_reps_0"), val = tensor<int32, [4]>([1, 12, 128, 1])];
54
+ tensor<bool, [1, 12, 128, 128]> mask_3 = tile(reps = mask_3_reps_0, x = var_112_cast_fp16)[name = tensor<string, []>("mask_3")];
55
+ tensor<fp16, []> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, []>(-inf)];
56
+ tensor<fp16, [1, 12, 128, 128]> input_7_cast_fp16 = select(a = const_3_to_fp16, b = scores_1_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_7_cast_fp16")];
57
+ tensor<fp16, [1, 12, 128, 128]> input_9_cast_fp16 = softmax(axis = var_62, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
58
+ tensor<bool, []> x_7_transpose_x_0 = const()[name = tensor<string, []>("x_7_transpose_x_0"), val = tensor<bool, []>(false)];
59
+ tensor<bool, []> x_7_transpose_y_0 = const()[name = tensor<string, []>("x_7_transpose_y_0"), val = tensor<bool, []>(false)];
60
+ tensor<fp16, [1, 12, 128, 64]> v_1_cast_fp16 = transpose(perm = v_1_perm_0, x = var_106_cast_fp16)[name = tensor<string, []>("transpose_53")];
61
+ tensor<fp16, [1, 12, 128, 64]> x_7_cast_fp16 = matmul(transpose_x = x_7_transpose_x_0, transpose_y = x_7_transpose_y_0, x = input_9_cast_fp16, y = v_1_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
62
+ tensor<int32, [4]> var_122_perm_0 = const()[name = tensor<string, []>("op_122_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
63
+ tensor<int32, [3]> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, [3]>([1, -1, 768])];
64
+ tensor<fp16, [1, 128, 12, 64]> var_122_cast_fp16 = transpose(perm = var_122_perm_0, x = x_7_cast_fp16)[name = tensor<string, []>("transpose_50")];
65
+ tensor<fp16, [1, 128, 768]> input_11_cast_fp16 = reshape(shape = var_124, x = var_122_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
66
+ tensor<fp16, [768, 768]> transformer_layer_0_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187368128)))];
67
+ tensor<fp16, [768]> transformer_layer_0_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188547840)))];
68
+ tensor<fp16, [1, 128, 768]> linear_3_cast_fp16 = linear(bias = transformer_layer_0_attention_out_lin_bias_to_fp16, weight = transformer_layer_0_attention_out_lin_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
69
+ tensor<fp16, [1, 128, 768]> input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
70
+ tensor<int32, [1]> input_15_axes_0 = const()[name = tensor<string, []>("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
71
+ tensor<fp16, [768]> transformer_layer_0_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188549440)))];
72
+ tensor<fp16, [768]> transformer_layer_0_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188551040)))];
73
+ tensor<fp16, []> var_66_to_fp16 = const()[name = tensor<string, []>("op_66_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
74
+ tensor<fp16, [1, 128, 768]> input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = transformer_layer_0_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_0_sa_layer_norm_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
75
+ tensor<fp16, [3072, 768]> transformer_layer_0_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188552640)))];
76
+ tensor<fp16, [3072]> transformer_layer_0_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193271296)))];
77
+ tensor<fp16, [1, 128, 3072]> linear_4_cast_fp16 = linear(bias = transformer_layer_0_ffn_lin1_bias_to_fp16, weight = transformer_layer_0_ffn_lin1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
78
+ tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
79
+ tensor<fp16, [1, 128, 3072]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
80
+ tensor<fp16, [768, 3072]> transformer_layer_0_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193277504)))];
81
+ tensor<fp16, [768]> transformer_layer_0_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197996160)))];
82
+ tensor<fp16, [1, 128, 768]> linear_5_cast_fp16 = linear(bias = transformer_layer_0_ffn_lin2_bias_to_fp16, weight = transformer_layer_0_ffn_lin2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
83
+ tensor<fp16, [1, 128, 768]> input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
84
+ tensor<int32, [1]> query_3_axes_0 = const()[name = tensor<string, []>("query_3_axes_0"), val = tensor<int32, [1]>([-1])];
85
+ tensor<fp16, [768]> transformer_layer_0_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197997760)))];
86
+ tensor<fp16, [768]> transformer_layer_0_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197999360)))];
87
+ tensor<fp16, [1, 128, 768]> query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, beta = transformer_layer_0_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_0_output_layer_norm_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
88
+ tensor<fp16, [768, 768]> transformer_layer_1_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198000960)))];
89
+ tensor<fp16, [768]> transformer_layer_1_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199180672)))];
90
+ tensor<fp16, [1, 128, 768]> linear_6_cast_fp16 = linear(bias = transformer_layer_1_attention_q_lin_bias_to_fp16, weight = transformer_layer_1_attention_q_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
91
+ tensor<int32, [4]> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, [4]>([1, -1, 12, 64])];
92
+ tensor<fp16, [1, 128, 12, 64]> var_163_cast_fp16 = reshape(shape = var_162, x = linear_6_cast_fp16)[name = tensor<string, []>("op_163_cast_fp16")];
93
+ tensor<fp16, [768, 768]> transformer_layer_1_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199182272)))];
94
+ tensor<fp16, [768]> transformer_layer_1_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200361984)))];
95
+ tensor<fp16, [1, 128, 768]> linear_7_cast_fp16 = linear(bias = transformer_layer_1_attention_k_lin_bias_to_fp16, weight = transformer_layer_1_attention_k_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
96
+ tensor<int32, [4]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [4]>([1, -1, 12, 64])];
97
+ tensor<fp16, [1, 128, 12, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_7_cast_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
98
+ tensor<fp16, [768, 768]> transformer_layer_1_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200363584)))];
99
+ tensor<fp16, [768]> transformer_layer_1_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201543296)))];
100
+ tensor<fp16, [1, 128, 768]> linear_8_cast_fp16 = linear(bias = transformer_layer_1_attention_v_lin_bias_to_fp16, weight = transformer_layer_1_attention_v_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
101
+ tensor<int32, [4]> var_174 = const()[name = tensor<string, []>("op_174"), val = tensor<int32, [4]>([1, -1, 12, 64])];
102
+ tensor<fp16, [1, 128, 12, 64]> var_175_cast_fp16 = reshape(shape = var_174, x = linear_8_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
103
+ tensor<int32, [4]> v_3_perm_0 = const()[name = tensor<string, []>("v_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
104
+ tensor<fp16, []> _inversed_q_7_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_7_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
105
+ tensor<fp16, [1, 128, 12, 64]> _inversed_q_7_cast_fp16 = mul(x = var_163_cast_fp16, y = _inversed_q_7_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_7_cast_fp16")];
106
+ tensor<bool, []> scores_3_transpose_x_0 = const()[name = tensor<string, []>("scores_3_transpose_x_0"), val = tensor<bool, []>(false)];
107
+ tensor<bool, []> scores_3_transpose_y_0 = const()[name = tensor<string, []>("scores_3_transpose_y_0"), val = tensor<bool, []>(false)];
108
+ tensor<int32, [4]> transpose_20_perm_0 = const()[name = tensor<string, []>("transpose_20_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
109
+ tensor<int32, [4]> transpose_21_perm_0 = const()[name = tensor<string, []>("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
110
+ tensor<fp16, [1, 12, 64, 128]> transpose_21 = transpose(perm = transpose_21_perm_0, x = var_169_cast_fp16)[name = tensor<string, []>("transpose_47")];
111
+ tensor<fp16, [1, 12, 128, 64]> transpose_20 = transpose(perm = transpose_20_perm_0, x = _inversed_q_7_cast_fp16)[name = tensor<string, []>("transpose_48")];
112
+ tensor<fp16, [1, 12, 128, 128]> scores_3_cast_fp16 = matmul(transpose_x = scores_3_transpose_x_0, transpose_y = scores_3_transpose_y_0, x = transpose_20, y = transpose_21)[name = tensor<string, []>("scores_3_cast_fp16")];
113
+ tensor<fp16, []> const_6_to_fp16 = const()[name = tensor<string, []>("const_6_to_fp16"), val = tensor<fp16, []>(-inf)];
114
+ tensor<fp16, [1, 12, 128, 128]> input_25_cast_fp16 = select(a = const_6_to_fp16, b = scores_3_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_25_cast_fp16")];
115
+ tensor<fp16, [1, 12, 128, 128]> input_27_cast_fp16 = softmax(axis = var_62, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
116
+ tensor<bool, []> x_15_transpose_x_0 = const()[name = tensor<string, []>("x_15_transpose_x_0"), val = tensor<bool, []>(false)];
117
+ tensor<bool, []> x_15_transpose_y_0 = const()[name = tensor<string, []>("x_15_transpose_y_0"), val = tensor<bool, []>(false)];
118
+ tensor<fp16, [1, 12, 128, 64]> v_3_cast_fp16 = transpose(perm = v_3_perm_0, x = var_175_cast_fp16)[name = tensor<string, []>("transpose_49")];
119
+ tensor<fp16, [1, 12, 128, 64]> x_15_cast_fp16 = matmul(transpose_x = x_15_transpose_x_0, transpose_y = x_15_transpose_y_0, x = input_27_cast_fp16, y = v_3_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
120
+ tensor<int32, [4]> var_191_perm_0 = const()[name = tensor<string, []>("op_191_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
121
+ tensor<int32, [3]> var_193 = const()[name = tensor<string, []>("op_193"), val = tensor<int32, [3]>([1, -1, 768])];
122
+ tensor<fp16, [1, 128, 12, 64]> var_191_cast_fp16 = transpose(perm = var_191_perm_0, x = x_15_cast_fp16)[name = tensor<string, []>("transpose_46")];
123
+ tensor<fp16, [1, 128, 768]> input_29_cast_fp16 = reshape(shape = var_193, x = var_191_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
124
+ tensor<fp16, [768, 768]> transformer_layer_1_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201544896)))];
125
+ tensor<fp16, [768]> transformer_layer_1_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202724608)))];
126
+ tensor<fp16, [1, 128, 768]> linear_9_cast_fp16 = linear(bias = transformer_layer_1_attention_out_lin_bias_to_fp16, weight = transformer_layer_1_attention_out_lin_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
127
+ tensor<fp16, [1, 128, 768]> input_31_cast_fp16 = add(x = linear_9_cast_fp16, y = query_3_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
128
+ tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([-1])];
129
+ tensor<fp16, [768]> transformer_layer_1_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202726208)))];
130
+ tensor<fp16, [768]> transformer_layer_1_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202727808)))];
131
+ tensor<fp16, [1, 128, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = transformer_layer_1_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_1_sa_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
132
+ tensor<fp16, [3072, 768]> transformer_layer_1_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202729408)))];
133
+ tensor<fp16, [3072]> transformer_layer_1_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207448064)))];
134
+ tensor<fp16, [1, 128, 3072]> linear_10_cast_fp16 = linear(bias = transformer_layer_1_ffn_lin1_bias_to_fp16, weight = transformer_layer_1_ffn_lin1_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
135
+ tensor<string, []> input_37_mode_0 = const()[name = tensor<string, []>("input_37_mode_0"), val = tensor<string, []>("EXACT")];
136
+ tensor<fp16, [1, 128, 3072]> input_37_cast_fp16 = gelu(mode = input_37_mode_0, x = linear_10_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
137
+ tensor<fp16, [768, 3072]> transformer_layer_1_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207454272)))];
138
+ tensor<fp16, [768]> transformer_layer_1_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212172928)))];
139
+ tensor<fp16, [1, 128, 768]> linear_11_cast_fp16 = linear(bias = transformer_layer_1_ffn_lin2_bias_to_fp16, weight = transformer_layer_1_ffn_lin2_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
140
+ tensor<fp16, [1, 128, 768]> input_41_cast_fp16 = add(x = linear_11_cast_fp16, y = input_33_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
141
+ tensor<int32, [1]> query_5_axes_0 = const()[name = tensor<string, []>("query_5_axes_0"), val = tensor<int32, [1]>([-1])];
142
+ tensor<fp16, [768]> transformer_layer_1_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212174528)))];
143
+ tensor<fp16, [768]> transformer_layer_1_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212176128)))];
144
+ tensor<fp16, [1, 128, 768]> query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, beta = transformer_layer_1_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_1_output_layer_norm_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
145
+ tensor<fp16, [768, 768]> transformer_layer_2_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212177728)))];
146
+ tensor<fp16, [768]> transformer_layer_2_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213357440)))];
147
+ tensor<fp16, [1, 128, 768]> linear_12_cast_fp16 = linear(bias = transformer_layer_2_attention_q_lin_bias_to_fp16, weight = transformer_layer_2_attention_q_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
148
+ tensor<int32, [4]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [4]>([1, -1, 12, 64])];
149
+ tensor<fp16, [1, 128, 12, 64]> var_232_cast_fp16 = reshape(shape = var_231, x = linear_12_cast_fp16)[name = tensor<string, []>("op_232_cast_fp16")];
150
+ tensor<fp16, [768, 768]> transformer_layer_2_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213359040)))];
151
+ tensor<fp16, [768]> transformer_layer_2_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214538752)))];
152
+ tensor<fp16, [1, 128, 768]> linear_13_cast_fp16 = linear(bias = transformer_layer_2_attention_k_lin_bias_to_fp16, weight = transformer_layer_2_attention_k_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
153
+ tensor<int32, [4]> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, [4]>([1, -1, 12, 64])];
154
+ tensor<fp16, [1, 128, 12, 64]> var_238_cast_fp16 = reshape(shape = var_237, x = linear_13_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
155
+ tensor<fp16, [768, 768]> transformer_layer_2_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214540352)))];
156
+ tensor<fp16, [768]> transformer_layer_2_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215720064)))];
157
+ tensor<fp16, [1, 128, 768]> linear_14_cast_fp16 = linear(bias = transformer_layer_2_attention_v_lin_bias_to_fp16, weight = transformer_layer_2_attention_v_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
158
+ tensor<int32, [4]> var_243 = const()[name = tensor<string, []>("op_243"), val = tensor<int32, [4]>([1, -1, 12, 64])];
159
+ tensor<fp16, [1, 128, 12, 64]> var_244_cast_fp16 = reshape(shape = var_243, x = linear_14_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
160
+ tensor<int32, [4]> v_5_perm_0 = const()[name = tensor<string, []>("v_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
161
+ tensor<fp16, []> _inversed_q_11_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_11_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
162
+ tensor<fp16, [1, 128, 12, 64]> _inversed_q_11_cast_fp16 = mul(x = var_232_cast_fp16, y = _inversed_q_11_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_11_cast_fp16")];
163
+ tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
164
+ tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
165
+ tensor<int32, [4]> transpose_22_perm_0 = const()[name = tensor<string, []>("transpose_22_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
166
+ tensor<int32, [4]> transpose_23_perm_0 = const()[name = tensor<string, []>("transpose_23_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
167
+ tensor<fp16, [1, 12, 64, 128]> transpose_23 = transpose(perm = transpose_23_perm_0, x = var_238_cast_fp16)[name = tensor<string, []>("transpose_43")];
168
+ tensor<fp16, [1, 12, 128, 64]> transpose_22 = transpose(perm = transpose_22_perm_0, x = _inversed_q_11_cast_fp16)[name = tensor<string, []>("transpose_44")];
169
+ tensor<fp16, [1, 12, 128, 128]> scores_5_cast_fp16 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_22, y = transpose_23)[name = tensor<string, []>("scores_5_cast_fp16")];
170
+ tensor<fp16, []> const_9_to_fp16 = const()[name = tensor<string, []>("const_9_to_fp16"), val = tensor<fp16, []>(-inf)];
171
+ tensor<fp16, [1, 12, 128, 128]> input_43_cast_fp16 = select(a = const_9_to_fp16, b = scores_5_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_43_cast_fp16")];
172
+ tensor<fp16, [1, 12, 128, 128]> input_45_cast_fp16 = softmax(axis = var_62, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
173
+ tensor<bool, []> x_23_transpose_x_0 = const()[name = tensor<string, []>("x_23_transpose_x_0"), val = tensor<bool, []>(false)];
174
+ tensor<bool, []> x_23_transpose_y_0 = const()[name = tensor<string, []>("x_23_transpose_y_0"), val = tensor<bool, []>(false)];
175
+ tensor<fp16, [1, 12, 128, 64]> v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = var_244_cast_fp16)[name = tensor<string, []>("transpose_45")];
176
+ tensor<fp16, [1, 12, 128, 64]> x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = input_45_cast_fp16, y = v_5_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")];
177
+ tensor<int32, [4]> var_260_perm_0 = const()[name = tensor<string, []>("op_260_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
178
+ tensor<int32, [3]> var_262 = const()[name = tensor<string, []>("op_262"), val = tensor<int32, [3]>([1, -1, 768])];
179
+ tensor<fp16, [1, 128, 12, 64]> var_260_cast_fp16 = transpose(perm = var_260_perm_0, x = x_23_cast_fp16)[name = tensor<string, []>("transpose_42")];
180
+ tensor<fp16, [1, 128, 768]> input_47_cast_fp16 = reshape(shape = var_262, x = var_260_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
181
+ tensor<fp16, [768, 768]> transformer_layer_2_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215721664)))];
182
+ tensor<fp16, [768]> transformer_layer_2_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216901376)))];
183
+ tensor<fp16, [1, 128, 768]> linear_15_cast_fp16 = linear(bias = transformer_layer_2_attention_out_lin_bias_to_fp16, weight = transformer_layer_2_attention_out_lin_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
184
+ tensor<fp16, [1, 128, 768]> input_49_cast_fp16 = add(x = linear_15_cast_fp16, y = query_5_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
185
+ tensor<int32, [1]> input_51_axes_0 = const()[name = tensor<string, []>("input_51_axes_0"), val = tensor<int32, [1]>([-1])];
186
+ tensor<fp16, [768]> transformer_layer_2_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216902976)))];
187
+ tensor<fp16, [768]> transformer_layer_2_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216904576)))];
188
+ tensor<fp16, [1, 128, 768]> input_51_cast_fp16 = layer_norm(axes = input_51_axes_0, beta = transformer_layer_2_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_2_sa_layer_norm_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
189
+ tensor<fp16, [3072, 768]> transformer_layer_2_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216906176)))];
190
+ tensor<fp16, [3072]> transformer_layer_2_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221624832)))];
191
+ tensor<fp16, [1, 128, 3072]> linear_16_cast_fp16 = linear(bias = transformer_layer_2_ffn_lin1_bias_to_fp16, weight = transformer_layer_2_ffn_lin1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
192
+ tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
193
+ tensor<fp16, [1, 128, 3072]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = linear_16_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
194
+ tensor<fp16, [768, 3072]> transformer_layer_2_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221631040)))];
195
+ tensor<fp16, [768]> transformer_layer_2_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226349696)))];
196
+ tensor<fp16, [1, 128, 768]> linear_17_cast_fp16 = linear(bias = transformer_layer_2_ffn_lin2_bias_to_fp16, weight = transformer_layer_2_ffn_lin2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
197
+ tensor<fp16, [1, 128, 768]> input_59_cast_fp16 = add(x = linear_17_cast_fp16, y = input_51_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
198
+ tensor<int32, [1]> query_7_axes_0 = const()[name = tensor<string, []>("query_7_axes_0"), val = tensor<int32, [1]>([-1])];
199
+ tensor<fp16, [768]> transformer_layer_2_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226351296)))];
200
+ tensor<fp16, [768]> transformer_layer_2_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226352896)))];
201
+ tensor<fp16, [1, 128, 768]> query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, beta = transformer_layer_2_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_2_output_layer_norm_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
202
+ tensor<fp16, [768, 768]> transformer_layer_3_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226354496)))];
203
+ tensor<fp16, [768]> transformer_layer_3_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227534208)))];
204
+ tensor<fp16, [1, 128, 768]> linear_18_cast_fp16 = linear(bias = transformer_layer_3_attention_q_lin_bias_to_fp16, weight = transformer_layer_3_attention_q_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
205
+ tensor<int32, [4]> var_300 = const()[name = tensor<string, []>("op_300"), val = tensor<int32, [4]>([1, -1, 12, 64])];
206
+ tensor<fp16, [1, 128, 12, 64]> var_301_cast_fp16 = reshape(shape = var_300, x = linear_18_cast_fp16)[name = tensor<string, []>("op_301_cast_fp16")];
207
+ tensor<fp16, [768, 768]> transformer_layer_3_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227535808)))];
208
+ tensor<fp16, [768]> transformer_layer_3_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228715520)))];
209
+ tensor<fp16, [1, 128, 768]> linear_19_cast_fp16 = linear(bias = transformer_layer_3_attention_k_lin_bias_to_fp16, weight = transformer_layer_3_attention_k_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
210
+ tensor<int32, [4]> var_306 = const()[name = tensor<string, []>("op_306"), val = tensor<int32, [4]>([1, -1, 12, 64])];
211
+ tensor<fp16, [1, 128, 12, 64]> var_307_cast_fp16 = reshape(shape = var_306, x = linear_19_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
212
+ tensor<fp16, [768, 768]> transformer_layer_3_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228717120)))];
213
+ tensor<fp16, [768]> transformer_layer_3_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229896832)))];
214
+ tensor<fp16, [1, 128, 768]> linear_20_cast_fp16 = linear(bias = transformer_layer_3_attention_v_lin_bias_to_fp16, weight = transformer_layer_3_attention_v_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
215
+ tensor<int32, [4]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [4]>([1, -1, 12, 64])];
216
+ tensor<fp16, [1, 128, 12, 64]> var_313_cast_fp16 = reshape(shape = var_312, x = linear_20_cast_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
217
+ tensor<int32, [4]> v_7_perm_0 = const()[name = tensor<string, []>("v_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
218
+ tensor<fp16, []> _inversed_q_15_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_15_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
219
+ tensor<fp16, [1, 128, 12, 64]> _inversed_q_15_cast_fp16 = mul(x = var_301_cast_fp16, y = _inversed_q_15_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_15_cast_fp16")];
220
+ tensor<bool, []> scores_7_transpose_x_0 = const()[name = tensor<string, []>("scores_7_transpose_x_0"), val = tensor<bool, []>(false)];
221
+ tensor<bool, []> scores_7_transpose_y_0 = const()[name = tensor<string, []>("scores_7_transpose_y_0"), val = tensor<bool, []>(false)];
222
+ tensor<int32, [4]> transpose_24_perm_0 = const()[name = tensor<string, []>("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
223
+ tensor<int32, [4]> transpose_25_perm_0 = const()[name = tensor<string, []>("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
224
+ tensor<fp16, [1, 12, 64, 128]> transpose_25 = transpose(perm = transpose_25_perm_0, x = var_307_cast_fp16)[name = tensor<string, []>("transpose_39")];
225
+ tensor<fp16, [1, 12, 128, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = _inversed_q_15_cast_fp16)[name = tensor<string, []>("transpose_40")];
226
+ tensor<fp16, [1, 12, 128, 128]> scores_7_cast_fp16 = matmul(transpose_x = scores_7_transpose_x_0, transpose_y = scores_7_transpose_y_0, x = transpose_24, y = transpose_25)[name = tensor<string, []>("scores_7_cast_fp16")];
227
+ tensor<fp16, []> const_12_to_fp16 = const()[name = tensor<string, []>("const_12_to_fp16"), val = tensor<fp16, []>(-inf)];
228
+ tensor<fp16, [1, 12, 128, 128]> input_61_cast_fp16 = select(a = const_12_to_fp16, b = scores_7_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_61_cast_fp16")];
229
+ tensor<fp16, [1, 12, 128, 128]> input_63_cast_fp16 = softmax(axis = var_62, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
230
+ tensor<bool, []> x_31_transpose_x_0 = const()[name = tensor<string, []>("x_31_transpose_x_0"), val = tensor<bool, []>(false)];
231
+ tensor<bool, []> x_31_transpose_y_0 = const()[name = tensor<string, []>("x_31_transpose_y_0"), val = tensor<bool, []>(false)];
232
+ tensor<fp16, [1, 12, 128, 64]> v_7_cast_fp16 = transpose(perm = v_7_perm_0, x = var_313_cast_fp16)[name = tensor<string, []>("transpose_41")];
233
+ tensor<fp16, [1, 12, 128, 64]> x_31_cast_fp16 = matmul(transpose_x = x_31_transpose_x_0, transpose_y = x_31_transpose_y_0, x = input_63_cast_fp16, y = v_7_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
234
+ tensor<int32, [4]> var_329_perm_0 = const()[name = tensor<string, []>("op_329_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
235
+ tensor<int32, [3]> var_331 = const()[name = tensor<string, []>("op_331"), val = tensor<int32, [3]>([1, -1, 768])];
236
+ tensor<fp16, [1, 128, 12, 64]> var_329_cast_fp16 = transpose(perm = var_329_perm_0, x = x_31_cast_fp16)[name = tensor<string, []>("transpose_38")];
237
+ tensor<fp16, [1, 128, 768]> input_65_cast_fp16 = reshape(shape = var_331, x = var_329_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
238
+ tensor<fp16, [768, 768]> transformer_layer_3_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229898432)))];
239
+ tensor<fp16, [768]> transformer_layer_3_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231078144)))];
240
+ tensor<fp16, [1, 128, 768]> linear_21_cast_fp16 = linear(bias = transformer_layer_3_attention_out_lin_bias_to_fp16, weight = transformer_layer_3_attention_out_lin_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
241
+ tensor<fp16, [1, 128, 768]> input_67_cast_fp16 = add(x = linear_21_cast_fp16, y = query_7_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
242
+ tensor<int32, [1]> input_69_axes_0 = const()[name = tensor<string, []>("input_69_axes_0"), val = tensor<int32, [1]>([-1])];
243
+ tensor<fp16, [768]> transformer_layer_3_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231079744)))];
244
+ tensor<fp16, [768]> transformer_layer_3_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231081344)))];
245
+ tensor<fp16, [1, 128, 768]> input_69_cast_fp16 = layer_norm(axes = input_69_axes_0, beta = transformer_layer_3_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_3_sa_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
246
+ tensor<fp16, [3072, 768]> transformer_layer_3_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231082944)))];
247
+ tensor<fp16, [3072]> transformer_layer_3_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235801600)))];
248
+ tensor<fp16, [1, 128, 3072]> linear_22_cast_fp16 = linear(bias = transformer_layer_3_ffn_lin1_bias_to_fp16, weight = transformer_layer_3_ffn_lin1_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
249
+ tensor<string, []> input_73_mode_0 = const()[name = tensor<string, []>("input_73_mode_0"), val = tensor<string, []>("EXACT")];
250
+ tensor<fp16, [1, 128, 3072]> input_73_cast_fp16 = gelu(mode = input_73_mode_0, x = linear_22_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
251
+ tensor<fp16, [768, 3072]> transformer_layer_3_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235807808)))];
252
+ tensor<fp16, [768]> transformer_layer_3_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240526464)))];
253
+ tensor<fp16, [1, 128, 768]> linear_23_cast_fp16 = linear(bias = transformer_layer_3_ffn_lin2_bias_to_fp16, weight = transformer_layer_3_ffn_lin2_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
254
+ tensor<fp16, [1, 128, 768]> input_77_cast_fp16 = add(x = linear_23_cast_fp16, y = input_69_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
255
+ tensor<int32, [1]> query_9_axes_0 = const()[name = tensor<string, []>("query_9_axes_0"), val = tensor<int32, [1]>([-1])];
256
+ tensor<fp16, [768]> transformer_layer_3_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240528064)))];
257
+ tensor<fp16, [768]> transformer_layer_3_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240529664)))];
258
+ tensor<fp16, [1, 128, 768]> query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, beta = transformer_layer_3_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_3_output_layer_norm_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
259
+ tensor<fp16, [768, 768]> transformer_layer_4_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240531264)))];
260
+ tensor<fp16, [768]> transformer_layer_4_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241710976)))];
261
+ tensor<fp16, [1, 128, 768]> linear_24_cast_fp16 = linear(bias = transformer_layer_4_attention_q_lin_bias_to_fp16, weight = transformer_layer_4_attention_q_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
262
+ tensor<int32, [4]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [4]>([1, -1, 12, 64])];
263
+ tensor<fp16, [1, 128, 12, 64]> var_370_cast_fp16 = reshape(shape = var_369, x = linear_24_cast_fp16)[name = tensor<string, []>("op_370_cast_fp16")];
264
+ tensor<fp16, [768, 768]> transformer_layer_4_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241712576)))];
265
+ tensor<fp16, [768]> transformer_layer_4_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242892288)))];
266
+ tensor<fp16, [1, 128, 768]> linear_25_cast_fp16 = linear(bias = transformer_layer_4_attention_k_lin_bias_to_fp16, weight = transformer_layer_4_attention_k_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
267
+ tensor<int32, [4]> var_375 = const()[name = tensor<string, []>("op_375"), val = tensor<int32, [4]>([1, -1, 12, 64])];
268
+ tensor<fp16, [1, 128, 12, 64]> var_376_cast_fp16 = reshape(shape = var_375, x = linear_25_cast_fp16)[name = tensor<string, []>("op_376_cast_fp16")];
269
+ tensor<fp16, [768, 768]> transformer_layer_4_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242893888)))];
270
+ tensor<fp16, [768]> transformer_layer_4_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244073600)))];
271
+ tensor<fp16, [1, 128, 768]> linear_26_cast_fp16 = linear(bias = transformer_layer_4_attention_v_lin_bias_to_fp16, weight = transformer_layer_4_attention_v_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
272
+ tensor<int32, [4]> var_381 = const()[name = tensor<string, []>("op_381"), val = tensor<int32, [4]>([1, -1, 12, 64])];
273
+ tensor<fp16, [1, 128, 12, 64]> var_382_cast_fp16 = reshape(shape = var_381, x = linear_26_cast_fp16)[name = tensor<string, []>("op_382_cast_fp16")];
274
+ tensor<int32, [4]> v_9_perm_0 = const()[name = tensor<string, []>("v_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
275
+ tensor<fp16, []> _inversed_q_19_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_19_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
276
+ tensor<fp16, [1, 128, 12, 64]> _inversed_q_19_cast_fp16 = mul(x = var_370_cast_fp16, y = _inversed_q_19_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_19_cast_fp16")];
277
+ tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
278
+ tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
279
+ tensor<int32, [4]> transpose_26_perm_0 = const()[name = tensor<string, []>("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
280
+ tensor<int32, [4]> transpose_27_perm_0 = const()[name = tensor<string, []>("transpose_27_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
281
+ tensor<fp16, [1, 12, 64, 128]> transpose_27 = transpose(perm = transpose_27_perm_0, x = var_376_cast_fp16)[name = tensor<string, []>("transpose_35")];
282
+ tensor<fp16, [1, 12, 128, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = _inversed_q_19_cast_fp16)[name = tensor<string, []>("transpose_36")];
283
+ tensor<fp16, [1, 12, 128, 128]> scores_9_cast_fp16 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_26, y = transpose_27)[name = tensor<string, []>("scores_9_cast_fp16")];
284
+ tensor<fp16, []> const_15_to_fp16 = const()[name = tensor<string, []>("const_15_to_fp16"), val = tensor<fp16, []>(-inf)];
285
+ tensor<fp16, [1, 12, 128, 128]> input_79_cast_fp16 = select(a = const_15_to_fp16, b = scores_9_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_79_cast_fp16")];
286
+ tensor<fp16, [1, 12, 128, 128]> input_81_cast_fp16 = softmax(axis = var_62, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
287
+ tensor<bool, []> x_39_transpose_x_0 = const()[name = tensor<string, []>("x_39_transpose_x_0"), val = tensor<bool, []>(false)];
288
+ tensor<bool, []> x_39_transpose_y_0 = const()[name = tensor<string, []>("x_39_transpose_y_0"), val = tensor<bool, []>(false)];
289
+ tensor<fp16, [1, 12, 128, 64]> v_9_cast_fp16 = transpose(perm = v_9_perm_0, x = var_382_cast_fp16)[name = tensor<string, []>("transpose_37")];
290
+ tensor<fp16, [1, 12, 128, 64]> x_39_cast_fp16 = matmul(transpose_x = x_39_transpose_x_0, transpose_y = x_39_transpose_y_0, x = input_81_cast_fp16, y = v_9_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
291
+ tensor<int32, [4]> var_398_perm_0 = const()[name = tensor<string, []>("op_398_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
292
+ tensor<int32, [3]> var_400 = const()[name = tensor<string, []>("op_400"), val = tensor<int32, [3]>([1, -1, 768])];
293
+ tensor<fp16, [1, 128, 12, 64]> var_398_cast_fp16 = transpose(perm = var_398_perm_0, x = x_39_cast_fp16)[name = tensor<string, []>("transpose_34")];
294
+ tensor<fp16, [1, 128, 768]> input_83_cast_fp16 = reshape(shape = var_400, x = var_398_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
295
+ tensor<fp16, [768, 768]> transformer_layer_4_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244075200)))];
296
+ tensor<fp16, [768]> transformer_layer_4_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245254912)))];
297
+ tensor<fp16, [1, 128, 768]> linear_27_cast_fp16 = linear(bias = transformer_layer_4_attention_out_lin_bias_to_fp16, weight = transformer_layer_4_attention_out_lin_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
298
+ tensor<fp16, [1, 128, 768]> input_85_cast_fp16 = add(x = linear_27_cast_fp16, y = query_9_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
299
+ tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([-1])];
300
+ tensor<fp16, [768]> transformer_layer_4_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245256512)))];
301
+ tensor<fp16, [768]> transformer_layer_4_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245258112)))];
302
+ tensor<fp16, [1, 128, 768]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = transformer_layer_4_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_4_sa_layer_norm_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
303
+ tensor<fp16, [3072, 768]> transformer_layer_4_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245259712)))];
304
+ tensor<fp16, [3072]> transformer_layer_4_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249978368)))];
305
+ tensor<fp16, [1, 128, 3072]> linear_28_cast_fp16 = linear(bias = transformer_layer_4_ffn_lin1_bias_to_fp16, weight = transformer_layer_4_ffn_lin1_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
306
+ tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
307
+ tensor<fp16, [1, 128, 3072]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = linear_28_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
308
+ tensor<fp16, [768, 3072]> transformer_layer_4_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249984576)))];
309
+ tensor<fp16, [768]> transformer_layer_4_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254703232)))];
310
+ tensor<fp16, [1, 128, 768]> linear_29_cast_fp16 = linear(bias = transformer_layer_4_ffn_lin2_bias_to_fp16, weight = transformer_layer_4_ffn_lin2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
311
+ tensor<fp16, [1, 128, 768]> input_95_cast_fp16 = add(x = linear_29_cast_fp16, y = input_87_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
312
+ tensor<int32, [1]> query_axes_0 = const()[name = tensor<string, []>("query_axes_0"), val = tensor<int32, [1]>([-1])];
313
+ tensor<fp16, [768]> transformer_layer_4_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254704832)))];
314
+ tensor<fp16, [768]> transformer_layer_4_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254706432)))];
315
+ tensor<fp16, [1, 128, 768]> query_cast_fp16 = layer_norm(axes = query_axes_0, beta = transformer_layer_4_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_4_output_layer_norm_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
316
+ tensor<fp16, [768, 768]> transformer_layer_5_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254708032)))];
317
+ tensor<fp16, [768]> transformer_layer_5_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255887744)))];
318
+ tensor<fp16, [1, 128, 768]> linear_30_cast_fp16 = linear(bias = transformer_layer_5_attention_q_lin_bias_to_fp16, weight = transformer_layer_5_attention_q_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
319
+ tensor<int32, [4]> var_438 = const()[name = tensor<string, []>("op_438"), val = tensor<int32, [4]>([1, -1, 12, 64])];
320
+ tensor<fp16, [1, 128, 12, 64]> var_439_cast_fp16 = reshape(shape = var_438, x = linear_30_cast_fp16)[name = tensor<string, []>("op_439_cast_fp16")];
321
+ tensor<fp16, [768, 768]> transformer_layer_5_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255889344)))];
322
+ tensor<fp16, [768]> transformer_layer_5_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257069056)))];
323
+ tensor<fp16, [1, 128, 768]> linear_31_cast_fp16 = linear(bias = transformer_layer_5_attention_k_lin_bias_to_fp16, weight = transformer_layer_5_attention_k_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
324
+ tensor<int32, [4]> var_444 = const()[name = tensor<string, []>("op_444"), val = tensor<int32, [4]>([1, -1, 12, 64])];
325
+ tensor<fp16, [1, 128, 12, 64]> var_445_cast_fp16 = reshape(shape = var_444, x = linear_31_cast_fp16)[name = tensor<string, []>("op_445_cast_fp16")];
326
+ tensor<fp16, [768, 768]> transformer_layer_5_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257070656)))];
327
+ tensor<fp16, [768]> transformer_layer_5_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258250368)))];
328
+ tensor<fp16, [1, 128, 768]> linear_32_cast_fp16 = linear(bias = transformer_layer_5_attention_v_lin_bias_to_fp16, weight = transformer_layer_5_attention_v_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
329
+ tensor<int32, [4]> var_450 = const()[name = tensor<string, []>("op_450"), val = tensor<int32, [4]>([1, -1, 12, 64])];
330
+ tensor<fp16, [1, 128, 12, 64]> var_451_cast_fp16 = reshape(shape = var_450, x = linear_32_cast_fp16)[name = tensor<string, []>("op_451_cast_fp16")];
331
+ tensor<int32, [4]> v_perm_0 = const()[name = tensor<string, []>("v_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
332
+ tensor<fp16, []> _inversed_q_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
333
+ tensor<fp16, [1, 128, 12, 64]> _inversed_q_cast_fp16 = mul(x = var_439_cast_fp16, y = _inversed_q_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_cast_fp16")];
334
+ tensor<bool, []> scores_transpose_x_0 = const()[name = tensor<string, []>("scores_transpose_x_0"), val = tensor<bool, []>(false)];
335
+ tensor<bool, []> scores_transpose_y_0 = const()[name = tensor<string, []>("scores_transpose_y_0"), val = tensor<bool, []>(false)];
336
+ tensor<int32, [4]> transpose_28_perm_0 = const()[name = tensor<string, []>("transpose_28_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
337
+ tensor<int32, [4]> transpose_29_perm_0 = const()[name = tensor<string, []>("transpose_29_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
338
+ tensor<fp16, [1, 12, 64, 128]> transpose_29 = transpose(perm = transpose_29_perm_0, x = var_445_cast_fp16)[name = tensor<string, []>("transpose_31")];
339
+ tensor<fp16, [1, 12, 128, 64]> transpose_28 = transpose(perm = transpose_28_perm_0, x = _inversed_q_cast_fp16)[name = tensor<string, []>("transpose_32")];
340
+ tensor<fp16, [1, 12, 128, 128]> scores_cast_fp16 = matmul(transpose_x = scores_transpose_x_0, transpose_y = scores_transpose_y_0, x = transpose_28, y = transpose_29)[name = tensor<string, []>("scores_cast_fp16")];
341
+ tensor<fp16, []> const_18_to_fp16 = const()[name = tensor<string, []>("const_18_to_fp16"), val = tensor<fp16, []>(-inf)];
342
+ tensor<fp16, [1, 12, 128, 128]> input_97_cast_fp16 = select(a = const_18_to_fp16, b = scores_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_97_cast_fp16")];
343
+ tensor<fp16, [1, 12, 128, 128]> input_99_cast_fp16 = softmax(axis = var_62, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
344
+ tensor<bool, []> x_transpose_x_0 = const()[name = tensor<string, []>("x_transpose_x_0"), val = tensor<bool, []>(false)];
345
+ tensor<bool, []> x_transpose_y_0 = const()[name = tensor<string, []>("x_transpose_y_0"), val = tensor<bool, []>(false)];
346
+ tensor<fp16, [1, 12, 128, 64]> v_cast_fp16 = transpose(perm = v_perm_0, x = var_451_cast_fp16)[name = tensor<string, []>("transpose_33")];
347
+ tensor<fp16, [1, 12, 128, 64]> x_cast_fp16 = matmul(transpose_x = x_transpose_x_0, transpose_y = x_transpose_y_0, x = input_99_cast_fp16, y = v_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
348
+ tensor<int32, [4]> var_467_perm_0 = const()[name = tensor<string, []>("op_467_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
349
+ tensor<int32, [3]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [3]>([1, -1, 768])];
350
+ tensor<fp16, [1, 128, 12, 64]> var_467_cast_fp16 = transpose(perm = var_467_perm_0, x = x_cast_fp16)[name = tensor<string, []>("transpose_30")];
351
+ tensor<fp16, [1, 128, 768]> input_101_cast_fp16 = reshape(shape = var_469, x = var_467_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
352
+ tensor<fp16, [768, 768]> transformer_layer_5_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258251968)))];
353
+ tensor<fp16, [768]> transformer_layer_5_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259431680)))];
354
+ tensor<fp16, [1, 128, 768]> linear_33_cast_fp16 = linear(bias = transformer_layer_5_attention_out_lin_bias_to_fp16, weight = transformer_layer_5_attention_out_lin_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
355
+ tensor<fp16, [1, 128, 768]> input_103_cast_fp16 = add(x = linear_33_cast_fp16, y = query_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
356
+ tensor<int32, [1]> input_105_axes_0 = const()[name = tensor<string, []>("input_105_axes_0"), val = tensor<int32, [1]>([-1])];
357
+ tensor<fp16, [768]> transformer_layer_5_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259433280)))];
358
+ tensor<fp16, [768]> transformer_layer_5_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259434880)))];
359
+ tensor<fp16, [1, 128, 768]> input_105_cast_fp16 = layer_norm(axes = input_105_axes_0, beta = transformer_layer_5_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_5_sa_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
360
+ tensor<fp16, [3072, 768]> transformer_layer_5_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259436480)))];
361
+ tensor<fp16, [3072]> transformer_layer_5_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264155136)))];
362
+ tensor<fp16, [1, 128, 3072]> linear_34_cast_fp16 = linear(bias = transformer_layer_5_ffn_lin1_bias_to_fp16, weight = transformer_layer_5_ffn_lin1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
363
+ tensor<string, []> input_109_mode_0 = const()[name = tensor<string, []>("input_109_mode_0"), val = tensor<string, []>("EXACT")];
364
+ tensor<fp16, [1, 128, 3072]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = linear_34_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
365
+ tensor<fp16, [768, 3072]> transformer_layer_5_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264161344)))];
366
+ tensor<fp16, [768]> transformer_layer_5_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268880000)))];
367
+ tensor<fp16, [1, 128, 768]> linear_35_cast_fp16 = linear(bias = transformer_layer_5_ffn_lin2_bias_to_fp16, weight = transformer_layer_5_ffn_lin2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
368
+ tensor<fp16, [1, 128, 768]> input_113_cast_fp16 = add(x = linear_35_cast_fp16, y = input_105_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
369
+ tensor<int32, [1]> input_115_axes_0 = const()[name = tensor<string, []>("input_115_axes_0"), val = tensor<int32, [1]>([-1])];
370
+ tensor<fp16, [768]> transformer_layer_5_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268881600)))];
371
+ tensor<fp16, [768]> transformer_layer_5_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268883200)))];
372
+ tensor<fp16, [1, 128, 768]> input_115_cast_fp16 = layer_norm(axes = input_115_axes_0, beta = transformer_layer_5_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_5_output_layer_norm_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
373
+ tensor<fp16, [768, 768]> vocab_transform_weight_to_fp16 = const()[name = tensor<string, []>("vocab_transform_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268884800)))];
374
+ tensor<fp16, [768]> vocab_transform_bias_to_fp16 = const()[name = tensor<string, []>("vocab_transform_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270064512)))];
375
+ tensor<fp16, [1, 128, 768]> linear_36_cast_fp16 = linear(bias = vocab_transform_bias_to_fp16, weight = vocab_transform_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
376
+ tensor<string, []> input_117_mode_0 = const()[name = tensor<string, []>("input_117_mode_0"), val = tensor<string, []>("EXACT")];
377
+ tensor<fp16, [1, 128, 768]> input_117_cast_fp16 = gelu(mode = input_117_mode_0, x = linear_36_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
378
+ tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
379
+ tensor<fp16, [768]> vocab_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("vocab_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270066112)))];
380
+ tensor<fp16, [768]> vocab_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("vocab_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270067712)))];
381
+ tensor<fp16, []> var_500_to_fp16 = const()[name = tensor<string, []>("op_500_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
382
+ tensor<fp16, [1, 128, 768]> input_cast_fp16 = layer_norm(axes = input_axes_0, beta = vocab_layer_norm_bias_to_fp16, epsilon = var_500_to_fp16, gamma = vocab_layer_norm_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
383
+ tensor<fp16, [119547]> vocab_projector_bias_to_fp16 = const()[name = tensor<string, []>("vocab_projector_bias_to_fp16"), val = tensor<fp16, [119547]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270069312)))];
384
+ tensor<fp16, [1, 128, 119547]> linear_37_cast_fp16 = linear(bias = vocab_projector_bias_to_fp16, weight = embeddings_word_embeddings_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
385
+ tensor<string, []> linear_37_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("linear_37_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
386
+ tensor<fp32, [1, 128, 119547]> logits = cast(dtype = linear_37_cast_fp16_to_fp32_dtype_0, x = linear_37_cast_fp16)[name = tensor<string, []>("cast_44")];
387
+ } -> (logits);
388
+ }
distilbert_mlm.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e285ae0002b6f54b4b36de3de15f71d208c75199441c6edad5a51cef8a38ce81
3
+ size 270308470