Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

distilbert_mlm.mlmodelc/analytics/coremldata.bin +3 -0
distilbert_mlm.mlmodelc/coremldata.bin +3 -0
distilbert_mlm.mlmodelc/metadata.json +89 -0
distilbert_mlm.mlmodelc/model.mil +388 -0
distilbert_mlm.mlmodelc/weights/weight.bin +3 -0

distilbert_mlm.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10f2c23b71cd59734511b6e10fb8a48aec95421790caefc308db5520ece6f501
+size 243

distilbert_mlm.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:748bc523def8a60f7cfc97b8fd49954b3f21920c0e36ee16d18e223b1e9c3fcd
+size 481

distilbert_mlm.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,89 @@

+[
+  {
+    "shortDescription" : "DistilBERT MLM for grammar correction (multilingual, fast)",
+    "metadataOutputVersion" : "3.0",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 128 × 119547)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 119547]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "version" : "1.0",
+    "modelParameters" : [
+    ],
+    "author" : "Typo00",
+    "specificationVersion" : 6,
+    "storagePrecision" : "Float16",
+    "mlProgramOperationTypeHistogram" : {
+      "Linear" : 38,
+      "Select" : 6,
+      "LayerNorm" : 14,
+      "Transpose" : 24,
+      "Matmul" : 12,
+      "Sub" : 1,
+      "Gelu" : 7,
+      "Softmax" : 6,
+      "Mul" : 7,
+      "Cast" : 2,
+      "Equal" : 1,
+      "Add" : 13,
+      "ExpandDims" : 2,
+      "Reshape" : 24,
+      "Gather" : 1,
+      "Tile" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "stateSchema" : [
+    ],
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 128]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 128]",
+        "name" : "attention_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.conversion_date" : "2026-01-12",
+      "com.github.apple.coremltools.source" : "torch==2.7.0",
+      "com.github.apple.coremltools.version" : "9.0",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "generatedClassName" : "distilbert_mlm",
+    "method" : "predict"
+  }
+]

distilbert_mlm.mlmodelc/model.mil ADDED Viewed

	@@ -0,0 +1,388 @@

+program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
+{
+    func main<ios15>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> input_ids) {
+            tensor<int32, []> input_embeds_axis_0 = const()[name = tensor<string, []>("input_embeds_axis_0"), val = tensor<int32, []>(0)];
+            tensor<fp16, [119547, 768]> embeddings_word_embeddings_weight_to_fp16 = const()[name = tensor<string, []>("embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [119547, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
+            tensor<fp16, [1, 128, 768]> input_embeds_cast_fp16 = gather(axis = input_embeds_axis_0, indices = input_ids, x = embeddings_word_embeddings_weight_to_fp16)[name = tensor<string, []>("input_embeds_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> position_embeddings_1_to_fp16 = const()[name = tensor<string, []>("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183624320)))];
+            tensor<fp16, [1, 128, 768]> input_3_cast_fp16 = add(x = input_embeds_cast_fp16, y = position_embeddings_1_to_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<int32, [1]> input_5_axes_0 = const()[name = tensor<string, []>("input_5_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> embeddings_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183820992)))];
+            tensor<fp16, [768]> embeddings_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183822592)))];
+            tensor<fp16, []> var_13_to_fp16 = const()[name = tensor<string, []>("op_13_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 128, 768]> input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, beta = embeddings_LayerNorm_bias_to_fp16, epsilon = var_13_to_fp16, gamma = embeddings_LayerNorm_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<int32, [1]> var_38_axes_0 = const()[name = tensor<string, []>("op_38_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [1, 1, 128]> var_38 = expand_dims(axes = var_38_axes_0, x = attention_mask)[name = tensor<string, []>("op_38")];
+            tensor<int32, [1]> extended_attention_mask_axes_0 = const()[name = tensor<string, []>("extended_attention_mask_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<int32, [1, 1, 1, 128]> extended_attention_mask = expand_dims(axes = extended_attention_mask_axes_0, x = var_38)[name = tensor<string, []>("extended_attention_mask")];
+            tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<string, []> var_45_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_45_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [1, 1, 1, 128]> extended_attention_mask_to_fp16 = cast(dtype = var_45_to_fp16_dtype_0, x = extended_attention_mask)[name = tensor<string, []>("cast_45")];
+            tensor<fp16, [1, 1, 1, 128]> var_48_cast_fp16 = sub(x = var_46_to_fp16, y = extended_attention_mask_to_fp16)[name = tensor<string, []>("op_48_cast_fp16")];
+            tensor<fp16, []> var_49_to_fp16 = const()[name = tensor<string, []>("op_49_to_fp16"), val = tensor<fp16, []>(-0x1.388p+13)];
+            tensor<fp16, [1, 1, 1, 128]> mask_1_cast_fp16 = mul(x = var_48_cast_fp16, y = var_49_to_fp16)[name = tensor<string, []>("mask_1_cast_fp16")];
+            tensor<int32, []> var_62 = const()[name = tensor<string, []>("op_62"), val = tensor<int32, []>(-1)];
+            tensor<fp16, [768, 768]> transformer_layer_0_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183824192)))];
+            tensor<fp16, [768]> transformer_layer_0_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185003904)))];
+            tensor<fp16, [1, 128, 768]> linear_0_cast_fp16 = linear(bias = transformer_layer_0_attention_q_lin_bias_to_fp16, weight = transformer_layer_0_attention_q_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, [4]> var_93 = const()[name = tensor<string, []>("op_93"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_94_cast_fp16 = reshape(shape = var_93, x = linear_0_cast_fp16)[name = tensor<string, []>("op_94_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_0_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185005504)))];
+            tensor<fp16, [768]> transformer_layer_0_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186185216)))];
+            tensor<fp16, [1, 128, 768]> linear_1_cast_fp16 = linear(bias = transformer_layer_0_attention_k_lin_bias_to_fp16, weight = transformer_layer_0_attention_k_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
+            tensor<int32, [4]> var_99 = const()[name = tensor<string, []>("op_99"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_100_cast_fp16 = reshape(shape = var_99, x = linear_1_cast_fp16)[name = tensor<string, []>("op_100_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_0_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186186816)))];
+            tensor<fp16, [768]> transformer_layer_0_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187366528)))];
+            tensor<fp16, [1, 128, 768]> linear_2_cast_fp16 = linear(bias = transformer_layer_0_attention_v_lin_bias_to_fp16, weight = transformer_layer_0_attention_v_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
+            tensor<int32, [4]> var_105 = const()[name = tensor<string, []>("op_105"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_106_cast_fp16 = reshape(shape = var_105, x = linear_2_cast_fp16)[name = tensor<string, []>("op_106_cast_fp16")];
+            tensor<int32, [4]> v_1_perm_0 = const()[name = tensor<string, []>("v_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, []> _inversed_q_3_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_3_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 128, 12, 64]> _inversed_q_3_cast_fp16 = mul(x = var_94_cast_fp16, y = _inversed_q_3_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_3_cast_fp16")];
+            tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [4]> transpose_18_perm_0 = const()[name = tensor<string, []>("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_19_perm_0 = const()[name = tensor<string, []>("transpose_19_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 128]> transpose_19 = transpose(perm = transpose_19_perm_0, x = var_100_cast_fp16)[name = tensor<string, []>("transpose_51")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_18 = transpose(perm = transpose_18_perm_0, x = _inversed_q_3_cast_fp16)[name = tensor<string, []>("transpose_52")];
+            tensor<fp16, [1, 12, 128, 128]> scores_1_cast_fp16 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_18, y = transpose_19)[name = tensor<string, []>("scores_1_cast_fp16")];
+            tensor<fp16, []> var_64_promoted_to_fp16 = const()[name = tensor<string, []>("op_64_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<bool, [1, 1, 1, 128]> var_112_cast_fp16 = equal(x = mask_1_cast_fp16, y = var_64_promoted_to_fp16)[name = tensor<string, []>("op_112_cast_fp16")];
+            tensor<int32, [4]> mask_3_reps_0 = const()[name = tensor<string, []>("mask_3_reps_0"), val = tensor<int32, [4]>([1, 12, 128, 1])];
+            tensor<bool, [1, 12, 128, 128]> mask_3 = tile(reps = mask_3_reps_0, x = var_112_cast_fp16)[name = tensor<string, []>("mask_3")];
+            tensor<fp16, []> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, []>(-inf)];
+            tensor<fp16, [1, 12, 128, 128]> input_7_cast_fp16 = select(a = const_3_to_fp16, b = scores_1_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<fp16, [1, 12, 128, 128]> input_9_cast_fp16 = softmax(axis = var_62, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<bool, []> x_7_transpose_x_0 = const()[name = tensor<string, []>("x_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_7_transpose_y_0 = const()[name = tensor<string, []>("x_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 128, 64]> v_1_cast_fp16 = transpose(perm = v_1_perm_0, x = var_106_cast_fp16)[name = tensor<string, []>("transpose_53")];
+            tensor<fp16, [1, 12, 128, 64]> x_7_cast_fp16 = matmul(transpose_x = x_7_transpose_x_0, transpose_y = x_7_transpose_y_0, x = input_9_cast_fp16, y = v_1_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
+            tensor<int32, [4]> var_122_perm_0 = const()[name = tensor<string, []>("op_122_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, 128, 12, 64]> var_122_cast_fp16 = transpose(perm = var_122_perm_0, x = x_7_cast_fp16)[name = tensor<string, []>("transpose_50")];
+            tensor<fp16, [1, 128, 768]> input_11_cast_fp16 = reshape(shape = var_124, x = var_122_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_0_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187368128)))];
+            tensor<fp16, [768]> transformer_layer_0_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188547840)))];
+            tensor<fp16, [1, 128, 768]> linear_3_cast_fp16 = linear(bias = transformer_layer_0_attention_out_lin_bias_to_fp16, weight = transformer_layer_0_attention_out_lin_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<int32, [1]> input_15_axes_0 = const()[name = tensor<string, []>("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_0_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188549440)))];
+            tensor<fp16, [768]> transformer_layer_0_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188551040)))];
+            tensor<fp16, []> var_66_to_fp16 = const()[name = tensor<string, []>("op_66_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 128, 768]> input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = transformer_layer_0_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_0_sa_layer_norm_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<fp16, [3072, 768]> transformer_layer_0_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188552640)))];
+            tensor<fp16, [3072]> transformer_layer_0_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193271296)))];
+            tensor<fp16, [1, 128, 3072]> linear_4_cast_fp16 = linear(bias = transformer_layer_0_ffn_lin1_bias_to_fp16, weight = transformer_layer_0_ffn_lin1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
+            tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 3072]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<fp16, [768, 3072]> transformer_layer_0_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193277504)))];
+            tensor<fp16, [768]> transformer_layer_0_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197996160)))];
+            tensor<fp16, [1, 128, 768]> linear_5_cast_fp16 = linear(bias = transformer_layer_0_ffn_lin2_bias_to_fp16, weight = transformer_layer_0_ffn_lin2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<int32, [1]> query_3_axes_0 = const()[name = tensor<string, []>("query_3_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_0_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197997760)))];
+            tensor<fp16, [768]> transformer_layer_0_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197999360)))];
+            tensor<fp16, [1, 128, 768]> query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, beta = transformer_layer_0_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_0_output_layer_norm_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_1_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198000960)))];
+            tensor<fp16, [768]> transformer_layer_1_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199180672)))];
+            tensor<fp16, [1, 128, 768]> linear_6_cast_fp16 = linear(bias = transformer_layer_1_attention_q_lin_bias_to_fp16, weight = transformer_layer_1_attention_q_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
+            tensor<int32, [4]> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_163_cast_fp16 = reshape(shape = var_162, x = linear_6_cast_fp16)[name = tensor<string, []>("op_163_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_1_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199182272)))];
+            tensor<fp16, [768]> transformer_layer_1_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200361984)))];
+            tensor<fp16, [1, 128, 768]> linear_7_cast_fp16 = linear(bias = transformer_layer_1_attention_k_lin_bias_to_fp16, weight = transformer_layer_1_attention_k_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
+            tensor<int32, [4]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_7_cast_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_1_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200363584)))];
+            tensor<fp16, [768]> transformer_layer_1_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201543296)))];
+            tensor<fp16, [1, 128, 768]> linear_8_cast_fp16 = linear(bias = transformer_layer_1_attention_v_lin_bias_to_fp16, weight = transformer_layer_1_attention_v_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
+            tensor<int32, [4]> var_174 = const()[name = tensor<string, []>("op_174"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_175_cast_fp16 = reshape(shape = var_174, x = linear_8_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
+            tensor<int32, [4]> v_3_perm_0 = const()[name = tensor<string, []>("v_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, []> _inversed_q_7_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_7_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 128, 12, 64]> _inversed_q_7_cast_fp16 = mul(x = var_163_cast_fp16, y = _inversed_q_7_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_7_cast_fp16")];
+            tensor<bool, []> scores_3_transpose_x_0 = const()[name = tensor<string, []>("scores_3_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> scores_3_transpose_y_0 = const()[name = tensor<string, []>("scores_3_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [4]> transpose_20_perm_0 = const()[name = tensor<string, []>("transpose_20_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_21_perm_0 = const()[name = tensor<string, []>("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 128]> transpose_21 = transpose(perm = transpose_21_perm_0, x = var_169_cast_fp16)[name = tensor<string, []>("transpose_47")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_20 = transpose(perm = transpose_20_perm_0, x = _inversed_q_7_cast_fp16)[name = tensor<string, []>("transpose_48")];
+            tensor<fp16, [1, 12, 128, 128]> scores_3_cast_fp16 = matmul(transpose_x = scores_3_transpose_x_0, transpose_y = scores_3_transpose_y_0, x = transpose_20, y = transpose_21)[name = tensor<string, []>("scores_3_cast_fp16")];
+            tensor<fp16, []> const_6_to_fp16 = const()[name = tensor<string, []>("const_6_to_fp16"), val = tensor<fp16, []>(-inf)];
+            tensor<fp16, [1, 12, 128, 128]> input_25_cast_fp16 = select(a = const_6_to_fp16, b = scores_3_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<fp16, [1, 12, 128, 128]> input_27_cast_fp16 = softmax(axis = var_62, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
+            tensor<bool, []> x_15_transpose_x_0 = const()[name = tensor<string, []>("x_15_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_15_transpose_y_0 = const()[name = tensor<string, []>("x_15_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 128, 64]> v_3_cast_fp16 = transpose(perm = v_3_perm_0, x = var_175_cast_fp16)[name = tensor<string, []>("transpose_49")];
+            tensor<fp16, [1, 12, 128, 64]> x_15_cast_fp16 = matmul(transpose_x = x_15_transpose_x_0, transpose_y = x_15_transpose_y_0, x = input_27_cast_fp16, y = v_3_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
+            tensor<int32, [4]> var_191_perm_0 = const()[name = tensor<string, []>("op_191_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_193 = const()[name = tensor<string, []>("op_193"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, 128, 12, 64]> var_191_cast_fp16 = transpose(perm = var_191_perm_0, x = x_15_cast_fp16)[name = tensor<string, []>("transpose_46")];
+            tensor<fp16, [1, 128, 768]> input_29_cast_fp16 = reshape(shape = var_193, x = var_191_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_1_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201544896)))];
+            tensor<fp16, [768]> transformer_layer_1_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202724608)))];
+            tensor<fp16, [1, 128, 768]> linear_9_cast_fp16 = linear(bias = transformer_layer_1_attention_out_lin_bias_to_fp16, weight = transformer_layer_1_attention_out_lin_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_31_cast_fp16 = add(x = linear_9_cast_fp16, y = query_3_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
+            tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_1_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202726208)))];
+            tensor<fp16, [768]> transformer_layer_1_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202727808)))];
+            tensor<fp16, [1, 128, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = transformer_layer_1_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_1_sa_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
+            tensor<fp16, [3072, 768]> transformer_layer_1_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202729408)))];
+            tensor<fp16, [3072]> transformer_layer_1_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207448064)))];
+            tensor<fp16, [1, 128, 3072]> linear_10_cast_fp16 = linear(bias = transformer_layer_1_ffn_lin1_bias_to_fp16, weight = transformer_layer_1_ffn_lin1_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
+            tensor<string, []> input_37_mode_0 = const()[name = tensor<string, []>("input_37_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 3072]> input_37_cast_fp16 = gelu(mode = input_37_mode_0, x = linear_10_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<fp16, [768, 3072]> transformer_layer_1_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207454272)))];
+            tensor<fp16, [768]> transformer_layer_1_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212172928)))];
+            tensor<fp16, [1, 128, 768]> linear_11_cast_fp16 = linear(bias = transformer_layer_1_ffn_lin2_bias_to_fp16, weight = transformer_layer_1_ffn_lin2_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_41_cast_fp16 = add(x = linear_11_cast_fp16, y = input_33_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<int32, [1]> query_5_axes_0 = const()[name = tensor<string, []>("query_5_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_1_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212174528)))];
+            tensor<fp16, [768]> transformer_layer_1_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212176128)))];
+            tensor<fp16, [1, 128, 768]> query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, beta = transformer_layer_1_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_1_output_layer_norm_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_2_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212177728)))];
+            tensor<fp16, [768]> transformer_layer_2_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213357440)))];
+            tensor<fp16, [1, 128, 768]> linear_12_cast_fp16 = linear(bias = transformer_layer_2_attention_q_lin_bias_to_fp16, weight = transformer_layer_2_attention_q_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
+            tensor<int32, [4]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_232_cast_fp16 = reshape(shape = var_231, x = linear_12_cast_fp16)[name = tensor<string, []>("op_232_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_2_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213359040)))];
+            tensor<fp16, [768]> transformer_layer_2_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214538752)))];
+            tensor<fp16, [1, 128, 768]> linear_13_cast_fp16 = linear(bias = transformer_layer_2_attention_k_lin_bias_to_fp16, weight = transformer_layer_2_attention_k_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
+            tensor<int32, [4]> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_238_cast_fp16 = reshape(shape = var_237, x = linear_13_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_2_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214540352)))];
+            tensor<fp16, [768]> transformer_layer_2_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215720064)))];
+            tensor<fp16, [1, 128, 768]> linear_14_cast_fp16 = linear(bias = transformer_layer_2_attention_v_lin_bias_to_fp16, weight = transformer_layer_2_attention_v_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
+            tensor<int32, [4]> var_243 = const()[name = tensor<string, []>("op_243"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_244_cast_fp16 = reshape(shape = var_243, x = linear_14_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<int32, [4]> v_5_perm_0 = const()[name = tensor<string, []>("v_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, []> _inversed_q_11_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_11_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 128, 12, 64]> _inversed_q_11_cast_fp16 = mul(x = var_232_cast_fp16, y = _inversed_q_11_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_11_cast_fp16")];
+            tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [4]> transpose_22_perm_0 = const()[name = tensor<string, []>("transpose_22_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_23_perm_0 = const()[name = tensor<string, []>("transpose_23_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 128]> transpose_23 = transpose(perm = transpose_23_perm_0, x = var_238_cast_fp16)[name = tensor<string, []>("transpose_43")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_22 = transpose(perm = transpose_22_perm_0, x = _inversed_q_11_cast_fp16)[name = tensor<string, []>("transpose_44")];
+            tensor<fp16, [1, 12, 128, 128]> scores_5_cast_fp16 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_22, y = transpose_23)[name = tensor<string, []>("scores_5_cast_fp16")];
+            tensor<fp16, []> const_9_to_fp16 = const()[name = tensor<string, []>("const_9_to_fp16"), val = tensor<fp16, []>(-inf)];
+            tensor<fp16, [1, 12, 128, 128]> input_43_cast_fp16 = select(a = const_9_to_fp16, b = scores_5_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<fp16, [1, 12, 128, 128]> input_45_cast_fp16 = softmax(axis = var_62, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<bool, []> x_23_transpose_x_0 = const()[name = tensor<string, []>("x_23_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_23_transpose_y_0 = const()[name = tensor<string, []>("x_23_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 128, 64]> v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = var_244_cast_fp16)[name = tensor<string, []>("transpose_45")];
+            tensor<fp16, [1, 12, 128, 64]> x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = input_45_cast_fp16, y = v_5_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")];
+            tensor<int32, [4]> var_260_perm_0 = const()[name = tensor<string, []>("op_260_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_262 = const()[name = tensor<string, []>("op_262"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, 128, 12, 64]> var_260_cast_fp16 = transpose(perm = var_260_perm_0, x = x_23_cast_fp16)[name = tensor<string, []>("transpose_42")];
+            tensor<fp16, [1, 128, 768]> input_47_cast_fp16 = reshape(shape = var_262, x = var_260_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_2_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215721664)))];
+            tensor<fp16, [768]> transformer_layer_2_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216901376)))];
+            tensor<fp16, [1, 128, 768]> linear_15_cast_fp16 = linear(bias = transformer_layer_2_attention_out_lin_bias_to_fp16, weight = transformer_layer_2_attention_out_lin_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_49_cast_fp16 = add(x = linear_15_cast_fp16, y = query_5_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<int32, [1]> input_51_axes_0 = const()[name = tensor<string, []>("input_51_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_2_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216902976)))];
+            tensor<fp16, [768]> transformer_layer_2_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216904576)))];
+            tensor<fp16, [1, 128, 768]> input_51_cast_fp16 = layer_norm(axes = input_51_axes_0, beta = transformer_layer_2_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_2_sa_layer_norm_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<fp16, [3072, 768]> transformer_layer_2_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216906176)))];
+            tensor<fp16, [3072]> transformer_layer_2_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221624832)))];
+            tensor<fp16, [1, 128, 3072]> linear_16_cast_fp16 = linear(bias = transformer_layer_2_ffn_lin1_bias_to_fp16, weight = transformer_layer_2_ffn_lin1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
+            tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 3072]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = linear_16_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<fp16, [768, 3072]> transformer_layer_2_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221631040)))];
+            tensor<fp16, [768]> transformer_layer_2_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226349696)))];
+            tensor<fp16, [1, 128, 768]> linear_17_cast_fp16 = linear(bias = transformer_layer_2_ffn_lin2_bias_to_fp16, weight = transformer_layer_2_ffn_lin2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_59_cast_fp16 = add(x = linear_17_cast_fp16, y = input_51_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
+            tensor<int32, [1]> query_7_axes_0 = const()[name = tensor<string, []>("query_7_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_2_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226351296)))];
+            tensor<fp16, [768]> transformer_layer_2_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226352896)))];
+            tensor<fp16, [1, 128, 768]> query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, beta = transformer_layer_2_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_2_output_layer_norm_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_3_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226354496)))];
+            tensor<fp16, [768]> transformer_layer_3_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227534208)))];
+            tensor<fp16, [1, 128, 768]> linear_18_cast_fp16 = linear(bias = transformer_layer_3_attention_q_lin_bias_to_fp16, weight = transformer_layer_3_attention_q_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
+            tensor<int32, [4]> var_300 = const()[name = tensor<string, []>("op_300"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_301_cast_fp16 = reshape(shape = var_300, x = linear_18_cast_fp16)[name = tensor<string, []>("op_301_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_3_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227535808)))];
+            tensor<fp16, [768]> transformer_layer_3_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228715520)))];
+            tensor<fp16, [1, 128, 768]> linear_19_cast_fp16 = linear(bias = transformer_layer_3_attention_k_lin_bias_to_fp16, weight = transformer_layer_3_attention_k_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
+            tensor<int32, [4]> var_306 = const()[name = tensor<string, []>("op_306"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_307_cast_fp16 = reshape(shape = var_306, x = linear_19_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_3_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228717120)))];
+            tensor<fp16, [768]> transformer_layer_3_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229896832)))];
+            tensor<fp16, [1, 128, 768]> linear_20_cast_fp16 = linear(bias = transformer_layer_3_attention_v_lin_bias_to_fp16, weight = transformer_layer_3_attention_v_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
+            tensor<int32, [4]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_313_cast_fp16 = reshape(shape = var_312, x = linear_20_cast_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
+            tensor<int32, [4]> v_7_perm_0 = const()[name = tensor<string, []>("v_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, []> _inversed_q_15_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_15_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 128, 12, 64]> _inversed_q_15_cast_fp16 = mul(x = var_301_cast_fp16, y = _inversed_q_15_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_15_cast_fp16")];
+            tensor<bool, []> scores_7_transpose_x_0 = const()[name = tensor<string, []>("scores_7_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> scores_7_transpose_y_0 = const()[name = tensor<string, []>("scores_7_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [4]> transpose_24_perm_0 = const()[name = tensor<string, []>("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_25_perm_0 = const()[name = tensor<string, []>("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 128]> transpose_25 = transpose(perm = transpose_25_perm_0, x = var_307_cast_fp16)[name = tensor<string, []>("transpose_39")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = _inversed_q_15_cast_fp16)[name = tensor<string, []>("transpose_40")];
+            tensor<fp16, [1, 12, 128, 128]> scores_7_cast_fp16 = matmul(transpose_x = scores_7_transpose_x_0, transpose_y = scores_7_transpose_y_0, x = transpose_24, y = transpose_25)[name = tensor<string, []>("scores_7_cast_fp16")];
+            tensor<fp16, []> const_12_to_fp16 = const()[name = tensor<string, []>("const_12_to_fp16"), val = tensor<fp16, []>(-inf)];
+            tensor<fp16, [1, 12, 128, 128]> input_61_cast_fp16 = select(a = const_12_to_fp16, b = scores_7_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_61_cast_fp16")];
+            tensor<fp16, [1, 12, 128, 128]> input_63_cast_fp16 = softmax(axis = var_62, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
+            tensor<bool, []> x_31_transpose_x_0 = const()[name = tensor<string, []>("x_31_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_31_transpose_y_0 = const()[name = tensor<string, []>("x_31_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 128, 64]> v_7_cast_fp16 = transpose(perm = v_7_perm_0, x = var_313_cast_fp16)[name = tensor<string, []>("transpose_41")];
+            tensor<fp16, [1, 12, 128, 64]> x_31_cast_fp16 = matmul(transpose_x = x_31_transpose_x_0, transpose_y = x_31_transpose_y_0, x = input_63_cast_fp16, y = v_7_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
+            tensor<int32, [4]> var_329_perm_0 = const()[name = tensor<string, []>("op_329_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_331 = const()[name = tensor<string, []>("op_331"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, 128, 12, 64]> var_329_cast_fp16 = transpose(perm = var_329_perm_0, x = x_31_cast_fp16)[name = tensor<string, []>("transpose_38")];
+            tensor<fp16, [1, 128, 768]> input_65_cast_fp16 = reshape(shape = var_331, x = var_329_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_3_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229898432)))];
+            tensor<fp16, [768]> transformer_layer_3_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231078144)))];
+            tensor<fp16, [1, 128, 768]> linear_21_cast_fp16 = linear(bias = transformer_layer_3_attention_out_lin_bias_to_fp16, weight = transformer_layer_3_attention_out_lin_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_67_cast_fp16 = add(x = linear_21_cast_fp16, y = query_7_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<int32, [1]> input_69_axes_0 = const()[name = tensor<string, []>("input_69_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_3_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231079744)))];
+            tensor<fp16, [768]> transformer_layer_3_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231081344)))];
+            tensor<fp16, [1, 128, 768]> input_69_cast_fp16 = layer_norm(axes = input_69_axes_0, beta = transformer_layer_3_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_3_sa_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<fp16, [3072, 768]> transformer_layer_3_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231082944)))];
+            tensor<fp16, [3072]> transformer_layer_3_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235801600)))];
+            tensor<fp16, [1, 128, 3072]> linear_22_cast_fp16 = linear(bias = transformer_layer_3_ffn_lin1_bias_to_fp16, weight = transformer_layer_3_ffn_lin1_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
+            tensor<string, []> input_73_mode_0 = const()[name = tensor<string, []>("input_73_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 3072]> input_73_cast_fp16 = gelu(mode = input_73_mode_0, x = linear_22_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<fp16, [768, 3072]> transformer_layer_3_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235807808)))];
+            tensor<fp16, [768]> transformer_layer_3_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240526464)))];
+            tensor<fp16, [1, 128, 768]> linear_23_cast_fp16 = linear(bias = transformer_layer_3_ffn_lin2_bias_to_fp16, weight = transformer_layer_3_ffn_lin2_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_77_cast_fp16 = add(x = linear_23_cast_fp16, y = input_69_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<int32, [1]> query_9_axes_0 = const()[name = tensor<string, []>("query_9_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_3_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240528064)))];
+            tensor<fp16, [768]> transformer_layer_3_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240529664)))];
+            tensor<fp16, [1, 128, 768]> query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, beta = transformer_layer_3_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_3_output_layer_norm_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_4_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240531264)))];
+            tensor<fp16, [768]> transformer_layer_4_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241710976)))];
+            tensor<fp16, [1, 128, 768]> linear_24_cast_fp16 = linear(bias = transformer_layer_4_attention_q_lin_bias_to_fp16, weight = transformer_layer_4_attention_q_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
+            tensor<int32, [4]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_370_cast_fp16 = reshape(shape = var_369, x = linear_24_cast_fp16)[name = tensor<string, []>("op_370_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_4_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241712576)))];
+            tensor<fp16, [768]> transformer_layer_4_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242892288)))];
+            tensor<fp16, [1, 128, 768]> linear_25_cast_fp16 = linear(bias = transformer_layer_4_attention_k_lin_bias_to_fp16, weight = transformer_layer_4_attention_k_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
+            tensor<int32, [4]> var_375 = const()[name = tensor<string, []>("op_375"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_376_cast_fp16 = reshape(shape = var_375, x = linear_25_cast_fp16)[name = tensor<string, []>("op_376_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_4_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242893888)))];
+            tensor<fp16, [768]> transformer_layer_4_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244073600)))];
+            tensor<fp16, [1, 128, 768]> linear_26_cast_fp16 = linear(bias = transformer_layer_4_attention_v_lin_bias_to_fp16, weight = transformer_layer_4_attention_v_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
+            tensor<int32, [4]> var_381 = const()[name = tensor<string, []>("op_381"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_382_cast_fp16 = reshape(shape = var_381, x = linear_26_cast_fp16)[name = tensor<string, []>("op_382_cast_fp16")];
+            tensor<int32, [4]> v_9_perm_0 = const()[name = tensor<string, []>("v_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, []> _inversed_q_19_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_19_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 128, 12, 64]> _inversed_q_19_cast_fp16 = mul(x = var_370_cast_fp16, y = _inversed_q_19_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_19_cast_fp16")];
+            tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [4]> transpose_26_perm_0 = const()[name = tensor<string, []>("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_27_perm_0 = const()[name = tensor<string, []>("transpose_27_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 128]> transpose_27 = transpose(perm = transpose_27_perm_0, x = var_376_cast_fp16)[name = tensor<string, []>("transpose_35")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = _inversed_q_19_cast_fp16)[name = tensor<string, []>("transpose_36")];
+            tensor<fp16, [1, 12, 128, 128]> scores_9_cast_fp16 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_26, y = transpose_27)[name = tensor<string, []>("scores_9_cast_fp16")];
+            tensor<fp16, []> const_15_to_fp16 = const()[name = tensor<string, []>("const_15_to_fp16"), val = tensor<fp16, []>(-inf)];
+            tensor<fp16, [1, 12, 128, 128]> input_79_cast_fp16 = select(a = const_15_to_fp16, b = scores_9_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<fp16, [1, 12, 128, 128]> input_81_cast_fp16 = softmax(axis = var_62, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<bool, []> x_39_transpose_x_0 = const()[name = tensor<string, []>("x_39_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_39_transpose_y_0 = const()[name = tensor<string, []>("x_39_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 128, 64]> v_9_cast_fp16 = transpose(perm = v_9_perm_0, x = var_382_cast_fp16)[name = tensor<string, []>("transpose_37")];
+            tensor<fp16, [1, 12, 128, 64]> x_39_cast_fp16 = matmul(transpose_x = x_39_transpose_x_0, transpose_y = x_39_transpose_y_0, x = input_81_cast_fp16, y = v_9_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
+            tensor<int32, [4]> var_398_perm_0 = const()[name = tensor<string, []>("op_398_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_400 = const()[name = tensor<string, []>("op_400"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, 128, 12, 64]> var_398_cast_fp16 = transpose(perm = var_398_perm_0, x = x_39_cast_fp16)[name = tensor<string, []>("transpose_34")];
+            tensor<fp16, [1, 128, 768]> input_83_cast_fp16 = reshape(shape = var_400, x = var_398_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_4_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244075200)))];
+            tensor<fp16, [768]> transformer_layer_4_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245254912)))];
+            tensor<fp16, [1, 128, 768]> linear_27_cast_fp16 = linear(bias = transformer_layer_4_attention_out_lin_bias_to_fp16, weight = transformer_layer_4_attention_out_lin_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_85_cast_fp16 = add(x = linear_27_cast_fp16, y = query_9_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_4_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245256512)))];
+            tensor<fp16, [768]> transformer_layer_4_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245258112)))];
+            tensor<fp16, [1, 128, 768]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = transformer_layer_4_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_4_sa_layer_norm_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<fp16, [3072, 768]> transformer_layer_4_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245259712)))];
+            tensor<fp16, [3072]> transformer_layer_4_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249978368)))];
+            tensor<fp16, [1, 128, 3072]> linear_28_cast_fp16 = linear(bias = transformer_layer_4_ffn_lin1_bias_to_fp16, weight = transformer_layer_4_ffn_lin1_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
+            tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 3072]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = linear_28_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
+            tensor<fp16, [768, 3072]> transformer_layer_4_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249984576)))];
+            tensor<fp16, [768]> transformer_layer_4_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254703232)))];
+            tensor<fp16, [1, 128, 768]> linear_29_cast_fp16 = linear(bias = transformer_layer_4_ffn_lin2_bias_to_fp16, weight = transformer_layer_4_ffn_lin2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_95_cast_fp16 = add(x = linear_29_cast_fp16, y = input_87_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
+            tensor<int32, [1]> query_axes_0 = const()[name = tensor<string, []>("query_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_4_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254704832)))];
+            tensor<fp16, [768]> transformer_layer_4_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254706432)))];
+            tensor<fp16, [1, 128, 768]> query_cast_fp16 = layer_norm(axes = query_axes_0, beta = transformer_layer_4_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_4_output_layer_norm_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_5_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254708032)))];
+            tensor<fp16, [768]> transformer_layer_5_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255887744)))];
+            tensor<fp16, [1, 128, 768]> linear_30_cast_fp16 = linear(bias = transformer_layer_5_attention_q_lin_bias_to_fp16, weight = transformer_layer_5_attention_q_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
+            tensor<int32, [4]> var_438 = const()[name = tensor<string, []>("op_438"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_439_cast_fp16 = reshape(shape = var_438, x = linear_30_cast_fp16)[name = tensor<string, []>("op_439_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_5_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255889344)))];
+            tensor<fp16, [768]> transformer_layer_5_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257069056)))];
+            tensor<fp16, [1, 128, 768]> linear_31_cast_fp16 = linear(bias = transformer_layer_5_attention_k_lin_bias_to_fp16, weight = transformer_layer_5_attention_k_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
+            tensor<int32, [4]> var_444 = const()[name = tensor<string, []>("op_444"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_445_cast_fp16 = reshape(shape = var_444, x = linear_31_cast_fp16)[name = tensor<string, []>("op_445_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_5_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257070656)))];
+            tensor<fp16, [768]> transformer_layer_5_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258250368)))];
+            tensor<fp16, [1, 128, 768]> linear_32_cast_fp16 = linear(bias = transformer_layer_5_attention_v_lin_bias_to_fp16, weight = transformer_layer_5_attention_v_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
+            tensor<int32, [4]> var_450 = const()[name = tensor<string, []>("op_450"), val = tensor<int32, [4]>([1, -1, 12, 64])];
+            tensor<fp16, [1, 128, 12, 64]> var_451_cast_fp16 = reshape(shape = var_450, x = linear_32_cast_fp16)[name = tensor<string, []>("op_451_cast_fp16")];
+            tensor<int32, [4]> v_perm_0 = const()[name = tensor<string, []>("v_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<fp16, []> _inversed_q_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
+            tensor<fp16, [1, 128, 12, 64]> _inversed_q_cast_fp16 = mul(x = var_439_cast_fp16, y = _inversed_q_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_cast_fp16")];
+            tensor<bool, []> scores_transpose_x_0 = const()[name = tensor<string, []>("scores_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> scores_transpose_y_0 = const()[name = tensor<string, []>("scores_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [4]> transpose_28_perm_0 = const()[name = tensor<string, []>("transpose_28_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
+            tensor<int32, [4]> transpose_29_perm_0 = const()[name = tensor<string, []>("transpose_29_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
+            tensor<fp16, [1, 12, 64, 128]> transpose_29 = transpose(perm = transpose_29_perm_0, x = var_445_cast_fp16)[name = tensor<string, []>("transpose_31")];
+            tensor<fp16, [1, 12, 128, 64]> transpose_28 = transpose(perm = transpose_28_perm_0, x = _inversed_q_cast_fp16)[name = tensor<string, []>("transpose_32")];
+            tensor<fp16, [1, 12, 128, 128]> scores_cast_fp16 = matmul(transpose_x = scores_transpose_x_0, transpose_y = scores_transpose_y_0, x = transpose_28, y = transpose_29)[name = tensor<string, []>("scores_cast_fp16")];
+            tensor<fp16, []> const_18_to_fp16 = const()[name = tensor<string, []>("const_18_to_fp16"), val = tensor<fp16, []>(-inf)];
+            tensor<fp16, [1, 12, 128, 128]> input_97_cast_fp16 = select(a = const_18_to_fp16, b = scores_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_97_cast_fp16")];
+            tensor<fp16, [1, 12, 128, 128]> input_99_cast_fp16 = softmax(axis = var_62, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<bool, []> x_transpose_x_0 = const()[name = tensor<string, []>("x_transpose_x_0"), val = tensor<bool, []>(false)];
+            tensor<bool, []> x_transpose_y_0 = const()[name = tensor<string, []>("x_transpose_y_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 12, 128, 64]> v_cast_fp16 = transpose(perm = v_perm_0, x = var_451_cast_fp16)[name = tensor<string, []>("transpose_33")];
+            tensor<fp16, [1, 12, 128, 64]> x_cast_fp16 = matmul(transpose_x = x_transpose_x_0, transpose_y = x_transpose_y_0, x = input_99_cast_fp16, y = v_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
+            tensor<int32, [4]> var_467_perm_0 = const()[name = tensor<string, []>("op_467_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
+            tensor<int32, [3]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [3]>([1, -1, 768])];
+            tensor<fp16, [1, 128, 12, 64]> var_467_cast_fp16 = transpose(perm = var_467_perm_0, x = x_cast_fp16)[name = tensor<string, []>("transpose_30")];
+            tensor<fp16, [1, 128, 768]> input_101_cast_fp16 = reshape(shape = var_469, x = var_467_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<fp16, [768, 768]> transformer_layer_5_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258251968)))];
+            tensor<fp16, [768]> transformer_layer_5_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259431680)))];
+            tensor<fp16, [1, 128, 768]> linear_33_cast_fp16 = linear(bias = transformer_layer_5_attention_out_lin_bias_to_fp16, weight = transformer_layer_5_attention_out_lin_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_103_cast_fp16 = add(x = linear_33_cast_fp16, y = query_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<int32, [1]> input_105_axes_0 = const()[name = tensor<string, []>("input_105_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_5_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259433280)))];
+            tensor<fp16, [768]> transformer_layer_5_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259434880)))];
+            tensor<fp16, [1, 128, 768]> input_105_cast_fp16 = layer_norm(axes = input_105_axes_0, beta = transformer_layer_5_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_5_sa_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<fp16, [3072, 768]> transformer_layer_5_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259436480)))];
+            tensor<fp16, [3072]> transformer_layer_5_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264155136)))];
+            tensor<fp16, [1, 128, 3072]> linear_34_cast_fp16 = linear(bias = transformer_layer_5_ffn_lin1_bias_to_fp16, weight = transformer_layer_5_ffn_lin1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
+            tensor<string, []> input_109_mode_0 = const()[name = tensor<string, []>("input_109_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 3072]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = linear_34_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<fp16, [768, 3072]> transformer_layer_5_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264161344)))];
+            tensor<fp16, [768]> transformer_layer_5_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268880000)))];
+            tensor<fp16, [1, 128, 768]> linear_35_cast_fp16 = linear(bias = transformer_layer_5_ffn_lin2_bias_to_fp16, weight = transformer_layer_5_ffn_lin2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
+            tensor<fp16, [1, 128, 768]> input_113_cast_fp16 = add(x = linear_35_cast_fp16, y = input_105_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<int32, [1]> input_115_axes_0 = const()[name = tensor<string, []>("input_115_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> transformer_layer_5_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268881600)))];
+            tensor<fp16, [768]> transformer_layer_5_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268883200)))];
+            tensor<fp16, [1, 128, 768]> input_115_cast_fp16 = layer_norm(axes = input_115_axes_0, beta = transformer_layer_5_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_5_output_layer_norm_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<fp16, [768, 768]> vocab_transform_weight_to_fp16 = const()[name = tensor<string, []>("vocab_transform_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268884800)))];
+            tensor<fp16, [768]> vocab_transform_bias_to_fp16 = const()[name = tensor<string, []>("vocab_transform_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270064512)))];
+            tensor<fp16, [1, 128, 768]> linear_36_cast_fp16 = linear(bias = vocab_transform_bias_to_fp16, weight = vocab_transform_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
+            tensor<string, []> input_117_mode_0 = const()[name = tensor<string, []>("input_117_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 128, 768]> input_117_cast_fp16 = gelu(mode = input_117_mode_0, x = linear_36_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<fp16, [768]> vocab_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("vocab_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270066112)))];
+            tensor<fp16, [768]> vocab_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("vocab_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270067712)))];
+            tensor<fp16, []> var_500_to_fp16 = const()[name = tensor<string, []>("op_500_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 128, 768]> input_cast_fp16 = layer_norm(axes = input_axes_0, beta = vocab_layer_norm_bias_to_fp16, epsilon = var_500_to_fp16, gamma = vocab_layer_norm_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<fp16, [119547]> vocab_projector_bias_to_fp16 = const()[name = tensor<string, []>("vocab_projector_bias_to_fp16"), val = tensor<fp16, [119547]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270069312)))];
+            tensor<fp16, [1, 128, 119547]> linear_37_cast_fp16 = linear(bias = vocab_projector_bias_to_fp16, weight = embeddings_word_embeddings_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
+            tensor<string, []> linear_37_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("linear_37_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp32, [1, 128, 119547]> logits = cast(dtype = linear_37_cast_fp16_to_fp32_dtype_0, x = linear_37_cast_fp16)[name = tensor<string, []>("cast_44")];
+        } -> (logits);
+}

distilbert_mlm.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e285ae0002b6f54b4b36de3de15f71d208c75199441c6edad5a51cef8a38ce81
+size 270308470