Upload folder using huggingface_hub
Browse files
distilbert_mlm.mlmodelc/analytics/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10f2c23b71cd59734511b6e10fb8a48aec95421790caefc308db5520ece6f501
|
| 3 |
+
size 243
|
distilbert_mlm.mlmodelc/coremldata.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:748bc523def8a60f7cfc97b8fd49954b3f21920c0e36ee16d18e223b1e9c3fcd
|
| 3 |
+
size 481
|
distilbert_mlm.mlmodelc/metadata.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"shortDescription" : "DistilBERT MLM for grammar correction (multilingual, fast)",
|
| 4 |
+
"metadataOutputVersion" : "3.0",
|
| 5 |
+
"outputSchema" : [
|
| 6 |
+
{
|
| 7 |
+
"hasShapeFlexibility" : "0",
|
| 8 |
+
"isOptional" : "0",
|
| 9 |
+
"dataType" : "Float32",
|
| 10 |
+
"formattedType" : "MultiArray (Float32 1 × 128 × 119547)",
|
| 11 |
+
"shortDescription" : "",
|
| 12 |
+
"shape" : "[1, 128, 119547]",
|
| 13 |
+
"name" : "logits",
|
| 14 |
+
"type" : "MultiArray"
|
| 15 |
+
}
|
| 16 |
+
],
|
| 17 |
+
"version" : "1.0",
|
| 18 |
+
"modelParameters" : [
|
| 19 |
+
|
| 20 |
+
],
|
| 21 |
+
"author" : "Typo00",
|
| 22 |
+
"specificationVersion" : 6,
|
| 23 |
+
"storagePrecision" : "Float16",
|
| 24 |
+
"mlProgramOperationTypeHistogram" : {
|
| 25 |
+
"Linear" : 38,
|
| 26 |
+
"Select" : 6,
|
| 27 |
+
"LayerNorm" : 14,
|
| 28 |
+
"Transpose" : 24,
|
| 29 |
+
"Matmul" : 12,
|
| 30 |
+
"Sub" : 1,
|
| 31 |
+
"Gelu" : 7,
|
| 32 |
+
"Softmax" : 6,
|
| 33 |
+
"Mul" : 7,
|
| 34 |
+
"Cast" : 2,
|
| 35 |
+
"Equal" : 1,
|
| 36 |
+
"Add" : 13,
|
| 37 |
+
"ExpandDims" : 2,
|
| 38 |
+
"Reshape" : 24,
|
| 39 |
+
"Gather" : 1,
|
| 40 |
+
"Tile" : 1
|
| 41 |
+
},
|
| 42 |
+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
| 43 |
+
"stateSchema" : [
|
| 44 |
+
|
| 45 |
+
],
|
| 46 |
+
"isUpdatable" : "0",
|
| 47 |
+
"availability" : {
|
| 48 |
+
"macOS" : "12.0",
|
| 49 |
+
"tvOS" : "15.0",
|
| 50 |
+
"visionOS" : "1.0",
|
| 51 |
+
"watchOS" : "8.0",
|
| 52 |
+
"iOS" : "15.0",
|
| 53 |
+
"macCatalyst" : "15.0"
|
| 54 |
+
},
|
| 55 |
+
"modelType" : {
|
| 56 |
+
"name" : "MLModelType_mlProgram"
|
| 57 |
+
},
|
| 58 |
+
"inputSchema" : [
|
| 59 |
+
{
|
| 60 |
+
"hasShapeFlexibility" : "0",
|
| 61 |
+
"isOptional" : "0",
|
| 62 |
+
"dataType" : "Int32",
|
| 63 |
+
"formattedType" : "MultiArray (Int32 1 × 128)",
|
| 64 |
+
"shortDescription" : "",
|
| 65 |
+
"shape" : "[1, 128]",
|
| 66 |
+
"name" : "input_ids",
|
| 67 |
+
"type" : "MultiArray"
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"hasShapeFlexibility" : "0",
|
| 71 |
+
"isOptional" : "0",
|
| 72 |
+
"dataType" : "Int32",
|
| 73 |
+
"formattedType" : "MultiArray (Int32 1 × 128)",
|
| 74 |
+
"shortDescription" : "",
|
| 75 |
+
"shape" : "[1, 128]",
|
| 76 |
+
"name" : "attention_mask",
|
| 77 |
+
"type" : "MultiArray"
|
| 78 |
+
}
|
| 79 |
+
],
|
| 80 |
+
"userDefinedMetadata" : {
|
| 81 |
+
"com.github.apple.coremltools.conversion_date" : "2026-01-12",
|
| 82 |
+
"com.github.apple.coremltools.source" : "torch==2.7.0",
|
| 83 |
+
"com.github.apple.coremltools.version" : "9.0",
|
| 84 |
+
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
| 85 |
+
},
|
| 86 |
+
"generatedClassName" : "distilbert_mlm",
|
| 87 |
+
"method" : "predict"
|
| 88 |
+
}
|
| 89 |
+
]
|
distilbert_mlm.mlmodelc/model.mil
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
program(1.0)
|
| 2 |
+
[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3510.2.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0"}})]
|
| 3 |
+
{
|
| 4 |
+
func main<ios15>(tensor<int32, [1, 128]> attention_mask, tensor<int32, [1, 128]> input_ids) {
|
| 5 |
+
tensor<int32, []> input_embeds_axis_0 = const()[name = tensor<string, []>("input_embeds_axis_0"), val = tensor<int32, []>(0)];
|
| 6 |
+
tensor<fp16, [119547, 768]> embeddings_word_embeddings_weight_to_fp16 = const()[name = tensor<string, []>("embeddings_word_embeddings_weight_to_fp16"), val = tensor<fp16, [119547, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
|
| 7 |
+
tensor<fp16, [1, 128, 768]> input_embeds_cast_fp16 = gather(axis = input_embeds_axis_0, indices = input_ids, x = embeddings_word_embeddings_weight_to_fp16)[name = tensor<string, []>("input_embeds_cast_fp16")];
|
| 8 |
+
tensor<fp16, [1, 128, 768]> position_embeddings_1_to_fp16 = const()[name = tensor<string, []>("position_embeddings_1_to_fp16"), val = tensor<fp16, [1, 128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183624320)))];
|
| 9 |
+
tensor<fp16, [1, 128, 768]> input_3_cast_fp16 = add(x = input_embeds_cast_fp16, y = position_embeddings_1_to_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
|
| 10 |
+
tensor<int32, [1]> input_5_axes_0 = const()[name = tensor<string, []>("input_5_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 11 |
+
tensor<fp16, [768]> embeddings_LayerNorm_weight_to_fp16 = const()[name = tensor<string, []>("embeddings_LayerNorm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183820992)))];
|
| 12 |
+
tensor<fp16, [768]> embeddings_LayerNorm_bias_to_fp16 = const()[name = tensor<string, []>("embeddings_LayerNorm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183822592)))];
|
| 13 |
+
tensor<fp16, []> var_13_to_fp16 = const()[name = tensor<string, []>("op_13_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
|
| 14 |
+
tensor<fp16, [1, 128, 768]> input_5_cast_fp16 = layer_norm(axes = input_5_axes_0, beta = embeddings_LayerNorm_bias_to_fp16, epsilon = var_13_to_fp16, gamma = embeddings_LayerNorm_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
|
| 15 |
+
tensor<int32, [1]> var_38_axes_0 = const()[name = tensor<string, []>("op_38_axes_0"), val = tensor<int32, [1]>([1])];
|
| 16 |
+
tensor<int32, [1, 1, 128]> var_38 = expand_dims(axes = var_38_axes_0, x = attention_mask)[name = tensor<string, []>("op_38")];
|
| 17 |
+
tensor<int32, [1]> extended_attention_mask_axes_0 = const()[name = tensor<string, []>("extended_attention_mask_axes_0"), val = tensor<int32, [1]>([2])];
|
| 18 |
+
tensor<int32, [1, 1, 1, 128]> extended_attention_mask = expand_dims(axes = extended_attention_mask_axes_0, x = var_38)[name = tensor<string, []>("extended_attention_mask")];
|
| 19 |
+
tensor<fp16, []> var_46_to_fp16 = const()[name = tensor<string, []>("op_46_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
|
| 20 |
+
tensor<string, []> var_45_to_fp16_dtype_0 = const()[name = tensor<string, []>("op_45_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
|
| 21 |
+
tensor<fp16, [1, 1, 1, 128]> extended_attention_mask_to_fp16 = cast(dtype = var_45_to_fp16_dtype_0, x = extended_attention_mask)[name = tensor<string, []>("cast_45")];
|
| 22 |
+
tensor<fp16, [1, 1, 1, 128]> var_48_cast_fp16 = sub(x = var_46_to_fp16, y = extended_attention_mask_to_fp16)[name = tensor<string, []>("op_48_cast_fp16")];
|
| 23 |
+
tensor<fp16, []> var_49_to_fp16 = const()[name = tensor<string, []>("op_49_to_fp16"), val = tensor<fp16, []>(-0x1.388p+13)];
|
| 24 |
+
tensor<fp16, [1, 1, 1, 128]> mask_1_cast_fp16 = mul(x = var_48_cast_fp16, y = var_49_to_fp16)[name = tensor<string, []>("mask_1_cast_fp16")];
|
| 25 |
+
tensor<int32, []> var_62 = const()[name = tensor<string, []>("op_62"), val = tensor<int32, []>(-1)];
|
| 26 |
+
tensor<fp16, [768, 768]> transformer_layer_0_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(183824192)))];
|
| 27 |
+
tensor<fp16, [768]> transformer_layer_0_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185003904)))];
|
| 28 |
+
tensor<fp16, [1, 128, 768]> linear_0_cast_fp16 = linear(bias = transformer_layer_0_attention_q_lin_bias_to_fp16, weight = transformer_layer_0_attention_q_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
|
| 29 |
+
tensor<int32, [4]> var_93 = const()[name = tensor<string, []>("op_93"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 30 |
+
tensor<fp16, [1, 128, 12, 64]> var_94_cast_fp16 = reshape(shape = var_93, x = linear_0_cast_fp16)[name = tensor<string, []>("op_94_cast_fp16")];
|
| 31 |
+
tensor<fp16, [768, 768]> transformer_layer_0_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(185005504)))];
|
| 32 |
+
tensor<fp16, [768]> transformer_layer_0_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186185216)))];
|
| 33 |
+
tensor<fp16, [1, 128, 768]> linear_1_cast_fp16 = linear(bias = transformer_layer_0_attention_k_lin_bias_to_fp16, weight = transformer_layer_0_attention_k_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
|
| 34 |
+
tensor<int32, [4]> var_99 = const()[name = tensor<string, []>("op_99"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 35 |
+
tensor<fp16, [1, 128, 12, 64]> var_100_cast_fp16 = reshape(shape = var_99, x = linear_1_cast_fp16)[name = tensor<string, []>("op_100_cast_fp16")];
|
| 36 |
+
tensor<fp16, [768, 768]> transformer_layer_0_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186186816)))];
|
| 37 |
+
tensor<fp16, [768]> transformer_layer_0_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187366528)))];
|
| 38 |
+
tensor<fp16, [1, 128, 768]> linear_2_cast_fp16 = linear(bias = transformer_layer_0_attention_v_lin_bias_to_fp16, weight = transformer_layer_0_attention_v_lin_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
|
| 39 |
+
tensor<int32, [4]> var_105 = const()[name = tensor<string, []>("op_105"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 40 |
+
tensor<fp16, [1, 128, 12, 64]> var_106_cast_fp16 = reshape(shape = var_105, x = linear_2_cast_fp16)[name = tensor<string, []>("op_106_cast_fp16")];
|
| 41 |
+
tensor<int32, [4]> v_1_perm_0 = const()[name = tensor<string, []>("v_1_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 42 |
+
tensor<fp16, []> _inversed_q_3_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_3_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
|
| 43 |
+
tensor<fp16, [1, 128, 12, 64]> _inversed_q_3_cast_fp16 = mul(x = var_94_cast_fp16, y = _inversed_q_3_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_3_cast_fp16")];
|
| 44 |
+
tensor<bool, []> scores_1_transpose_x_0 = const()[name = tensor<string, []>("scores_1_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 45 |
+
tensor<bool, []> scores_1_transpose_y_0 = const()[name = tensor<string, []>("scores_1_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 46 |
+
tensor<int32, [4]> transpose_18_perm_0 = const()[name = tensor<string, []>("transpose_18_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
|
| 47 |
+
tensor<int32, [4]> transpose_19_perm_0 = const()[name = tensor<string, []>("transpose_19_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
|
| 48 |
+
tensor<fp16, [1, 12, 64, 128]> transpose_19 = transpose(perm = transpose_19_perm_0, x = var_100_cast_fp16)[name = tensor<string, []>("transpose_51")];
|
| 49 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_18 = transpose(perm = transpose_18_perm_0, x = _inversed_q_3_cast_fp16)[name = tensor<string, []>("transpose_52")];
|
| 50 |
+
tensor<fp16, [1, 12, 128, 128]> scores_1_cast_fp16 = matmul(transpose_x = scores_1_transpose_x_0, transpose_y = scores_1_transpose_y_0, x = transpose_18, y = transpose_19)[name = tensor<string, []>("scores_1_cast_fp16")];
|
| 51 |
+
tensor<fp16, []> var_64_promoted_to_fp16 = const()[name = tensor<string, []>("op_64_promoted_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
|
| 52 |
+
tensor<bool, [1, 1, 1, 128]> var_112_cast_fp16 = equal(x = mask_1_cast_fp16, y = var_64_promoted_to_fp16)[name = tensor<string, []>("op_112_cast_fp16")];
|
| 53 |
+
tensor<int32, [4]> mask_3_reps_0 = const()[name = tensor<string, []>("mask_3_reps_0"), val = tensor<int32, [4]>([1, 12, 128, 1])];
|
| 54 |
+
tensor<bool, [1, 12, 128, 128]> mask_3 = tile(reps = mask_3_reps_0, x = var_112_cast_fp16)[name = tensor<string, []>("mask_3")];
|
| 55 |
+
tensor<fp16, []> const_3_to_fp16 = const()[name = tensor<string, []>("const_3_to_fp16"), val = tensor<fp16, []>(-inf)];
|
| 56 |
+
tensor<fp16, [1, 12, 128, 128]> input_7_cast_fp16 = select(a = const_3_to_fp16, b = scores_1_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_7_cast_fp16")];
|
| 57 |
+
tensor<fp16, [1, 12, 128, 128]> input_9_cast_fp16 = softmax(axis = var_62, x = input_7_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
|
| 58 |
+
tensor<bool, []> x_7_transpose_x_0 = const()[name = tensor<string, []>("x_7_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 59 |
+
tensor<bool, []> x_7_transpose_y_0 = const()[name = tensor<string, []>("x_7_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 60 |
+
tensor<fp16, [1, 12, 128, 64]> v_1_cast_fp16 = transpose(perm = v_1_perm_0, x = var_106_cast_fp16)[name = tensor<string, []>("transpose_53")];
|
| 61 |
+
tensor<fp16, [1, 12, 128, 64]> x_7_cast_fp16 = matmul(transpose_x = x_7_transpose_x_0, transpose_y = x_7_transpose_y_0, x = input_9_cast_fp16, y = v_1_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")];
|
| 62 |
+
tensor<int32, [4]> var_122_perm_0 = const()[name = tensor<string, []>("op_122_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 63 |
+
tensor<int32, [3]> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, [3]>([1, -1, 768])];
|
| 64 |
+
tensor<fp16, [1, 128, 12, 64]> var_122_cast_fp16 = transpose(perm = var_122_perm_0, x = x_7_cast_fp16)[name = tensor<string, []>("transpose_50")];
|
| 65 |
+
tensor<fp16, [1, 128, 768]> input_11_cast_fp16 = reshape(shape = var_124, x = var_122_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
|
| 66 |
+
tensor<fp16, [768, 768]> transformer_layer_0_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(187368128)))];
|
| 67 |
+
tensor<fp16, [768]> transformer_layer_0_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188547840)))];
|
| 68 |
+
tensor<fp16, [1, 128, 768]> linear_3_cast_fp16 = linear(bias = transformer_layer_0_attention_out_lin_bias_to_fp16, weight = transformer_layer_0_attention_out_lin_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")];
|
| 69 |
+
tensor<fp16, [1, 128, 768]> input_13_cast_fp16 = add(x = linear_3_cast_fp16, y = input_5_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
|
| 70 |
+
tensor<int32, [1]> input_15_axes_0 = const()[name = tensor<string, []>("input_15_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 71 |
+
tensor<fp16, [768]> transformer_layer_0_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188549440)))];
|
| 72 |
+
tensor<fp16, [768]> transformer_layer_0_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188551040)))];
|
| 73 |
+
tensor<fp16, []> var_66_to_fp16 = const()[name = tensor<string, []>("op_66_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
|
| 74 |
+
tensor<fp16, [1, 128, 768]> input_15_cast_fp16 = layer_norm(axes = input_15_axes_0, beta = transformer_layer_0_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_0_sa_layer_norm_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
|
| 75 |
+
tensor<fp16, [3072, 768]> transformer_layer_0_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(188552640)))];
|
| 76 |
+
tensor<fp16, [3072]> transformer_layer_0_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193271296)))];
|
| 77 |
+
tensor<fp16, [1, 128, 3072]> linear_4_cast_fp16 = linear(bias = transformer_layer_0_ffn_lin1_bias_to_fp16, weight = transformer_layer_0_ffn_lin1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")];
|
| 78 |
+
tensor<string, []> input_19_mode_0 = const()[name = tensor<string, []>("input_19_mode_0"), val = tensor<string, []>("EXACT")];
|
| 79 |
+
tensor<fp16, [1, 128, 3072]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = linear_4_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
|
| 80 |
+
tensor<fp16, [768, 3072]> transformer_layer_0_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(193277504)))];
|
| 81 |
+
tensor<fp16, [768]> transformer_layer_0_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197996160)))];
|
| 82 |
+
tensor<fp16, [1, 128, 768]> linear_5_cast_fp16 = linear(bias = transformer_layer_0_ffn_lin2_bias_to_fp16, weight = transformer_layer_0_ffn_lin2_weight_to_fp16, x = input_19_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")];
|
| 83 |
+
tensor<fp16, [1, 128, 768]> input_23_cast_fp16 = add(x = linear_5_cast_fp16, y = input_15_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
|
| 84 |
+
tensor<int32, [1]> query_3_axes_0 = const()[name = tensor<string, []>("query_3_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 85 |
+
tensor<fp16, [768]> transformer_layer_0_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197997760)))];
|
| 86 |
+
tensor<fp16, [768]> transformer_layer_0_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_0_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(197999360)))];
|
| 87 |
+
tensor<fp16, [1, 128, 768]> query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, beta = transformer_layer_0_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_0_output_layer_norm_weight_to_fp16, x = input_23_cast_fp16)[name = tensor<string, []>("query_3_cast_fp16")];
|
| 88 |
+
tensor<fp16, [768, 768]> transformer_layer_1_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(198000960)))];
|
| 89 |
+
tensor<fp16, [768]> transformer_layer_1_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199180672)))];
|
| 90 |
+
tensor<fp16, [1, 128, 768]> linear_6_cast_fp16 = linear(bias = transformer_layer_1_attention_q_lin_bias_to_fp16, weight = transformer_layer_1_attention_q_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")];
|
| 91 |
+
tensor<int32, [4]> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 92 |
+
tensor<fp16, [1, 128, 12, 64]> var_163_cast_fp16 = reshape(shape = var_162, x = linear_6_cast_fp16)[name = tensor<string, []>("op_163_cast_fp16")];
|
| 93 |
+
tensor<fp16, [768, 768]> transformer_layer_1_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199182272)))];
|
| 94 |
+
tensor<fp16, [768]> transformer_layer_1_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200361984)))];
|
| 95 |
+
tensor<fp16, [1, 128, 768]> linear_7_cast_fp16 = linear(bias = transformer_layer_1_attention_k_lin_bias_to_fp16, weight = transformer_layer_1_attention_k_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")];
|
| 96 |
+
tensor<int32, [4]> var_168 = const()[name = tensor<string, []>("op_168"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 97 |
+
tensor<fp16, [1, 128, 12, 64]> var_169_cast_fp16 = reshape(shape = var_168, x = linear_7_cast_fp16)[name = tensor<string, []>("op_169_cast_fp16")];
|
| 98 |
+
tensor<fp16, [768, 768]> transformer_layer_1_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(200363584)))];
|
| 99 |
+
tensor<fp16, [768]> transformer_layer_1_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201543296)))];
|
| 100 |
+
tensor<fp16, [1, 128, 768]> linear_8_cast_fp16 = linear(bias = transformer_layer_1_attention_v_lin_bias_to_fp16, weight = transformer_layer_1_attention_v_lin_weight_to_fp16, x = query_3_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")];
|
| 101 |
+
tensor<int32, [4]> var_174 = const()[name = tensor<string, []>("op_174"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 102 |
+
tensor<fp16, [1, 128, 12, 64]> var_175_cast_fp16 = reshape(shape = var_174, x = linear_8_cast_fp16)[name = tensor<string, []>("op_175_cast_fp16")];
|
| 103 |
+
tensor<int32, [4]> v_3_perm_0 = const()[name = tensor<string, []>("v_3_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 104 |
+
tensor<fp16, []> _inversed_q_7_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_7_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
|
| 105 |
+
tensor<fp16, [1, 128, 12, 64]> _inversed_q_7_cast_fp16 = mul(x = var_163_cast_fp16, y = _inversed_q_7_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_7_cast_fp16")];
|
| 106 |
+
tensor<bool, []> scores_3_transpose_x_0 = const()[name = tensor<string, []>("scores_3_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 107 |
+
tensor<bool, []> scores_3_transpose_y_0 = const()[name = tensor<string, []>("scores_3_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 108 |
+
tensor<int32, [4]> transpose_20_perm_0 = const()[name = tensor<string, []>("transpose_20_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
|
| 109 |
+
tensor<int32, [4]> transpose_21_perm_0 = const()[name = tensor<string, []>("transpose_21_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
|
| 110 |
+
tensor<fp16, [1, 12, 64, 128]> transpose_21 = transpose(perm = transpose_21_perm_0, x = var_169_cast_fp16)[name = tensor<string, []>("transpose_47")];
|
| 111 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_20 = transpose(perm = transpose_20_perm_0, x = _inversed_q_7_cast_fp16)[name = tensor<string, []>("transpose_48")];
|
| 112 |
+
tensor<fp16, [1, 12, 128, 128]> scores_3_cast_fp16 = matmul(transpose_x = scores_3_transpose_x_0, transpose_y = scores_3_transpose_y_0, x = transpose_20, y = transpose_21)[name = tensor<string, []>("scores_3_cast_fp16")];
|
| 113 |
+
tensor<fp16, []> const_6_to_fp16 = const()[name = tensor<string, []>("const_6_to_fp16"), val = tensor<fp16, []>(-inf)];
|
| 114 |
+
tensor<fp16, [1, 12, 128, 128]> input_25_cast_fp16 = select(a = const_6_to_fp16, b = scores_3_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_25_cast_fp16")];
|
| 115 |
+
tensor<fp16, [1, 12, 128, 128]> input_27_cast_fp16 = softmax(axis = var_62, x = input_25_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")];
|
| 116 |
+
tensor<bool, []> x_15_transpose_x_0 = const()[name = tensor<string, []>("x_15_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 117 |
+
tensor<bool, []> x_15_transpose_y_0 = const()[name = tensor<string, []>("x_15_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 118 |
+
tensor<fp16, [1, 12, 128, 64]> v_3_cast_fp16 = transpose(perm = v_3_perm_0, x = var_175_cast_fp16)[name = tensor<string, []>("transpose_49")];
|
| 119 |
+
tensor<fp16, [1, 12, 128, 64]> x_15_cast_fp16 = matmul(transpose_x = x_15_transpose_x_0, transpose_y = x_15_transpose_y_0, x = input_27_cast_fp16, y = v_3_cast_fp16)[name = tensor<string, []>("x_15_cast_fp16")];
|
| 120 |
+
tensor<int32, [4]> var_191_perm_0 = const()[name = tensor<string, []>("op_191_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 121 |
+
tensor<int32, [3]> var_193 = const()[name = tensor<string, []>("op_193"), val = tensor<int32, [3]>([1, -1, 768])];
|
| 122 |
+
tensor<fp16, [1, 128, 12, 64]> var_191_cast_fp16 = transpose(perm = var_191_perm_0, x = x_15_cast_fp16)[name = tensor<string, []>("transpose_46")];
|
| 123 |
+
tensor<fp16, [1, 128, 768]> input_29_cast_fp16 = reshape(shape = var_193, x = var_191_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")];
|
| 124 |
+
tensor<fp16, [768, 768]> transformer_layer_1_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(201544896)))];
|
| 125 |
+
tensor<fp16, [768]> transformer_layer_1_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202724608)))];
|
| 126 |
+
tensor<fp16, [1, 128, 768]> linear_9_cast_fp16 = linear(bias = transformer_layer_1_attention_out_lin_bias_to_fp16, weight = transformer_layer_1_attention_out_lin_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")];
|
| 127 |
+
tensor<fp16, [1, 128, 768]> input_31_cast_fp16 = add(x = linear_9_cast_fp16, y = query_3_cast_fp16)[name = tensor<string, []>("input_31_cast_fp16")];
|
| 128 |
+
tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 129 |
+
tensor<fp16, [768]> transformer_layer_1_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202726208)))];
|
| 130 |
+
tensor<fp16, [768]> transformer_layer_1_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202727808)))];
|
| 131 |
+
tensor<fp16, [1, 128, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, beta = transformer_layer_1_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_1_sa_layer_norm_weight_to_fp16, x = input_31_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")];
|
| 132 |
+
tensor<fp16, [3072, 768]> transformer_layer_1_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202729408)))];
|
| 133 |
+
tensor<fp16, [3072]> transformer_layer_1_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207448064)))];
|
| 134 |
+
tensor<fp16, [1, 128, 3072]> linear_10_cast_fp16 = linear(bias = transformer_layer_1_ffn_lin1_bias_to_fp16, weight = transformer_layer_1_ffn_lin1_weight_to_fp16, x = input_33_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")];
|
| 135 |
+
tensor<string, []> input_37_mode_0 = const()[name = tensor<string, []>("input_37_mode_0"), val = tensor<string, []>("EXACT")];
|
| 136 |
+
tensor<fp16, [1, 128, 3072]> input_37_cast_fp16 = gelu(mode = input_37_mode_0, x = linear_10_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
|
| 137 |
+
tensor<fp16, [768, 3072]> transformer_layer_1_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(207454272)))];
|
| 138 |
+
tensor<fp16, [768]> transformer_layer_1_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212172928)))];
|
| 139 |
+
tensor<fp16, [1, 128, 768]> linear_11_cast_fp16 = linear(bias = transformer_layer_1_ffn_lin2_bias_to_fp16, weight = transformer_layer_1_ffn_lin2_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")];
|
| 140 |
+
tensor<fp16, [1, 128, 768]> input_41_cast_fp16 = add(x = linear_11_cast_fp16, y = input_33_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
|
| 141 |
+
tensor<int32, [1]> query_5_axes_0 = const()[name = tensor<string, []>("query_5_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 142 |
+
tensor<fp16, [768]> transformer_layer_1_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212174528)))];
|
| 143 |
+
tensor<fp16, [768]> transformer_layer_1_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_1_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212176128)))];
|
| 144 |
+
tensor<fp16, [1, 128, 768]> query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, beta = transformer_layer_1_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_1_output_layer_norm_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
|
| 145 |
+
tensor<fp16, [768, 768]> transformer_layer_2_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212177728)))];
|
| 146 |
+
tensor<fp16, [768]> transformer_layer_2_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213357440)))];
|
| 147 |
+
tensor<fp16, [1, 128, 768]> linear_12_cast_fp16 = linear(bias = transformer_layer_2_attention_q_lin_bias_to_fp16, weight = transformer_layer_2_attention_q_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")];
|
| 148 |
+
tensor<int32, [4]> var_231 = const()[name = tensor<string, []>("op_231"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 149 |
+
tensor<fp16, [1, 128, 12, 64]> var_232_cast_fp16 = reshape(shape = var_231, x = linear_12_cast_fp16)[name = tensor<string, []>("op_232_cast_fp16")];
|
| 150 |
+
tensor<fp16, [768, 768]> transformer_layer_2_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(213359040)))];
|
| 151 |
+
tensor<fp16, [768]> transformer_layer_2_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214538752)))];
|
| 152 |
+
tensor<fp16, [1, 128, 768]> linear_13_cast_fp16 = linear(bias = transformer_layer_2_attention_k_lin_bias_to_fp16, weight = transformer_layer_2_attention_k_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")];
|
| 153 |
+
tensor<int32, [4]> var_237 = const()[name = tensor<string, []>("op_237"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 154 |
+
tensor<fp16, [1, 128, 12, 64]> var_238_cast_fp16 = reshape(shape = var_237, x = linear_13_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
|
| 155 |
+
tensor<fp16, [768, 768]> transformer_layer_2_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(214540352)))];
|
| 156 |
+
tensor<fp16, [768]> transformer_layer_2_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215720064)))];
|
| 157 |
+
tensor<fp16, [1, 128, 768]> linear_14_cast_fp16 = linear(bias = transformer_layer_2_attention_v_lin_bias_to_fp16, weight = transformer_layer_2_attention_v_lin_weight_to_fp16, x = query_5_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")];
|
| 158 |
+
tensor<int32, [4]> var_243 = const()[name = tensor<string, []>("op_243"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 159 |
+
tensor<fp16, [1, 128, 12, 64]> var_244_cast_fp16 = reshape(shape = var_243, x = linear_14_cast_fp16)[name = tensor<string, []>("op_244_cast_fp16")];
|
| 160 |
+
tensor<int32, [4]> v_5_perm_0 = const()[name = tensor<string, []>("v_5_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 161 |
+
tensor<fp16, []> _inversed_q_11_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_11_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
|
| 162 |
+
tensor<fp16, [1, 128, 12, 64]> _inversed_q_11_cast_fp16 = mul(x = var_232_cast_fp16, y = _inversed_q_11_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_11_cast_fp16")];
|
| 163 |
+
tensor<bool, []> scores_5_transpose_x_0 = const()[name = tensor<string, []>("scores_5_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 164 |
+
tensor<bool, []> scores_5_transpose_y_0 = const()[name = tensor<string, []>("scores_5_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 165 |
+
tensor<int32, [4]> transpose_22_perm_0 = const()[name = tensor<string, []>("transpose_22_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
|
| 166 |
+
tensor<int32, [4]> transpose_23_perm_0 = const()[name = tensor<string, []>("transpose_23_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
|
| 167 |
+
tensor<fp16, [1, 12, 64, 128]> transpose_23 = transpose(perm = transpose_23_perm_0, x = var_238_cast_fp16)[name = tensor<string, []>("transpose_43")];
|
| 168 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_22 = transpose(perm = transpose_22_perm_0, x = _inversed_q_11_cast_fp16)[name = tensor<string, []>("transpose_44")];
|
| 169 |
+
tensor<fp16, [1, 12, 128, 128]> scores_5_cast_fp16 = matmul(transpose_x = scores_5_transpose_x_0, transpose_y = scores_5_transpose_y_0, x = transpose_22, y = transpose_23)[name = tensor<string, []>("scores_5_cast_fp16")];
|
| 170 |
+
tensor<fp16, []> const_9_to_fp16 = const()[name = tensor<string, []>("const_9_to_fp16"), val = tensor<fp16, []>(-inf)];
|
| 171 |
+
tensor<fp16, [1, 12, 128, 128]> input_43_cast_fp16 = select(a = const_9_to_fp16, b = scores_5_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_43_cast_fp16")];
|
| 172 |
+
tensor<fp16, [1, 12, 128, 128]> input_45_cast_fp16 = softmax(axis = var_62, x = input_43_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
|
| 173 |
+
tensor<bool, []> x_23_transpose_x_0 = const()[name = tensor<string, []>("x_23_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 174 |
+
tensor<bool, []> x_23_transpose_y_0 = const()[name = tensor<string, []>("x_23_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 175 |
+
tensor<fp16, [1, 12, 128, 64]> v_5_cast_fp16 = transpose(perm = v_5_perm_0, x = var_244_cast_fp16)[name = tensor<string, []>("transpose_45")];
|
| 176 |
+
tensor<fp16, [1, 12, 128, 64]> x_23_cast_fp16 = matmul(transpose_x = x_23_transpose_x_0, transpose_y = x_23_transpose_y_0, x = input_45_cast_fp16, y = v_5_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")];
|
| 177 |
+
tensor<int32, [4]> var_260_perm_0 = const()[name = tensor<string, []>("op_260_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 178 |
+
tensor<int32, [3]> var_262 = const()[name = tensor<string, []>("op_262"), val = tensor<int32, [3]>([1, -1, 768])];
|
| 179 |
+
tensor<fp16, [1, 128, 12, 64]> var_260_cast_fp16 = transpose(perm = var_260_perm_0, x = x_23_cast_fp16)[name = tensor<string, []>("transpose_42")];
|
| 180 |
+
tensor<fp16, [1, 128, 768]> input_47_cast_fp16 = reshape(shape = var_262, x = var_260_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
|
| 181 |
+
tensor<fp16, [768, 768]> transformer_layer_2_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(215721664)))];
|
| 182 |
+
tensor<fp16, [768]> transformer_layer_2_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216901376)))];
|
| 183 |
+
tensor<fp16, [1, 128, 768]> linear_15_cast_fp16 = linear(bias = transformer_layer_2_attention_out_lin_bias_to_fp16, weight = transformer_layer_2_attention_out_lin_weight_to_fp16, x = input_47_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")];
|
| 184 |
+
tensor<fp16, [1, 128, 768]> input_49_cast_fp16 = add(x = linear_15_cast_fp16, y = query_5_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
|
| 185 |
+
tensor<int32, [1]> input_51_axes_0 = const()[name = tensor<string, []>("input_51_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 186 |
+
tensor<fp16, [768]> transformer_layer_2_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216902976)))];
|
| 187 |
+
tensor<fp16, [768]> transformer_layer_2_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216904576)))];
|
| 188 |
+
tensor<fp16, [1, 128, 768]> input_51_cast_fp16 = layer_norm(axes = input_51_axes_0, beta = transformer_layer_2_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_2_sa_layer_norm_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
|
| 189 |
+
tensor<fp16, [3072, 768]> transformer_layer_2_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(216906176)))];
|
| 190 |
+
tensor<fp16, [3072]> transformer_layer_2_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221624832)))];
|
| 191 |
+
tensor<fp16, [1, 128, 3072]> linear_16_cast_fp16 = linear(bias = transformer_layer_2_ffn_lin1_bias_to_fp16, weight = transformer_layer_2_ffn_lin1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")];
|
| 192 |
+
tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
|
| 193 |
+
tensor<fp16, [1, 128, 3072]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = linear_16_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
|
| 194 |
+
tensor<fp16, [768, 3072]> transformer_layer_2_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(221631040)))];
|
| 195 |
+
tensor<fp16, [768]> transformer_layer_2_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226349696)))];
|
| 196 |
+
tensor<fp16, [1, 128, 768]> linear_17_cast_fp16 = linear(bias = transformer_layer_2_ffn_lin2_bias_to_fp16, weight = transformer_layer_2_ffn_lin2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")];
|
| 197 |
+
tensor<fp16, [1, 128, 768]> input_59_cast_fp16 = add(x = linear_17_cast_fp16, y = input_51_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")];
|
| 198 |
+
tensor<int32, [1]> query_7_axes_0 = const()[name = tensor<string, []>("query_7_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 199 |
+
tensor<fp16, [768]> transformer_layer_2_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226351296)))];
|
| 200 |
+
tensor<fp16, [768]> transformer_layer_2_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_2_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226352896)))];
|
| 201 |
+
tensor<fp16, [1, 128, 768]> query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, beta = transformer_layer_2_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_2_output_layer_norm_weight_to_fp16, x = input_59_cast_fp16)[name = tensor<string, []>("query_7_cast_fp16")];
|
| 202 |
+
tensor<fp16, [768, 768]> transformer_layer_3_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(226354496)))];
|
| 203 |
+
tensor<fp16, [768]> transformer_layer_3_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227534208)))];
|
| 204 |
+
tensor<fp16, [1, 128, 768]> linear_18_cast_fp16 = linear(bias = transformer_layer_3_attention_q_lin_bias_to_fp16, weight = transformer_layer_3_attention_q_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")];
|
| 205 |
+
tensor<int32, [4]> var_300 = const()[name = tensor<string, []>("op_300"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 206 |
+
tensor<fp16, [1, 128, 12, 64]> var_301_cast_fp16 = reshape(shape = var_300, x = linear_18_cast_fp16)[name = tensor<string, []>("op_301_cast_fp16")];
|
| 207 |
+
tensor<fp16, [768, 768]> transformer_layer_3_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(227535808)))];
|
| 208 |
+
tensor<fp16, [768]> transformer_layer_3_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228715520)))];
|
| 209 |
+
tensor<fp16, [1, 128, 768]> linear_19_cast_fp16 = linear(bias = transformer_layer_3_attention_k_lin_bias_to_fp16, weight = transformer_layer_3_attention_k_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")];
|
| 210 |
+
tensor<int32, [4]> var_306 = const()[name = tensor<string, []>("op_306"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 211 |
+
tensor<fp16, [1, 128, 12, 64]> var_307_cast_fp16 = reshape(shape = var_306, x = linear_19_cast_fp16)[name = tensor<string, []>("op_307_cast_fp16")];
|
| 212 |
+
tensor<fp16, [768, 768]> transformer_layer_3_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(228717120)))];
|
| 213 |
+
tensor<fp16, [768]> transformer_layer_3_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229896832)))];
|
| 214 |
+
tensor<fp16, [1, 128, 768]> linear_20_cast_fp16 = linear(bias = transformer_layer_3_attention_v_lin_bias_to_fp16, weight = transformer_layer_3_attention_v_lin_weight_to_fp16, x = query_7_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")];
|
| 215 |
+
tensor<int32, [4]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 216 |
+
tensor<fp16, [1, 128, 12, 64]> var_313_cast_fp16 = reshape(shape = var_312, x = linear_20_cast_fp16)[name = tensor<string, []>("op_313_cast_fp16")];
|
| 217 |
+
tensor<int32, [4]> v_7_perm_0 = const()[name = tensor<string, []>("v_7_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 218 |
+
tensor<fp16, []> _inversed_q_15_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_15_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
|
| 219 |
+
tensor<fp16, [1, 128, 12, 64]> _inversed_q_15_cast_fp16 = mul(x = var_301_cast_fp16, y = _inversed_q_15_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_15_cast_fp16")];
|
| 220 |
+
tensor<bool, []> scores_7_transpose_x_0 = const()[name = tensor<string, []>("scores_7_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 221 |
+
tensor<bool, []> scores_7_transpose_y_0 = const()[name = tensor<string, []>("scores_7_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 222 |
+
tensor<int32, [4]> transpose_24_perm_0 = const()[name = tensor<string, []>("transpose_24_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
|
| 223 |
+
tensor<int32, [4]> transpose_25_perm_0 = const()[name = tensor<string, []>("transpose_25_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
|
| 224 |
+
tensor<fp16, [1, 12, 64, 128]> transpose_25 = transpose(perm = transpose_25_perm_0, x = var_307_cast_fp16)[name = tensor<string, []>("transpose_39")];
|
| 225 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_24 = transpose(perm = transpose_24_perm_0, x = _inversed_q_15_cast_fp16)[name = tensor<string, []>("transpose_40")];
|
| 226 |
+
tensor<fp16, [1, 12, 128, 128]> scores_7_cast_fp16 = matmul(transpose_x = scores_7_transpose_x_0, transpose_y = scores_7_transpose_y_0, x = transpose_24, y = transpose_25)[name = tensor<string, []>("scores_7_cast_fp16")];
|
| 227 |
+
tensor<fp16, []> const_12_to_fp16 = const()[name = tensor<string, []>("const_12_to_fp16"), val = tensor<fp16, []>(-inf)];
|
| 228 |
+
tensor<fp16, [1, 12, 128, 128]> input_61_cast_fp16 = select(a = const_12_to_fp16, b = scores_7_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_61_cast_fp16")];
|
| 229 |
+
tensor<fp16, [1, 12, 128, 128]> input_63_cast_fp16 = softmax(axis = var_62, x = input_61_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")];
|
| 230 |
+
tensor<bool, []> x_31_transpose_x_0 = const()[name = tensor<string, []>("x_31_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 231 |
+
tensor<bool, []> x_31_transpose_y_0 = const()[name = tensor<string, []>("x_31_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 232 |
+
tensor<fp16, [1, 12, 128, 64]> v_7_cast_fp16 = transpose(perm = v_7_perm_0, x = var_313_cast_fp16)[name = tensor<string, []>("transpose_41")];
|
| 233 |
+
tensor<fp16, [1, 12, 128, 64]> x_31_cast_fp16 = matmul(transpose_x = x_31_transpose_x_0, transpose_y = x_31_transpose_y_0, x = input_63_cast_fp16, y = v_7_cast_fp16)[name = tensor<string, []>("x_31_cast_fp16")];
|
| 234 |
+
tensor<int32, [4]> var_329_perm_0 = const()[name = tensor<string, []>("op_329_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 235 |
+
tensor<int32, [3]> var_331 = const()[name = tensor<string, []>("op_331"), val = tensor<int32, [3]>([1, -1, 768])];
|
| 236 |
+
tensor<fp16, [1, 128, 12, 64]> var_329_cast_fp16 = transpose(perm = var_329_perm_0, x = x_31_cast_fp16)[name = tensor<string, []>("transpose_38")];
|
| 237 |
+
tensor<fp16, [1, 128, 768]> input_65_cast_fp16 = reshape(shape = var_331, x = var_329_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")];
|
| 238 |
+
tensor<fp16, [768, 768]> transformer_layer_3_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(229898432)))];
|
| 239 |
+
tensor<fp16, [768]> transformer_layer_3_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231078144)))];
|
| 240 |
+
tensor<fp16, [1, 128, 768]> linear_21_cast_fp16 = linear(bias = transformer_layer_3_attention_out_lin_bias_to_fp16, weight = transformer_layer_3_attention_out_lin_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")];
|
| 241 |
+
tensor<fp16, [1, 128, 768]> input_67_cast_fp16 = add(x = linear_21_cast_fp16, y = query_7_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
|
| 242 |
+
tensor<int32, [1]> input_69_axes_0 = const()[name = tensor<string, []>("input_69_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 243 |
+
tensor<fp16, [768]> transformer_layer_3_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231079744)))];
|
| 244 |
+
tensor<fp16, [768]> transformer_layer_3_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231081344)))];
|
| 245 |
+
tensor<fp16, [1, 128, 768]> input_69_cast_fp16 = layer_norm(axes = input_69_axes_0, beta = transformer_layer_3_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_3_sa_layer_norm_weight_to_fp16, x = input_67_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
|
| 246 |
+
tensor<fp16, [3072, 768]> transformer_layer_3_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(231082944)))];
|
| 247 |
+
tensor<fp16, [3072]> transformer_layer_3_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235801600)))];
|
| 248 |
+
tensor<fp16, [1, 128, 3072]> linear_22_cast_fp16 = linear(bias = transformer_layer_3_ffn_lin1_bias_to_fp16, weight = transformer_layer_3_ffn_lin1_weight_to_fp16, x = input_69_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")];
|
| 249 |
+
tensor<string, []> input_73_mode_0 = const()[name = tensor<string, []>("input_73_mode_0"), val = tensor<string, []>("EXACT")];
|
| 250 |
+
tensor<fp16, [1, 128, 3072]> input_73_cast_fp16 = gelu(mode = input_73_mode_0, x = linear_22_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
|
| 251 |
+
tensor<fp16, [768, 3072]> transformer_layer_3_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(235807808)))];
|
| 252 |
+
tensor<fp16, [768]> transformer_layer_3_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240526464)))];
|
| 253 |
+
tensor<fp16, [1, 128, 768]> linear_23_cast_fp16 = linear(bias = transformer_layer_3_ffn_lin2_bias_to_fp16, weight = transformer_layer_3_ffn_lin2_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")];
|
| 254 |
+
tensor<fp16, [1, 128, 768]> input_77_cast_fp16 = add(x = linear_23_cast_fp16, y = input_69_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
|
| 255 |
+
tensor<int32, [1]> query_9_axes_0 = const()[name = tensor<string, []>("query_9_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 256 |
+
tensor<fp16, [768]> transformer_layer_3_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240528064)))];
|
| 257 |
+
tensor<fp16, [768]> transformer_layer_3_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_3_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240529664)))];
|
| 258 |
+
tensor<fp16, [1, 128, 768]> query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, beta = transformer_layer_3_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_3_output_layer_norm_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("query_9_cast_fp16")];
|
| 259 |
+
tensor<fp16, [768, 768]> transformer_layer_4_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(240531264)))];
|
| 260 |
+
tensor<fp16, [768]> transformer_layer_4_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241710976)))];
|
| 261 |
+
tensor<fp16, [1, 128, 768]> linear_24_cast_fp16 = linear(bias = transformer_layer_4_attention_q_lin_bias_to_fp16, weight = transformer_layer_4_attention_q_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")];
|
| 262 |
+
tensor<int32, [4]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 263 |
+
tensor<fp16, [1, 128, 12, 64]> var_370_cast_fp16 = reshape(shape = var_369, x = linear_24_cast_fp16)[name = tensor<string, []>("op_370_cast_fp16")];
|
| 264 |
+
tensor<fp16, [768, 768]> transformer_layer_4_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(241712576)))];
|
| 265 |
+
tensor<fp16, [768]> transformer_layer_4_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242892288)))];
|
| 266 |
+
tensor<fp16, [1, 128, 768]> linear_25_cast_fp16 = linear(bias = transformer_layer_4_attention_k_lin_bias_to_fp16, weight = transformer_layer_4_attention_k_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")];
|
| 267 |
+
tensor<int32, [4]> var_375 = const()[name = tensor<string, []>("op_375"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 268 |
+
tensor<fp16, [1, 128, 12, 64]> var_376_cast_fp16 = reshape(shape = var_375, x = linear_25_cast_fp16)[name = tensor<string, []>("op_376_cast_fp16")];
|
| 269 |
+
tensor<fp16, [768, 768]> transformer_layer_4_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(242893888)))];
|
| 270 |
+
tensor<fp16, [768]> transformer_layer_4_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244073600)))];
|
| 271 |
+
tensor<fp16, [1, 128, 768]> linear_26_cast_fp16 = linear(bias = transformer_layer_4_attention_v_lin_bias_to_fp16, weight = transformer_layer_4_attention_v_lin_weight_to_fp16, x = query_9_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")];
|
| 272 |
+
tensor<int32, [4]> var_381 = const()[name = tensor<string, []>("op_381"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 273 |
+
tensor<fp16, [1, 128, 12, 64]> var_382_cast_fp16 = reshape(shape = var_381, x = linear_26_cast_fp16)[name = tensor<string, []>("op_382_cast_fp16")];
|
| 274 |
+
tensor<int32, [4]> v_9_perm_0 = const()[name = tensor<string, []>("v_9_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 275 |
+
tensor<fp16, []> _inversed_q_19_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_19_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
|
| 276 |
+
tensor<fp16, [1, 128, 12, 64]> _inversed_q_19_cast_fp16 = mul(x = var_370_cast_fp16, y = _inversed_q_19_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_19_cast_fp16")];
|
| 277 |
+
tensor<bool, []> scores_9_transpose_x_0 = const()[name = tensor<string, []>("scores_9_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 278 |
+
tensor<bool, []> scores_9_transpose_y_0 = const()[name = tensor<string, []>("scores_9_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 279 |
+
tensor<int32, [4]> transpose_26_perm_0 = const()[name = tensor<string, []>("transpose_26_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
|
| 280 |
+
tensor<int32, [4]> transpose_27_perm_0 = const()[name = tensor<string, []>("transpose_27_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
|
| 281 |
+
tensor<fp16, [1, 12, 64, 128]> transpose_27 = transpose(perm = transpose_27_perm_0, x = var_376_cast_fp16)[name = tensor<string, []>("transpose_35")];
|
| 282 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_26 = transpose(perm = transpose_26_perm_0, x = _inversed_q_19_cast_fp16)[name = tensor<string, []>("transpose_36")];
|
| 283 |
+
tensor<fp16, [1, 12, 128, 128]> scores_9_cast_fp16 = matmul(transpose_x = scores_9_transpose_x_0, transpose_y = scores_9_transpose_y_0, x = transpose_26, y = transpose_27)[name = tensor<string, []>("scores_9_cast_fp16")];
|
| 284 |
+
tensor<fp16, []> const_15_to_fp16 = const()[name = tensor<string, []>("const_15_to_fp16"), val = tensor<fp16, []>(-inf)];
|
| 285 |
+
tensor<fp16, [1, 12, 128, 128]> input_79_cast_fp16 = select(a = const_15_to_fp16, b = scores_9_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_79_cast_fp16")];
|
| 286 |
+
tensor<fp16, [1, 12, 128, 128]> input_81_cast_fp16 = softmax(axis = var_62, x = input_79_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
|
| 287 |
+
tensor<bool, []> x_39_transpose_x_0 = const()[name = tensor<string, []>("x_39_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 288 |
+
tensor<bool, []> x_39_transpose_y_0 = const()[name = tensor<string, []>("x_39_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 289 |
+
tensor<fp16, [1, 12, 128, 64]> v_9_cast_fp16 = transpose(perm = v_9_perm_0, x = var_382_cast_fp16)[name = tensor<string, []>("transpose_37")];
|
| 290 |
+
tensor<fp16, [1, 12, 128, 64]> x_39_cast_fp16 = matmul(transpose_x = x_39_transpose_x_0, transpose_y = x_39_transpose_y_0, x = input_81_cast_fp16, y = v_9_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")];
|
| 291 |
+
tensor<int32, [4]> var_398_perm_0 = const()[name = tensor<string, []>("op_398_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 292 |
+
tensor<int32, [3]> var_400 = const()[name = tensor<string, []>("op_400"), val = tensor<int32, [3]>([1, -1, 768])];
|
| 293 |
+
tensor<fp16, [1, 128, 12, 64]> var_398_cast_fp16 = transpose(perm = var_398_perm_0, x = x_39_cast_fp16)[name = tensor<string, []>("transpose_34")];
|
| 294 |
+
tensor<fp16, [1, 128, 768]> input_83_cast_fp16 = reshape(shape = var_400, x = var_398_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
|
| 295 |
+
tensor<fp16, [768, 768]> transformer_layer_4_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(244075200)))];
|
| 296 |
+
tensor<fp16, [768]> transformer_layer_4_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245254912)))];
|
| 297 |
+
tensor<fp16, [1, 128, 768]> linear_27_cast_fp16 = linear(bias = transformer_layer_4_attention_out_lin_bias_to_fp16, weight = transformer_layer_4_attention_out_lin_weight_to_fp16, x = input_83_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")];
|
| 298 |
+
tensor<fp16, [1, 128, 768]> input_85_cast_fp16 = add(x = linear_27_cast_fp16, y = query_9_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
|
| 299 |
+
tensor<int32, [1]> input_87_axes_0 = const()[name = tensor<string, []>("input_87_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 300 |
+
tensor<fp16, [768]> transformer_layer_4_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245256512)))];
|
| 301 |
+
tensor<fp16, [768]> transformer_layer_4_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245258112)))];
|
| 302 |
+
tensor<fp16, [1, 128, 768]> input_87_cast_fp16 = layer_norm(axes = input_87_axes_0, beta = transformer_layer_4_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_4_sa_layer_norm_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
|
| 303 |
+
tensor<fp16, [3072, 768]> transformer_layer_4_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(245259712)))];
|
| 304 |
+
tensor<fp16, [3072]> transformer_layer_4_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249978368)))];
|
| 305 |
+
tensor<fp16, [1, 128, 3072]> linear_28_cast_fp16 = linear(bias = transformer_layer_4_ffn_lin1_bias_to_fp16, weight = transformer_layer_4_ffn_lin1_weight_to_fp16, x = input_87_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")];
|
| 306 |
+
tensor<string, []> input_91_mode_0 = const()[name = tensor<string, []>("input_91_mode_0"), val = tensor<string, []>("EXACT")];
|
| 307 |
+
tensor<fp16, [1, 128, 3072]> input_91_cast_fp16 = gelu(mode = input_91_mode_0, x = linear_28_cast_fp16)[name = tensor<string, []>("input_91_cast_fp16")];
|
| 308 |
+
tensor<fp16, [768, 3072]> transformer_layer_4_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(249984576)))];
|
| 309 |
+
tensor<fp16, [768]> transformer_layer_4_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254703232)))];
|
| 310 |
+
tensor<fp16, [1, 128, 768]> linear_29_cast_fp16 = linear(bias = transformer_layer_4_ffn_lin2_bias_to_fp16, weight = transformer_layer_4_ffn_lin2_weight_to_fp16, x = input_91_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")];
|
| 311 |
+
tensor<fp16, [1, 128, 768]> input_95_cast_fp16 = add(x = linear_29_cast_fp16, y = input_87_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")];
|
| 312 |
+
tensor<int32, [1]> query_axes_0 = const()[name = tensor<string, []>("query_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 313 |
+
tensor<fp16, [768]> transformer_layer_4_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254704832)))];
|
| 314 |
+
tensor<fp16, [768]> transformer_layer_4_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_4_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254706432)))];
|
| 315 |
+
tensor<fp16, [1, 128, 768]> query_cast_fp16 = layer_norm(axes = query_axes_0, beta = transformer_layer_4_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_4_output_layer_norm_weight_to_fp16, x = input_95_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
|
| 316 |
+
tensor<fp16, [768, 768]> transformer_layer_5_attention_q_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_q_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(254708032)))];
|
| 317 |
+
tensor<fp16, [768]> transformer_layer_5_attention_q_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_q_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255887744)))];
|
| 318 |
+
tensor<fp16, [1, 128, 768]> linear_30_cast_fp16 = linear(bias = transformer_layer_5_attention_q_lin_bias_to_fp16, weight = transformer_layer_5_attention_q_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")];
|
| 319 |
+
tensor<int32, [4]> var_438 = const()[name = tensor<string, []>("op_438"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 320 |
+
tensor<fp16, [1, 128, 12, 64]> var_439_cast_fp16 = reshape(shape = var_438, x = linear_30_cast_fp16)[name = tensor<string, []>("op_439_cast_fp16")];
|
| 321 |
+
tensor<fp16, [768, 768]> transformer_layer_5_attention_k_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_k_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(255889344)))];
|
| 322 |
+
tensor<fp16, [768]> transformer_layer_5_attention_k_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_k_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257069056)))];
|
| 323 |
+
tensor<fp16, [1, 128, 768]> linear_31_cast_fp16 = linear(bias = transformer_layer_5_attention_k_lin_bias_to_fp16, weight = transformer_layer_5_attention_k_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")];
|
| 324 |
+
tensor<int32, [4]> var_444 = const()[name = tensor<string, []>("op_444"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 325 |
+
tensor<fp16, [1, 128, 12, 64]> var_445_cast_fp16 = reshape(shape = var_444, x = linear_31_cast_fp16)[name = tensor<string, []>("op_445_cast_fp16")];
|
| 326 |
+
tensor<fp16, [768, 768]> transformer_layer_5_attention_v_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_v_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(257070656)))];
|
| 327 |
+
tensor<fp16, [768]> transformer_layer_5_attention_v_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_v_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258250368)))];
|
| 328 |
+
tensor<fp16, [1, 128, 768]> linear_32_cast_fp16 = linear(bias = transformer_layer_5_attention_v_lin_bias_to_fp16, weight = transformer_layer_5_attention_v_lin_weight_to_fp16, x = query_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")];
|
| 329 |
+
tensor<int32, [4]> var_450 = const()[name = tensor<string, []>("op_450"), val = tensor<int32, [4]>([1, -1, 12, 64])];
|
| 330 |
+
tensor<fp16, [1, 128, 12, 64]> var_451_cast_fp16 = reshape(shape = var_450, x = linear_32_cast_fp16)[name = tensor<string, []>("op_451_cast_fp16")];
|
| 331 |
+
tensor<int32, [4]> v_perm_0 = const()[name = tensor<string, []>("v_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 332 |
+
tensor<fp16, []> _inversed_q_y_0_to_fp16 = const()[name = tensor<string, []>("_inversed_q_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-3)];
|
| 333 |
+
tensor<fp16, [1, 128, 12, 64]> _inversed_q_cast_fp16 = mul(x = var_439_cast_fp16, y = _inversed_q_y_0_to_fp16)[name = tensor<string, []>("_inversed_q_cast_fp16")];
|
| 334 |
+
tensor<bool, []> scores_transpose_x_0 = const()[name = tensor<string, []>("scores_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 335 |
+
tensor<bool, []> scores_transpose_y_0 = const()[name = tensor<string, []>("scores_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 336 |
+
tensor<int32, [4]> transpose_28_perm_0 = const()[name = tensor<string, []>("transpose_28_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])];
|
| 337 |
+
tensor<int32, [4]> transpose_29_perm_0 = const()[name = tensor<string, []>("transpose_29_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])];
|
| 338 |
+
tensor<fp16, [1, 12, 64, 128]> transpose_29 = transpose(perm = transpose_29_perm_0, x = var_445_cast_fp16)[name = tensor<string, []>("transpose_31")];
|
| 339 |
+
tensor<fp16, [1, 12, 128, 64]> transpose_28 = transpose(perm = transpose_28_perm_0, x = _inversed_q_cast_fp16)[name = tensor<string, []>("transpose_32")];
|
| 340 |
+
tensor<fp16, [1, 12, 128, 128]> scores_cast_fp16 = matmul(transpose_x = scores_transpose_x_0, transpose_y = scores_transpose_y_0, x = transpose_28, y = transpose_29)[name = tensor<string, []>("scores_cast_fp16")];
|
| 341 |
+
tensor<fp16, []> const_18_to_fp16 = const()[name = tensor<string, []>("const_18_to_fp16"), val = tensor<fp16, []>(-inf)];
|
| 342 |
+
tensor<fp16, [1, 12, 128, 128]> input_97_cast_fp16 = select(a = const_18_to_fp16, b = scores_cast_fp16, cond = mask_3)[name = tensor<string, []>("input_97_cast_fp16")];
|
| 343 |
+
tensor<fp16, [1, 12, 128, 128]> input_99_cast_fp16 = softmax(axis = var_62, x = input_97_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
|
| 344 |
+
tensor<bool, []> x_transpose_x_0 = const()[name = tensor<string, []>("x_transpose_x_0"), val = tensor<bool, []>(false)];
|
| 345 |
+
tensor<bool, []> x_transpose_y_0 = const()[name = tensor<string, []>("x_transpose_y_0"), val = tensor<bool, []>(false)];
|
| 346 |
+
tensor<fp16, [1, 12, 128, 64]> v_cast_fp16 = transpose(perm = v_perm_0, x = var_451_cast_fp16)[name = tensor<string, []>("transpose_33")];
|
| 347 |
+
tensor<fp16, [1, 12, 128, 64]> x_cast_fp16 = matmul(transpose_x = x_transpose_x_0, transpose_y = x_transpose_y_0, x = input_99_cast_fp16, y = v_cast_fp16)[name = tensor<string, []>("x_cast_fp16")];
|
| 348 |
+
tensor<int32, [4]> var_467_perm_0 = const()[name = tensor<string, []>("op_467_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
|
| 349 |
+
tensor<int32, [3]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [3]>([1, -1, 768])];
|
| 350 |
+
tensor<fp16, [1, 128, 12, 64]> var_467_cast_fp16 = transpose(perm = var_467_perm_0, x = x_cast_fp16)[name = tensor<string, []>("transpose_30")];
|
| 351 |
+
tensor<fp16, [1, 128, 768]> input_101_cast_fp16 = reshape(shape = var_469, x = var_467_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
|
| 352 |
+
tensor<fp16, [768, 768]> transformer_layer_5_attention_out_lin_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_out_lin_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(258251968)))];
|
| 353 |
+
tensor<fp16, [768]> transformer_layer_5_attention_out_lin_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_attention_out_lin_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259431680)))];
|
| 354 |
+
tensor<fp16, [1, 128, 768]> linear_33_cast_fp16 = linear(bias = transformer_layer_5_attention_out_lin_bias_to_fp16, weight = transformer_layer_5_attention_out_lin_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")];
|
| 355 |
+
tensor<fp16, [1, 128, 768]> input_103_cast_fp16 = add(x = linear_33_cast_fp16, y = query_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
|
| 356 |
+
tensor<int32, [1]> input_105_axes_0 = const()[name = tensor<string, []>("input_105_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 357 |
+
tensor<fp16, [768]> transformer_layer_5_sa_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_sa_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259433280)))];
|
| 358 |
+
tensor<fp16, [768]> transformer_layer_5_sa_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_sa_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259434880)))];
|
| 359 |
+
tensor<fp16, [1, 128, 768]> input_105_cast_fp16 = layer_norm(axes = input_105_axes_0, beta = transformer_layer_5_sa_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_5_sa_layer_norm_weight_to_fp16, x = input_103_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
|
| 360 |
+
tensor<fp16, [3072, 768]> transformer_layer_5_ffn_lin1_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin1_weight_to_fp16"), val = tensor<fp16, [3072, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(259436480)))];
|
| 361 |
+
tensor<fp16, [3072]> transformer_layer_5_ffn_lin1_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin1_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264155136)))];
|
| 362 |
+
tensor<fp16, [1, 128, 3072]> linear_34_cast_fp16 = linear(bias = transformer_layer_5_ffn_lin1_bias_to_fp16, weight = transformer_layer_5_ffn_lin1_weight_to_fp16, x = input_105_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")];
|
| 363 |
+
tensor<string, []> input_109_mode_0 = const()[name = tensor<string, []>("input_109_mode_0"), val = tensor<string, []>("EXACT")];
|
| 364 |
+
tensor<fp16, [1, 128, 3072]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = linear_34_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
|
| 365 |
+
tensor<fp16, [768, 3072]> transformer_layer_5_ffn_lin2_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin2_weight_to_fp16"), val = tensor<fp16, [768, 3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(264161344)))];
|
| 366 |
+
tensor<fp16, [768]> transformer_layer_5_ffn_lin2_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_ffn_lin2_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268880000)))];
|
| 367 |
+
tensor<fp16, [1, 128, 768]> linear_35_cast_fp16 = linear(bias = transformer_layer_5_ffn_lin2_bias_to_fp16, weight = transformer_layer_5_ffn_lin2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")];
|
| 368 |
+
tensor<fp16, [1, 128, 768]> input_113_cast_fp16 = add(x = linear_35_cast_fp16, y = input_105_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
|
| 369 |
+
tensor<int32, [1]> input_115_axes_0 = const()[name = tensor<string, []>("input_115_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 370 |
+
tensor<fp16, [768]> transformer_layer_5_output_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_output_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268881600)))];
|
| 371 |
+
tensor<fp16, [768]> transformer_layer_5_output_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("transformer_layer_5_output_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268883200)))];
|
| 372 |
+
tensor<fp16, [1, 128, 768]> input_115_cast_fp16 = layer_norm(axes = input_115_axes_0, beta = transformer_layer_5_output_layer_norm_bias_to_fp16, epsilon = var_66_to_fp16, gamma = transformer_layer_5_output_layer_norm_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
|
| 373 |
+
tensor<fp16, [768, 768]> vocab_transform_weight_to_fp16 = const()[name = tensor<string, []>("vocab_transform_weight_to_fp16"), val = tensor<fp16, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(268884800)))];
|
| 374 |
+
tensor<fp16, [768]> vocab_transform_bias_to_fp16 = const()[name = tensor<string, []>("vocab_transform_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270064512)))];
|
| 375 |
+
tensor<fp16, [1, 128, 768]> linear_36_cast_fp16 = linear(bias = vocab_transform_bias_to_fp16, weight = vocab_transform_weight_to_fp16, x = input_115_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")];
|
| 376 |
+
tensor<string, []> input_117_mode_0 = const()[name = tensor<string, []>("input_117_mode_0"), val = tensor<string, []>("EXACT")];
|
| 377 |
+
tensor<fp16, [1, 128, 768]> input_117_cast_fp16 = gelu(mode = input_117_mode_0, x = linear_36_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
|
| 378 |
+
tensor<int32, [1]> input_axes_0 = const()[name = tensor<string, []>("input_axes_0"), val = tensor<int32, [1]>([-1])];
|
| 379 |
+
tensor<fp16, [768]> vocab_layer_norm_weight_to_fp16 = const()[name = tensor<string, []>("vocab_layer_norm_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270066112)))];
|
| 380 |
+
tensor<fp16, [768]> vocab_layer_norm_bias_to_fp16 = const()[name = tensor<string, []>("vocab_layer_norm_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270067712)))];
|
| 381 |
+
tensor<fp16, []> var_500_to_fp16 = const()[name = tensor<string, []>("op_500_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
|
| 382 |
+
tensor<fp16, [1, 128, 768]> input_cast_fp16 = layer_norm(axes = input_axes_0, beta = vocab_layer_norm_bias_to_fp16, epsilon = var_500_to_fp16, gamma = vocab_layer_norm_weight_to_fp16, x = input_117_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
|
| 383 |
+
tensor<fp16, [119547]> vocab_projector_bias_to_fp16 = const()[name = tensor<string, []>("vocab_projector_bias_to_fp16"), val = tensor<fp16, [119547]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(270069312)))];
|
| 384 |
+
tensor<fp16, [1, 128, 119547]> linear_37_cast_fp16 = linear(bias = vocab_projector_bias_to_fp16, weight = embeddings_word_embeddings_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")];
|
| 385 |
+
tensor<string, []> linear_37_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("linear_37_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
|
| 386 |
+
tensor<fp32, [1, 128, 119547]> logits = cast(dtype = linear_37_cast_fp16_to_fp32_dtype_0, x = linear_37_cast_fp16)[name = tensor<string, []>("cast_44")];
|
| 387 |
+
} -> (logits);
|
| 388 |
+
}
|
distilbert_mlm.mlmodelc/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e285ae0002b6f54b4b36de3de15f71d208c75199441c6edad5a51cef8a38ce81
|
| 3 |
+
size 270308470
|