{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 1024, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 2, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "[\\s\\S]" }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 4 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 5 ], "tokens": [ "[SEP]" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "<|start|>": 0, "<|end|>": 1, "[PAD]": 2, "[UNK]": 3, "[CLS]": 4, "[SEP]": 5, "[MASK]": 6, "\u0000": 7, "\u0001": 8, "\u0002": 9, "\u0003": 10, "\u0004": 11, "\u0005": 12, "\u0006": 13, "\u0007": 14, "\b": 15, "\t": 16, "\n": 17, "\u000b": 18, "\f": 19, "\r": 20, "\u000e": 21, "\u000f": 22, "\u0010": 23, "\u0011": 24, "\u0012": 25, "\u0013": 26, "\u0014": 27, "\u0015": 28, "\u0016": 29, "\u0017": 30, "\u0018": 31, "\u0019": 32, "\u001a": 33, "\u001b": 34, "\u001c": 35, "\u001d": 36, "\u001e": 37, "\u001f": 38, " ": 39, "!": 40, "\"": 41, "#": 42, "$": 43, "%": 44, "&": 45, "'": 46, "(": 47, ")": 48, "*": 49, "+": 50, ",": 51, "-": 52, ".": 53, "/": 54, "0": 55, "1": 56, "2": 57, "3": 58, "4": 59, "5": 60, "6": 61, "7": 62, "8": 63, "9": 64, ":": 65, ";": 66, "<": 67, "=": 68, ">": 69, "?": 70, "@": 71, "A": 72, "B": 73, "C": 74, "D": 75, "E": 76, "F": 77, "G": 78, "H": 79, "I": 80, "J": 81, "K": 82, "L": 83, "M": 84, "N": 85, "O": 86, "P": 87, "Q": 88, "R": 89, "S": 90, "T": 91, "U": 92, "V": 93, "W": 94, "X": 95, "Y": 96, "Z": 97, "[": 98, "\\": 99, "]": 100, "^": 101, "_": 102, "`": 103, "a": 104, "b": 105, "c": 106, "d": 107, "e": 108, "f": 109, "g": 110, "h": 111, "i": 112, "j": 113, "k": 114, "l": 115, "m": 116, "n": 117, "o": 118, "p": 119, "q": 120, "r": 121, "s": 122, "t": 123, "u": 124, "v": 125, "w": 126, "x": 127, "y": 128, "z": 129, "{": 130, "|": 131, "}": 132, "~": 133, "": 134, "€": 135, "": 136, "‚": 137, "ƒ": 138, "„": 139, "…": 140, "†": 141, "‡": 142, "ˆ": 143, "‰": 144, "Š": 145, "‹": 146, "Œ": 147, "": 148, "Ž": 149, "": 150, "": 151, "‘": 152, "’": 153, "“": 154, "”": 155, "•": 156, "–": 157, "—": 158, "˜": 159, "™": 160, "š": 161, "›": 162, "œ": 163, "": 164, "ž": 165, "Ÿ": 166, " ": 167, "¡": 168, "¢": 169, "£": 170, "¤": 171, "¥": 172, "¦": 173, "§": 174, "¨": 175, "©": 176, "ª": 177, "«": 178, "¬": 179, "­": 180, "®": 181, "¯": 182, "°": 183, "±": 184, "²": 185, "³": 186, "´": 187, "µ": 188, "¶": 189, "·": 190, "¸": 191, "¹": 192, "º": 193, "»": 194, "¼": 195, "½": 196, "¾": 197, "¿": 198, "À": 199, "Á": 200, "Â": 201, "Ã": 202, "Ä": 203, "Å": 204, "Æ": 205, "Ç": 206, "È": 207, "É": 208, "Ê": 209, "Ë": 210, "Ì": 211, "Í": 212, "Î": 213, "Ï": 214, "Ð": 215, "Ñ": 216, "Ò": 217, "Ó": 218, "Ô": 219, "Õ": 220, "Ö": 221, "×": 222, "Ø": 223, "Ù": 224, "Ú": 225, "Û": 226, "Ü": 227, "Ý": 228, "Þ": 229, "ß": 230, "à": 231, "á": 232, "â": 233, "ã": 234, "ä": 235, "å": 236, "æ": 237, "ç": 238, "è": 239, "é": 240, "ê": 241, "ë": 242, "ì": 243, "í": 244, "î": 245, "ï": 246, "ð": 247, "ñ": 248, "ò": 249, "ó": 250, "ô": 251, "õ": 252, "ö": 253, "÷": 254, "ø": 255, "ù": 256, "ú": 257, "û": 258, "ü": 259, "ý": 260, "þ": 261, "ÿ": 262 }, "unk_token": "[UNK]" } }