{ "128": [ { "case": "normal_medical_note", "text_chars": 95, "covered_chars": 95, "truncated": false, "predicted": [ { "label": "TITLE", "start": 5, "end": 10, "text": " خانم" }, { "label": "GIVENNAME", "start": 10, "end": 17, "text": " نازنین" }, { "label": "SURNAME", "start": 17, "end": 23, "text": " شریفی" }, { "label": "IDCARDNUM", "start": 33, "end": 44, "text": " ۰۰۱۲۳۴۵۶۷۸" }, { "label": "TELEPHONENUM", "start": 53, "end": 65, "text": " 09123456789" }, { "label": "DATE", "start": 74, "end": 85, "text": " ۱۴۰۲/۰۸/۰۹" } ], "expected": [ { "label": "GIVENNAME", "value": "نازنین" }, { "label": "SURNAME", "value": "شریفی" }, { "label": "IDCARDNUM", "value": "۰۰۱۲۳۴۵۶۷۸" }, { "label": "TELEPHONENUM", "value": "09123456789" }, { "label": "DATE", "value": "۱۴۰۲/۰۸/۰۹" } ], "checks": [ { "label": "GIVENNAME", "value": "نازنین", "exact": false, "loose": true }, { "label": "SURNAME", "value": "شریفی", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "۰۰۱۲۳۴۵۶۷۸", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "09123456789", "exact": false, "loose": true }, { "label": "DATE", "value": "۱۴۰۲/۰۸/۰۹", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "TITLE", "text": " خانم" } ] }, { "case": "long_pii_at_end_after_truncation", "text_chars": 3869, "covered_chars": 439, "truncated": true, "predicted": [], "expected": [ { "label": "GIVENNAME", "value": "علی" }, { "label": "SURNAME", "value": "رضایی" }, { "label": "EMAIL", "value": "ali.rezaei@example.com" }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹" } ], "checks": [ { "label": "GIVENNAME", "value": "علی", "exact": false, "loose": false }, { "label": "SURNAME", "value": "رضایی", "exact": false, "loose": false }, { "label": "EMAIL", "value": "ali.rezaei@example.com", "exact": false, "loose": false }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹", "exact": false, "loose": false } ], "loose_recall": 0.0, "unexpected": [] }, { "case": "long_pii_at_beginning", "text_chars": 3851, "covered_chars": 401, "truncated": true, "predicted": [ { "label": "GIVENNAME", "start": 3, "end": 7, "text": " علی" }, { "label": "SURNAME", "start": 7, "end": 13, "text": " رضایی" }, { "label": "EMAIL", "start": 20, "end": 32, "text": " ali.rezaei@" }, { "label": "EMAIL", "start": 39, "end": 43, "text": ".com" }, { "label": "TELEPHONENUM", "start": 50, "end": 62, "text": " ۰۹۱۲۳۴۵۶۷۸۹" } ], "expected": [ { "label": "GIVENNAME", "value": "علی" }, { "label": "SURNAME", "value": "رضایی" }, { "label": "EMAIL", "value": "ali.rezaei@example.com" }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹" } ], "checks": [ { "label": "GIVENNAME", "value": "علی", "exact": false, "loose": true }, { "label": "SURNAME", "value": "رضایی", "exact": false, "loose": true }, { "label": "EMAIL", "value": "ali.rezaei@example.com", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "EMAIL", "text": " ali.rezaei@" } ] }, { "case": "mixed_arabic_persian_digits_chars", "text_chars": 89, "covered_chars": 89, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 13, "end": 18, "text": " ياسر" }, { "label": "SURNAME", "start": 18, "end": 24, "text": " كاظمي" }, { "label": "IDCARDNUM", "start": 32, "end": 43, "text": " 0012345678" }, { "label": "PASSPORTNUM", "start": 52, "end": 62, "text": " A۱۲۳۴۵۶۷۸" }, { "label": "CREDITCARDNUMBER", "start": 68, "end": 88, "text": " ۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸" } ], "expected": [ { "label": "GIVENNAME", "value": "ياسر" }, { "label": "SURNAME", "value": "كاظمي" }, { "label": "IDCARDNUM", "value": "0012345678" }, { "label": "PASSPORTNUM", "value": "A۱۲۳۴۵۶۷۸" }, { "label": "CREDITCARDNUMBER", "value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸" } ], "checks": [ { "label": "GIVENNAME", "value": "ياسر", "exact": false, "loose": true }, { "label": "SURNAME", "value": "كاظمي", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "0012345678", "exact": false, "loose": true }, { "label": "PASSPORTNUM", "value": "A۱۲۳۴۵۶۷۸", "exact": false, "loose": true }, { "label": "CREDITCARDNUMBER", "value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [] }, { "case": "zwnj_and_titles", "text_chars": 77, "covered_chars": 77, "truncated": false, "predicted": [ { "label": "TITLE", "start": 5, "end": 10, "text": " خانم" }, { "label": "GIVENNAME", "start": 10, "end": 16, "text": " مه‌سا" }, { "label": "SURNAME", "start": 16, "end": 27, "text": " موسوی‌نژاد" }, { "label": "GIVENNAME", "start": 33, "end": 37, "text": " رضا" }, { "label": "STREET", "start": 43, "end": 57, "text": " خیابان شریعتی" }, { "label": "BUILDINGNUM", "start": 62, "end": 65, "text": " ۲۳" }, { "label": "BUILDINGNUM", "start": 70, "end": 72, "text": " ۴" } ], "expected": [ { "label": "TITLE", "value": "خانم" }, { "label": "GIVENNAME", "value": "مه‌سا" }, { "label": "SURNAME", "value": "موسوی‌نژاد" }, { "label": "GIVENNAME", "value": "رضا" }, { "label": "STREET", "value": "خیابان شریعتی" }, { "label": "BUILDINGNUM", "value": "۲۳" } ], "checks": [ { "label": "TITLE", "value": "خانم", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "مه‌سا", "exact": false, "loose": true }, { "label": "SURNAME", "value": "موسوی‌نژاد", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "رضا", "exact": false, "loose": true }, { "label": "STREET", "value": "خیابان شریعتی", "exact": false, "loose": true }, { "label": "BUILDINGNUM", "value": "۲۳", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "BUILDINGNUM", "text": " ۴" } ] }, { "case": "obfuscated_contacts", "text_chars": 88, "covered_chars": 88, "truncated": false, "predicted": [ { "label": "EMAIL", "start": 20, "end": 22, "text": "za" }, { "label": "EMAIL", "start": 28, "end": 35, "text": " karimi" }, { "label": "SOCIALNUM", "start": 78, "end": 87, "text": " ۳۴۵ ۶۷۸۹" } ], "expected": [ { "label": "EMAIL", "value": "reza [dot] karimi [at] example [dot] ir" }, { "label": "TELEPHONENUM", "value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹" } ], "checks": [ { "label": "EMAIL", "value": "reza [dot] karimi [at] example [dot] ir", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹", "exact": false, "loose": false } ], "loose_recall": 0.5, "unexpected": [ { "label": "SOCIALNUM", "text": " ۳۴۵ ۶۷۸۹" } ] }, { "case": "json_log_format", "text_chars": 110, "covered_chars": 110, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 12, "end": 16, "text": "سارا" }, { "label": "SURNAME", "start": 16, "end": 22, "text": " احمدی" }, { "label": "IDCARDNUM", "start": 39, "end": 49, "text": "۱۲۳۴۵۶۷۸۹۰" }, { "label": "TELEPHONENUM", "start": 60, "end": 76, "text": "+98-912-345-6789" }, { "label": "DATE", "start": 85, "end": 95, "text": "1370-05-21" }, { "label": "CITY", "start": 105, "end": 108, "text": "رشت" } ], "expected": [ { "label": "GIVENNAME", "value": "سارا" }, { "label": "SURNAME", "value": "احمدی" }, { "label": "IDCARDNUM", "value": "۱۲۳۴۵۶۷۸۹۰" }, { "label": "TELEPHONENUM", "value": "+98-912-345-6789" }, { "label": "DATE", "value": "1370-05-21" }, { "label": "CITY", "value": "رشت" } ], "checks": [ { "label": "GIVENNAME", "value": "سارا", "exact": true, "loose": true }, { "label": "SURNAME", "value": "احمدی", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "۱۲۳۴۵۶۷۸۹۰", "exact": true, "loose": true }, { "label": "TELEPHONENUM", "value": "+98-912-345-6789", "exact": true, "loose": true }, { "label": "DATE", "value": "1370-05-21", "exact": true, "loose": true }, { "label": "CITY", "value": "رشت", "exact": true, "loose": true } ], "loose_recall": 1.0, "unexpected": [] }, { "case": "table_like_rows", "text_chars": 123, "covered_chars": 123, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 28, "end": 33, "text": "کیوان" }, { "label": "SURNAME", "start": 33, "end": 39, "text": " مرادی" }, { "label": "CITY", "start": 41, "end": 48, "text": " اصفهان" }, { "label": "ZIPCODE", "start": 50, "end": 61, "text": " ۸۱۷۴۶۳۷۳۴۱" }, { "label": "TAXNUM", "start": 63, "end": 75, "text": " 41122334455" }, { "label": "GIVENNAME", "start": 76, "end": 81, "text": "الهام" }, { "label": "SURNAME", "start": 81, "end": 88, "text": " رستگار" }, { "label": "CITY", "start": 90, "end": 96, "text": " کرمان" }, { "label": "ZIPCODE", "start": 98, "end": 109, "text": " 7616913911" }, { "label": "TAXNUM", "start": 111, "end": 123, "text": " 99887766554" } ], "expected": [ { "label": "GIVENNAME", "value": "کیوان" }, { "label": "SURNAME", "value": "مرادی" }, { "label": "CITY", "value": "اصفهان" }, { "label": "ZIPCODE", "value": "۸۱۷۴۶۳۷۳۴۱" }, { "label": "TAXNUM", "value": "41122334455" }, { "label": "GIVENNAME", "value": "الهام" }, { "label": "SURNAME", "value": "رستگار" }, { "label": "CITY", "value": "کرمان" } ], "checks": [ { "label": "GIVENNAME", "value": "کیوان", "exact": true, "loose": true }, { "label": "SURNAME", "value": "مرادی", "exact": false, "loose": true }, { "label": "CITY", "value": "اصفهان", "exact": false, "loose": true }, { "label": "ZIPCODE", "value": "۸۱۷۴۶۳۷۳۴۱", "exact": false, "loose": true }, { "label": "TAXNUM", "value": "41122334455", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "الهام", "exact": true, "loose": true }, { "label": "SURNAME", "value": "رستگار", "exact": false, "loose": true }, { "label": "CITY", "value": "کرمان", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "ZIPCODE", "text": " 7616913911" }, { "label": "TAXNUM", "text": " 99887766554" } ] }, { "case": "ambiguous_non_pii_numbers", "text_chars": 85, "covered_chars": 85, "truncated": false, "predicted": [], "expected": [], "checks": [], "loose_recall": null, "unexpected": [] } ], "256": [ { "case": "normal_medical_note", "text_chars": 95, "covered_chars": 95, "truncated": false, "predicted": [ { "label": "TITLE", "start": 5, "end": 10, "text": " خانم" }, { "label": "GIVENNAME", "start": 10, "end": 17, "text": " نازنین" }, { "label": "SURNAME", "start": 17, "end": 23, "text": " شریفی" }, { "label": "IDCARDNUM", "start": 33, "end": 44, "text": " ۰۰۱۲۳۴۵۶۷۸" }, { "label": "TELEPHONENUM", "start": 53, "end": 65, "text": " 09123456789" }, { "label": "DATE", "start": 74, "end": 85, "text": " ۱۴۰۲/۰۸/۰۹" } ], "expected": [ { "label": "GIVENNAME", "value": "نازنین" }, { "label": "SURNAME", "value": "شریفی" }, { "label": "IDCARDNUM", "value": "۰۰۱۲۳۴۵۶۷۸" }, { "label": "TELEPHONENUM", "value": "09123456789" }, { "label": "DATE", "value": "۱۴۰۲/۰۸/۰۹" } ], "checks": [ { "label": "GIVENNAME", "value": "نازنین", "exact": false, "loose": true }, { "label": "SURNAME", "value": "شریفی", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "۰۰۱۲۳۴۵۶۷۸", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "09123456789", "exact": false, "loose": true }, { "label": "DATE", "value": "۱۴۰۲/۰۸/۰۹", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "TITLE", "text": " خانم" } ] }, { "case": "long_pii_at_end_after_truncation", "text_chars": 3869, "covered_chars": 888, "truncated": true, "predicted": [], "expected": [ { "label": "GIVENNAME", "value": "علی" }, { "label": "SURNAME", "value": "رضایی" }, { "label": "EMAIL", "value": "ali.rezaei@example.com" }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹" } ], "checks": [ { "label": "GIVENNAME", "value": "علی", "exact": false, "loose": false }, { "label": "SURNAME", "value": "رضایی", "exact": false, "loose": false }, { "label": "EMAIL", "value": "ali.rezaei@example.com", "exact": false, "loose": false }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹", "exact": false, "loose": false } ], "loose_recall": 0.0, "unexpected": [] }, { "case": "long_pii_at_beginning", "text_chars": 3851, "covered_chars": 846, "truncated": true, "predicted": [ { "label": "GIVENNAME", "start": 3, "end": 7, "text": " علی" }, { "label": "SURNAME", "start": 7, "end": 13, "text": " رضایی" }, { "label": "EMAIL", "start": 20, "end": 32, "text": " ali.rezaei@" }, { "label": "EMAIL", "start": 39, "end": 43, "text": ".com" }, { "label": "TELEPHONENUM", "start": 50, "end": 62, "text": " ۰۹۱۲۳۴۵۶۷۸۹" } ], "expected": [ { "label": "GIVENNAME", "value": "علی" }, { "label": "SURNAME", "value": "رضایی" }, { "label": "EMAIL", "value": "ali.rezaei@example.com" }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹" } ], "checks": [ { "label": "GIVENNAME", "value": "علی", "exact": false, "loose": true }, { "label": "SURNAME", "value": "رضایی", "exact": false, "loose": true }, { "label": "EMAIL", "value": "ali.rezaei@example.com", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "EMAIL", "text": " ali.rezaei@" } ] }, { "case": "mixed_arabic_persian_digits_chars", "text_chars": 89, "covered_chars": 89, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 13, "end": 18, "text": " ياسر" }, { "label": "SURNAME", "start": 18, "end": 24, "text": " كاظمي" }, { "label": "IDCARDNUM", "start": 32, "end": 43, "text": " 0012345678" }, { "label": "PASSPORTNUM", "start": 52, "end": 62, "text": " A۱۲۳۴۵۶۷۸" }, { "label": "CREDITCARDNUMBER", "start": 68, "end": 88, "text": " ۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸" } ], "expected": [ { "label": "GIVENNAME", "value": "ياسر" }, { "label": "SURNAME", "value": "كاظمي" }, { "label": "IDCARDNUM", "value": "0012345678" }, { "label": "PASSPORTNUM", "value": "A۱۲۳۴۵۶۷۸" }, { "label": "CREDITCARDNUMBER", "value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸" } ], "checks": [ { "label": "GIVENNAME", "value": "ياسر", "exact": false, "loose": true }, { "label": "SURNAME", "value": "كاظمي", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "0012345678", "exact": false, "loose": true }, { "label": "PASSPORTNUM", "value": "A۱۲۳۴۵۶۷۸", "exact": false, "loose": true }, { "label": "CREDITCARDNUMBER", "value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [] }, { "case": "zwnj_and_titles", "text_chars": 77, "covered_chars": 77, "truncated": false, "predicted": [ { "label": "TITLE", "start": 5, "end": 10, "text": " خانم" }, { "label": "GIVENNAME", "start": 10, "end": 16, "text": " مه‌سا" }, { "label": "SURNAME", "start": 16, "end": 27, "text": " موسوی‌نژاد" }, { "label": "GIVENNAME", "start": 33, "end": 37, "text": " رضا" }, { "label": "STREET", "start": 43, "end": 57, "text": " خیابان شریعتی" }, { "label": "BUILDINGNUM", "start": 62, "end": 65, "text": " ۲۳" }, { "label": "BUILDINGNUM", "start": 70, "end": 72, "text": " ۴" } ], "expected": [ { "label": "TITLE", "value": "خانم" }, { "label": "GIVENNAME", "value": "مه‌سا" }, { "label": "SURNAME", "value": "موسوی‌نژاد" }, { "label": "GIVENNAME", "value": "رضا" }, { "label": "STREET", "value": "خیابان شریعتی" }, { "label": "BUILDINGNUM", "value": "۲۳" } ], "checks": [ { "label": "TITLE", "value": "خانم", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "مه‌سا", "exact": false, "loose": true }, { "label": "SURNAME", "value": "موسوی‌نژاد", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "رضا", "exact": false, "loose": true }, { "label": "STREET", "value": "خیابان شریعتی", "exact": false, "loose": true }, { "label": "BUILDINGNUM", "value": "۲۳", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "BUILDINGNUM", "text": " ۴" } ] }, { "case": "obfuscated_contacts", "text_chars": 88, "covered_chars": 88, "truncated": false, "predicted": [ { "label": "EMAIL", "start": 20, "end": 22, "text": "za" }, { "label": "EMAIL", "start": 28, "end": 35, "text": " karimi" }, { "label": "SOCIALNUM", "start": 78, "end": 87, "text": " ۳۴۵ ۶۷۸۹" } ], "expected": [ { "label": "EMAIL", "value": "reza [dot] karimi [at] example [dot] ir" }, { "label": "TELEPHONENUM", "value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹" } ], "checks": [ { "label": "EMAIL", "value": "reza [dot] karimi [at] example [dot] ir", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹", "exact": false, "loose": false } ], "loose_recall": 0.5, "unexpected": [ { "label": "SOCIALNUM", "text": " ۳۴۵ ۶۷۸۹" } ] }, { "case": "json_log_format", "text_chars": 110, "covered_chars": 110, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 12, "end": 16, "text": "سارا" }, { "label": "SURNAME", "start": 16, "end": 22, "text": " احمدی" }, { "label": "IDCARDNUM", "start": 39, "end": 49, "text": "۱۲۳۴۵۶۷۸۹۰" }, { "label": "TELEPHONENUM", "start": 60, "end": 76, "text": "+98-912-345-6789" }, { "label": "DATE", "start": 85, "end": 95, "text": "1370-05-21" }, { "label": "CITY", "start": 105, "end": 108, "text": "رشت" } ], "expected": [ { "label": "GIVENNAME", "value": "سارا" }, { "label": "SURNAME", "value": "احمدی" }, { "label": "IDCARDNUM", "value": "۱۲۳۴۵۶۷۸۹۰" }, { "label": "TELEPHONENUM", "value": "+98-912-345-6789" }, { "label": "DATE", "value": "1370-05-21" }, { "label": "CITY", "value": "رشت" } ], "checks": [ { "label": "GIVENNAME", "value": "سارا", "exact": true, "loose": true }, { "label": "SURNAME", "value": "احمدی", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "۱۲۳۴۵۶۷۸۹۰", "exact": true, "loose": true }, { "label": "TELEPHONENUM", "value": "+98-912-345-6789", "exact": true, "loose": true }, { "label": "DATE", "value": "1370-05-21", "exact": true, "loose": true }, { "label": "CITY", "value": "رشت", "exact": true, "loose": true } ], "loose_recall": 1.0, "unexpected": [] }, { "case": "table_like_rows", "text_chars": 123, "covered_chars": 123, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 28, "end": 33, "text": "کیوان" }, { "label": "SURNAME", "start": 33, "end": 39, "text": " مرادی" }, { "label": "CITY", "start": 41, "end": 48, "text": " اصفهان" }, { "label": "ZIPCODE", "start": 50, "end": 61, "text": " ۸۱۷۴۶۳۷۳۴۱" }, { "label": "TAXNUM", "start": 63, "end": 75, "text": " 41122334455" }, { "label": "GIVENNAME", "start": 76, "end": 81, "text": "الهام" }, { "label": "SURNAME", "start": 81, "end": 88, "text": " رستگار" }, { "label": "CITY", "start": 90, "end": 96, "text": " کرمان" }, { "label": "ZIPCODE", "start": 98, "end": 109, "text": " 7616913911" }, { "label": "TAXNUM", "start": 111, "end": 123, "text": " 99887766554" } ], "expected": [ { "label": "GIVENNAME", "value": "کیوان" }, { "label": "SURNAME", "value": "مرادی" }, { "label": "CITY", "value": "اصفهان" }, { "label": "ZIPCODE", "value": "۸۱۷۴۶۳۷۳۴۱" }, { "label": "TAXNUM", "value": "41122334455" }, { "label": "GIVENNAME", "value": "الهام" }, { "label": "SURNAME", "value": "رستگار" }, { "label": "CITY", "value": "کرمان" } ], "checks": [ { "label": "GIVENNAME", "value": "کیوان", "exact": true, "loose": true }, { "label": "SURNAME", "value": "مرادی", "exact": false, "loose": true }, { "label": "CITY", "value": "اصفهان", "exact": false, "loose": true }, { "label": "ZIPCODE", "value": "۸۱۷۴۶۳۷۳۴۱", "exact": false, "loose": true }, { "label": "TAXNUM", "value": "41122334455", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "الهام", "exact": true, "loose": true }, { "label": "SURNAME", "value": "رستگار", "exact": false, "loose": true }, { "label": "CITY", "value": "کرمان", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "ZIPCODE", "text": " 7616913911" }, { "label": "TAXNUM", "text": " 99887766554" } ] }, { "case": "ambiguous_non_pii_numbers", "text_chars": 85, "covered_chars": 85, "truncated": false, "predicted": [], "expected": [], "checks": [], "loose_recall": null, "unexpected": [] } ], "512": [ { "case": "normal_medical_note", "text_chars": 95, "covered_chars": 95, "truncated": false, "predicted": [ { "label": "TITLE", "start": 5, "end": 10, "text": " خانم" }, { "label": "GIVENNAME", "start": 10, "end": 17, "text": " نازنین" }, { "label": "SURNAME", "start": 17, "end": 23, "text": " شریفی" }, { "label": "IDCARDNUM", "start": 33, "end": 44, "text": " ۰۰۱۲۳۴۵۶۷۸" }, { "label": "TELEPHONENUM", "start": 53, "end": 65, "text": " 09123456789" }, { "label": "DATE", "start": 74, "end": 85, "text": " ۱۴۰۲/۰۸/۰۹" } ], "expected": [ { "label": "GIVENNAME", "value": "نازنین" }, { "label": "SURNAME", "value": "شریفی" }, { "label": "IDCARDNUM", "value": "۰۰۱۲۳۴۵۶۷۸" }, { "label": "TELEPHONENUM", "value": "09123456789" }, { "label": "DATE", "value": "۱۴۰۲/۰۸/۰۹" } ], "checks": [ { "label": "GIVENNAME", "value": "نازنین", "exact": false, "loose": true }, { "label": "SURNAME", "value": "شریفی", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "۰۰۱۲۳۴۵۶۷۸", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "09123456789", "exact": false, "loose": true }, { "label": "DATE", "value": "۱۴۰۲/۰۸/۰۹", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "TITLE", "text": " خانم" } ] }, { "case": "long_pii_at_end_after_truncation", "text_chars": 3869, "covered_chars": 1783, "truncated": true, "predicted": [], "expected": [ { "label": "GIVENNAME", "value": "علی" }, { "label": "SURNAME", "value": "رضایی" }, { "label": "EMAIL", "value": "ali.rezaei@example.com" }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹" } ], "checks": [ { "label": "GIVENNAME", "value": "علی", "exact": false, "loose": false }, { "label": "SURNAME", "value": "رضایی", "exact": false, "loose": false }, { "label": "EMAIL", "value": "ali.rezaei@example.com", "exact": false, "loose": false }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹", "exact": false, "loose": false } ], "loose_recall": 0.0, "unexpected": [] }, { "case": "long_pii_at_beginning", "text_chars": 3851, "covered_chars": 1745, "truncated": true, "predicted": [ { "label": "GIVENNAME", "start": 3, "end": 7, "text": " علی" }, { "label": "SURNAME", "start": 7, "end": 13, "text": " رضایی" }, { "label": "EMAIL", "start": 20, "end": 32, "text": " ali.rezaei@" }, { "label": "EMAIL", "start": 39, "end": 43, "text": ".com" }, { "label": "TELEPHONENUM", "start": 50, "end": 62, "text": " ۰۹۱۲۳۴۵۶۷۸۹" } ], "expected": [ { "label": "GIVENNAME", "value": "علی" }, { "label": "SURNAME", "value": "رضایی" }, { "label": "EMAIL", "value": "ali.rezaei@example.com" }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹" } ], "checks": [ { "label": "GIVENNAME", "value": "علی", "exact": false, "loose": true }, { "label": "SURNAME", "value": "رضایی", "exact": false, "loose": true }, { "label": "EMAIL", "value": "ali.rezaei@example.com", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "۰۹۱۲۳۴۵۶۷۸۹", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "EMAIL", "text": " ali.rezaei@" } ] }, { "case": "mixed_arabic_persian_digits_chars", "text_chars": 89, "covered_chars": 89, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 13, "end": 18, "text": " ياسر" }, { "label": "SURNAME", "start": 18, "end": 24, "text": " كاظمي" }, { "label": "IDCARDNUM", "start": 32, "end": 43, "text": " 0012345678" }, { "label": "PASSPORTNUM", "start": 52, "end": 62, "text": " A۱۲۳۴۵۶۷۸" }, { "label": "CREDITCARDNUMBER", "start": 68, "end": 88, "text": " ۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸" } ], "expected": [ { "label": "GIVENNAME", "value": "ياسر" }, { "label": "SURNAME", "value": "كاظمي" }, { "label": "IDCARDNUM", "value": "0012345678" }, { "label": "PASSPORTNUM", "value": "A۱۲۳۴۵۶۷۸" }, { "label": "CREDITCARDNUMBER", "value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸" } ], "checks": [ { "label": "GIVENNAME", "value": "ياسر", "exact": false, "loose": true }, { "label": "SURNAME", "value": "كاظمي", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "0012345678", "exact": false, "loose": true }, { "label": "PASSPORTNUM", "value": "A۱۲۳۴۵۶۷۸", "exact": false, "loose": true }, { "label": "CREDITCARDNUMBER", "value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [] }, { "case": "zwnj_and_titles", "text_chars": 77, "covered_chars": 77, "truncated": false, "predicted": [ { "label": "TITLE", "start": 5, "end": 10, "text": " خانم" }, { "label": "GIVENNAME", "start": 10, "end": 16, "text": " مه‌سا" }, { "label": "SURNAME", "start": 16, "end": 27, "text": " موسوی‌نژاد" }, { "label": "GIVENNAME", "start": 33, "end": 37, "text": " رضا" }, { "label": "STREET", "start": 43, "end": 57, "text": " خیابان شریعتی" }, { "label": "BUILDINGNUM", "start": 62, "end": 65, "text": " ۲۳" }, { "label": "BUILDINGNUM", "start": 70, "end": 72, "text": " ۴" } ], "expected": [ { "label": "TITLE", "value": "خانم" }, { "label": "GIVENNAME", "value": "مه‌سا" }, { "label": "SURNAME", "value": "موسوی‌نژاد" }, { "label": "GIVENNAME", "value": "رضا" }, { "label": "STREET", "value": "خیابان شریعتی" }, { "label": "BUILDINGNUM", "value": "۲۳" } ], "checks": [ { "label": "TITLE", "value": "خانم", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "مه‌سا", "exact": false, "loose": true }, { "label": "SURNAME", "value": "موسوی‌نژاد", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "رضا", "exact": false, "loose": true }, { "label": "STREET", "value": "خیابان شریعتی", "exact": false, "loose": true }, { "label": "BUILDINGNUM", "value": "۲۳", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "BUILDINGNUM", "text": " ۴" } ] }, { "case": "obfuscated_contacts", "text_chars": 88, "covered_chars": 88, "truncated": false, "predicted": [ { "label": "EMAIL", "start": 20, "end": 22, "text": "za" }, { "label": "EMAIL", "start": 28, "end": 35, "text": " karimi" }, { "label": "SOCIALNUM", "start": 78, "end": 87, "text": " ۳۴۵ ۶۷۸۹" } ], "expected": [ { "label": "EMAIL", "value": "reza [dot] karimi [at] example [dot] ir" }, { "label": "TELEPHONENUM", "value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹" } ], "checks": [ { "label": "EMAIL", "value": "reza [dot] karimi [at] example [dot] ir", "exact": false, "loose": true }, { "label": "TELEPHONENUM", "value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹", "exact": false, "loose": false } ], "loose_recall": 0.5, "unexpected": [ { "label": "SOCIALNUM", "text": " ۳۴۵ ۶۷۸۹" } ] }, { "case": "json_log_format", "text_chars": 110, "covered_chars": 110, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 12, "end": 16, "text": "سارا" }, { "label": "SURNAME", "start": 16, "end": 22, "text": " احمدی" }, { "label": "IDCARDNUM", "start": 39, "end": 49, "text": "۱۲۳۴۵۶۷۸۹۰" }, { "label": "TELEPHONENUM", "start": 60, "end": 76, "text": "+98-912-345-6789" }, { "label": "DATE", "start": 85, "end": 95, "text": "1370-05-21" }, { "label": "CITY", "start": 105, "end": 108, "text": "رشت" } ], "expected": [ { "label": "GIVENNAME", "value": "سارا" }, { "label": "SURNAME", "value": "احمدی" }, { "label": "IDCARDNUM", "value": "۱۲۳۴۵۶۷۸۹۰" }, { "label": "TELEPHONENUM", "value": "+98-912-345-6789" }, { "label": "DATE", "value": "1370-05-21" }, { "label": "CITY", "value": "رشت" } ], "checks": [ { "label": "GIVENNAME", "value": "سارا", "exact": true, "loose": true }, { "label": "SURNAME", "value": "احمدی", "exact": false, "loose": true }, { "label": "IDCARDNUM", "value": "۱۲۳۴۵۶۷۸۹۰", "exact": true, "loose": true }, { "label": "TELEPHONENUM", "value": "+98-912-345-6789", "exact": true, "loose": true }, { "label": "DATE", "value": "1370-05-21", "exact": true, "loose": true }, { "label": "CITY", "value": "رشت", "exact": true, "loose": true } ], "loose_recall": 1.0, "unexpected": [] }, { "case": "table_like_rows", "text_chars": 123, "covered_chars": 123, "truncated": false, "predicted": [ { "label": "GIVENNAME", "start": 28, "end": 33, "text": "کیوان" }, { "label": "SURNAME", "start": 33, "end": 39, "text": " مرادی" }, { "label": "CITY", "start": 41, "end": 48, "text": " اصفهان" }, { "label": "ZIPCODE", "start": 50, "end": 61, "text": " ۸۱۷۴۶۳۷۳۴۱" }, { "label": "TAXNUM", "start": 63, "end": 75, "text": " 41122334455" }, { "label": "GIVENNAME", "start": 76, "end": 81, "text": "الهام" }, { "label": "SURNAME", "start": 81, "end": 88, "text": " رستگار" }, { "label": "CITY", "start": 90, "end": 96, "text": " کرمان" }, { "label": "ZIPCODE", "start": 98, "end": 109, "text": " 7616913911" }, { "label": "TAXNUM", "start": 111, "end": 123, "text": " 99887766554" } ], "expected": [ { "label": "GIVENNAME", "value": "کیوان" }, { "label": "SURNAME", "value": "مرادی" }, { "label": "CITY", "value": "اصفهان" }, { "label": "ZIPCODE", "value": "۸۱۷۴۶۳۷۳۴۱" }, { "label": "TAXNUM", "value": "41122334455" }, { "label": "GIVENNAME", "value": "الهام" }, { "label": "SURNAME", "value": "رستگار" }, { "label": "CITY", "value": "کرمان" } ], "checks": [ { "label": "GIVENNAME", "value": "کیوان", "exact": true, "loose": true }, { "label": "SURNAME", "value": "مرادی", "exact": false, "loose": true }, { "label": "CITY", "value": "اصفهان", "exact": false, "loose": true }, { "label": "ZIPCODE", "value": "۸۱۷۴۶۳۷۳۴۱", "exact": false, "loose": true }, { "label": "TAXNUM", "value": "41122334455", "exact": false, "loose": true }, { "label": "GIVENNAME", "value": "الهام", "exact": true, "loose": true }, { "label": "SURNAME", "value": "رستگار", "exact": false, "loose": true }, { "label": "CITY", "value": "کرمان", "exact": false, "loose": true } ], "loose_recall": 1.0, "unexpected": [ { "label": "ZIPCODE", "text": " 7616913911" }, { "label": "TAXNUM", "text": " 99887766554" } ] }, { "case": "ambiguous_non_pii_numbers", "text_chars": 85, "covered_chars": 85, "truncated": false, "predicted": [], "expected": [], "checks": [], "loose_recall": null, "unexpected": [] } ] }