Reza2kn's picture
Upload folder using huggingface_hub
7bdf716 verified
raw
history blame contribute delete
52.4 kB
{
"128": [
{
"case": "normal_medical_note",
"text_chars": 95,
"covered_chars": 95,
"truncated": false,
"predicted": [
{
"label": "TITLE",
"start": 5,
"end": 10,
"text": " خانم"
},
{
"label": "GIVENNAME",
"start": 10,
"end": 17,
"text": " نازنین"
},
{
"label": "SURNAME",
"start": 17,
"end": 23,
"text": " شریفی"
},
{
"label": "IDCARDNUM",
"start": 33,
"end": 44,
"text": " ۰۰۱۲۳۴۵۶۷۸"
},
{
"label": "TELEPHONENUM",
"start": 53,
"end": 65,
"text": " 09123456789"
},
{
"label": "DATE",
"start": 74,
"end": 85,
"text": " ۱۴۰۲/۰۸/۰۹"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "نازنین"
},
{
"label": "SURNAME",
"value": "شریفی"
},
{
"label": "IDCARDNUM",
"value": "۰۰۱۲۳۴۵۶۷۸"
},
{
"label": "TELEPHONENUM",
"value": "09123456789"
},
{
"label": "DATE",
"value": "۱۴۰۲/۰۸/۰۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "نازنین",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "شریفی",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "۰۰۱۲۳۴۵۶۷۸",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "09123456789",
"exact": false,
"loose": true
},
{
"label": "DATE",
"value": "۱۴۰۲/۰۸/۰۹",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "TITLE",
"text": " خانم"
}
]
},
{
"case": "long_pii_at_end_after_truncation",
"text_chars": 3869,
"covered_chars": 439,
"truncated": true,
"predicted": [],
"expected": [
{
"label": "GIVENNAME",
"value": "علی"
},
{
"label": "SURNAME",
"value": "رضایی"
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com"
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "علی",
"exact": false,
"loose": false
},
{
"label": "SURNAME",
"value": "رضایی",
"exact": false,
"loose": false
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com",
"exact": false,
"loose": false
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹",
"exact": false,
"loose": false
}
],
"loose_recall": 0.0,
"unexpected": []
},
{
"case": "long_pii_at_beginning",
"text_chars": 3851,
"covered_chars": 401,
"truncated": true,
"predicted": [
{
"label": "GIVENNAME",
"start": 3,
"end": 7,
"text": " علی"
},
{
"label": "SURNAME",
"start": 7,
"end": 13,
"text": " رضایی"
},
{
"label": "EMAIL",
"start": 20,
"end": 32,
"text": " ali.rezaei@"
},
{
"label": "EMAIL",
"start": 39,
"end": 43,
"text": ".com"
},
{
"label": "TELEPHONENUM",
"start": 50,
"end": 62,
"text": " ۰۹۱۲۳۴۵۶۷۸۹"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "علی"
},
{
"label": "SURNAME",
"value": "رضایی"
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com"
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "علی",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "رضایی",
"exact": false,
"loose": true
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "EMAIL",
"text": " ali.rezaei@"
}
]
},
{
"case": "mixed_arabic_persian_digits_chars",
"text_chars": 89,
"covered_chars": 89,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 13,
"end": 18,
"text": " ياسر"
},
{
"label": "SURNAME",
"start": 18,
"end": 24,
"text": " كاظمي"
},
{
"label": "IDCARDNUM",
"start": 32,
"end": 43,
"text": " 0012345678"
},
{
"label": "PASSPORTNUM",
"start": 52,
"end": 62,
"text": " A۱۲۳۴۵۶۷۸"
},
{
"label": "CREDITCARDNUMBER",
"start": 68,
"end": 88,
"text": " ۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "ياسر"
},
{
"label": "SURNAME",
"value": "كاظمي"
},
{
"label": "IDCARDNUM",
"value": "0012345678"
},
{
"label": "PASSPORTNUM",
"value": "A۱۲۳۴۵۶۷۸"
},
{
"label": "CREDITCARDNUMBER",
"value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "ياسر",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "كاظمي",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "0012345678",
"exact": false,
"loose": true
},
{
"label": "PASSPORTNUM",
"value": "A۱۲۳۴۵۶۷۸",
"exact": false,
"loose": true
},
{
"label": "CREDITCARDNUMBER",
"value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": []
},
{
"case": "zwnj_and_titles",
"text_chars": 77,
"covered_chars": 77,
"truncated": false,
"predicted": [
{
"label": "TITLE",
"start": 5,
"end": 10,
"text": " خانم"
},
{
"label": "GIVENNAME",
"start": 10,
"end": 16,
"text": " مه‌سا"
},
{
"label": "SURNAME",
"start": 16,
"end": 27,
"text": " موسوی‌نژاد"
},
{
"label": "GIVENNAME",
"start": 33,
"end": 37,
"text": " رضا"
},
{
"label": "STREET",
"start": 43,
"end": 57,
"text": " خیابان شریعتی"
},
{
"label": "BUILDINGNUM",
"start": 62,
"end": 65,
"text": " ۲۳"
},
{
"label": "BUILDINGNUM",
"start": 70,
"end": 72,
"text": " ۴"
}
],
"expected": [
{
"label": "TITLE",
"value": "خانم"
},
{
"label": "GIVENNAME",
"value": "مه‌سا"
},
{
"label": "SURNAME",
"value": "موسوی‌نژاد"
},
{
"label": "GIVENNAME",
"value": "رضا"
},
{
"label": "STREET",
"value": "خیابان شریعتی"
},
{
"label": "BUILDINGNUM",
"value": "۲۳"
}
],
"checks": [
{
"label": "TITLE",
"value": "خانم",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "مه‌سا",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "موسوی‌نژاد",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "رضا",
"exact": false,
"loose": true
},
{
"label": "STREET",
"value": "خیابان شریعتی",
"exact": false,
"loose": true
},
{
"label": "BUILDINGNUM",
"value": "۲۳",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "BUILDINGNUM",
"text": " ۴"
}
]
},
{
"case": "obfuscated_contacts",
"text_chars": 88,
"covered_chars": 88,
"truncated": false,
"predicted": [
{
"label": "EMAIL",
"start": 20,
"end": 22,
"text": "za"
},
{
"label": "EMAIL",
"start": 28,
"end": 35,
"text": " karimi"
},
{
"label": "SOCIALNUM",
"start": 78,
"end": 87,
"text": " ۳۴۵ ۶۷۸۹"
}
],
"expected": [
{
"label": "EMAIL",
"value": "reza [dot] karimi [at] example [dot] ir"
},
{
"label": "TELEPHONENUM",
"value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹"
}
],
"checks": [
{
"label": "EMAIL",
"value": "reza [dot] karimi [at] example [dot] ir",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹",
"exact": false,
"loose": false
}
],
"loose_recall": 0.5,
"unexpected": [
{
"label": "SOCIALNUM",
"text": " ۳۴۵ ۶۷۸۹"
}
]
},
{
"case": "json_log_format",
"text_chars": 110,
"covered_chars": 110,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 12,
"end": 16,
"text": "سارا"
},
{
"label": "SURNAME",
"start": 16,
"end": 22,
"text": " احمدی"
},
{
"label": "IDCARDNUM",
"start": 39,
"end": 49,
"text": "۱۲۳۴۵۶۷۸۹۰"
},
{
"label": "TELEPHONENUM",
"start": 60,
"end": 76,
"text": "+98-912-345-6789"
},
{
"label": "DATE",
"start": 85,
"end": 95,
"text": "1370-05-21"
},
{
"label": "CITY",
"start": 105,
"end": 108,
"text": "رشت"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "سارا"
},
{
"label": "SURNAME",
"value": "احمدی"
},
{
"label": "IDCARDNUM",
"value": "۱۲۳۴۵۶۷۸۹۰"
},
{
"label": "TELEPHONENUM",
"value": "+98-912-345-6789"
},
{
"label": "DATE",
"value": "1370-05-21"
},
{
"label": "CITY",
"value": "رشت"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "سارا",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "احمدی",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "۱۲۳۴۵۶۷۸۹۰",
"exact": true,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "+98-912-345-6789",
"exact": true,
"loose": true
},
{
"label": "DATE",
"value": "1370-05-21",
"exact": true,
"loose": true
},
{
"label": "CITY",
"value": "رشت",
"exact": true,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": []
},
{
"case": "table_like_rows",
"text_chars": 123,
"covered_chars": 123,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 28,
"end": 33,
"text": "کیوان"
},
{
"label": "SURNAME",
"start": 33,
"end": 39,
"text": " مرادی"
},
{
"label": "CITY",
"start": 41,
"end": 48,
"text": " اصفهان"
},
{
"label": "ZIPCODE",
"start": 50,
"end": 61,
"text": " ۸۱۷۴۶۳۷۳۴۱"
},
{
"label": "TAXNUM",
"start": 63,
"end": 75,
"text": " 41122334455"
},
{
"label": "GIVENNAME",
"start": 76,
"end": 81,
"text": "الهام"
},
{
"label": "SURNAME",
"start": 81,
"end": 88,
"text": " رستگار"
},
{
"label": "CITY",
"start": 90,
"end": 96,
"text": " کرمان"
},
{
"label": "ZIPCODE",
"start": 98,
"end": 109,
"text": " 7616913911"
},
{
"label": "TAXNUM",
"start": 111,
"end": 123,
"text": " 99887766554"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "کیوان"
},
{
"label": "SURNAME",
"value": "مرادی"
},
{
"label": "CITY",
"value": "اصفهان"
},
{
"label": "ZIPCODE",
"value": "۸۱۷۴۶۳۷۳۴۱"
},
{
"label": "TAXNUM",
"value": "41122334455"
},
{
"label": "GIVENNAME",
"value": "الهام"
},
{
"label": "SURNAME",
"value": "رستگار"
},
{
"label": "CITY",
"value": "کرمان"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "کیوان",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "مرادی",
"exact": false,
"loose": true
},
{
"label": "CITY",
"value": "اصفهان",
"exact": false,
"loose": true
},
{
"label": "ZIPCODE",
"value": "۸۱۷۴۶۳۷۳۴۱",
"exact": false,
"loose": true
},
{
"label": "TAXNUM",
"value": "41122334455",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "الهام",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "رستگار",
"exact": false,
"loose": true
},
{
"label": "CITY",
"value": "کرمان",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "ZIPCODE",
"text": " 7616913911"
},
{
"label": "TAXNUM",
"text": " 99887766554"
}
]
},
{
"case": "ambiguous_non_pii_numbers",
"text_chars": 85,
"covered_chars": 85,
"truncated": false,
"predicted": [],
"expected": [],
"checks": [],
"loose_recall": null,
"unexpected": []
}
],
"256": [
{
"case": "normal_medical_note",
"text_chars": 95,
"covered_chars": 95,
"truncated": false,
"predicted": [
{
"label": "TITLE",
"start": 5,
"end": 10,
"text": " خانم"
},
{
"label": "GIVENNAME",
"start": 10,
"end": 17,
"text": " نازنین"
},
{
"label": "SURNAME",
"start": 17,
"end": 23,
"text": " شریفی"
},
{
"label": "IDCARDNUM",
"start": 33,
"end": 44,
"text": " ۰۰۱۲۳۴۵۶۷۸"
},
{
"label": "TELEPHONENUM",
"start": 53,
"end": 65,
"text": " 09123456789"
},
{
"label": "DATE",
"start": 74,
"end": 85,
"text": " ۱۴۰۲/۰۸/۰۹"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "نازنین"
},
{
"label": "SURNAME",
"value": "شریفی"
},
{
"label": "IDCARDNUM",
"value": "۰۰۱۲۳۴۵۶۷۸"
},
{
"label": "TELEPHONENUM",
"value": "09123456789"
},
{
"label": "DATE",
"value": "۱۴۰۲/۰۸/۰۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "نازنین",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "شریفی",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "۰۰۱۲۳۴۵۶۷۸",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "09123456789",
"exact": false,
"loose": true
},
{
"label": "DATE",
"value": "۱۴۰۲/۰۸/۰۹",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "TITLE",
"text": " خانم"
}
]
},
{
"case": "long_pii_at_end_after_truncation",
"text_chars": 3869,
"covered_chars": 888,
"truncated": true,
"predicted": [],
"expected": [
{
"label": "GIVENNAME",
"value": "علی"
},
{
"label": "SURNAME",
"value": "رضایی"
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com"
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "علی",
"exact": false,
"loose": false
},
{
"label": "SURNAME",
"value": "رضایی",
"exact": false,
"loose": false
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com",
"exact": false,
"loose": false
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹",
"exact": false,
"loose": false
}
],
"loose_recall": 0.0,
"unexpected": []
},
{
"case": "long_pii_at_beginning",
"text_chars": 3851,
"covered_chars": 846,
"truncated": true,
"predicted": [
{
"label": "GIVENNAME",
"start": 3,
"end": 7,
"text": " علی"
},
{
"label": "SURNAME",
"start": 7,
"end": 13,
"text": " رضایی"
},
{
"label": "EMAIL",
"start": 20,
"end": 32,
"text": " ali.rezaei@"
},
{
"label": "EMAIL",
"start": 39,
"end": 43,
"text": ".com"
},
{
"label": "TELEPHONENUM",
"start": 50,
"end": 62,
"text": " ۰۹۱۲۳۴۵۶۷۸۹"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "علی"
},
{
"label": "SURNAME",
"value": "رضایی"
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com"
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "علی",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "رضایی",
"exact": false,
"loose": true
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "EMAIL",
"text": " ali.rezaei@"
}
]
},
{
"case": "mixed_arabic_persian_digits_chars",
"text_chars": 89,
"covered_chars": 89,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 13,
"end": 18,
"text": " ياسر"
},
{
"label": "SURNAME",
"start": 18,
"end": 24,
"text": " كاظمي"
},
{
"label": "IDCARDNUM",
"start": 32,
"end": 43,
"text": " 0012345678"
},
{
"label": "PASSPORTNUM",
"start": 52,
"end": 62,
"text": " A۱۲۳۴۵۶۷۸"
},
{
"label": "CREDITCARDNUMBER",
"start": 68,
"end": 88,
"text": " ۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "ياسر"
},
{
"label": "SURNAME",
"value": "كاظمي"
},
{
"label": "IDCARDNUM",
"value": "0012345678"
},
{
"label": "PASSPORTNUM",
"value": "A۱۲۳۴۵۶۷۸"
},
{
"label": "CREDITCARDNUMBER",
"value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "ياسر",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "كاظمي",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "0012345678",
"exact": false,
"loose": true
},
{
"label": "PASSPORTNUM",
"value": "A۱۲۳۴۵۶۷۸",
"exact": false,
"loose": true
},
{
"label": "CREDITCARDNUMBER",
"value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": []
},
{
"case": "zwnj_and_titles",
"text_chars": 77,
"covered_chars": 77,
"truncated": false,
"predicted": [
{
"label": "TITLE",
"start": 5,
"end": 10,
"text": " خانم"
},
{
"label": "GIVENNAME",
"start": 10,
"end": 16,
"text": " مه‌سا"
},
{
"label": "SURNAME",
"start": 16,
"end": 27,
"text": " موسوی‌نژاد"
},
{
"label": "GIVENNAME",
"start": 33,
"end": 37,
"text": " رضا"
},
{
"label": "STREET",
"start": 43,
"end": 57,
"text": " خیابان شریعتی"
},
{
"label": "BUILDINGNUM",
"start": 62,
"end": 65,
"text": " ۲۳"
},
{
"label": "BUILDINGNUM",
"start": 70,
"end": 72,
"text": " ۴"
}
],
"expected": [
{
"label": "TITLE",
"value": "خانم"
},
{
"label": "GIVENNAME",
"value": "مه‌سا"
},
{
"label": "SURNAME",
"value": "موسوی‌نژاد"
},
{
"label": "GIVENNAME",
"value": "رضا"
},
{
"label": "STREET",
"value": "خیابان شریعتی"
},
{
"label": "BUILDINGNUM",
"value": "۲۳"
}
],
"checks": [
{
"label": "TITLE",
"value": "خانم",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "مه‌سا",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "موسوی‌نژاد",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "رضا",
"exact": false,
"loose": true
},
{
"label": "STREET",
"value": "خیابان شریعتی",
"exact": false,
"loose": true
},
{
"label": "BUILDINGNUM",
"value": "۲۳",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "BUILDINGNUM",
"text": " ۴"
}
]
},
{
"case": "obfuscated_contacts",
"text_chars": 88,
"covered_chars": 88,
"truncated": false,
"predicted": [
{
"label": "EMAIL",
"start": 20,
"end": 22,
"text": "za"
},
{
"label": "EMAIL",
"start": 28,
"end": 35,
"text": " karimi"
},
{
"label": "SOCIALNUM",
"start": 78,
"end": 87,
"text": " ۳۴۵ ۶۷۸۹"
}
],
"expected": [
{
"label": "EMAIL",
"value": "reza [dot] karimi [at] example [dot] ir"
},
{
"label": "TELEPHONENUM",
"value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹"
}
],
"checks": [
{
"label": "EMAIL",
"value": "reza [dot] karimi [at] example [dot] ir",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹",
"exact": false,
"loose": false
}
],
"loose_recall": 0.5,
"unexpected": [
{
"label": "SOCIALNUM",
"text": " ۳۴۵ ۶۷۸۹"
}
]
},
{
"case": "json_log_format",
"text_chars": 110,
"covered_chars": 110,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 12,
"end": 16,
"text": "سارا"
},
{
"label": "SURNAME",
"start": 16,
"end": 22,
"text": " احمدی"
},
{
"label": "IDCARDNUM",
"start": 39,
"end": 49,
"text": "۱۲۳۴۵۶۷۸۹۰"
},
{
"label": "TELEPHONENUM",
"start": 60,
"end": 76,
"text": "+98-912-345-6789"
},
{
"label": "DATE",
"start": 85,
"end": 95,
"text": "1370-05-21"
},
{
"label": "CITY",
"start": 105,
"end": 108,
"text": "رشت"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "سارا"
},
{
"label": "SURNAME",
"value": "احمدی"
},
{
"label": "IDCARDNUM",
"value": "۱۲۳۴۵۶۷۸۹۰"
},
{
"label": "TELEPHONENUM",
"value": "+98-912-345-6789"
},
{
"label": "DATE",
"value": "1370-05-21"
},
{
"label": "CITY",
"value": "رشت"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "سارا",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "احمدی",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "۱۲۳۴۵۶۷۸۹۰",
"exact": true,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "+98-912-345-6789",
"exact": true,
"loose": true
},
{
"label": "DATE",
"value": "1370-05-21",
"exact": true,
"loose": true
},
{
"label": "CITY",
"value": "رشت",
"exact": true,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": []
},
{
"case": "table_like_rows",
"text_chars": 123,
"covered_chars": 123,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 28,
"end": 33,
"text": "کیوان"
},
{
"label": "SURNAME",
"start": 33,
"end": 39,
"text": " مرادی"
},
{
"label": "CITY",
"start": 41,
"end": 48,
"text": " اصفهان"
},
{
"label": "ZIPCODE",
"start": 50,
"end": 61,
"text": " ۸۱۷۴۶۳۷۳۴۱"
},
{
"label": "TAXNUM",
"start": 63,
"end": 75,
"text": " 41122334455"
},
{
"label": "GIVENNAME",
"start": 76,
"end": 81,
"text": "الهام"
},
{
"label": "SURNAME",
"start": 81,
"end": 88,
"text": " رستگار"
},
{
"label": "CITY",
"start": 90,
"end": 96,
"text": " کرمان"
},
{
"label": "ZIPCODE",
"start": 98,
"end": 109,
"text": " 7616913911"
},
{
"label": "TAXNUM",
"start": 111,
"end": 123,
"text": " 99887766554"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "کیوان"
},
{
"label": "SURNAME",
"value": "مرادی"
},
{
"label": "CITY",
"value": "اصفهان"
},
{
"label": "ZIPCODE",
"value": "۸۱۷۴۶۳۷۳۴۱"
},
{
"label": "TAXNUM",
"value": "41122334455"
},
{
"label": "GIVENNAME",
"value": "الهام"
},
{
"label": "SURNAME",
"value": "رستگار"
},
{
"label": "CITY",
"value": "کرمان"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "کیوان",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "مرادی",
"exact": false,
"loose": true
},
{
"label": "CITY",
"value": "اصفهان",
"exact": false,
"loose": true
},
{
"label": "ZIPCODE",
"value": "۸۱۷۴۶۳۷۳۴۱",
"exact": false,
"loose": true
},
{
"label": "TAXNUM",
"value": "41122334455",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "الهام",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "رستگار",
"exact": false,
"loose": true
},
{
"label": "CITY",
"value": "کرمان",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "ZIPCODE",
"text": " 7616913911"
},
{
"label": "TAXNUM",
"text": " 99887766554"
}
]
},
{
"case": "ambiguous_non_pii_numbers",
"text_chars": 85,
"covered_chars": 85,
"truncated": false,
"predicted": [],
"expected": [],
"checks": [],
"loose_recall": null,
"unexpected": []
}
],
"512": [
{
"case": "normal_medical_note",
"text_chars": 95,
"covered_chars": 95,
"truncated": false,
"predicted": [
{
"label": "TITLE",
"start": 5,
"end": 10,
"text": " خانم"
},
{
"label": "GIVENNAME",
"start": 10,
"end": 17,
"text": " نازنین"
},
{
"label": "SURNAME",
"start": 17,
"end": 23,
"text": " شریفی"
},
{
"label": "IDCARDNUM",
"start": 33,
"end": 44,
"text": " ۰۰۱۲۳۴۵۶۷۸"
},
{
"label": "TELEPHONENUM",
"start": 53,
"end": 65,
"text": " 09123456789"
},
{
"label": "DATE",
"start": 74,
"end": 85,
"text": " ۱۴۰۲/۰۸/۰۹"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "نازنین"
},
{
"label": "SURNAME",
"value": "شریفی"
},
{
"label": "IDCARDNUM",
"value": "۰۰۱۲۳۴۵۶۷۸"
},
{
"label": "TELEPHONENUM",
"value": "09123456789"
},
{
"label": "DATE",
"value": "۱۴۰۲/۰۸/۰۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "نازنین",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "شریفی",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "۰۰۱۲۳۴۵۶۷۸",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "09123456789",
"exact": false,
"loose": true
},
{
"label": "DATE",
"value": "۱۴۰۲/۰۸/۰۹",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "TITLE",
"text": " خانم"
}
]
},
{
"case": "long_pii_at_end_after_truncation",
"text_chars": 3869,
"covered_chars": 1783,
"truncated": true,
"predicted": [],
"expected": [
{
"label": "GIVENNAME",
"value": "علی"
},
{
"label": "SURNAME",
"value": "رضایی"
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com"
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "علی",
"exact": false,
"loose": false
},
{
"label": "SURNAME",
"value": "رضایی",
"exact": false,
"loose": false
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com",
"exact": false,
"loose": false
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹",
"exact": false,
"loose": false
}
],
"loose_recall": 0.0,
"unexpected": []
},
{
"case": "long_pii_at_beginning",
"text_chars": 3851,
"covered_chars": 1745,
"truncated": true,
"predicted": [
{
"label": "GIVENNAME",
"start": 3,
"end": 7,
"text": " علی"
},
{
"label": "SURNAME",
"start": 7,
"end": 13,
"text": " رضایی"
},
{
"label": "EMAIL",
"start": 20,
"end": 32,
"text": " ali.rezaei@"
},
{
"label": "EMAIL",
"start": 39,
"end": 43,
"text": ".com"
},
{
"label": "TELEPHONENUM",
"start": 50,
"end": 62,
"text": " ۰۹۱۲۳۴۵۶۷۸۹"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "علی"
},
{
"label": "SURNAME",
"value": "رضایی"
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com"
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "علی",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "رضایی",
"exact": false,
"loose": true
},
{
"label": "EMAIL",
"value": "ali.rezaei@example.com",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "۰۹۱۲۳۴۵۶۷۸۹",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "EMAIL",
"text": " ali.rezaei@"
}
]
},
{
"case": "mixed_arabic_persian_digits_chars",
"text_chars": 89,
"covered_chars": 89,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 13,
"end": 18,
"text": " ياسر"
},
{
"label": "SURNAME",
"start": 18,
"end": 24,
"text": " كاظمي"
},
{
"label": "IDCARDNUM",
"start": 32,
"end": 43,
"text": " 0012345678"
},
{
"label": "PASSPORTNUM",
"start": 52,
"end": 62,
"text": " A۱۲۳۴۵۶۷۸"
},
{
"label": "CREDITCARDNUMBER",
"start": 68,
"end": 88,
"text": " ۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "ياسر"
},
{
"label": "SURNAME",
"value": "كاظمي"
},
{
"label": "IDCARDNUM",
"value": "0012345678"
},
{
"label": "PASSPORTNUM",
"value": "A۱۲۳۴۵۶۷۸"
},
{
"label": "CREDITCARDNUMBER",
"value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "ياسر",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "كاظمي",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "0012345678",
"exact": false,
"loose": true
},
{
"label": "PASSPORTNUM",
"value": "A۱۲۳۴۵۶۷۸",
"exact": false,
"loose": true
},
{
"label": "CREDITCARDNUMBER",
"value": "۶۱۰۴-۳۳۷۷-۱۲۳۴-۵۶۷۸",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": []
},
{
"case": "zwnj_and_titles",
"text_chars": 77,
"covered_chars": 77,
"truncated": false,
"predicted": [
{
"label": "TITLE",
"start": 5,
"end": 10,
"text": " خانم"
},
{
"label": "GIVENNAME",
"start": 10,
"end": 16,
"text": " مه‌سا"
},
{
"label": "SURNAME",
"start": 16,
"end": 27,
"text": " موسوی‌نژاد"
},
{
"label": "GIVENNAME",
"start": 33,
"end": 37,
"text": " رضا"
},
{
"label": "STREET",
"start": 43,
"end": 57,
"text": " خیابان شریعتی"
},
{
"label": "BUILDINGNUM",
"start": 62,
"end": 65,
"text": " ۲۳"
},
{
"label": "BUILDINGNUM",
"start": 70,
"end": 72,
"text": " ۴"
}
],
"expected": [
{
"label": "TITLE",
"value": "خانم"
},
{
"label": "GIVENNAME",
"value": "مه‌سا"
},
{
"label": "SURNAME",
"value": "موسوی‌نژاد"
},
{
"label": "GIVENNAME",
"value": "رضا"
},
{
"label": "STREET",
"value": "خیابان شریعتی"
},
{
"label": "BUILDINGNUM",
"value": "۲۳"
}
],
"checks": [
{
"label": "TITLE",
"value": "خانم",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "مه‌سا",
"exact": false,
"loose": true
},
{
"label": "SURNAME",
"value": "موسوی‌نژاد",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "رضا",
"exact": false,
"loose": true
},
{
"label": "STREET",
"value": "خیابان شریعتی",
"exact": false,
"loose": true
},
{
"label": "BUILDINGNUM",
"value": "۲۳",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "BUILDINGNUM",
"text": " ۴"
}
]
},
{
"case": "obfuscated_contacts",
"text_chars": 88,
"covered_chars": 88,
"truncated": false,
"predicted": [
{
"label": "EMAIL",
"start": 20,
"end": 22,
"text": "za"
},
{
"label": "EMAIL",
"start": 28,
"end": 35,
"text": " karimi"
},
{
"label": "SOCIALNUM",
"start": 78,
"end": 87,
"text": " ۳۴۵ ۶۷۸۹"
}
],
"expected": [
{
"label": "EMAIL",
"value": "reza [dot] karimi [at] example [dot] ir"
},
{
"label": "TELEPHONENUM",
"value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹"
}
],
"checks": [
{
"label": "EMAIL",
"value": "reza [dot] karimi [at] example [dot] ir",
"exact": false,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "صفر نهصد و دوازده ۳۴۵ ۶۷۸۹",
"exact": false,
"loose": false
}
],
"loose_recall": 0.5,
"unexpected": [
{
"label": "SOCIALNUM",
"text": " ۳۴۵ ۶۷۸۹"
}
]
},
{
"case": "json_log_format",
"text_chars": 110,
"covered_chars": 110,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 12,
"end": 16,
"text": "سارا"
},
{
"label": "SURNAME",
"start": 16,
"end": 22,
"text": " احمدی"
},
{
"label": "IDCARDNUM",
"start": 39,
"end": 49,
"text": "۱۲۳۴۵۶۷۸۹۰"
},
{
"label": "TELEPHONENUM",
"start": 60,
"end": 76,
"text": "+98-912-345-6789"
},
{
"label": "DATE",
"start": 85,
"end": 95,
"text": "1370-05-21"
},
{
"label": "CITY",
"start": 105,
"end": 108,
"text": "رشت"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "سارا"
},
{
"label": "SURNAME",
"value": "احمدی"
},
{
"label": "IDCARDNUM",
"value": "۱۲۳۴۵۶۷۸۹۰"
},
{
"label": "TELEPHONENUM",
"value": "+98-912-345-6789"
},
{
"label": "DATE",
"value": "1370-05-21"
},
{
"label": "CITY",
"value": "رشت"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "سارا",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "احمدی",
"exact": false,
"loose": true
},
{
"label": "IDCARDNUM",
"value": "۱۲۳۴۵۶۷۸۹۰",
"exact": true,
"loose": true
},
{
"label": "TELEPHONENUM",
"value": "+98-912-345-6789",
"exact": true,
"loose": true
},
{
"label": "DATE",
"value": "1370-05-21",
"exact": true,
"loose": true
},
{
"label": "CITY",
"value": "رشت",
"exact": true,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": []
},
{
"case": "table_like_rows",
"text_chars": 123,
"covered_chars": 123,
"truncated": false,
"predicted": [
{
"label": "GIVENNAME",
"start": 28,
"end": 33,
"text": "کیوان"
},
{
"label": "SURNAME",
"start": 33,
"end": 39,
"text": " مرادی"
},
{
"label": "CITY",
"start": 41,
"end": 48,
"text": " اصفهان"
},
{
"label": "ZIPCODE",
"start": 50,
"end": 61,
"text": " ۸۱۷۴۶۳۷۳۴۱"
},
{
"label": "TAXNUM",
"start": 63,
"end": 75,
"text": " 41122334455"
},
{
"label": "GIVENNAME",
"start": 76,
"end": 81,
"text": "الهام"
},
{
"label": "SURNAME",
"start": 81,
"end": 88,
"text": " رستگار"
},
{
"label": "CITY",
"start": 90,
"end": 96,
"text": " کرمان"
},
{
"label": "ZIPCODE",
"start": 98,
"end": 109,
"text": " 7616913911"
},
{
"label": "TAXNUM",
"start": 111,
"end": 123,
"text": " 99887766554"
}
],
"expected": [
{
"label": "GIVENNAME",
"value": "کیوان"
},
{
"label": "SURNAME",
"value": "مرادی"
},
{
"label": "CITY",
"value": "اصفهان"
},
{
"label": "ZIPCODE",
"value": "۸۱۷۴۶۳۷۳۴۱"
},
{
"label": "TAXNUM",
"value": "41122334455"
},
{
"label": "GIVENNAME",
"value": "الهام"
},
{
"label": "SURNAME",
"value": "رستگار"
},
{
"label": "CITY",
"value": "کرمان"
}
],
"checks": [
{
"label": "GIVENNAME",
"value": "کیوان",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "مرادی",
"exact": false,
"loose": true
},
{
"label": "CITY",
"value": "اصفهان",
"exact": false,
"loose": true
},
{
"label": "ZIPCODE",
"value": "۸۱۷۴۶۳۷۳۴۱",
"exact": false,
"loose": true
},
{
"label": "TAXNUM",
"value": "41122334455",
"exact": false,
"loose": true
},
{
"label": "GIVENNAME",
"value": "الهام",
"exact": true,
"loose": true
},
{
"label": "SURNAME",
"value": "رستگار",
"exact": false,
"loose": true
},
{
"label": "CITY",
"value": "کرمان",
"exact": false,
"loose": true
}
],
"loose_recall": 1.0,
"unexpected": [
{
"label": "ZIPCODE",
"text": " 7616913911"
},
{
"label": "TAXNUM",
"text": " 99887766554"
}
]
},
{
"case": "ambiguous_non_pii_numbers",
"text_chars": 85,
"covered_chars": 85,
"truncated": false,
"predicted": [],
"expected": [],
"checks": [],
"loose_recall": null,
"unexpected": []
}
]
}