Prompt_Squirrel_RAG / data /analysis /clothing_precision_trial_compare_20260322T113759Z.json
Food Desert
Update docs diagrams, evaluation labels, and analysis artifacts
6566a4f
Raw
History Blame
6.9 kB
{
"timestamp_utc": "2026-03-22T11:39:34.040448Z",
"subset_size": 10,
"subset_ids": [
37076,
203825,
335343,
1023509,
2141904,
2291202,
3285630,
260449,
1325009,
212271
],
"baseline_file": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\eval_results\\model_ab_n30_mistral_small2501_20260322.jsonl",
"new_file": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\eval_results\\model_ab_n10_mistral_small2501_clothing_precision_20260322T113759Z.jsonl",
"usage_before_usd": 4.796378237,
"usage_after_usd": 4.797453587,
"usage_delta_usd": 0.0010753499999998084,
"baseline_clothed": {
"tp": 4,
"fp": 6,
"fn": 0,
"precision": 0.4,
"recall": 1.0,
"f1": 0.5714285714285715
},
"baseline_clothing": {
"tp": 4,
"fp": 6,
"fn": 0,
"precision": 0.4,
"recall": 1.0,
"f1": 0.5714285714285715
},
"new_clothed": {
"tp": 4,
"fp": 6,
"fn": 0,
"precision": 0.4,
"recall": 1.0,
"f1": 0.5714285714285715
},
"new_clothing": {
"tp": 4,
"fp": 6,
"fn": 0,
"precision": 0.4,
"recall": 1.0,
"f1": 0.5714285714285715
},
"avg_f1_baseline": 0.5784,
"avg_f1_precision_prompt": 0.58621,
"per_id": [
{
"id": 37076,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.7222,
"R": 0.8667,
"F1": 0.7879
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.7778,
"R": 0.9333,
"F1": 0.8485
},
"added_tags": [
"bell",
"blue_eyes"
],
"removed_tags": [
"better_version_at_source",
"collar"
]
},
{
"id": 203825,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.4,
"R": 0.8333,
"F1": 0.5405
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.32,
"R": 0.6667,
"F1": 0.4324
},
"added_tags": [
"asian",
"black_outfit"
],
"removed_tags": [
"bat",
"mammal"
]
},
{
"id": 335343,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.5263,
"R": 0.6667,
"F1": 0.5882
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.4783,
"R": 0.7333,
"F1": 0.5789
},
"added_tags": [
"palette",
"resting",
"serene",
"sleeping"
],
"removed_tags": []
},
{
"id": 1023509,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.4286,
"R": 0.6923,
"F1": 0.5294
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.4091,
"R": 0.6923,
"F1": 0.5143
},
"added_tags": [
"bright_light",
"feral"
],
"removed_tags": [
"note"
]
},
{
"id": 2141904,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.2308,
"R": 0.375,
"F1": 0.2857
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.2308,
"R": 0.375,
"F1": 0.2857
},
"added_tags": [],
"removed_tags": []
},
{
"id": 2291202,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.5455,
"R": 0.6667,
"F1": 0.6
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.6,
"R": 0.6667,
"F1": 0.6316
},
"added_tags": [
"fingers",
"looking_at_viewer"
],
"removed_tags": [
"gesture",
"open_mouth",
"pointing",
"pointing_up"
]
},
{
"id": 3285630,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.375,
"R": 0.75,
"F1": 0.5
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.3929,
"R": 0.9167,
"F1": 0.55
},
"added_tags": [
"alpha_channel",
"brown_clothing",
"brown_topwear",
"brown_vest",
"male"
],
"removed_tags": [
"business_attire"
]
},
{
"id": 260449,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.5,
"R": 0.7857,
"F1": 0.6111
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.5,
"R": 0.7857,
"F1": 0.6111
},
"added_tags": [
"raised_arm",
"topless"
],
"removed_tags": [
"bottomwear",
"loincloth"
]
},
{
"id": 1325009,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.65,
"R": 0.5909,
"F1": 0.619
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.6522,
"R": 0.6818,
"F1": 0.6667
},
"added_tags": [
"bottomwear",
"gesture",
"pantherine",
"pose",
"raised_hand",
"shorts",
"tiger"
],
"removed_tags": [
"fur_tuft",
"muscular",
"thoughtful",
"tuft"
]
},
{
"id": 212271,
"baseline": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.7222,
"R": 0.7222,
"F1": 0.7222
},
"precision_prompt": {
"sel_clothed": true,
"sel_clothing": true,
"P": 0.7647,
"R": 0.7222,
"F1": 0.7429
},
"added_tags": [],
"removed_tags": [
"bottomless"
]
}
],
"files": {
"subset_eval_path": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\runtime_debug\\clothing_precision_subset_n10_20260322T113759Z.jsonl",
"modified_structural_csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\runtime_debug\\structural_tag_definitions_precision_20260322T113759Z.csv",
"modified_probe_definitions_csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\runtime_debug\\probe_tag_definitions_precision_20260322T113759Z.csv"
}
}