{ "settings": { "n": 10, "seed": 42, "caption_field": "caption_cogvlm", "probe_count": 35, "retries": 2, "temperature": 0.0, "max_tokens": 900, "model_env": "meta-llama/llama-3.1-8b-instruct" }, "overall_metrics": { "explicit": { "tp": 49, "fp": 56, "fn": 19, "precision": 0.466667, "recall": 0.720588, "f1": 0.566474 }, "strong": { "tp": 49, "fp": 56, "fn": 19, "precision": 0.466667, "recall": 0.720588, "f1": 0.566474 } }, "diagnostics": { "samples_with_attempt_failures": 0, "samples_with_call_exhaustion": 0, "avg_attempt_failure_rate": 0.0, "avg_call_exhaustion_rate": 0.0 }, "top_tags_by_f1_strong": [ { "tag": "outside", "bundle": "scene_pose", "needs_glossary": "0", "support_pos": "2", "support_neg": "8", "tp_explicit": "2", "fp_explicit": "0", "fn_explicit": "0", "precision_explicit": "1.000000", "recall_explicit": "1.000000", "f1_explicit": "1.000000", "tp_strong": "2", "fp_strong": "0", "fn_strong": "0", "precision_strong": "1.000000", "recall_strong": "1.000000", "f1_strong": "1.000000" }, { "tag": "eyes_closed", "bundle": "gaze_expression", "needs_glossary": "0", "support_pos": "1", "support_neg": "9", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "0", "precision_explicit": "1.000000", "recall_explicit": "1.000000", "f1_explicit": "1.000000", "tp_strong": "1", "fp_strong": "0", "fn_strong": "0", "precision_strong": "1.000000", "recall_strong": "1.000000", "f1_strong": "1.000000" }, { "tag": "group", "bundle": "count_cardinality", "needs_glossary": "0", "support_pos": "1", "support_neg": "9", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "0", "precision_explicit": "1.000000", "recall_explicit": "1.000000", "f1_explicit": "1.000000", "tp_strong": "1", "fp_strong": "0", "fn_strong": "0", "precision_strong": "1.000000", "recall_strong": "1.000000", "f1_strong": "1.000000" }, { "tag": "feral", "bundle": "body_type_presence", "needs_glossary": "1", "support_pos": "1", "support_neg": "9", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "0", "precision_explicit": "1.000000", "recall_explicit": "1.000000", "f1_explicit": "1.000000", "tp_strong": "1", "fp_strong": "0", "fn_strong": "0", "precision_strong": "1.000000", "recall_strong": "1.000000", "f1_strong": "1.000000" }, { "tag": "<3", "bundle": "text_symbols", "needs_glossary": "1", "support_pos": "1", "support_neg": "9", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "0", "precision_explicit": "1.000000", "recall_explicit": "1.000000", "f1_explicit": "1.000000", "tp_strong": "1", "fp_strong": "0", "fn_strong": "0", "precision_strong": "1.000000", "recall_strong": "1.000000", "f1_strong": "1.000000" }, { "tag": "clothing", "bundle": "clothing_state", "needs_glossary": "0", "support_pos": "8", "support_neg": "2", "tp_explicit": "8", "fp_explicit": "1", "fn_explicit": "0", "precision_explicit": "0.888889", "recall_explicit": "1.000000", "f1_explicit": "0.941176", "tp_strong": "8", "fp_strong": "1", "fn_strong": "0", "precision_strong": "0.888889", "recall_strong": "1.000000", "f1_strong": "0.941176" }, { "tag": "anthro", "bundle": "body_type_presence", "needs_glossary": "1", "support_pos": "8", "support_neg": "2", "tp_explicit": "8", "fp_explicit": "2", "fn_explicit": "0", "precision_explicit": "0.800000", "recall_explicit": "1.000000", "f1_explicit": "0.888889", "tp_strong": "8", "fp_strong": "2", "fn_strong": "0", "precision_strong": "0.800000", "recall_strong": "1.000000", "f1_strong": "0.888889" }, { "tag": "bear", "bundle": "species_taxonomy", "needs_glossary": "0", "support_pos": "2", "support_neg": "8", "tp_explicit": "2", "fp_explicit": "1", "fn_explicit": "0", "precision_explicit": "0.666667", "recall_explicit": "1.000000", "f1_explicit": "0.800000", "tp_strong": "2", "fp_strong": "1", "fn_strong": "0", "precision_strong": "0.666667", "recall_strong": "1.000000", "f1_strong": "0.800000" }, { "tag": "duo", "bundle": "count_cardinality", "needs_glossary": "1", "support_pos": "2", "support_neg": "8", "tp_explicit": "2", "fp_explicit": "1", "fn_explicit": "0", "precision_explicit": "0.666667", "recall_explicit": "1.000000", "f1_explicit": "0.800000", "tp_strong": "2", "fp_strong": "1", "fn_strong": "0", "precision_strong": "0.666667", "recall_strong": "1.000000", "f1_strong": "0.800000" }, { "tag": "solo", "bundle": "count_cardinality", "needs_glossary": "1", "support_pos": "7", "support_neg": "3", "tp_explicit": "4", "fp_explicit": "0", "fn_explicit": "3", "precision_explicit": "1.000000", "recall_explicit": "0.571429", "f1_explicit": "0.727273", "tp_strong": "4", "fp_strong": "0", "fn_strong": "3", "precision_strong": "1.000000", "recall_strong": "0.571429", "f1_strong": "0.727273" }, { "tag": "clothed", "bundle": "clothing_state", "needs_glossary": "0", "support_pos": "5", "support_neg": "5", "tp_explicit": "5", "fp_explicit": "4", "fn_explicit": "0", "precision_explicit": "0.555556", "recall_explicit": "1.000000", "f1_explicit": "0.714286", "tp_strong": "5", "fp_strong": "4", "fn_strong": "0", "precision_strong": "0.555556", "recall_strong": "1.000000", "f1_strong": "0.714286" }, { "tag": "bird", "bundle": "species_taxonomy", "needs_glossary": "0", "support_pos": "2", "support_neg": "8", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "1", "precision_explicit": "1.000000", "recall_explicit": "0.500000", "f1_explicit": "0.666667", "tp_strong": "1", "fp_strong": "0", "fn_strong": "1", "precision_strong": "1.000000", "recall_strong": "0.500000", "f1_strong": "0.666667" }, { "tag": "leporid", "bundle": "species_taxonomy", "needs_glossary": "1", "support_pos": "2", "support_neg": "8", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "1", "precision_explicit": "1.000000", "recall_explicit": "0.500000", "f1_explicit": "0.666667", "tp_strong": "1", "fp_strong": "0", "fn_strong": "1", "precision_strong": "1.000000", "recall_strong": "0.500000", "f1_strong": "0.666667" }, { "tag": "felid", "bundle": "species_taxonomy", "needs_glossary": "1", "support_pos": "2", "support_neg": "8", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "1", "precision_explicit": "1.000000", "recall_explicit": "0.500000", "f1_explicit": "0.666667", "tp_strong": "1", "fp_strong": "0", "fn_strong": "1", "precision_strong": "1.000000", "recall_strong": "0.500000", "f1_strong": "0.666667" }, { "tag": "canis", "bundle": "species_taxonomy", "needs_glossary": "1", "support_pos": "1", "support_neg": "9", "tp_explicit": "1", "fp_explicit": "1", "fn_explicit": "0", "precision_explicit": "0.500000", "recall_explicit": "1.000000", "f1_explicit": "0.666667", "tp_strong": "1", "fp_strong": "1", "fn_strong": "0", "precision_strong": "0.500000", "recall_strong": "1.000000", "f1_strong": "0.666667" }, { "tag": "simple_background", "bundle": "scene_pose", "needs_glossary": "0", "support_pos": "3", "support_neg": "7", "tp_explicit": "3", "fp_explicit": "4", "fn_explicit": "0", "precision_explicit": "0.428571", "recall_explicit": "1.000000", "f1_explicit": "0.600000", "tp_strong": "3", "fp_strong": "4", "fn_strong": "0", "precision_strong": "0.428571", "recall_strong": "1.000000", "f1_strong": "0.600000" }, { "tag": "canid", "bundle": "species_taxonomy", "needs_glossary": "1", "support_pos": "3", "support_neg": "7", "tp_explicit": "2", "fp_explicit": "2", "fn_explicit": "1", "precision_explicit": "0.500000", "recall_explicit": "0.666667", "f1_explicit": "0.571429", "tp_strong": "2", "fp_strong": "2", "fn_strong": "1", "precision_strong": "0.500000", "recall_strong": "0.666667", "f1_strong": "0.571429" }, { "tag": "looking_at_viewer", "bundle": "gaze_expression", "needs_glossary": "0", "support_pos": "3", "support_neg": "7", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "2", "precision_explicit": "1.000000", "recall_explicit": "0.333333", "f1_explicit": "0.500000", "tp_strong": "1", "fp_strong": "0", "fn_strong": "2", "precision_strong": "1.000000", "recall_strong": "0.333333", "f1_strong": "0.500000" }, { "tag": "standing", "bundle": "scene_pose", "needs_glossary": "0", "support_pos": "2", "support_neg": "8", "tp_explicit": "2", "fp_explicit": "4", "fn_explicit": "0", "precision_explicit": "0.333333", "recall_explicit": "1.000000", "f1_explicit": "0.500000", "tp_strong": "2", "fp_strong": "4", "fn_strong": "0", "precision_strong": "0.333333", "recall_strong": "1.000000", "f1_strong": "0.500000" }, { "tag": "biped", "bundle": "body_type_presence", "needs_glossary": "0", "support_pos": "3", "support_neg": "7", "tp_explicit": "2", "fp_explicit": "4", "fn_explicit": "1", "precision_explicit": "0.333333", "recall_explicit": "0.666667", "f1_explicit": "0.444444", "tp_strong": "2", "fp_strong": "4", "fn_strong": "1", "precision_strong": "0.333333", "recall_strong": "0.666667", "f1_strong": "0.444444" } ], "outputs": { "csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_sanity10.csv", "json": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_sanity10.json" } }