Food Desert commited on
Commit
08add8e
·
1 Parent(s): 46fe384

eval with structural inference + strong_implied + implications

Browse files
data/eval_results/eval_caption_cogvlm_n10_seed42_20260211_230357.jsonl ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"_meta": true, "timestamp": "2026-02-11T23:03:57.759772", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "per_phrase_k": 2, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "n_errors": 0}
2
+ {"id": 1097903, "n_gt": 15, "n_retrieved": 72, "n_selected": 25, "n_implied": 11, "n_structural": 3, "ret_R": 0.4667, "P": 0.44, "R": 0.7333, "F1": 0.55, "leaf_P": 0.3333, "leaf_R": 0.8, "leaf_F1": 0.4706, "n_leaf_sel": 12, "n_leaf_gt": 5, "ret_P": 0.0972, "sel_given_ret": 1.5714, "over_sel": 1.67, "why": {"explicit": 12}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.44, "gen_R": 0.7333, "gen_F1": 0.55, "missed": ["big_muscles", "huge_muscles", "hyper", "hyper_muscles"], "extra": ["biceps", "blue_background", "blue_bottomwear", "blue_clothing", "blue_shorts", "bottomwear", "clothing", "confident", "flexing_muscles", "lean_muscle", "male", "muscular_male", "pecs", "shorts"], "structural": ["solo", "anthro", "male"], "t1": 2.17, "t2": 7.37, "t3": 13.1, "t3s": 0.69, "err": null}
3
+ {"id": 2177013, "n_gt": 12, "n_retrieved": 164, "n_selected": 34, "n_implied": 12, "n_structural": 3, "ret_R": 0.3333, "P": 0.2059, "R": 0.5833, "F1": 0.3043, "leaf_P": 0.2222, "leaf_R": 0.5, "leaf_F1": 0.3077, "n_leaf_sel": 18, "n_leaf_gt": 8, "ret_P": 0.0244, "sel_given_ret": 1.75, "over_sel": 2.83, "why": {"explicit": 15, "strong_implied": 4}, "n_gt_char": 0, "n_sel_char": 1, "char_F1": 0.0, "gen_P": 0.2121, "gen_R": 0.5833, "gen_F1": 0.3111, "missed": ["5_fingers", "abs", "clothed", "fingers", "wingless_dragon"], "extra": ["action_pose", "black_body", "black_tail", "front_view", "full-length_portrait", "gradie", "holding_object", "holding_sword", "holding_weapon", "legwear", "longsword", "melee_weapon", "nun_outfit", "portrait", "pose", "red_body", "red_skin", "shirt", "short_hair", "sword", "tail", "topwear", "torn_clothing", "torn_legwear", "torn_shirt", "torn_topwear", "weapon"], "structural": ["solo", "anthro", "male"], "t1": 2.05, "t2": 8.58, "t3": 29.66, "t3s": 1.09, "err": null}
4
+ {"id": 979381, "n_gt": 45, "n_retrieved": 197, "n_selected": 69, "n_implied": 29, "n_structural": 3, "ret_R": 0.6222, "P": 0.4058, "R": 0.6222, "F1": 0.4912, "leaf_P": 0.25, "leaf_R": 0.381, "leaf_F1": 0.3019, "n_leaf_sel": 32, "n_leaf_gt": 21, "ret_P": 0.1421, "sel_given_ret": 1.0, "over_sel": 1.53, "why": {"explicit": 35, "strong_implied": 2}, "n_gt_char": 0, "n_sel_char": 1, "char_F1": 0.0, "gen_P": 0.4308, "gen_R": 0.6222, "gen_F1": 0.5091, "missed": ["animatronic", "bow_(feature)", "chicken", "eye_patch", "eyewear", "female", "galliform", "gallus_(genus)", "holding_musical_instrument", "hook", "hook_hand", "machine", "patch_(fabric)", "phasianid", "pirate", "plushie", "robot"], "extra": ["3rd_party_watermark", "avian_(starbound)", "birthday_cake", "black_clothing", "black_suit", "bow_ribbon", "brown_bear", "business_suit", "cake", "cardinal_(bird)", "chef", "cookie", "cupcake", "cupcake_(fnaf)", "dessert", "domestic_rabbit", "five_nights_at_freddy's", "food", "holding_food", "holding_guitar", "holding_toy", "lop_rabbit", "oryctolagus", "oscine", "pancake", "passerine", "red_bow", "red_clothing", "red_suit", "scottgames", "starbound", "string_bow", "suit", "suit_jacket", "toy", "trio", "ursine", "watermark", "white_beak", "white_clothing", "white_suit"], "structural": ["trio", "anthro", "male"], "t1": 2.43, "t2": 8.44, "t3": 78.85, "t3s": 1.21, "err": null}
5
+ {"id": 2303042, "n_gt": 71, "n_retrieved": 184, "n_selected": 62, "n_implied": 16, "n_structural": 3, "ret_R": 0.3662, "P": 0.4677, "R": 0.4085, "F1": 0.4361, "leaf_P": 0.3902, "leaf_R": 0.3636, "leaf_F1": 0.3765, "n_leaf_sel": 41, "n_leaf_gt": 44, "ret_P": 0.1413, "sel_given_ret": 1.1154, "over_sel": 0.87, "why": {"explicit": 40, "strong_implied": 3}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4677, "gen_R": 0.4085, "gen_F1": 0.4361, "missed": ["5_fingers", "absolute_territory", "ambiguous_gender", "anthro", "barefoot", "biped", "blue_background", "blue_hair", "blue_highlights", "blue_pupils", "bottomwear", "clothed", "colored_nails", "eyewear", "feet", "fingers", "flat_chested", "hand_on_hip", "hotpants", "inner_ear_fluff", "kemono", "lagomorph", "leporid", "long_ears", "mammal", "midriff", "nails", "navel", "open_bottomwear", "open_clothing", "open_pants", "pants", "pink_nails", "pink_sclera", "pupils", "rabbit", "shorts", "standing", "tattoo", "tuft", "white_clothing", "white_legwear"], "extra": ["black_belt", "black_gloves", "black_handwear", "black_headset", "black_headwear", "black_leggings", "black_outfit", "black_topwear", "computer", "confident", "earbuds", "fishnet", "fishnet_legwear", "fist", "headwear", "holding_chin", "hooded_cloak", "hoodie", "humanoid", "keyboard", "knee_highs", "leather", "leather_clothing", "leather_gloves", "leather_handwear", "leggings", "naughty_smile", "playful", "pouch_(clothing)", "sharp_teeth", "toothy_grin", "toothy_smile", "topwear"], "structural": ["solo", "humanoid", "female"], "t1": 2.3, "t2": 8.56, "t3": 20.57, "t3s": 1.33, "err": null}
6
+ {"id": 120915, "n_gt": 7, "n_retrieved": 107, "n_selected": 19, "n_implied": 5, "n_structural": 3, "ret_R": 0.7143, "P": 0.3684, "R": 1.0, "F1": 0.5385, "leaf_P": 0.3077, "leaf_R": 0.6667, "leaf_F1": 0.4211, "n_leaf_sel": 13, "n_leaf_gt": 6, "ret_P": 0.0467, "sel_given_ret": 1.4, "over_sel": 2.71, "why": {"explicit": 12, "strong_implied": 3}, "n_gt_char": 0, "n_sel_char": 1, "char_F1": 0.0, "gen_P": 0.3889, "gen_R": 1.0, "gen_F1": 0.56, "missed": [], "extra": ["3d_(artwork)", "anthro", "digital_media_(artwork)", "male_feral", "muscular", "muscular_ambiguous", "solo_focus", "suid", "suina", "talking_feral", "unguligrade_anthro", "unnamed_character"], "structural": ["solo", "anthro", "ambiguous_gender"], "t1": 1.51, "t2": 0.94, "t3": 12.31, "t3s": 0.73, "err": null}
7
+ {"id": 1545647, "n_gt": 28, "n_retrieved": 164, "n_selected": 41, "n_implied": 12, "n_structural": 3, "ret_R": 0.2143, "P": 0.2927, "R": 0.4286, "F1": 0.3478, "leaf_P": 0.1923, "leaf_R": 0.3333, "leaf_F1": 0.2439, "n_leaf_sel": 26, "n_leaf_gt": 15, "ret_P": 0.0366, "sel_given_ret": 2.0, "over_sel": 1.46, "why": {"explicit": 26}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2927, "gen_R": 0.4286, "gen_F1": 0.3478, "missed": ["5_fingers", "abs", "clothed", "clothing", "fingers", "fur", "muscular", "muscular_anthro", "muscular_male", "snow", "tan_body", "tan_fur", "topless", "winter", "yellow_body", "yellow_fur"], "extra": ["armband", "armlet", "blue_sky", "body_of_water", "choker", "crossed_legs", "crosslegged_pose", "day", "evergreen_tree", "forest", "glowing", "glowing_flesh", "glowing_jewelry", "glowing_necklace", "glowing_nose", "gold_(metal)", "gold_choker", "gold_jewelry", "gold_necklace", "grass", "jewelry", "lake", "meditation", "necklace", "pine_tree", "sky", "sky_background", "waistband", "wristband"], "structural": ["solo", "anthro", "male"], "t1": 2.05, "t2": 1.52, "t3": 48.44, "t3s": 0.89, "err": null}
8
+ {"id": 232786, "n_gt": 19, "n_retrieved": 159, "n_selected": 85, "n_implied": 30, "n_structural": 4, "ret_R": 0.6316, "P": 0.1765, "R": 0.7895, "F1": 0.2885, "leaf_P": 0.0652, "leaf_R": 0.3333, "leaf_F1": 0.1091, "n_leaf_sel": 46, "n_leaf_gt": 9, "ret_P": 0.0755, "sel_given_ret": 1.25, "over_sel": 4.47, "why": {"explicit": 34, "strong_implied": 18}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1786, "gen_R": 0.7895, "gen_F1": 0.2913, "missed": ["blush", "flat_cap", "grizzly_bear", "open_mouth"], "extra": ["anthro", "appliance", "beverage", "black_scarf", "box", "cabinet", "cafe", "cake", "candy", "chocolate", "chocolate_sauce", "christmas", "christmas_clothing", "christmas_headwear", "container", "cutlery", "eating", "eating_food", "eyewear_on_head", "female", "fork", "gift", "glasses", "grey_eyewear", "happy", "holding_candy", "holding_clothing", "holding_food", "holding_gift", "holding_hat", "holding_headgear", "holding_headwear", "holding_ice_cream", "holding_lollipop", "holding_mug", "holding_object", "holding_plate", "holding_spoon", "holidays", "hot_chocolate", "kitchen_utensils", "light", "light_beam", "lighting", "lights", "lollipop", "looking_at_food", "looking_at_object", "map", "mug", "offering_food", "party_hat", "pink_background", "red_eyewear", "restaurant", "road_sign", "santa_hat", "scarf", "scenery", "sharing_clothing", "sharing_scarf", "smile", "smiling_at_each_other", "smirk", "spoon", "stove", "tools", "toothy_smile", "tray", "yellow_eyewear"], "structural": ["duo", "anthro", "male", "female"], "t1": 1.92, "t2": 1.39, "t3": 24.05, "t3s": 1.25, "err": null}
9
+ {"id": 1059380, "n_gt": 42, "n_retrieved": 143, "n_selected": 54, "n_implied": 19, "n_structural": 3, "ret_R": 0.2857, "P": 0.3333, "R": 0.4286, "F1": 0.375, "leaf_P": 0.2759, "leaf_R": 0.32, "leaf_F1": 0.2963, "n_leaf_sel": 29, "n_leaf_gt": 25, "ret_P": 0.0839, "sel_given_ret": 1.5, "over_sel": 1.29, "why": {"explicit": 34}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3333, "gen_R": 0.4286, "gen_F1": 0.375, "missed": ["5_fingers", "avian", "belt", "bird", "black_nose", "blonde_hair", "claws", "clothed", "digitigrade", "fangs", "felid", "fingers", "grey_fur", "hair", "holding_gun", "holding_object", "holding_weapon", "mammal", "red_body", "red_fur", "scope", "teeth", "topless", "vines"], "extra": ["assault_rifle", "biceps", "blonde_mane", "blue_sky", "bottomwear", "camo", "camo_bottomwear", "camo_clothing", "camo_pants", "camo_print", "clear_sky", "clinging", "cloud", "detailed", "detailed_fur", "evergreen_tree", "glistening", "glistening_eyes", "grey_mane", "in_tree", "mane", "military_clothing", "orange_sky", "pants", "pattern_bottomwear", "pattern_clothing", "pattern_pants", "pine_tree", "red_clothing", "red_topwear", "shirt", "t-shirt", "tan_mane", "thick_fur", "topwear", "yellow_background"], "structural": ["solo", "anthro", "male"], "t1": 1.71, "t2": 1.4, "t3": 35.48, "t3s": 1.08, "err": null}
10
+ {"id": 1171401, "n_gt": 20, "n_retrieved": 177, "n_selected": 72, "n_implied": 28, "n_structural": 3, "ret_R": 0.35, "P": 0.1806, "R": 0.65, "F1": 0.2826, "leaf_P": 0.0303, "leaf_R": 0.0714, "leaf_F1": 0.0426, "n_leaf_sel": 33, "n_leaf_gt": 14, "ret_P": 0.0395, "sel_given_ret": 1.8571, "over_sel": 3.6, "why": {"explicit": 41}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1806, "gen_R": 0.65, "gen_F1": 0.2826, "missed": ["<3", "anthro", "biped", "clothed", "crossdressing", "girly", "male"], "extra": ["black_hair", "blue_bottomwear", "blue_clothing", "blue_panties", "blue_pants", "blue_shirt", "blue_tank_top", "blue_topwear", "blue_underwear", "bottomwear", "brown_hair", "choker", "corsac_fox", "female", "fennec_fox", "frilly", "frilly_clothing", "frilly_panties", "frilly_underwear", "hand_behind_back", "heart_choker", "heart_cutout", "heart_print", "heart_symbol", "humanoid", "jewelry", "kneeling_on_ground", "long_hair", "necklace", "one_eye_closed", "open_mouth", "open_smile", "pants", "pink_body", "pink_clothing", "pink_eyes", "pink_fur", "pink_hair", "pink_heart", "pink_panties", "pink_shirt", "pink_t-shirt", "pink_tail", "pink_tank_top", "pink_topwear", "pink_underwear", "playful", "puffy_speech_bubble", "shared_speech_bubble", "shirt", "smiling_at_viewer", "speech_bubble", "t-shirt", "tail", "tank_top", "text_in_heart", "topwear", "wink", "winking_at_viewer"], "structural": ["solo", "humanoid", "female"], "t1": 2.46, "t2": 1.35, "t3": 13.75, "t3s": 0.53, "err": null}
11
+ {"id": 1463990, "n_gt": 80, "n_retrieved": 217, "n_selected": 71, "n_implied": 11, "n_structural": 3, "ret_R": 0.325, "P": 0.2817, "R": 0.25, "F1": 0.2649, "leaf_P": 0.2, "leaf_R": 0.1964, "leaf_F1": 0.1982, "n_leaf_sel": 55, "n_leaf_gt": 56, "ret_P": 0.1198, "sel_given_ret": 0.7692, "over_sel": 0.89, "why": {"explicit": 50, "strong_implied": 9}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2817, "gen_R": 0.25, "gen_F1": 0.2649, "missed": ["4_fingers", "angry", "biped", "black_eyes", "black_markings", "black_nose", "blue_hair", "brother", "brothers", "cheek_tuft", "chest_tuft", "clothing", "countershade_face", "countershade_tail", "countershade_torso", "countershading", "crossed_arms", "eye_markings", "facial_tuft", "fingers", "frown", "gloves", "gloves_(marking)", "glowing", "glowing_eyes", "grey_markings", "half-closed_eyes", "handwear", "head_tuft", "humanoid_hands", "long_tail", "looking_at_viewer", "looking_away", "markings", "mephitid", "mostly_nude", "multicolored_body", "multicolored_fur", "multicolored_hair", "narrowed_eyes", "red_background", "red_nose", "red_sclera", "shadow", "sibling", "skunk", "smug", "standing", "tail", "tan_hair", "text", "toony", "tuft", "two_tone_body", "two_tone_fur", "two_tone_hair", "white_body", "white_countershading", "white_fur", "white_hair"], "extra": ["abs", "accessory", "actual_fur", "angry_expression", "angry_eyes", "angry_face", "animal_legs", "bared_teeth", "big_glasses", "big_teeth", "black_and_red", "black_and_white_and_red", "black_and_white_fur", "blue_and_white", "blue_and_white_fur", "body_hair", "brother_(lore)", "brothers_(lore)", "close-up", "closed_frown", "closed_smile", "colorful", "colorful_background", "eyewear", "flat_colors", "glasses", "grey_hair", "grey_theme", "hair_accessory", "hair_sticks", "half-length_portrait", "headgear", "legs_together", "long_hair", "looking_aside", "looking_forward", "messy_hair", "monochrome", "mouth_closed", "muscular", "pattern_headgear", "patterns", "portrait", "punk_hair", "red_eyewear", "sharp_fangs", "showing_teeth", "tan_head", "teeth_showing", "unguligrade_anthro", "yellow_eyewear"], "structural": ["duo", "anthro", "male"], "t1": 2.01, "t2": 2.03, "t3": 50.03, "t3s": 0.91, "err": null}