import asyncio import inspect import re import os import logging from .img.core import analyze_saved_images from .text_module.pipeline import verify_text_logic from .text_module.TextAnalysisResult import TextAnalysisResult class MockGradioFile(dict): def __init__(self, path): filename = os.path.basename(path) data = { "name": path, "path": path, "saved_path": path, "orig_name": filename, "original_filename": filename, "size": os.path.getsize(path) if os.path.exists(path) else 0 } super().__init__(data) for k, v in data.items(): setattr(self, k, v) def parse_and_check_fake(report_text): """ A robust function to both parse the report and determine the verdict. It checks for definitive signals of "real" or "not real" in the report. Returns: (is_fake, parsed_data) """ data = {"auth": "❓ **Unverified**", "tools": "Unknown", "synth": "unverified", "artifacts": ""} if not report_text: return False, data # Default to not fake # --- Verdict Check --- text_lower = report_text.lower() is_fake = True # Default to fake unless a real signal is found if "real photo" in text_lower: is_fake = False elif "not real" not in text_lower: # If neither "real photo" nor "not real" is present, treat as not fake for safety is_fake = False # --- Parsing --- auth_match = re.search(r"Authenticity Assessment:\s*(.+)", report_text) if auth_match: data["auth"] = auth_match.group(1).strip() tools_match = re.search(r"Verification Tools & Methods:\s*(.+)", report_text) if tools_match: data["tools"] = tools_match.group(1).strip() synth_match = re.search(r"Synthetic Type \(if applicable\):\s*(.+)", report_text) if synth_match: data["synth"] = synth_match.group(1).strip() art_match = re.search(r"Other Artifacts:\s*(.*)", report_text, re.DOTALL) if art_match: data["artifacts"] = art_match.group(1).strip() return is_fake, data async def consume_async_generator(gen): last_result = None if inspect.isasyncgen(gen): async for item in gen: last_result = item else: for item in gen: last_result = item return last_result def is_verdict_fake(assessment_string): if not assessment_string: return False s = assessment_string.lower().strip() if "real (authentic)" in s or "real photo" in s: return False fake_keywords = ["not real", "fake", "manipulated", "generated", "artificial", "synthetic"] return any(kw in s for kw in fake_keywords) logger = logging.getLogger(__name__) def clean_simple(s: str) -> str: """Simple sanitizer: remove basic HTML tags and cut off any trailing 'Other Artifacts' part.""" if not s: return "unverified" s = re.sub(r'<[^>]+>', ' ', s) s = re.split(r'Other Artifacts\s*[:\-]?', s, flags=re.IGNORECASE)[0] s = s.replace('**', '').strip(' \n\r\t:;') s = re.sub(r'\s+', ' ', s).strip() return s if s else "unverified" async def run_multimodal_analysis(image_paths: list, text_input: str) -> dict: async def task_image(): if not image_paths: return [] all_image_reports = [] for img_path in image_paths: try: gradio_inputs = [MockGradioFile(img_path)] gen = analyze_saved_images(gradio_inputs) result_tuple = await consume_async_generator(gen) if result_tuple: _, report_md = result_tuple all_image_reports.append(report_md) else: all_image_reports.append(None) except Exception as e: import traceback traceback.print_exc() all_image_reports.append(f"Error Image: {str(e)}") return all_image_reports async def task_text(): if not text_input: return None txt_res_obj = TextAnalysisResult() try: await asyncio.to_thread(verify_text_logic, text_input, txt_res_obj) return txt_res_obj except Exception as e: txt_res_obj.set_authenticity_assessment("Error") txt_res_obj.set_other_artifacts(str(e)) return txt_res_obj img_report_raw, txt_res_obj = await asyncio.gather(task_image(), task_text()) all_final_results = [] txt_auth = "No text" txt_tools = "" txt_synth = "N/A" txt_art = "" if txt_res_obj: txt_auth = txt_res_obj.get_authenticity_assessment() txt_tools = txt_res_obj.get_verification_tools_methods() txt_synth = txt_res_obj.get_synthetic_type() txt_art = txt_res_obj.get_other_artifacts() txt_is_fake = is_verdict_fake(txt_auth) # Text status determination if not text_input: text_status = "unverified" elif "Error" in txt_auth: text_status = "unverified" elif "Unverified" in txt_auth: text_status = "unverified" elif txt_is_fake: text_status = "fake" else: text_status = "real" if not img_report_raw and not text_input: all_final_results.append({ "authenticity_assessment": "⚠️ No Input Provided", "verification_tools_methods": "", "synthetic_type": "N/A", "other_artifacts": "No image or text was provided for analysis." }) return all_final_results if not img_report_raw: # Only text analysis img_parsed = {"auth": "No images", "tools": "", "synth": "unverified", "artifacts": ""} final_auth = "🤖 NOT REAL (Fake, Manipulated, or AI)" if txt_is_fake else "REAL (Authentic)" final_tools = f"Verified by our model using algorithms SearchLLM." final_synth = f"Text: {txt_synth}" if txt_is_fake and txt_synth != "unverified" else "unverified" all_final_results.append({ "filename": "N/A", "text_used": text_input, "result": { "authenticity_assessment": final_auth, "verification_tools_methods": final_tools, "synthetic_type": final_synth, "other_artifacts": f"image: No image provided\ntext: {text_status}" } }) return all_final_results for idx, img_report in enumerate(img_report_raw): img_filename = os.path.basename(image_paths[idx]) if idx < len(image_paths) else "Unknown Image" if img_report: if "Error Image" in img_report: img_is_fake = False # Treat error as not fake img_parsed = {"auth": "Error", "tools": "unverified", "synth": "unverified", "artifacts": img_report} else: img_is_fake, img_parsed = parse_and_check_fake(img_report) else: img_is_fake = False img_parsed = {"auth": "No images", "tools": "", "synth": "unverified", "artifacts": ""} # Image status determination if not img_report: image_status = "unverified" elif "Error Image" in img_report: image_status = "unverified" elif "Unverified" in img_parsed.get('auth', ''): image_status = "unverified" elif img_is_fake: image_status = "fake" else: image_status = "real" if img_is_fake or txt_is_fake: final_auth = "🤖 NOT REAL (Fake, Manipulated, or AI)" else: final_auth = "REAL (Authentic)" final_tools = f"Determined by an algorithm developed by NICT" synth_list = [] if img_is_fake: raw_img_s = img_parsed.get("synth", "") img_s_type = clean_simple(raw_img_s) if img_s_type == "unverified": img_s_type = "Manipulated Image" synth_list.append(f"Image: {img_s_type}") if raw_img_s and re.search(r'Other Artifacts', raw_img_s, flags=re.IGNORECASE): logger.warning(f"[CLEANER] 'synth' contained 'Other Artifacts' fragment — cleaned. sample: {raw_img_s[:200]}") if txt_is_fake: raw_txt_s = txt_synth txt_s_type = clean_simple(raw_txt_s) if not txt_s_type or txt_s_type == "unverified": txt_s_type = "Generated Content" synth_list.append(f"Text: {txt_s_type}") final_synth = " | ".join(synth_list) if synth_list else "unverified" # if text_input == "": # final_artifacts_str = f"Image {img_filename}: {image_status}\nText : {text_status}" # else: # final_artifacts_str = f"Image {img_filename}: {image_status}\nText status: {text_status}" all_final_results.append({ "filename": img_filename, "text_used": text_input, "result": { "authenticity_assessment": final_auth, "verification_tools_methods": final_tools, "synthetic_type": final_synth, "other_artifacts": { "Image": image_status, "Text": text_status } } }) return all_final_results