"""Gradio app for Maritime Intelligence Classifier.""" import gradio as gr from setfit import SetFitModel from pathlib import Path import os # Try to load model from Hugging Face Hub first, then fall back to local # Set MODEL_PATH environment variable or update this line with your Hugging Face repo ID MODEL_PATH = os.getenv("MODEL_PATH", "gamaly/maritime-intelligence-classifier") LOCAL_MODEL_PATH = "./maritime_classifier" # Load model print("Loading model...") print(f"MODEL_PATH: {MODEL_PATH}") print(f"LOCAL_MODEL_PATH: {LOCAL_MODEL_PATH}") model = None try: # Check if MODEL_PATH is a Hugging Face repo (contains "/" and doesn't exist locally) if "/" in MODEL_PATH and not Path(MODEL_PATH).exists(): print(f"Loading from Hugging Face Hub: {MODEL_PATH}") model = SetFitModel.from_pretrained(MODEL_PATH) print(f"✓ Successfully loaded model from Hugging Face: {MODEL_PATH}") # Check if local model path exists elif Path(LOCAL_MODEL_PATH).exists(): print(f"Loading from local path: {LOCAL_MODEL_PATH}") model = SetFitModel.from_pretrained(LOCAL_MODEL_PATH) print(f"✓ Successfully loaded model from local path: {LOCAL_MODEL_PATH}") # If MODEL_PATH is a local path that exists elif Path(MODEL_PATH).exists(): print(f"Loading from local path: {MODEL_PATH}") model = SetFitModel.from_pretrained(MODEL_PATH) print(f"✓ Successfully loaded model from local path: {MODEL_PATH}") # Default: try MODEL_PATH as Hugging Face repo else: print(f"Attempting to load from Hugging Face Hub: {MODEL_PATH}") model = SetFitModel.from_pretrained(MODEL_PATH) print(f"✓ Successfully loaded model from Hugging Face: {MODEL_PATH}") except Exception as e: print(f"❌ Error loading model: {e}") print(f" Attempted paths:") print(f" - Hugging Face: {MODEL_PATH}") print(f" - Local: {LOCAL_MODEL_PATH}") import traceback print("\nFull traceback:") traceback.print_exc() model = None if model is None: print("\n⚠️ WARNING: Model failed to load. The app will not work correctly.") print(" Please check:") print(f" 1. Model exists at: https://huggingface.co/{MODEL_PATH}") print(" 2. Internet connection is available") print(" 3. All dependencies are installed (setfit, sentence-transformers, etc.)") else: print("\n✅ Model loaded successfully! Ready for inference.") def truncate_text(text, max_tokens=256): """ Truncate text to approximately max_tokens. Uses a simple word-based approximation (roughly 1 token = 0.75 words). """ if not text: return text # Rough approximation: 1 token ≈ 0.75 words (conservative estimate) max_words = int(max_tokens * 0.75) words = text.split() if len(words) <= max_words: return text # Truncate and add ellipsis truncated = " ".join(words[:max_words]) return truncated + "... [truncated]" def predict_text(text): """Predict whether text is actionable (YES) or not (NO).""" if model is None: return "Error: Model not loaded. Please check the console logs.", 0.0, "error" if not text or not text.strip(): return "Please enter some text to classify.", 0.0, "neutral" try: # Note: SetFit uses the base model's max_length (256 tokens for all-MiniLM-L6-v2) # The model will automatically truncate longer texts, but we can pre-truncate # to ensure we're using the most relevant part (beginning of text) # For longer articles, the beginning usually contains the most important info # Check approximate length (rough estimate: 1 token ≈ 0.75 words) word_count = len(text.split()) token_estimate = int(word_count / 0.75) # If text is significantly longer than 256 tokens, truncate intelligently # (SetFit will truncate anyway, but we can control which part) if token_estimate > 300: # Give some buffer # For news articles, the beginning usually has the key info # But we could also try: beginning + end, or just beginning processed_text = truncate_text(text, max_tokens=256) print(f"⚠️ Text truncated from ~{token_estimate} tokens to ~256 tokens") else: processed_text = text # Make prediction prediction = model.predict([processed_text])[0] # Get probabilities (handle version compatibility) try: probabilities = model.predict_proba([processed_text])[0] confidence = probabilities[prediction] * 100 except AttributeError as e: # Fallback if predict_proba fails due to version mismatch # Use a simple confidence estimate based on prediction print(f"Warning: predict_proba failed ({e}), using fallback confidence") # For binary classification, we can estimate confidence from the decision function # or just use a default high confidence confidence = 85.0 # Default confidence when we can't get probabilities # Convert to labels label = "YES (Actionable)" if prediction == 1 else "NO (Not Actionable)" # Determine status for styling status = "actionable" if prediction == 1 else "not_actionable" return label, confidence, status except Exception as e: error_msg = f"Error during prediction: {str(e)}" print(error_msg) import traceback traceback.print_exc() return error_msg, 0.0, "error" def get_explanation(status): """Get explanation based on prediction status.""" explanations = { "actionable": "✓ This text contains actionable vessel-specific evidence (e.g., specific vessel names, crimes, incidents).", "not_actionable": "✗ This text does not contain actionable vessel-specific evidence (e.g., general maritime news, non-specific information).", "error": "⚠️ An error occurred. Please check the model is properly loaded.", "neutral": "" } return explanations.get(status, "") # Create Gradio interface # Note: theme parameter moved to launch() in Gradio 6.0+ with gr.Blocks(title="Maritime Intelligence Classifier") as app: gr.Markdown( """ # 🚢 Maritime Intelligence Classifier Classify maritime news articles as containing **actionable vessel-specific evidence** (YES) or not (NO). **Actionable articles** typically include: - Specific vessel names - Specific crimes or incidents - Evidence that can be used for investigation **Non-actionable articles** are general maritime news without specific vessel details. """ ) with gr.Row(): with gr.Column(scale=2): text_input = gr.Textbox( label="Article Text", placeholder="Paste or type the maritime news article text here...", lines=10, max_lines=20 ) submit_btn = gr.Button("Classify", variant="primary", size="lg") with gr.Column(scale=1): prediction_output = gr.Label( label="Prediction", value={"YES (Actionable)": 0.0, "NO (Not Actionable)": 0.0} ) confidence_output = gr.Number( label="Confidence", value=0.0, precision=1 ) explanation_output = gr.Markdown() # Example texts gr.Markdown("### 📝 Example Texts") with gr.Row(): example_yes = gr.Examples( examples=[ ["The fishing vessel Marine 707 was involved in the disappearance of fisheries observer Samuel Abayateye in Ghanaian waters. The observer's decapitated body was found weeks later."], ["Authorities detained the Meng Xin 15 after discovering evidence of illegal saiko transshipment and threats against fisheries observers."], ], inputs=text_input, label="YES Examples (Actionable)" ) example_no = gr.Examples( examples=[ ["A new maritime museum opened in the port city, showcasing historical ships and ocean exploration artifacts."], ["Marine scientists are studying the effects of ocean acidification on coral reefs in tropical waters."], ], inputs=text_input, label="NO Examples (Not Actionable)" ) # Connect the prediction function def update_prediction(text): label, confidence, status = predict_text(text) # Create label dict for gradio Label component if status == "actionable": label_dict = {"YES (Actionable)": confidence / 100, "NO (Not Actionable)": (100 - confidence) / 100} elif status == "not_actionable": label_dict = {"YES (Actionable)": (100 - confidence) / 100, "NO (Not Actionable)": confidence / 100} else: label_dict = {"YES (Actionable)": 0.0, "NO (Not Actionable)": 0.0} explanation = get_explanation(status) return label_dict, confidence, explanation submit_btn.click( fn=update_prediction, inputs=text_input, outputs=[prediction_output, confidence_output, explanation_output] ) text_input.submit( fn=update_prediction, inputs=text_input, outputs=[prediction_output, confidence_output, explanation_output] ) gr.Markdown( """ --- ### ℹ️ About This classifier uses SetFit to identify maritime news articles containing actionable vessel-specific evidence. Built for The Outlaw Ocean Project. **Model**: SetFit (sentence-transformers/all-MiniLM-L6-v2 base) """ ) if __name__ == "__main__": app.launch(share=False, theme=gr.themes.Soft())