import streamlit as st import requests from transformers import pipeline import re from collections import Counter import json # Load summarizer model @st.cache_resource def load_summarizer(): return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") summarizer = load_summarizer() # Summarize user's invention def generate_summary(text): if len(text.split()) < 10: return "Please enter at least 10 words for summarization." result = summarizer(text, max_length=200, min_length=60, do_sample=False) return result[0]['summary_text'] # Improved keyword extraction def extract_keywords(text, max_keywords=5): words = re.findall(r'\b[a-z]{4,}\b', text.lower()) stopwords = set([ "about", "above", "after", "again", "against", "being", "have", "still", "does", "with", "this", "since", "their", "which", "these", "those", "other", "where", "while", "into", "from", "that", "will", "would", "could", "should", "might", "shall", "more", "such", "only", "like", "than", "there", "here", "also", "very", "every", "because", "through", "among", "between", "during" ]) filtered = [w for w in words if w not in stopwords] common = Counter(filtered).most_common(max_keywords) return [w for w, _ in common] def search_patents(keywords): # Fixed query structure for PatentsView API query = { "_text_any": { "patent_title": " ".join(keywords), "patent_abstract": " ".join(keywords) } } fields = ["patent_number", "patent_title", "patent_abstract"] url = "https://api.patentsview.org/patents/query" params = { "q": json.dumps(query), "f": json.dumps(fields), "o": json.dumps({"per_page": 5}) } try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() # Debug info st.write(f"API Response Status: {response.status_code}") if "patents" not in data: st.write("API Response Keys:", list(data.keys())) return data.get("patents", []) except requests.exceptions.Timeout: st.error("Patent API request timed out. Please try again.") return [] except requests.exceptions.RequestException as e: st.error(f"Patent API request error: {str(e)}") return [] except json.JSONDecodeError as e: st.error(f"Failed to parse API response: {str(e)}") return [] except Exception as e: st.error(f"Unexpected error: {str(e)}") return [] # Alternative search function using simpler query structure def search_patents_simple(keywords): # Try a simpler query structure keyword_string = " ".join(keywords) query = { "_text_any": keyword_string } fields = ["patent_number", "patent_title", "patent_abstract"] url = "https://api.patentsview.org/patents/query" params = { "q": json.dumps(query), "f": json.dumps(fields), "o": json.dumps({"per_page": 5}) } try: response = requests.get(url, params=params, timeout=30) response.raise_for_status() data = response.json() return data.get("patents", []) except Exception as e: st.error(f"Simple search also failed: {str(e)}") return [] # Similarity score via keyword overlap def similarity_score(text1, text2): if not text1 or not text2: return 0 set1 = set(text1.lower().split()) set2 = set(text2.lower().split()) overlap = set1.intersection(set2) return len(overlap) / max(len(set1), len(set2), 1) # UI st.title("🧠 Patent Infringement Analyzer (Live Search)") st.write("This tool summarizes your invention and compares it to real patents using the PatentsView API.") invention = st.text_area("✍️ Describe your invention (10+ words):", height=250) if st.button("🔍 Search & Analyze"): if len(invention.split()) < 10: st.error("Please enter more detail — at least 10 words.") else: with st.spinner("Summarizing your invention..."): summary = generate_summary(invention) st.subheader("📄 Invention Summary") st.write(summary) keywords = extract_keywords(invention) st.info("🔑 Keywords used for search: " + ", ".join(keywords)) with st.spinner("Searching patents..."): # Try the main search first patents = search_patents(keywords) # If that fails, try the simpler approach if not patents: st.write("Trying alternative search method...") patents = search_patents_simple(keywords) if not patents: st.warning("No similar patents found. This could mean:") st.write("- Your invention might be novel") st.write("- The keywords might be too specific") st.write("- The patent database might be temporarily unavailable") else: st.subheader("🔍 Similar Patents & Overlap Risk") for i, p in enumerate(patents): # Handle missing fields gracefully patent_num = p.get("patent_number", f"Patent {i+1}") patent_title = p.get("patent_title", "No title available") patent_abstract = p.get("patent_abstract", "No abstract available") score = similarity_score(summary, patent_abstract) risk = "High" if score > 0.25 else "Moderate" if score > 0.15 else "Low" st.markdown(f""" **[{patent_num}]** - **{patent_title}** *Abstract*: {patent_abstract[:500]}{'...' if len(patent_abstract) > 500 else ''} **Similarity Score**: {score:.3f} | **Estimated Risk**: `{risk}` --- """)