rebeccah12321's picture
Update app.py
fb05dc7 verified
Raw
History Blame Contribute Delete
5.95 kB
import streamlit as st
import requests
from transformers import pipeline
import re
from collections import Counter
import json
# Load summarizer model
@st.cache_resource
def load_summarizer():
return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
summarizer = load_summarizer()
# Summarize user's invention
def generate_summary(text):
if len(text.split()) < 10:
return "Please enter at least 10 words for summarization."
result = summarizer(text, max_length=200, min_length=60, do_sample=False)
return result[0]['summary_text']
# Improved keyword extraction
def extract_keywords(text, max_keywords=5):
words = re.findall(r'\b[a-z]{4,}\b', text.lower())
stopwords = set([
"about", "above", "after", "again", "against", "being", "have", "still", "does", "with", "this",
"since", "their", "which", "these", "those", "other", "where", "while", "into", "from", "that",
"will", "would", "could", "should", "might", "shall", "more", "such", "only", "like", "than",
"there", "here", "also", "very", "every", "because", "through", "among", "between", "during"
])
filtered = [w for w in words if w not in stopwords]
common = Counter(filtered).most_common(max_keywords)
return [w for w, _ in common]
def search_patents(keywords):
# Fixed query structure for PatentsView API
query = {
"_text_any": {
"patent_title": " ".join(keywords),
"patent_abstract": " ".join(keywords)
}
}
fields = ["patent_number", "patent_title", "patent_abstract"]
url = "https://api.patentsview.org/patents/query"
params = {
"q": json.dumps(query),
"f": json.dumps(fields),
"o": json.dumps({"per_page": 5})
}
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
# Debug info
st.write(f"API Response Status: {response.status_code}")
if "patents" not in data:
st.write("API Response Keys:", list(data.keys()))
return data.get("patents", [])
except requests.exceptions.Timeout:
st.error("Patent API request timed out. Please try again.")
return []
except requests.exceptions.RequestException as e:
st.error(f"Patent API request error: {str(e)}")
return []
except json.JSONDecodeError as e:
st.error(f"Failed to parse API response: {str(e)}")
return []
except Exception as e:
st.error(f"Unexpected error: {str(e)}")
return []
# Alternative search function using simpler query structure
def search_patents_simple(keywords):
# Try a simpler query structure
keyword_string = " ".join(keywords)
query = {
"_text_any": keyword_string
}
fields = ["patent_number", "patent_title", "patent_abstract"]
url = "https://api.patentsview.org/patents/query"
params = {
"q": json.dumps(query),
"f": json.dumps(fields),
"o": json.dumps({"per_page": 5})
}
try:
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
return data.get("patents", [])
except Exception as e:
st.error(f"Simple search also failed: {str(e)}")
return []
# Similarity score via keyword overlap
def similarity_score(text1, text2):
if not text1 or not text2:
return 0
set1 = set(text1.lower().split())
set2 = set(text2.lower().split())
overlap = set1.intersection(set2)
return len(overlap) / max(len(set1), len(set2), 1)
# UI
st.title("🧠 Patent Infringement Analyzer (Live Search)")
st.write("This tool summarizes your invention and compares it to real patents using the PatentsView API.")
invention = st.text_area("✍️ Describe your invention (10+ words):", height=250)
if st.button("πŸ” Search & Analyze"):
if len(invention.split()) < 10:
st.error("Please enter more detail β€” at least 10 words.")
else:
with st.spinner("Summarizing your invention..."):
summary = generate_summary(invention)
st.subheader("πŸ“„ Invention Summary")
st.write(summary)
keywords = extract_keywords(invention)
st.info("πŸ”‘ Keywords used for search: " + ", ".join(keywords))
with st.spinner("Searching patents..."):
# Try the main search first
patents = search_patents(keywords)
# If that fails, try the simpler approach
if not patents:
st.write("Trying alternative search method...")
patents = search_patents_simple(keywords)
if not patents:
st.warning("No similar patents found. This could mean:")
st.write("- Your invention might be novel")
st.write("- The keywords might be too specific")
st.write("- The patent database might be temporarily unavailable")
else:
st.subheader("πŸ” Similar Patents & Overlap Risk")
for i, p in enumerate(patents):
# Handle missing fields gracefully
patent_num = p.get("patent_number", f"Patent {i+1}")
patent_title = p.get("patent_title", "No title available")
patent_abstract = p.get("patent_abstract", "No abstract available")
score = similarity_score(summary, patent_abstract)
risk = "High" if score > 0.25 else "Moderate" if score > 0.15 else "Low"
st.markdown(f"""
**[{patent_num}]** - **{patent_title}**
*Abstract*: {patent_abstract[:500]}{'...' if len(patent_abstract) > 500 else ''}
**Similarity Score**: {score:.3f} | **Estimated Risk**: `{risk}`
---
""")