Spaces:

rebeccah12321
/

patent-infringement-analyzer

Sleeping

App Files Files Community

patent-infringement-analyzer / app.py

rebeccah12321

Update app.py

fb05dc7 verified about 1 year ago

Raw

History Blame Contribute Delete

5.95 kB

	import streamlit as st
	import requests
	from transformers import pipeline
	import re
	from collections import Counter
	import json

	# Load summarizer model
	@st.cache_resource
	def load_summarizer():
	return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

	summarizer = load_summarizer()

	# Summarize user's invention
	def generate_summary(text):
	if len(text.split()) < 10:
	return "Please enter at least 10 words for summarization."
	result = summarizer(text, max_length=200, min_length=60, do_sample=False)
	return result[0]['summary_text']

	# Improved keyword extraction
	def extract_keywords(text, max_keywords=5):
	words = re.findall(r'\b[a-z]{4,}\b', text.lower())
	stopwords = set([
	"about", "above", "after", "again", "against", "being", "have", "still", "does", "with", "this",
	"since", "their", "which", "these", "those", "other", "where", "while", "into", "from", "that",
	"will", "would", "could", "should", "might", "shall", "more", "such", "only", "like", "than",
	"there", "here", "also", "very", "every", "because", "through", "among", "between", "during"
	])
	filtered = [w for w in words if w not in stopwords]
	common = Counter(filtered).most_common(max_keywords)
	return [w for w, _ in common]

	def search_patents(keywords):
	# Fixed query structure for PatentsView API
	query = {
	"_text_any": {
	"patent_title": " ".join(keywords),
	"patent_abstract": " ".join(keywords)
	}
	}

	fields = ["patent_number", "patent_title", "patent_abstract"]

	url = "https://api.patentsview.org/patents/query"

	params = {
	"q": json.dumps(query),
	"f": json.dumps(fields),
	"o": json.dumps({"per_page": 5})
	}

	try:
	response = requests.get(url, params=params, timeout=30)
	response.raise_for_status()
	data = response.json()

	# Debug info
	st.write(f"API Response Status: {response.status_code}")
	if "patents" not in data:
	st.write("API Response Keys:", list(data.keys()))

	return data.get("patents", [])
	except requests.exceptions.Timeout:
	st.error("Patent API request timed out. Please try again.")
	return []
	except requests.exceptions.RequestException as e:
	st.error(f"Patent API request error: {str(e)}")
	return []
	except json.JSONDecodeError as e:
	st.error(f"Failed to parse API response: {str(e)}")
	return []
	except Exception as e:
	st.error(f"Unexpected error: {str(e)}")
	return []

	# Alternative search function using simpler query structure
	def search_patents_simple(keywords):
	# Try a simpler query structure
	keyword_string = " ".join(keywords)

	query = {
	"_text_any": keyword_string
	}

	fields = ["patent_number", "patent_title", "patent_abstract"]

	url = "https://api.patentsview.org/patents/query"

	params = {
	"q": json.dumps(query),
	"f": json.dumps(fields),
	"o": json.dumps({"per_page": 5})
	}

	try:
	response = requests.get(url, params=params, timeout=30)
	response.raise_for_status()
	data = response.json()
	return data.get("patents", [])
	except Exception as e:
	st.error(f"Simple search also failed: {str(e)}")
	return []

	# Similarity score via keyword overlap
	def similarity_score(text1, text2):
	if not text1 or not text2:
	return 0
	set1 = set(text1.lower().split())
	set2 = set(text2.lower().split())
	overlap = set1.intersection(set2)
	return len(overlap) / max(len(set1), len(set2), 1)

	# UI
	st.title("🧠 Patent Infringement Analyzer (Live Search)")
	st.write("This tool summarizes your invention and compares it to real patents using the PatentsView API.")

	invention = st.text_area("✍️ Describe your invention (10+ words):", height=250)

	if st.button("🔍 Search & Analyze"):
	if len(invention.split()) < 10:
	st.error("Please enter more detail — at least 10 words.")
	else:
	with st.spinner("Summarizing your invention..."):
	summary = generate_summary(invention)
	st.subheader("📄 Invention Summary")
	st.write(summary)

	keywords = extract_keywords(invention)
	st.info("🔑 Keywords used for search: " + ", ".join(keywords))

	with st.spinner("Searching patents..."):
	# Try the main search first
	patents = search_patents(keywords)

	# If that fails, try the simpler approach
	if not patents:
	st.write("Trying alternative search method...")
	patents = search_patents_simple(keywords)

	if not patents:
	st.warning("No similar patents found. This could mean:")
	st.write("- Your invention might be novel")
	st.write("- The keywords might be too specific")
	st.write("- The patent database might be temporarily unavailable")
	else:
	st.subheader("🔍 Similar Patents & Overlap Risk")
	for i, p in enumerate(patents):
	# Handle missing fields gracefully
	patent_num = p.get("patent_number", f"Patent {i+1}")
	patent_title = p.get("patent_title", "No title available")
	patent_abstract = p.get("patent_abstract", "No abstract available")

	score = similarity_score(summary, patent_abstract)
	risk = "High" if score > 0.25 else "Moderate" if score > 0.15 else "Low"

	st.markdown(f"""
	[{patent_num}] - {patent_title}

	Abstract: {patent_abstract[:500]}{'...' if len(patent_abstract) > 500 else ''}

	Similarity Score: {score:.3f} \| Estimated Risk: `{risk}`

	---
	""")