| import streamlit as st |
| import requests |
| from transformers import pipeline |
| import re |
| from collections import Counter |
| import json |
|
|
| |
| @st.cache_resource |
| def load_summarizer(): |
| return pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") |
|
|
| summarizer = load_summarizer() |
|
|
| |
| def generate_summary(text): |
| if len(text.split()) < 10: |
| return "Please enter at least 10 words for summarization." |
| result = summarizer(text, max_length=200, min_length=60, do_sample=False) |
| return result[0]['summary_text'] |
|
|
| |
| def extract_keywords(text, max_keywords=5): |
| words = re.findall(r'\b[a-z]{4,}\b', text.lower()) |
| stopwords = set([ |
| "about", "above", "after", "again", "against", "being", "have", "still", "does", "with", "this", |
| "since", "their", "which", "these", "those", "other", "where", "while", "into", "from", "that", |
| "will", "would", "could", "should", "might", "shall", "more", "such", "only", "like", "than", |
| "there", "here", "also", "very", "every", "because", "through", "among", "between", "during" |
| ]) |
| filtered = [w for w in words if w not in stopwords] |
| common = Counter(filtered).most_common(max_keywords) |
| return [w for w, _ in common] |
|
|
| def search_patents(keywords): |
| |
| query = { |
| "_text_any": { |
| "patent_title": " ".join(keywords), |
| "patent_abstract": " ".join(keywords) |
| } |
| } |
| |
| fields = ["patent_number", "patent_title", "patent_abstract"] |
| |
| url = "https://api.patentsview.org/patents/query" |
| |
| params = { |
| "q": json.dumps(query), |
| "f": json.dumps(fields), |
| "o": json.dumps({"per_page": 5}) |
| } |
| |
| try: |
| response = requests.get(url, params=params, timeout=30) |
| response.raise_for_status() |
| data = response.json() |
| |
| |
| st.write(f"API Response Status: {response.status_code}") |
| if "patents" not in data: |
| st.write("API Response Keys:", list(data.keys())) |
| |
| return data.get("patents", []) |
| except requests.exceptions.Timeout: |
| st.error("Patent API request timed out. Please try again.") |
| return [] |
| except requests.exceptions.RequestException as e: |
| st.error(f"Patent API request error: {str(e)}") |
| return [] |
| except json.JSONDecodeError as e: |
| st.error(f"Failed to parse API response: {str(e)}") |
| return [] |
| except Exception as e: |
| st.error(f"Unexpected error: {str(e)}") |
| return [] |
|
|
| |
| def search_patents_simple(keywords): |
| |
| keyword_string = " ".join(keywords) |
| |
| query = { |
| "_text_any": keyword_string |
| } |
| |
| fields = ["patent_number", "patent_title", "patent_abstract"] |
| |
| url = "https://api.patentsview.org/patents/query" |
| |
| params = { |
| "q": json.dumps(query), |
| "f": json.dumps(fields), |
| "o": json.dumps({"per_page": 5}) |
| } |
| |
| try: |
| response = requests.get(url, params=params, timeout=30) |
| response.raise_for_status() |
| data = response.json() |
| return data.get("patents", []) |
| except Exception as e: |
| st.error(f"Simple search also failed: {str(e)}") |
| return [] |
|
|
| |
| def similarity_score(text1, text2): |
| if not text1 or not text2: |
| return 0 |
| set1 = set(text1.lower().split()) |
| set2 = set(text2.lower().split()) |
| overlap = set1.intersection(set2) |
| return len(overlap) / max(len(set1), len(set2), 1) |
|
|
| |
| st.title("π§ Patent Infringement Analyzer (Live Search)") |
| st.write("This tool summarizes your invention and compares it to real patents using the PatentsView API.") |
|
|
| invention = st.text_area("βοΈ Describe your invention (10+ words):", height=250) |
|
|
| if st.button("π Search & Analyze"): |
| if len(invention.split()) < 10: |
| st.error("Please enter more detail β at least 10 words.") |
| else: |
| with st.spinner("Summarizing your invention..."): |
| summary = generate_summary(invention) |
| st.subheader("π Invention Summary") |
| st.write(summary) |
| |
| keywords = extract_keywords(invention) |
| st.info("π Keywords used for search: " + ", ".join(keywords)) |
| |
| with st.spinner("Searching patents..."): |
| |
| patents = search_patents(keywords) |
| |
| |
| if not patents: |
| st.write("Trying alternative search method...") |
| patents = search_patents_simple(keywords) |
| |
| if not patents: |
| st.warning("No similar patents found. This could mean:") |
| st.write("- Your invention might be novel") |
| st.write("- The keywords might be too specific") |
| st.write("- The patent database might be temporarily unavailable") |
| else: |
| st.subheader("π Similar Patents & Overlap Risk") |
| for i, p in enumerate(patents): |
| |
| patent_num = p.get("patent_number", f"Patent {i+1}") |
| patent_title = p.get("patent_title", "No title available") |
| patent_abstract = p.get("patent_abstract", "No abstract available") |
| |
| score = similarity_score(summary, patent_abstract) |
| risk = "High" if score > 0.25 else "Moderate" if score > 0.15 else "Low" |
| |
| st.markdown(f""" |
| **[{patent_num}]** - **{patent_title}** |
| |
| *Abstract*: {patent_abstract[:500]}{'...' if len(patent_abstract) > 500 else ''} |
| |
| **Similarity Score**: {score:.3f} | **Estimated Risk**: `{risk}` |
| |
| --- |
| """) |