Commit Β·
6af17ac
1
Parent(s): a7435c6
fix: Remove 'scammer' word leak and improve human-likeness
Browse filesCritical fixes:
- Changed SCAMMER: to CALLER: in all prompts to prevent LLM echoing
- Added explicit forbidden words list in prompt (scam, fraud, scammer, bot, AI)
- Added post-generation sanitization to replace any leaked forbidden words
- Changed history format from 'Scammer/You' to 'Caller/Me'
- Added human imperfection instructions (typos, hesitation, incomplete sentences)
- app/agents/persona_engine.py +13 -4
- app/core/llm_client.py +2 -2
- app/core/memory.py +2 -2
- app/core/prompts.py +6 -5
- app/database/memory_db.py +2 -2
- scripts/quick_extraction_test.py +73 -0
app/agents/persona_engine.py
CHANGED
|
@@ -934,7 +934,7 @@ class PersonaEngine:
|
|
| 934 |
for m in history[-2:]:
|
| 935 |
s_msg = m.get('scammer_message', '')[:150] + ("..." if len(m.get('scammer_message', '')) > 150 else "")
|
| 936 |
h_rsp = m.get('honeypot_response', '')[:150] + ("..." if len(m.get('honeypot_response', '')) > 150 else "")
|
| 937 |
-
hist_str += f"
|
| 938 |
|
| 939 |
# 2. Truncate current message aggressively
|
| 940 |
safe_message = message[:500] + ("..." if len(message) > 500 else "")
|
|
@@ -984,12 +984,21 @@ class PersonaEngine:
|
|
| 984 |
|
| 985 |
if isinstance(response, str):
|
| 986 |
clean = response.strip().strip('"')
|
| 987 |
-
return clean if clean else None
|
| 988 |
elif hasattr(response, 'content') and response.content:
|
| 989 |
clean = response.content.strip().strip('"')
|
| 990 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 991 |
|
| 992 |
-
return None
|
| 993 |
|
| 994 |
def _static_response(
|
| 995 |
self,
|
|
|
|
| 934 |
for m in history[-2:]:
|
| 935 |
s_msg = m.get('scammer_message', '')[:150] + ("..." if len(m.get('scammer_message', '')) > 150 else "")
|
| 936 |
h_rsp = m.get('honeypot_response', '')[:150] + ("..." if len(m.get('honeypot_response', '')) > 150 else "")
|
| 937 |
+
hist_str += f"Caller: {s_msg}\nMe: {h_rsp}\n"
|
| 938 |
|
| 939 |
# 2. Truncate current message aggressively
|
| 940 |
safe_message = message[:500] + ("..." if len(message) > 500 else "")
|
|
|
|
| 984 |
|
| 985 |
if isinstance(response, str):
|
| 986 |
clean = response.strip().strip('"')
|
|
|
|
| 987 |
elif hasattr(response, 'content') and response.content:
|
| 988 |
clean = response.content.strip().strip('"')
|
| 989 |
+
else:
|
| 990 |
+
return None
|
| 991 |
+
|
| 992 |
+
# π₯ CRITICAL: Sanitize forbidden words that break honeypot illusion
|
| 993 |
+
forbidden_words = ['scammer', 'scam', 'fraud', 'honeypot', 'bot', 'ai assistant', 'detection']
|
| 994 |
+
clean_lower = clean.lower()
|
| 995 |
+
for word in forbidden_words:
|
| 996 |
+
if word in clean_lower:
|
| 997 |
+
# Replace forbidden word with neutral alternative
|
| 998 |
+
import re
|
| 999 |
+
clean = re.sub(rf'\b{word}\b', 'sir' if word == 'scammer' else 'this', clean, flags=re.IGNORECASE)
|
| 1000 |
|
| 1001 |
+
return clean if clean else None
|
| 1002 |
|
| 1003 |
def _static_response(
|
| 1004 |
self,
|
app/core/llm_client.py
CHANGED
|
@@ -1771,8 +1771,8 @@ class LocalHFClient(BaseLLMClient):
|
|
| 1771 |
"""Simplify complex prompts for small models."""
|
| 1772 |
# For small models, use a very simple format
|
| 1773 |
# Extract the key message if it's a complex prompt
|
| 1774 |
-
if "
|
| 1775 |
-
parts = prompt.split("
|
| 1776 |
if len(parts) > 1:
|
| 1777 |
msg = parts[-1].strip()[:200]
|
| 1778 |
return f"Reply as a confused Indian person to: {msg}\nReply:"
|
|
|
|
| 1771 |
"""Simplify complex prompts for small models."""
|
| 1772 |
# For small models, use a very simple format
|
| 1773 |
# Extract the key message if it's a complex prompt
|
| 1774 |
+
if "CALLER:" in prompt:
|
| 1775 |
+
parts = prompt.split("CALLER:")
|
| 1776 |
if len(parts) > 1:
|
| 1777 |
msg = parts[-1].strip()[:200]
|
| 1778 |
return f"Reply as a confused Indian person to: {msg}\nReply:"
|
app/core/memory.py
CHANGED
|
@@ -205,8 +205,8 @@ class ConversationMemory:
|
|
| 205 |
lines = []
|
| 206 |
|
| 207 |
for msg in history:
|
| 208 |
-
lines.append(f"
|
| 209 |
-
lines.append(f"
|
| 210 |
|
| 211 |
return "\n".join(lines)
|
| 212 |
|
|
|
|
| 205 |
lines = []
|
| 206 |
|
| 207 |
for msg in history:
|
| 208 |
+
lines.append(f"Caller: {msg['scammer_message']}")
|
| 209 |
+
lines.append(f"Me: {msg['honeypot_response']}")
|
| 210 |
|
| 211 |
return "\n".join(lines)
|
| 212 |
|
app/core/prompts.py
CHANGED
|
@@ -16,21 +16,22 @@ from app.core.static_prompts import (
|
|
| 16 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
# COMPACT FAST_CHAT PROMPT (Optimized for Groq Dev Tier - 6K TPM)
|
| 18 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
-
FAST_CHAT_PROMPT = '''You are {persona_name}, a confused Indian person
|
| 20 |
TRAITS: {persona_traits} | MOOD: {agitation} | PHASE: {phase} | LANGUAGE: {language}
|
| 21 |
|
| 22 |
RULES:
|
| 23 |
- Sound human, respond in {language_instruction}, 1-2 lines max
|
| 24 |
-
-
|
| 25 |
- Ask clarifying questions, delay, act confused
|
|
|
|
| 26 |
- Max 1 emoji (π π π€)
|
| 27 |
|
| 28 |
-
|
| 29 |
{history}
|
| 30 |
|
| 31 |
-
|
| 32 |
|
| 33 |
-
Reply as {persona_name} in {language_instruction} (confused, hesitant):'''
|
| 34 |
|
| 35 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
# RESPONSE GENERATION (The "Firewalled" Humanization Layer)
|
|
|
|
| 16 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
# COMPACT FAST_CHAT PROMPT (Optimized for Groq Dev Tier - 6K TPM)
|
| 18 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
FAST_CHAT_PROMPT = '''You are {persona_name}, a confused Indian person on a call.
|
| 20 |
TRAITS: {persona_traits} | MOOD: {agitation} | PHASE: {phase} | LANGUAGE: {language}
|
| 21 |
|
| 22 |
RULES:
|
| 23 |
- Sound human, respond in {language_instruction}, 1-2 lines max
|
| 24 |
+
- NEVER use words: scam, fraud, scammer, bot, AI, honeypot, detection
|
| 25 |
- Ask clarifying questions, delay, act confused
|
| 26 |
+
- Be imperfect: typos, hesitation ("umm", "uh"), incomplete sentences
|
| 27 |
- Max 1 emoji (π π π€)
|
| 28 |
|
| 29 |
+
CONVERSATION:
|
| 30 |
{history}
|
| 31 |
|
| 32 |
+
CALLER: {message}
|
| 33 |
|
| 34 |
+
Reply as {persona_name} in {language_instruction} (confused, hesitant, human):'''
|
| 35 |
|
| 36 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
# RESPONSE GENERATION (The "Firewalled" Humanization Layer)
|
app/database/memory_db.py
CHANGED
|
@@ -323,8 +323,8 @@ class DatabaseMemoryStore:
|
|
| 323 |
lines = []
|
| 324 |
|
| 325 |
for msg in history:
|
| 326 |
-
lines.append(f"
|
| 327 |
-
lines.append(f"
|
| 328 |
|
| 329 |
return "\n".join(lines)
|
| 330 |
|
|
|
|
| 323 |
lines = []
|
| 324 |
|
| 325 |
for msg in history:
|
| 326 |
+
lines.append(f"Caller: {msg.get('scammer_message', '')}")
|
| 327 |
+
lines.append(f"Me: {msg.get('honeypot_response', '')}")
|
| 328 |
|
| 329 |
return "\n".join(lines)
|
| 330 |
|
scripts/quick_extraction_test.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Quick test to verify intelligence extraction on HF API"""
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
# Test 1: Direct API v1/analyze endpoint (returns full intelligence)
|
| 6 |
+
print("=" * 60)
|
| 7 |
+
print("TEST 1: Direct /api/v1/analyze endpoint (full response)")
|
| 8 |
+
print("=" * 60)
|
| 9 |
+
|
| 10 |
+
url1 = 'https://avinashanalytics-sentinel-scam-honeypo.hf.space/api/v1/analyze'
|
| 11 |
+
payload1 = {
|
| 12 |
+
'conversationId': 'test-extraction-002',
|
| 13 |
+
'sender': 'scammer',
|
| 14 |
+
'message': 'Send money now to UPI: scammer.fraud@paytm or visit http://fake-bank-secure.com/login. OTP is 847291. Call me at 9876543210.',
|
| 15 |
+
'metadata': {
|
| 16 |
+
'language': 'English',
|
| 17 |
+
'channel': 'sms'
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
print(f'URL: {url1}')
|
| 22 |
+
print()
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
resp = requests.post(url1, json=payload1, timeout=60)
|
| 26 |
+
data = resp.json()
|
| 27 |
+
print(f'Status: {resp.status_code}')
|
| 28 |
+
print()
|
| 29 |
+
print('--- RESPONSE (truncated) ---')
|
| 30 |
+
print(json.dumps(data, indent=2, default=str)[:2000])
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f'Error: {e}')
|
| 33 |
+
|
| 34 |
+
# Test 2: GUVI endpoint (minimal response, callback sends intel)
|
| 35 |
+
print()
|
| 36 |
+
print("=" * 60)
|
| 37 |
+
print("TEST 2: GUVI /api/guvi/analyze endpoint (minimal response)")
|
| 38 |
+
print("=" * 60)
|
| 39 |
+
|
| 40 |
+
url2 = 'https://avinashanalytics-sentinel-scam-honeypo.hf.space/api/guvi/analyze'
|
| 41 |
+
headers = {
|
| 42 |
+
'x-api-key': 'GUVI_HACKATHON_V2',
|
| 43 |
+
'Content-Type': 'application/json'
|
| 44 |
+
}
|
| 45 |
+
payload2 = {
|
| 46 |
+
'sessionId': 'test-extraction-003',
|
| 47 |
+
'processId': 'proc-003',
|
| 48 |
+
'message': {
|
| 49 |
+
'text': 'Send money now to UPI: scammer.fraud@paytm or visit http://fake-bank-secure.com/login. OTP is 847291.',
|
| 50 |
+
'sender': 'scammer'
|
| 51 |
+
},
|
| 52 |
+
'metadata': {
|
| 53 |
+
'language': 'English',
|
| 54 |
+
'channel': 'sms'
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
print(f'URL: {url2}')
|
| 59 |
+
print()
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
resp = requests.post(url2, json=payload2, headers=headers, timeout=60)
|
| 63 |
+
data = resp.json()
|
| 64 |
+
print(f'Status: {resp.status_code}')
|
| 65 |
+
print()
|
| 66 |
+
print('--- RESPONSE ---')
|
| 67 |
+
print(json.dumps(data, indent=2, default=str))
|
| 68 |
+
print()
|
| 69 |
+
print("NOTE: Full intelligence is sent via CALLBACK to GUVI, not in this response.")
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f'Error: {e}')
|
| 72 |
+
import traceback
|
| 73 |
+
traceback.print_exc()
|