Update app.py
Browse filesExtract each customer message independently
app.py
CHANGED
|
@@ -61,6 +61,30 @@ Critical rules:
|
|
| 61 |
- Always set reply_drafts to [].
|
| 62 |
"""
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
EXAMPLE_INPUT = """Maya: Hi! Can I get 2 dozen cupcakes for Saturday morning? Half vanilla, half chocolate.
|
| 66 |
Sam: Need 1 birthday cake, chocolate, for pickup Friday 5pm. I can pay Venmo.
|
|
@@ -181,14 +205,40 @@ def build_reply_drafts(data):
|
|
| 181 |
|
| 182 |
return replies
|
| 183 |
|
| 184 |
-
def
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
|
|
|
|
|
|
|
|
|
| 188 |
prompt = tokenizer.apply_chat_template(
|
| 189 |
[
|
| 190 |
-
{"role": "system", "content":
|
| 191 |
-
{"role": "user", "content":
|
| 192 |
],
|
| 193 |
tokenize=False,
|
| 194 |
add_generation_prompt=True,
|
|
@@ -198,7 +248,7 @@ def analyze_messages(messages):
|
|
| 198 |
with torch.no_grad():
|
| 199 |
output = model.generate(
|
| 200 |
**inputs,
|
| 201 |
-
max_new_tokens=
|
| 202 |
do_sample=False,
|
| 203 |
pad_token_id=tokenizer.eos_token_id,
|
| 204 |
)
|
|
@@ -209,22 +259,50 @@ def analyze_messages(messages):
|
|
| 209 |
)
|
| 210 |
|
| 211 |
try:
|
| 212 |
-
|
| 213 |
-
except Exception
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
"",
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
orders_df = normalize_orders(data)
|
|
|
|
| 222 |
auto_prep = build_prep_list(data)
|
| 223 |
auto_replies = build_reply_drafts(data)
|
| 224 |
-
|
| 225 |
data["prep_list"] = auto_prep
|
| 226 |
data["reply_drafts"] = auto_replies
|
| 227 |
-
|
| 228 |
prep = format_list("Prep list", auto_prep)
|
| 229 |
replies = format_replies(auto_replies)
|
| 230 |
raw = json.dumps(data, indent=2, ensure_ascii=False)
|
|
|
|
| 61 |
- Always set reply_drafts to [].
|
| 62 |
"""
|
| 63 |
|
| 64 |
+
SINGLE_ORDER_PROMPT = """
|
| 65 |
+
You extract one order from one customer's DM.
|
| 66 |
+
|
| 67 |
+
Return only valid JSON with this exact shape:
|
| 68 |
+
{
|
| 69 |
+
"item": "",
|
| 70 |
+
"quantity": "",
|
| 71 |
+
"flavor": "",
|
| 72 |
+
"pickup_time": "",
|
| 73 |
+
"delivery_address": "",
|
| 74 |
+
"payment_status": "",
|
| 75 |
+
"notes": "",
|
| 76 |
+
"missing_fields": []
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
Rules:
|
| 80 |
+
- Use only facts from this one message.
|
| 81 |
+
- Do not invent details.
|
| 82 |
+
- Put pickup or delivery time in pickup_time.
|
| 83 |
+
- Put pickup place or delivery address in delivery_address.
|
| 84 |
+
- If unknown, use an empty string.
|
| 85 |
+
- missing_fields can only contain: quantity, flavor, pickup_time, delivery_address, payment_status.
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
|
| 89 |
EXAMPLE_INPUT = """Maya: Hi! Can I get 2 dozen cupcakes for Saturday morning? Half vanilla, half chocolate.
|
| 90 |
Sam: Need 1 birthday cake, chocolate, for pickup Friday 5pm. I can pay Venmo.
|
|
|
|
| 205 |
|
| 206 |
return replies
|
| 207 |
|
| 208 |
+
def split_customer_messages(messages):
|
| 209 |
+
entries = []
|
| 210 |
+
current_customer = ""
|
| 211 |
+
current_parts = []
|
| 212 |
+
|
| 213 |
+
for raw_line in messages.splitlines():
|
| 214 |
+
line = raw_line.strip()
|
| 215 |
+
if not line:
|
| 216 |
+
continue
|
| 217 |
+
|
| 218 |
+
if ":" in line:
|
| 219 |
+
possible_name, body = line.split(":", 1)
|
| 220 |
+
if possible_name.strip() and len(possible_name.strip().split()) <= 3:
|
| 221 |
+
if current_customer or current_parts:
|
| 222 |
+
entries.append((current_customer or "Customer", " ".join(current_parts).strip()))
|
| 223 |
+
current_customer = possible_name.strip()
|
| 224 |
+
current_parts = [body.strip()]
|
| 225 |
+
continue
|
| 226 |
+
|
| 227 |
+
if current_parts:
|
| 228 |
+
current_parts.append(line)
|
| 229 |
+
else:
|
| 230 |
+
entries.append(("Customer", line))
|
| 231 |
+
|
| 232 |
+
if current_customer or current_parts:
|
| 233 |
+
entries.append((current_customer or "Customer", " ".join(current_parts).strip()))
|
| 234 |
|
| 235 |
+
return [(name, body) for name, body in entries if body]
|
| 236 |
+
|
| 237 |
+
def extract_single_order(customer, message):
|
| 238 |
prompt = tokenizer.apply_chat_template(
|
| 239 |
[
|
| 240 |
+
{"role": "system", "content": SINGLE_ORDER_PROMPT},
|
| 241 |
+
{"role": "user", "content": f"Customer: {customer}\nMessage: {message}"},
|
| 242 |
],
|
| 243 |
tokenize=False,
|
| 244 |
add_generation_prompt=True,
|
|
|
|
| 248 |
with torch.no_grad():
|
| 249 |
output = model.generate(
|
| 250 |
**inputs,
|
| 251 |
+
max_new_tokens=350,
|
| 252 |
do_sample=False,
|
| 253 |
pad_token_id=tokenizer.eos_token_id,
|
| 254 |
)
|
|
|
|
| 259 |
)
|
| 260 |
|
| 261 |
try:
|
| 262 |
+
parsed = extract_json(generated)
|
| 263 |
+
except Exception:
|
| 264 |
+
parsed = {
|
| 265 |
+
"item": "",
|
| 266 |
+
"quantity": "",
|
| 267 |
+
"flavor": "",
|
| 268 |
+
"pickup_time": "",
|
| 269 |
+
"delivery_address": "",
|
| 270 |
+
"payment_status": "",
|
| 271 |
+
"notes": message,
|
| 272 |
+
"missing_fields": [],
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
order = {"customer": customer}
|
| 276 |
+
for col in ORDER_COLUMNS[1:]:
|
| 277 |
+
value = parsed.get(col, "")
|
| 278 |
+
if col == "missing_fields":
|
| 279 |
+
if isinstance(value, list):
|
| 280 |
+
order[col] = value
|
| 281 |
+
elif isinstance(value, str):
|
| 282 |
+
order[col] = [part.strip() for part in value.split(",") if part.strip()]
|
| 283 |
+
else:
|
| 284 |
+
order[col] = []
|
| 285 |
+
else:
|
| 286 |
+
order[col] = text_value(value)
|
| 287 |
|
| 288 |
+
return order
|
| 289 |
+
|
| 290 |
+
def analyze_messages(messages):
|
| 291 |
+
if not messages.strip():
|
| 292 |
+
return pd.DataFrame(columns=ORDER_COLUMNS), "Paste some DMs first.", "", ""
|
| 293 |
+
|
| 294 |
+
entries = split_customer_messages(messages)
|
| 295 |
+
orders_data = [extract_single_order(customer, message) for customer, message in entries]
|
| 296 |
+
|
| 297 |
+
data = {"orders": orders_data}
|
| 298 |
orders_df = normalize_orders(data)
|
| 299 |
+
|
| 300 |
auto_prep = build_prep_list(data)
|
| 301 |
auto_replies = build_reply_drafts(data)
|
| 302 |
+
|
| 303 |
data["prep_list"] = auto_prep
|
| 304 |
data["reply_drafts"] = auto_replies
|
| 305 |
+
|
| 306 |
prep = format_list("Prep list", auto_prep)
|
| 307 |
replies = format_replies(auto_replies)
|
| 308 |
raw = json.dumps(data, indent=2, ensure_ascii=False)
|