SSSSSSSiao commited on
Commit
77df6ce
·
verified ·
1 Parent(s): c1bc92f

Update app.py

Browse files

Extract each customer message independently

Files changed (1) hide show
  1. app.py +94 -16
app.py CHANGED
@@ -61,6 +61,30 @@ Critical rules:
61
  - Always set reply_drafts to [].
62
  """
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  EXAMPLE_INPUT = """Maya: Hi! Can I get 2 dozen cupcakes for Saturday morning? Half vanilla, half chocolate.
66
  Sam: Need 1 birthday cake, chocolate, for pickup Friday 5pm. I can pay Venmo.
@@ -181,14 +205,40 @@ def build_reply_drafts(data):
181
 
182
  return replies
183
 
184
- def analyze_messages(messages):
185
- if not messages.strip():
186
- return pd.DataFrame(columns=ORDER_COLUMNS), "Paste some DMs first.", "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
 
 
 
188
  prompt = tokenizer.apply_chat_template(
189
  [
190
- {"role": "system", "content": SYSTEM_PROMPT},
191
- {"role": "user", "content": messages},
192
  ],
193
  tokenize=False,
194
  add_generation_prompt=True,
@@ -198,7 +248,7 @@ def analyze_messages(messages):
198
  with torch.no_grad():
199
  output = model.generate(
200
  **inputs,
201
- max_new_tokens=900,
202
  do_sample=False,
203
  pad_token_id=tokenizer.eos_token_id,
204
  )
@@ -209,22 +259,50 @@ def analyze_messages(messages):
209
  )
210
 
211
  try:
212
- data = extract_json(generated)
213
- except Exception as exc:
214
- return (
215
- pd.DataFrame(columns=ORDER_COLUMNS),
216
- f"### Needs review\nThe model did not return valid JSON: {exc}",
217
- "",
218
- generated,
219
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
 
 
 
 
 
 
 
 
 
 
221
  orders_df = normalize_orders(data)
 
222
  auto_prep = build_prep_list(data)
223
  auto_replies = build_reply_drafts(data)
224
-
225
  data["prep_list"] = auto_prep
226
  data["reply_drafts"] = auto_replies
227
-
228
  prep = format_list("Prep list", auto_prep)
229
  replies = format_replies(auto_replies)
230
  raw = json.dumps(data, indent=2, ensure_ascii=False)
 
61
  - Always set reply_drafts to [].
62
  """
63
 
64
+ SINGLE_ORDER_PROMPT = """
65
+ You extract one order from one customer's DM.
66
+
67
+ Return only valid JSON with this exact shape:
68
+ {
69
+ "item": "",
70
+ "quantity": "",
71
+ "flavor": "",
72
+ "pickup_time": "",
73
+ "delivery_address": "",
74
+ "payment_status": "",
75
+ "notes": "",
76
+ "missing_fields": []
77
+ }
78
+
79
+ Rules:
80
+ - Use only facts from this one message.
81
+ - Do not invent details.
82
+ - Put pickup or delivery time in pickup_time.
83
+ - Put pickup place or delivery address in delivery_address.
84
+ - If unknown, use an empty string.
85
+ - missing_fields can only contain: quantity, flavor, pickup_time, delivery_address, payment_status.
86
+ """
87
+
88
 
89
  EXAMPLE_INPUT = """Maya: Hi! Can I get 2 dozen cupcakes for Saturday morning? Half vanilla, half chocolate.
90
  Sam: Need 1 birthday cake, chocolate, for pickup Friday 5pm. I can pay Venmo.
 
205
 
206
  return replies
207
 
208
+ def split_customer_messages(messages):
209
+ entries = []
210
+ current_customer = ""
211
+ current_parts = []
212
+
213
+ for raw_line in messages.splitlines():
214
+ line = raw_line.strip()
215
+ if not line:
216
+ continue
217
+
218
+ if ":" in line:
219
+ possible_name, body = line.split(":", 1)
220
+ if possible_name.strip() and len(possible_name.strip().split()) <= 3:
221
+ if current_customer or current_parts:
222
+ entries.append((current_customer or "Customer", " ".join(current_parts).strip()))
223
+ current_customer = possible_name.strip()
224
+ current_parts = [body.strip()]
225
+ continue
226
+
227
+ if current_parts:
228
+ current_parts.append(line)
229
+ else:
230
+ entries.append(("Customer", line))
231
+
232
+ if current_customer or current_parts:
233
+ entries.append((current_customer or "Customer", " ".join(current_parts).strip()))
234
 
235
+ return [(name, body) for name, body in entries if body]
236
+
237
+ def extract_single_order(customer, message):
238
  prompt = tokenizer.apply_chat_template(
239
  [
240
+ {"role": "system", "content": SINGLE_ORDER_PROMPT},
241
+ {"role": "user", "content": f"Customer: {customer}\nMessage: {message}"},
242
  ],
243
  tokenize=False,
244
  add_generation_prompt=True,
 
248
  with torch.no_grad():
249
  output = model.generate(
250
  **inputs,
251
+ max_new_tokens=350,
252
  do_sample=False,
253
  pad_token_id=tokenizer.eos_token_id,
254
  )
 
259
  )
260
 
261
  try:
262
+ parsed = extract_json(generated)
263
+ except Exception:
264
+ parsed = {
265
+ "item": "",
266
+ "quantity": "",
267
+ "flavor": "",
268
+ "pickup_time": "",
269
+ "delivery_address": "",
270
+ "payment_status": "",
271
+ "notes": message,
272
+ "missing_fields": [],
273
+ }
274
+
275
+ order = {"customer": customer}
276
+ for col in ORDER_COLUMNS[1:]:
277
+ value = parsed.get(col, "")
278
+ if col == "missing_fields":
279
+ if isinstance(value, list):
280
+ order[col] = value
281
+ elif isinstance(value, str):
282
+ order[col] = [part.strip() for part in value.split(",") if part.strip()]
283
+ else:
284
+ order[col] = []
285
+ else:
286
+ order[col] = text_value(value)
287
 
288
+ return order
289
+
290
+ def analyze_messages(messages):
291
+ if not messages.strip():
292
+ return pd.DataFrame(columns=ORDER_COLUMNS), "Paste some DMs first.", "", ""
293
+
294
+ entries = split_customer_messages(messages)
295
+ orders_data = [extract_single_order(customer, message) for customer, message in entries]
296
+
297
+ data = {"orders": orders_data}
298
  orders_df = normalize_orders(data)
299
+
300
  auto_prep = build_prep_list(data)
301
  auto_replies = build_reply_drafts(data)
302
+
303
  data["prep_list"] = auto_prep
304
  data["reply_drafts"] = auto_replies
305
+
306
  prep = format_list("Prep list", auto_prep)
307
  replies = format_replies(auto_replies)
308
  raw = json.dumps(data, indent=2, ensure_ascii=False)