shelf-scribe / reconciliation.py
awilliams88's picture
Initialize Shelf Scribe MVP: AI restocking and pricing assistant
03539b1 verified
# Deterministic Reconciliation Logic for Shelf Scribe
import json
import math
import difflib
from datetime import datetime
from database import db_query, db_execute
def clean_string(s):
"""Helper to clean string for better comparison."""
if not s:
return ""
# Lowercase, strip whitespaces
return s.lower().strip()
def fuzzy_match_product(raw_name, catalog_products, threshold=0.65):
"""
Fuzzy matches a raw OCR name against products in the catalog.
Checks:
1. Direct match on name.
2. Direct match inside the aliases list of a product.
3. Fuzzy string matching on names and aliases.
Returns (matched_product_dict, confidence_score) or (None, 0.0)
"""
cleaned_raw = clean_string(raw_name)
if not cleaned_raw:
return None, 0.0
best_match = None
best_score = 0.0
for product in catalog_products:
p_name = clean_string(product["name"])
# 1. Direct name equality
if cleaned_raw == p_name:
return product, 1.0
# 2. Check aliases
aliases = []
if product["aliases"]:
try:
aliases = json.loads(product["aliases"])
except Exception:
# Fallback if it's a comma-separated string
aliases = [
a.strip() for a in product["aliases"].split(",") if a.strip()
]
for alias in aliases:
cleaned_alias = clean_string(alias)
if cleaned_raw == cleaned_alias:
return product, 1.0
# Fuzzy match on alias
score = difflib.SequenceMatcher(None, cleaned_raw, cleaned_alias).ratio()
if score > best_score:
best_score = score
best_match = product
# 3. Fuzzy match on product name
score = difflib.SequenceMatcher(None, cleaned_raw, p_name).ratio()
if score > best_score:
best_score = score
best_match = product
if best_score >= threshold:
return best_match, best_score
return None, 0.0
def reconcile_operations(shelf_detections, invoice_lines, category=None):
"""
Performs full reconciliation between VLM extractions, supplier invoices,
and the product catalog database.
Updates the database with detections and generates action items.
Returns a dict with:
- 'restocks': list of reorder items
- 'label_fixes': list of label price corrections/missing tags
- 'margin_warnings': list of price hikes causing low margins
- 'catalog_warnings': list of uncatalogued products detected
- 'check_items': list of low confidence detections
"""
# 1. Fetch catalog products
if category:
catalog = db_query("SELECT * FROM products WHERE category = ?", (category,))
else:
catalog = db_query("SELECT * FROM products")
# Clear old transaction data for this run
db_execute("DELETE FROM shelf_detections")
db_execute("DELETE FROM invoice_lines")
db_execute("DELETE FROM action_items")
matched_detections = {} # product_id -> detection_dict
reconciled_results = {
"restocks": [],
"label_fixes": [],
"margin_warnings": [],
"catalog_warnings": [],
"check_items": [],
}
# 2. Reconcile Shelf Detections
for det in shelf_detections:
raw_name = det["product_name_raw"]
matched_prod, score = fuzzy_match_product(raw_name, catalog)
prod_id = matched_prod["id"] if matched_prod else None
# Save to DB
db_execute(
"""
INSERT INTO shelf_detections (
product_name_raw, matched_product_id, visible_quantity_estimate,
shelf_price_detected, shelf_label_present, low_stock_signal,
empty_space_signal, confidence, evidence
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
raw_name,
prod_id,
det.get("visible_quantity_estimate"),
det.get("shelf_price_detected"),
1 if det.get("shelf_label_present", True) else 0,
1 if det.get("low_stock_signal", False) else 0,
1 if det.get("empty_space_signal", False) else 0,
det.get("confidence", 1.0),
det.get("evidence", ""),
),
)
# Save matched tracking
if matched_prod:
matched_detections[prod_id] = det
# Check price/label issues
shelf_price_detected = det.get("shelf_price_detected")
catalog_price = matched_prod["shelf_price"]
label_present = det.get("shelf_label_present", True)
if not label_present:
reason = "Missing shelf price label tag."
item = {
"product_id": prod_id,
"product_name": matched_prod["name"],
"action_type": "label_fix",
"reason": reason,
"suggested_quantity": None,
"confidence": det.get("confidence", 1.0),
"evidence": det.get("evidence", ""),
}
reconciled_results["label_fixes"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, confidence, evidence) VALUES (?, ?, ?, ?, ?, ?)",
(
prod_id,
matched_prod["name"],
"label_fix",
reason,
det.get("confidence", 1.0),
det.get("evidence", ""),
),
)
elif (
shelf_price_detected is not None
and abs(shelf_price_detected - catalog_price) > 0.01
):
reason = f"Label price mismatch! Detected: ${shelf_price_detected:.2f}, Catalog is ${catalog_price:.2f}."
item = {
"product_id": prod_id,
"product_name": matched_prod["name"],
"action_type": "label_fix",
"reason": reason,
"suggested_quantity": None,
"confidence": det.get("confidence", 1.0),
"evidence": det.get(
"evidence", f"Detected label price: ${shelf_price_detected:.2f}"
),
}
reconciled_results["label_fixes"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, confidence, evidence) VALUES (?, ?, ?, ?, ?, ?)",
(
prod_id,
matched_prod["name"],
"label_fix",
reason,
det.get("confidence", 1.0),
f"Detected: ${shelf_price_detected:.2f}",
),
)
# Check low confidence
if det.get("confidence", 1.0) < 0.85:
reason = f"Low confidence detection ({int(det.get('confidence', 1.0) * 100)}%). Verify product match."
item = {
"product_id": prod_id,
"product_name": matched_prod["name"],
"action_type": "check",
"reason": reason,
"suggested_quantity": None,
"confidence": det.get("confidence", 1.0),
"evidence": det.get("evidence", ""),
}
reconciled_results["check_items"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, confidence, evidence) VALUES (?, ?, ?, ?, ?, ?)",
(
prod_id,
matched_prod["name"],
"check",
reason,
det.get("confidence", 1.0),
det.get("evidence", ""),
),
)
else:
# Uncatalogued item!
reason = f"Uncatalogued product '{raw_name}' detected on shelf."
item = {
"product_id": None,
"product_name": raw_name,
"action_type": "catalog_update",
"reason": reason,
"suggested_quantity": None,
"confidence": det.get("confidence", 1.0),
"evidence": det.get("evidence", ""),
}
reconciled_results["catalog_warnings"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, confidence, evidence) VALUES (NULL, ?, ?, ?, ?, ?)",
(
raw_name,
"catalog_update",
reason,
det.get("confidence", 1.0),
det.get("evidence", ""),
),
)
# 3. Reconcile Invoice Lines
for line in invoice_lines:
raw_name = line["product_name_raw"]
matched_prod, score = fuzzy_match_product(raw_name, catalog)
prod_id = matched_prod["id"] if matched_prod else None
# Save to DB
db_execute(
"""
INSERT INTO invoice_lines (
product_name_raw, matched_product_id, quantity,
unit_cost, total_cost, confidence
) VALUES (?, ?, ?, ?, ?, ?)
""",
(
raw_name,
prod_id,
line.get("quantity", 0),
line.get("unit_cost", 0.0),
line.get("total_cost", 0.0),
line.get("confidence", 1.0),
),
)
if matched_prod:
# Update catalog supplier price dynamically based on latest invoice!
new_cost = line.get("unit_cost", 0.0)
old_cost = matched_prod["supplier_price"]
if abs(new_cost - old_cost) > 0.005:
db_execute(
"UPDATE products SET supplier_price = ? WHERE id = ?",
(new_cost, prod_id),
)
# Check margin warning
shelf_price = matched_prod["shelf_price"]
target_margin = matched_prod["target_margin_percent"]
# Calculate current margin
current_margin = (
((shelf_price - new_cost) / shelf_price) * 100.0
if shelf_price > 0
else 0
)
if current_margin < target_margin - 0.5:
suggested_price = new_cost / (1 - (target_margin / 100.0))
reason = f"Margin Alert! Supplier cost hiked from ${old_cost:.2f} to ${new_cost:.2f}. Margin dropped to {current_margin:.1f}% (Target: {target_margin:.1f}%)."
evidence = f"Suggest raising shelf price to ${suggested_price:.2f} to restore margin."
item = {
"product_id": prod_id,
"product_name": matched_prod["name"],
"action_type": "price_review",
"reason": reason,
"suggested_quantity": None,
"confidence": line.get("confidence", 1.0),
"evidence": evidence,
}
reconciled_results["margin_warnings"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, confidence, evidence) VALUES (?, ?, ?, ?, ?, ?)",
(
prod_id,
matched_prod["name"],
"price_review",
reason,
line.get("confidence", 1.0),
evidence,
),
)
else:
# Uncatalogued item on invoice!
reason = f"Uncatalogued product '{raw_name}' billed on invoice."
item = {
"product_id": None,
"product_name": raw_name,
"action_type": "catalog_update",
"reason": reason,
"suggested_quantity": None,
"confidence": line.get("confidence", 1.0),
"evidence": f"Qty: {line.get('quantity')}, Cost: ${line.get('unit_cost')}",
}
reconciled_results["catalog_warnings"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, confidence, evidence) VALUES (NULL, ?, ?, ?, ?, ?)",
(
raw_name,
"catalog_update",
reason,
line.get("confidence", 1.0),
item["evidence"],
),
)
# 4. Generate Restock Recommendations
for prod in catalog:
prod_id = prod["id"]
# Find quantity estimate
visible_qty = 0 # Default if not seen at all (out of stock)
if prod_id in matched_detections:
visible_qty = matched_detections[prod_id].get(
"visible_quantity_estimate", 0
)
if visible_qty is None:
visible_qty = 0
# Check reorder trigger
threshold = prod["reorder_threshold"]
if visible_qty <= threshold:
pack_size = prod["pack_size"]
raw_needed = (threshold * 2) - visible_qty
# Round up to nearest pack size multiple
order_packs = math.ceil(raw_needed / pack_size)
if order_packs == 0:
order_packs = 1
suggested_qty = order_packs * pack_size
reason = f"Low Stock detected on shelf! Visible: {visible_qty} units (Threshold: {threshold})."
evidence = f"Restocking suggested. Reordering {order_packs} pack(s) of {pack_size}."
item = {
"product_id": prod_id,
"product_name": prod["name"],
"action_type": "reorder",
"reason": reason,
"suggested_quantity": suggested_qty,
"confidence": matched_detections[prod_id].get("confidence", 0.90)
if prod_id in matched_detections
else 0.80,
"evidence": evidence,
}
reconciled_results["restocks"].append(item)
db_execute(
"INSERT INTO action_items (product_id, product_name, action_type, reason, suggested_quantity, confidence, evidence) VALUES (?, ?, ?, ?, ?, ?, ?)",
(
prod_id,
prod["name"],
"reorder",
reason,
suggested_qty,
item["confidence"],
evidence,
),
)
return reconciled_results
def generate_supplier_message(restock_items, supplier_name="Wholesale Supplier"):
"""
Generates a pre-formatted, easy-to-copy WhatsApp/SMS message draft
containing the restock order details.
"""
if not restock_items:
return "No restock actions needed today. Catalog stock levels are sufficient!"
date_str = datetime.now().strftime("%Y-%m-%d")
message = f"📋 *SHELF SCRIBE RESTOCK ORDER* ({date_str})\n"
message += f"To: *{supplier_name}*\n"
message += "--------------------------------------\n"
message += "Please prepare the following order for our store:\n\n"
total_cost = 0.0
for idx, item in enumerate(restock_items, 1):
prod_id = item["product_id"]
suggested_qty = item.get("suggested_quantity", 0)
# Query latest price
prod = db_query(
"SELECT supplier_price, pack_size FROM products WHERE id = ?", (prod_id,)
)
if prod:
price = prod[0]["supplier_price"]
pack_size = prod[0]["pack_size"]
cost = suggested_qty * price
total_cost += cost
packs = suggested_qty // pack_size
message += f"{idx}. *{item['product_name']}*\n"
message += f" Qty: *{suggested_qty}* units ({packs} pack{'s' if packs > 1 else ''} of {pack_size}) @ ${price:.2f}/unit = *${cost:.2f}*\n"
else:
message += f"{idx}. *{item['product_name']}*\n"
message += f" Qty: *{suggested_qty}* units\n"
message += "--------------------------------------\n"
message += f"💰 *Total Estimated Cost: ${total_cost:.2f}*\n\n"
message += (
"Please reply with order confirmation and estimated delivery time. Thank you!"
)
return message
def generate_printable_labels_html(products):
"""
Generates a modern, grid-based, print-ready HTML stylesheet
containing shelf labels for pricing and product presentation.
"""
if not products:
return "<p style='color: #94a3b8; text-align: center; padding: 2rem;'>No labels selected to print.</p>"
html = """
<div class="labels-print-container">
<style>
.labels-print-container {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
gap: 15px;
padding: 10px;
background-color: transparent;
}
.shelf-label-card {
border: 2px solid #334155;
border-radius: 8px;
background-color: #ffffff;
color: #0f172a;
padding: 15px;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
display: flex;
flex-direction: column;
justify-content: space-between;
height: 140px;
position: relative;
overflow: hidden;
}
.label-header {
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #64748b;
font-weight: 700;
margin-bottom: 4px;
}
.label-title {
font-size: 1.1rem;
font-weight: 800;
color: #1e293b;
line-height: 1.25;
margin-bottom: 8px;
height: 42px;
overflow: hidden;
display: -webkit-box;
-webkit-line-clamp: 2;
-webkit-box-orient: vertical;
}
.label-footer {
display: flex;
justify-content: space-between;
align-items: flex-end;
margin-top: 10px;
}
.label-price-box {
font-size: 1.8rem;
font-weight: 900;
color: #059669;
line-height: 1;
}
.label-barcode-placeholder {
display: flex;
flex-direction: column;
align-items: center;
gap: 2px;
}
.barcode-lines {
display: flex;
height: 25px;
align-items: stretch;
}
.barcode-line-black {
background-color: #000000;
margin-right: 1px;
}
.barcode-line-white {
background-color: #ffffff;
margin-right: 1px;
}
.barcode-num {
font-size: 0.55rem;
font-family: monospace;
color: #64748b;
letter-spacing: 1px;
}
.label-pack-info {
position: absolute;
top: 10px;
right: 10px;
background-color: #f1f5f9;
padding: 2px 6px;
font-size: 0.65rem;
border-radius: 4px;
color: #475569;
font-weight: 600;
}
@media print {
body * {
visibility: hidden;
}
.labels-print-container, .labels-print-container * {
visibility: visible;
}
.labels-print-container {
position: absolute;
left: 0;
top: 0;
width: 100%;
grid-template-columns: repeat(3, 1fr) !important;
background-color: white !important;
}
.shelf-label-card {
border: 1px solid #000000 !important;
box-shadow: none !important;
page-break-inside: avoid;
}
}
</style>
"""
for idx, prod in enumerate(products):
# Generate pseudo barcode lines based on item ID
barcode_id = f"2000000{prod['id']:05d}"
barcode_html = '<div class="barcode-lines">'
widths = [2, 1, 3, 1, 2, 4, 1, 2, 3, 1, 2, 1, 3, 2, 1]
for b_idx, w in enumerate(widths):
color = "black" if b_idx % 2 == 0 else "white"
barcode_html += (
f'<div class="barcode-line-{color}" style="width: {w}px;"></div>'
)
barcode_html += "</div>"
category_name = prod.get("category", "General")
html += f"""
<div class="shelf-label-card">
<span class="label-pack-info">Pack: {prod["pack_size"]}</span>
<div>
<div class="label-header">{category_name}</div>
<div class="label-title">{prod["name"]}</div>
</div>
<div class="label-footer">
<div class="label-price-box">${prod["shelf_price"]:.2f}</div>
<div class="label-barcode-placeholder">
{barcode_html}
<div class="barcode-num">{barcode_id}</div>
</div>
</div>
</div>
"""
html += "</div>"
return html