|
|
|
|
|
|
|
|
|
|
| import re
|
| import json
|
| import requests
|
| from pathlib import Path
|
| from utils_student import OLLAMA_API
|
| OUTPUT_DIR = Path(__file__).parent / 'data' / 'raw' / 'newsletters'
|
| NUM_EMAILS = 600
|
|
|
| NEWSLETTER_TYPES = [
|
|
|
| "VA (Veterans Affairs) benefits update newsletter",
|
| "Social Security Administration monthly update",
|
| "IRS tax season reminder newsletter",
|
| "State university alumni association newsletter",
|
| "Public library community events newsletter",
|
| "City council weekly digest",
|
| "County health department wellness newsletter",
|
| "State DMV services update",
|
| "Federal student aid (FAFSA) reminder email",
|
| "National Park Service seasonal newsletter",
|
|
|
| "Eventbrite event registration for a local 5K charity run",
|
| "Meetup.com weekly digest for a Python developer group",
|
| "Local rec league sports schedule and registration reminder",
|
| "Community theater ticket sale announcement",
|
| "Youth soccer club seasonal registration email",
|
| "Local gym membership promotion with class schedule",
|
| "Cycling club annual group ride and event announcement",
|
| "Neighborhood association monthly events digest",
|
| "University student organization weekly newsletter",
|
| "Non-profit volunteer opportunity announcement",
|
| "Chamber of commerce networking event invitation",
|
| "Adult softball league spring registration reminder",
|
| "Book club monthly selection and meeting announcement",
|
| "Charity fundraising gala ticket sale email",
|
| "Local farmers market vendor and schedule newsletter",
|
|
|
| "Steam / gaming platform wishlist sale and purchase notification",
|
| "Amazon / eBay / Etsy order confirmation and shipping update",
|
| "GitHub Actions CI notification and pull request comment",
|
| "Netflix / Spotify subscription renewal receipt",
|
| "Airbnb / hotel booking confirmation and check-in reminder",
|
| "LinkedIn job alert and connection request notification",
|
| "Bank account activity notification (Chase / Bank of America style)",
|
| "UPS / FedEx / USPS shipping notification with tracking number",
|
| "Zoom / Microsoft Teams meeting invitation and reminder",
|
| "University Canvas / Blackboard grade notification and assignment reminder",
|
| ]
|
|
|
| PROMPT_TEMPLATE = """Write a realistic {newsletter_type} email.
|
|
|
| Requirements:
|
| - Include a Subject: line at the top
|
| - Use professional but accessible language
|
| - Include 2-4 links (use realistic placeholder URLs like https://www.va.gov/benefits or https://www.eventbrite.com/e/example)
|
| - Mention words like "free", "click here", "subscribe", "limited time", "register now", "sign up" naturally where appropriate
|
| - Include a mix of uppercase headers and normal text
|
| - Include a specific date and time (e.g., "Saturday, May 14, 2026 at 9:00 AM")
|
| - Include a specific location or venue (e.g., "Virginia Beach Convention Center, 1000 19th St")
|
| - Include an organization name and a contact person name
|
| - Include a physical mailing address in the footer (e.g., "123 Main St, Suite 200, Norfolk, VA 23510")
|
| - 150-400 words in the body
|
| - End with an unsubscribe notice
|
|
|
| Write ONLY the email (Subject line + body). No commentary. /no_think"""
|
|
|
|
|
|
|
| def generate_email(newsletter_type, model='gemma3:1b'):
|
| prompt = PROMPT_TEMPLATE.format(newsletter_type=newsletter_type)
|
| try:
|
| resp = requests.post(OLLAMA_API, json={
|
| 'model': model,
|
| 'messages': [{'role': 'user', 'content': prompt}],
|
| 'stream': False,
|
| 'think': False,
|
| 'options': {'temperature': 0.8, 'num_predict': 600}
|
| }, timeout=120)
|
| if resp.status_code == 200:
|
| content = resp.json().get('message', {}).get('content', '')
|
| content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
|
| return content
|
| except (requests.RequestException, ValueError, KeyError) as e:
|
| print(' Error generating email: %s' % e)
|
| return None
|
|
|
|
|
|
|
| def main():
|
| print('Starting newsletter generation...')
|
| OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
| try:
|
| resp = requests.get('http://localhost:11434/api/tags', timeout=5)
|
| models_data = resp.json().get('models', [])
|
| models = []
|
| for m in models_data:
|
| models.append(m['name'])
|
|
|
| gemma = []
|
| for m in models:
|
| if 'gemma3' in m:
|
| gemma.append(m)
|
| qwen = []
|
| for m in models:
|
| if 'qwen3.5' in m:
|
| qwen.append(m)
|
| preferred = gemma or qwen
|
| if not preferred:
|
| print('No gemma3 or qwen3.5 model found. Run: ollama pull gemma3:1b')
|
| return
|
| model = preferred[0]
|
| print('Using model: %s' % model)
|
| except (requests.RequestException, ValueError, KeyError) as e:
|
| print('Ollama not running (%s). Start with: ollama serve' % e)
|
| return
|
|
|
| existing = list(OUTPUT_DIR.glob('*.txt'))
|
| start_idx = len(existing)
|
| needed = NUM_EMAILS - start_idx
|
|
|
| if needed <= 0:
|
| print('Already have %d newsletters. Done.' % len(existing))
|
| return
|
|
|
| print('Generating %d newsletters (have %d, target %d)...' % (needed, start_idx, NUM_EMAILS))
|
|
|
| for i in range(needed):
|
| ntype = NEWSLETTER_TYPES[i % len(NEWSLETTER_TYPES)]
|
| print(' [%d/%d] %s...' % (start_idx + i + 1, NUM_EMAILS, ntype))
|
| email = generate_email(ntype, model)
|
| if email and len(email) > 50:
|
| outfile = OUTPUT_DIR / ('newsletter_%03d.txt' % (start_idx + i))
|
| with open(outfile, 'w', encoding='utf-8') as f:
|
| f.write(email)
|
| else:
|
| print(' Skipped (too short or failed)')
|
|
|
| total = len(list(OUTPUT_DIR.glob('*.txt')))
|
| print('\nDone. %d newsletter files in %s' % (total, OUTPUT_DIR))
|
| print('Done generating!')
|
|
|
|
|
| if __name__ == '__main__':
|
| main()
|
|
|