deucebucket commited on
Commit
fe9e993
·
verified ·
1 Parent(s): 309efb7

docs: update templatefix test notes

Browse files
agentic_eval_20260522/regular_v6_1_noncoding_agentic_tools_strict_summary.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "gemma4-26b-regular-v6.1-templatefix-agentic",
3
+ "date": "2026-05-22",
4
+ "count": 3,
5
+ "pass_cases": [
6
+ "schedule_strict",
7
+ "release_notes_strict",
8
+ "creative_brief_strict"
9
+ ],
10
+ "warn_cases": [],
11
+ "fail_cases": [],
12
+ "state_after": {
13
+ "calendar": [
14
+ {
15
+ "id": "cal-1",
16
+ "day": "Tuesday",
17
+ "start": "09:00",
18
+ "end": "10:00",
19
+ "title": "dentist"
20
+ },
21
+ {
22
+ "id": "cal-2",
23
+ "day": "Tuesday",
24
+ "start": "14:00",
25
+ "end": "15:30",
26
+ "title": "vendor call"
27
+ },
28
+ {
29
+ "id": "cal-3",
30
+ "day": "Wednesday",
31
+ "start": "11:00",
32
+ "end": "12:00",
33
+ "title": "team sync"
34
+ },
35
+ {
36
+ "id": "cal-4",
37
+ "day": "Tuesday",
38
+ "end": "11:00",
39
+ "start": "10:15",
40
+ "title": "Free Slot Hold"
41
+ }
42
+ ],
43
+ "notes": {
44
+ "style-guide": "Tone: intimate, sensory, direct. Avoid purple exposition. Prefer concrete objects and subtext.",
45
+ "launch-checklist": "Before release: verify template, runtime flags, no-thinking, thinking budget, tool calls, vision, creative writing, agentic automation.",
46
+ "carl-prefs": "Carl should use concise summaries, preserve user intent, and avoid claiming tests that were not run.",
47
+ "Internal Release-Note Draft": "Internal release-note draft: Review checklist and Carl's preferences for the upcoming release.",
48
+ "Production Creative Brief - Romantic Stormlit Apartment Scene": "Production Creative Brief: Romantic Stormlit Apartment Scene\nTone: Intimate, atmospheric, and non-explicit.\nVisual Style: Low-key lighting, heavy use of shadows, and blue/cool tones from the storm outside contrasted with warm, amber hues from indoor lamps.\nSetting: A cozy apartment during a thunderstorm. Large windows showing rain and lightning.\nKey Elements: Sound of rain against glass, soft textures (blankets, velvet), flickering candlelight, and subtle character interaction.\nGoal: Capture the tension and romance through atmosphere and subtext rather than explicit action."
49
+ },
50
+ "tasks": [
51
+ {
52
+ "id": "task-1",
53
+ "evidence": "Created hold for Tuesday 10:15-11:00",
54
+ "priority": "medium",
55
+ "title": "Follow up on Tuesday hold"
56
+ },
57
+ {
58
+ "id": "task-2",
59
+ "evidence": "Missing proof for release checklist.",
60
+ "priority": "high",
61
+ "title": "Obtain missing proof for release checklist"
62
+ }
63
+ ]
64
+ }
65
+ }