File size: 5,075 Bytes
ea84121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
{
  "job": "extension",
  "config": {
    "name": "n1sr1n4",
    "process": [
      {
        "type": "diffusion_trainer",
        "training_folder": "/app/ai-toolkit/output",
        "sqlite_db_path": "/app/ai-toolkit/aitk_db.db",
        "device": "cuda",
        "trigger_word": null,
        "performance_log_every": 10,
        "network": {
          "type": "lora",
          "linear": 32,
          "linear_alpha": 32,
          "conv": 16,
          "conv_alpha": 16,
          "lokr_full_rank": true,
          "lokr_factor": -1,
          "network_kwargs": {
            "ignore_if_contains": []
          }
        },
        "save": {
          "dtype": "bf16",
          "save_every": 500,
          "max_step_saves_to_keep": 4,
          "save_format": "diffusers",
          "push_to_hub": false
        },
        "datasets": [
          {
            "folder_path": "/app/ai-toolkit/datasets/n1sr1n4",
            "mask_path": null,
            "mask_min_value": 0.1,
            "default_caption": "",
            "caption_ext": "txt",
            "caption_dropout_rate": 0.05,
            "cache_latents_to_disk": true,
            "is_reg": false,
            "network_weight": 1,
            "resolution": [
              512,
              768
            ],
            "controls": [],
            "shrink_video_to_frames": true,
            "num_frames": 121,
            "flip_x": false,
            "flip_y": false,
            "num_repeats": 1,
            "do_i2v": false,
            "do_audio": true,
            "fps": 24,
            "audio_normalize": true
          }
        ],
        "train": {
          "batch_size": 1,
          "bypass_guidance_embedding": false,
          "steps": 6500,
          "gradient_accumulation": 1,
          "train_unet": true,
          "train_text_encoder": false,
          "gradient_checkpointing": true,
          "noise_scheduler": "flowmatch",
          "optimizer": "adamw8bit",
          "timestep_type": "weighted",
          "content_or_style": "balanced",
          "optimizer_params": {
            "weight_decay": 0.0001
          },
          "unload_text_encoder": false,
          "cache_text_embeddings": true,
          "lr": 0.0001,
          "ema_config": {
            "use_ema": false,
            "ema_decay": 0.99
          },
          "skip_first_sample": false,
          "force_first_sample": false,
          "disable_sampling": false,
          "dtype": "bf16",
          "diff_output_preservation": false,
          "diff_output_preservation_multiplier": 1,
          "diff_output_preservation_class": "person",
          "switch_boundary_every": 1,
          "loss_type": "mse"
        },
        "logging": {
          "log_every": 1,
          "use_ui_logger": true
        },
        "model": {
          "name_or_path": "Lightricks/LTX-2",
          "quantize": false,
          "qtype": "qfloat8",
          "quantize_te": true,
          "qtype_te": "uint4",
          "arch": "ltx2",
          "low_vram": true,
          "model_kwargs": {},
          "layer_offloading": false,
          "layer_offloading_text_encoder_percent": 1,
          "layer_offloading_transformer_percent": 1
        },
        "sample": {
          "sampler": "flowmatch",
          "sample_every": 500,
          "width": 768,
          "height": 768,
          "samples": [
            {
              "prompt": "A cinematic medium shot of n1sr1n4 standing inside a metallic elevator. She looks tired, wearing a professional black blazer over a white patterned top and a blue lanyard around her neck. She holds a coffee cup loosely in her hand. The elevator doors start to close, but she looks up, makes eye contact with the viewer, and says softly, \"It is going to be a long day.\" The lighting is cool and artificial, reflecting off the metal walls.",
              "ctrl_img": "/app/ai-toolkit/data/images/fc426fd4-05a1-4093-9115-bcbd2462a2db.jpg"
            },
            {
              "prompt": "A close-up shot of n1sr1n4 outdoors with sunlight hitting her face. She takes a drag from her cigarette and exhales smoke slowly. She is wearing her signature blue patterned shirt. The camera focuses on her face as she notices the recording, smiles slightly, and says playfully, \"Are you really filming me right now?\" The background is a blur of city traffic and trees."
            },
            {
              "prompt": "A handheld vlog-style shot of n1sr1n4 sitting in a bright coffee shop. She is holding her iced coffee cup. She looks directly into the camera lens, tilts her head slightly, and says, \"I think this is the best coffee I have ever had.\" After speaking, she smiles and takes a sip from the straw. The background is slightly blurred with people walking by."
            }
          ],
          "neg": "",
          "seed": 42,
          "walk_seed": true,
          "guidance_scale": 4,
          "sample_steps": 25,
          "num_frames": 121,
          "fps": 24
        }
      }
    ]
  },
  "meta": {
    "name": "[name]",
    "version": "1.0"
  }
}