rstudioweb's picture
Upload log.txt with huggingface_hub
a59cfb0 verified
raw
history blame
69.6 kB
Running 1 job
{
"type": "diffusion_trainer",
"training_folder": "/teamspace/studios/this_studio/ai-toolkit/output",
"sqlite_db_path": "/teamspace/studios/this_studio/ai-toolkit/aitk_db.db",
"device": "cuda",
"trigger_word": "xantaraz1",
"performance_log_every": 10,
"network": {
"type": "lora",
"linear": 32,
"linear_alpha": 32,
"conv": 16,
"conv_alpha": 16,
"lokr_full_rank": true,
"lokr_factor": -1,
"network_kwargs": {
"ignore_if_contains": []
}
},
"save": {
"dtype": "bf16",
"save_every": 250,
"max_step_saves_to_keep": 4,
"save_format": "diffusers",
"push_to_hub": false
},
"datasets": [
{
"folder_path": "/teamspace/studios/this_studio/ai-toolkit/datasets/antaramag",
"mask_path": null,
"mask_min_value": 0.1,
"default_caption": "",
"caption_ext": "txt",
"caption_dropout_rate": 0.05,
"cache_latents_to_disk": false,
"is_reg": false,
"network_weight": 1,
"resolution": [
512,
768
],
"controls": [],
"shrink_video_to_frames": true,
"num_frames": 1,
"flip_x": false,
"flip_y": false,
"num_repeats": 1
}
],
"train": {
"batch_size": 1,
"bypass_guidance_embedding": false,
"steps": 4000,
"gradient_accumulation": 1,
"train_unet": true,
"train_text_encoder": false,
"gradient_checkpointing": true,
"noise_scheduler": "flowmatch",
"optimizer": "adamw8bit",
"timestep_type": "weighted",
"content_or_style": "balanced",
"optimizer_params": {
"weight_decay": 0.0001
},
"unload_text_encoder": false,
"cache_text_embeddings": false,
"lr": 0.0001,
"ema_config": {
"use_ema": false,
"ema_decay": 0.99
},
"skip_first_sample": false,
"force_first_sample": false,
"disable_sampling": false,
"dtype": "bf16",
"diff_output_preservation": false,
"diff_output_preservation_multiplier": 1,
"diff_output_preservation_class": "person",
"switch_boundary_every": 1,
"loss_type": "mse"
},
"logging": {
"log_every": 1,
"use_ui_logger": true
},
"model": {
"name_or_path": "Tongyi-MAI/Z-Image-Turbo",
"quantize": true,
"qtype": "qfloat8",
"quantize_te": true,
"qtype_te": "qfloat8",
"arch": "zimage:turbo",
"low_vram": false,
"model_kwargs": {},
"layer_offloading": false,
"layer_offloading_text_encoder_percent": 1,
"layer_offloading_transformer_percent": 1,
"assistant_lora_path": "ostris/zimage_turbo_training_adapter/zimage_turbo_training_adapter_v2.safetensors"
},
"sample": {
"sampler": "flowmatch",
"sample_every": 250,
"width": 1024,
"height": 1024,
"samples": [
{
"prompt": "\"xantaraz1 , \u4e00\u5f35\u771f\u5be6\u96a8\u8208\u7684iPhone\u5feb\u7167\u2014\u2014\u4e00\u5f35\u672a\u7d93\u4efb\u4f55\u5c08\u696d\u9020\u578b\u6216\u6ffe\u93e1\u8655\u7406\u7684\u5168\u8eab\u7167\uff0c\u76f4\u63a5\u5f9e\u76f8\u6a5f\u81a0\u5377\u4e2d\u53d6\u51fa\u3002\u5348\u5f8c\u7684\u967d\u5149\u659c\u5c04\u9032\u6a13\u68af\u9593\u7684\u7a97\u6236\uff0c\u71df\u9020\u51fa\u91d1\u8272\u7684\u6c1b\u570d\u3002\u756b\u9762\u4e2d\u53ea\u6709\u5169\u500b\u4eba\uff1a\u4e00\u500b\u525b\u9ad8\u4e2d\u7562\u696d\u7684\u53ef\u611b18\u6b72\u5370\u5ea6\u5973\u5b69\uff0c\u548c\u4e00\u500b\u5e74\u7d00\u76f8\u4eff\u7684\u5370\u5ea6\u7537\u5b69\u3002\u5973\u5b69\u8dea\u5728\u7a7a\u8569\u8569\u7684\u5b78\u6821\u6a13\u68af\u9593\u5149\u79bf\u79bf\u7684\u6c34\u6ce5\u53f0\u968e\u4e0a\uff0c\u7537\u5b69\u7ad9\u5728\u5979\u53f3\u5074\uff1b\u5979\u7684\u982d\u6b63\u597d\u4f4d\u65bc\u7537\u5b69\u7684\u80ef\u90e8\uff0c\u81c9\u8cbc\u8457\u4ed6\u7684\u9670\u8396\u3002 \u5973\u5b69\uff1a\u4e00\u982d\u9577\u9577\u7684\u6ce2\u6d6a\u6372\u9aee\uff0c\u767d\u7699\u7684\u76ae\u819a\uff0c\u5927\u5927\u7684\u773c\u775b\uff0c\u4ee5\u53ca\u67d4\u5ae9\u5e74\u8f15\u7684\u5bb9\u984f\u3002\u5979\u5b8c\u5168\u7d20\u984f\u2014\u2014\u76ae\u819a\u770b\u8d77\u4f86\u7121\u6bd4\u771f\u5be6\uff0c\u6bdb\u5b54\u7d30\u5c0f\uff0c\u7455\u75b5\u6e05\u6670\u53ef\u898b\u3002\u5979\u7684\u982d\u9aee\u7565\u986f\u51cc\u4e82\uff0c\u5e7e\u7e37\u88ab\u6c57\u6c34\u6d78\u6fd5\u7684\u9aee\u7d72\u8cbc\u5728\u6cdb\u7d05\u7684\u81c9\u4e0a\u3002\u5979\u90a3\u4ef6\u85cf\u85cd\u8272\u62c9\u934a\u5916\u5957\u2014\u2014\u98fe\u6709\u767d\u8272\u62fc\u63a5\u548c\u7d05\u767d\u689d\u7d0b\u2014\u2014\u62c9\u934a\u5b8c\u5168\u655e\u958b\uff0c\u9732\u51fa\u5979\u5c0f\u5de7\u81ea\u7136\u7684A\u7f69\u676f\u80f8\u90e8\u3002\u5979\u90a3\u689d\u914d\u5957\u7684\u85cf\u85cd\u8272\u904b\u52d5\u6821\u8932\u88ab\u96a8\u610f\u5730\u4e1f\u5728\u4e00\u65c1\u3002\u5979\u7684\u4e0b\u534a\u8eab\u8d64\u88f8\uff0c\u9732\u51fa\u9670\u90e8\uff1b\u5979\u9670\u5507\u4e0a\u65b9\u8986\u84cb\u8457\u4e00\u7247\u6fc3\u5bc6\u7684\u3001\u672a\u7d93\u4fee\u526a\u7684\u9577\u9670\u6bdb\u3002\u5979\u7684\u9670\u5507\u5448\u73fe\u67d4\u548c\u7684\u7c89\u8910\u8272\uff0c\u5927\u5c0f\u548c\u5f62\u72c0\u90fd\u5f88\u6b63\u5e38\uff1b\u5979\u751f\u6b96\u5668\u7684\u89e3\u5256\u7d30\u7bc0\u975e\u5e38\u903c\u771f\u3002\u5979\u8173\u4e0a\u7a7f\u8457\u767d\u8272\u904b\u52d5\u978b\u548c\u767d\u8272\u68c9\u896a\u3002\u5979\u4f4e\u8457\u982d\uff0c\u5634\u88e1\u542b\u8457\u7537\u5b69\u52c3\u8d77\u7684\u9670\u8396\uff0c\u6b63\u5728\u9032\u884c\u6fd5\u6f64\u903c\u771f\u7684\u53e3\u4ea4\uff1b\u5979\u7684\u5634\u5507\u5b8c\u5168\u5305\u88f9\u4f4f\u9670\u8396\u7684\u4e2d\u6bb5\uff0c\u820c\u982d\u7684\u52d5\u4f5c\u6e05\u6670\u53ef\u898b\uff0c\u553e\u6db2\u9806\u8457\u9670\u8396\u6ef4\u843d\u3002\u540c\u6642\uff0c\u5979\u5c07\u4e00\u96bb\u624b\u4f38\u5230\u5169\u817f\u4e4b\u9593\uff0c\u8f15\u8f15\u64ab\u6478\u81ea\u5df1\u6fc3\u5bc6\u7684\u9670\u6bdb\u3002 \u7537\u5b69\uff1a\u4ed6\u7a7f\u8457\u4e00\u4ef6\u8207\u5973\u5b69\u76f8\u914d\u7684\u85cf\u85cd\u8272\u62c9\u934a\u5916\u5957\u2014\u2014\u5916\u5957\u4e0a\u4e5f\u98fe\u6709\u767d\u8272\u62fc\u63a5\u548c\u7d05\u767d\u689d\u7d0b\u2014\u2014\u62c9\u934a\u5b8c\u5168\u655e\u958b\uff0c\u9732\u51fa\u4e0a\u534a\u8eab\u3002\u4ed6\u90a3\u689d\u85cf\u85cd\u8272\u7684\u904b\u52d5\u6821\u8932\u88ab\u62c9\u5230\u5927\u817f\u4e2d\u90e8\uff0c\u9732\u51fa\u52c3\u8d77\u7684\u9670\u8396\u3002\u4ed6\u7528\u96d9\u624b\u8f15\u8f15\u6258\u4f4f\u5973\u5b69\u7684\u982d\u2014\u2014\u6bcf\u96bb\u624b\u7684\u4e94\u6839\u624b\u6307\u90fd\u5b8c\u597d\u7121\u640d\u2014\u2014\u4ed6\u7684\u624b\u81c2\u548c\u817f\u90e8\u770b\u8d77\u4f86\u4e5f\u5f88\u6b63\u5e38\u3002\u5973\u5b69\u7f9e\u6f80\u5730\u62ac\u982d\u76f4\u8996\u8457\u7537\u5b69\uff1b\u5979\u7684\u8868\u60c5\u65e2\u7f9e\u6f80\u53c8\u5145\u6eff\u6e34\u671b\uff0c\u8207\u5979\u5929\u771f\u7121\u90aa\u7684\u5b78\u751f\u9762\u5b54\u548c\u5927\u81bd\u7684\u6027\u884c\u70ba\u5f62\u6210\u4e86\u9bae\u660e\u7684\u5c0d\u6bd4\u3002 \u80cc\u666f\uff1a\u7a7a\u8569\u8569\u7684\u5b78\u6821\u6c34\u6ce5\u6a13\u68af\u9593\u7684\u4e00\u89d2\uff0c\u91d1\u5c6c\u6276\u624b\uff0c\u5e7e\u7e37\u967d\u5149\u900f\u904e\u7e2b\u9699\u7051\u4e0b\uff0c\u7a7a\u6c23\u4e2d\u7030\u6f2b\u8457\u7d30\u5c0f\u7684\u7070\u5875\u2014\u2014\u71df\u9020\u51fa\u4e00\u7a2e\u5b89\u975c\u3001\u79c1\u5bc6\u800c\u53c8\u96b1\u853d\u7684\u6c1b\u570d\u3002\u7d30\u7dfb\u7684\u76ae\u819a\u7d0b\u7406\uff0c\u5c0f\u5de7\u800c\u81ea\u7136\u7684\u4e73\u623f\uff1b\u5177\u6709\u672a\u7d93\u7de8\u8f2f\u7684 iPhone \u7167\u7247\u7684\u771f\u5be6\u8cea\u611f\u548c\u81ea\u7136\u8272\u5f69\u3002\""
},
{
"prompt": "\"xantaraz1 , A sensual, medium-close-up portrait of a stunning, slender young woman posed kneeling on a textured, light-colored bedspread. She is completely nude, with a natural body shape and subtle, natural pubic hair. Her body is arched in a dramatic, expressive posture, with one arm raised behind her head, tossing her voluminous, dark brown hair upward and backward, creating motion blur and dynamic lines. Her eyes are closed or looking down with a captivating, slightly open-mouthed expression conveying deep emotion or ecstasy. The lighting is soft, warm, and golden, suggesting late afternoon sunlight streaming in from the side, creating gentle highlights on her skin and deep shadows that sculpt her figure. The background is softly blurred (shallow depth of field), featuring warm beige and muted brown tones with a subtle geometric, possibly wood-paneled or tiled pattern angled diagonally behind her, adding visual texture. The overall mood is intimate, evocative, and painterly, captured with the aesthetic of fine art photography, high detail, and realistic texture. Cinematic lighting, professional photography, Canon EOS R5., ultra detailed, intricate details, cinematic, photorealistic, masterpiece\""
},
{
"prompt": "\"xantaraz1 , A Sexy 18y Indian woman with perfect body pressed against a rain-streaked window, getting taken from behind, her breasts against the cold glass, She is standing pose and wearing a high knee length black boot, city lights reflecting on her wet skin, intense and cinematic\""
},
{
"prompt": "\"xantaraz1 , anal sex, bridge position, squatting cowgirl position, 1girl, penis, nipples, medium breasts, 1boy, pussy, black thighhighs, black ponytail hair, navel, clothed male nude female, lace-trimmed legwear, closed eyes, spread legs, bottomless, girl on top, male pubic hair, long hair, erection, arm support, lace trim, smile, tattoo on thigh, grin\""
},
{
"prompt": "\"xantaraz1 , A full-body portrait of a stunning Indian woman with long, flowing black hair cascading down her back, standing nude and poised in a serene, softly lit bedroom. Her body is sculpted with perfect, natural curves \u2014 full, round breasts with dark brownish areolas and nipples, a smooth midriff leading to a gently curved waist, and a deeply toned, naturally darkened pubic area that accentuates her feminine form. Her legs are long and slender, ending in a firm, rounded ass that subtly tenses as she stands with her arms relaxed at her sides, fingers slightly curled, exuding quiet confidence and sensual grace. Her gaze is direct and unflinching, meeting the viewer\u2019s eyes with a calm, alluring intensity that draws you in. The environment is minimalist and intimate, set against a soft, neutral beige background that enhances the natural tones of her skin and hair. Behind her, sheer white curtains filter in diffused daylight, casting gentle shadows across the room, while a simple bed with rumpled white linens and a potted green plant in the corner add subtle domestic warmth without distracting from the central focus. The lighting is soft and even, highlighting the smooth texture of her skin, the subtle contours of her anatomy, and the natural sheen of her hair \u2014 all rendered with photorealistic precision to emphasize her flawless, voluptuous form. This is a portrait of raw, unfiltered beauty \u2014 a celebration of the female body in its most natural state. Her posture is relaxed yet commanding, her expression serene and self-assured, embodying both innocence and sensuality. The composition is vertically framed, drawing attention to her full, symmetrical silhouette, while the muted, monochromatic backdrop ensures every curve, every detail of her anatomy \u2014 from the delicate arch of her collarbone to the subtle definition of her hips \u2014 is rendered with exquisite clarity. She stands as a living sculpture, a vision of erotic elegance, inviting the viewer to appreciate the harmony of her form and the quiet power of her presence.\""
}
],
"neg": "",
"seed": 42,
"walk_seed": true,
"guidance_scale": 1,
"sample_steps": 8,
"num_frames": 1,
"fps": 1
}
}
Using SQLite database at /teamspace/studios/this_studio/ai-toolkit/aitk_db.db
Job ID: "cfb27aa8-eb0e-4d2d-830f-dcad89ded8ab"
#############################################
# Running job: my_first_lora_v8
#############################################
Running 1 process
Loading ZImage model
Loading transformer
Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s] Loading checkpoint shards: 33%|###3 | 1/3 [00:00<00:01, 1.12it/s] Loading checkpoint shards: 33%|###3 | 1/3 [00:00<00:01, 1.12it/s] Loading checkpoint shards: 67%|######6 | 2/3 [00:01<00:00, 1.12it/s] Loading checkpoint shards: 67%|######6 | 2/3 [00:01<00:00, 1.12it/s] Loading checkpoint shards: 100%|##########| 3/3 [00:02<00:00, 1.36it/s] Loading checkpoint shards: 100%|##########| 3/3 [00:02<00:00, 1.36it/s] Loading checkpoint shards: 100%|##########| 3/3 [00:02<00:00, 1.29it/s] Loading checkpoint shards: 100%|##########| 3/3 [00:02<00:00, 1.29it/s]
Loading assistant LoRA
create LoRA network. base dim (rank): 64, alpha: 64
neuron dropout: p=None, rank dropout: p=None, module dropout: p=None
create LoRA for Text Encoder: 0 modules.
create LoRA for U-Net: 276 modules.
enable LoRA for U-Net
Merging in assistant LoRA
Missing keys: []
Quantizing Transformer
- quantizing 30 transformer blocks
0%| | 0/30 [00:00<?, ?it/s] 0%| | 0/30 [00:00<?, ?it/s] 3%|3 | 1/30 [00:00<00:04, 6.29it/s] 3%|3 | 1/30 [00:00<00:04, 6.29it/s] 7%|6 | 2/30 [00:00<00:03, 7.36it/s] 7%|6 | 2/30 [00:00<00:03, 7.36it/s] 10%|# | 3/30 [00:00<00:03, 7.90it/s] 10%|# | 3/30 [00:00<00:03, 7.90it/s] 13%|#3 | 4/30 [00:00<00:03, 8.31it/s] 13%|#3 | 4/30 [00:00<00:03, 8.31it/s] 17%|#6 | 5/30 [00:00<00:02, 8.55it/s] 17%|#6 | 5/30 [00:00<00:02, 8.55it/s] 23%|##3 | 7/30 [00:00<00:02, 9.26it/s] 23%|##3 | 7/30 [00:00<00:02, 9.26it/s] 27%|##6 | 8/30 [00:00<00:02, 9.07it/s] 27%|##6 | 8/30 [00:00<00:02, 9.07it/s] 30%|### | 9/30 [00:01<00:02, 9.27it/s] 30%|### | 9/30 [00:01<00:02, 9.27it/s] 33%|###3 | 10/30 [00:01<00:02, 9.46it/s] 33%|###3 | 10/30 [00:01<00:02, 9.46it/s] 40%|#### | 12/30 [00:01<00:01, 9.42it/s] 40%|#### | 12/30 [00:01<00:01, 9.42it/s] 43%|####3 | 13/30 [00:01<00:01, 9.29it/s] 43%|####3 | 13/30 [00:01<00:01, 9.29it/s] 47%|####6 | 14/30 [00:01<00:01, 9.12it/s] 47%|####6 | 14/30 [00:01<00:01, 9.12it/s] 53%|#####3 | 16/30 [00:01<00:01, 9.53it/s] 53%|#####3 | 16/30 [00:01<00:01, 9.53it/s] 60%|###### | 18/30 [00:01<00:01, 9.76it/s] 60%|###### | 18/30 [00:01<00:01, 9.76it/s] 67%|######6 | 20/30 [00:02<00:01, 9.79it/s] 67%|######6 | 20/30 [00:02<00:01, 9.79it/s] 73%|#######3 | 22/30 [00:02<00:00, 10.22it/s] 73%|#######3 | 22/30 [00:02<00:00, 10.22it/s] 80%|######## | 24/30 [00:02<00:00, 10.24it/s] 80%|######## | 24/30 [00:02<00:00, 10.24it/s] 87%|########6 | 26/30 [00:02<00:00, 10.00it/s] 87%|########6 | 26/30 [00:02<00:00, 10.00it/s] 90%|######### | 27/30 [00:02<00:00, 9.72it/s] 90%|######### | 27/30 [00:02<00:00, 9.72it/s] 93%|#########3| 28/30 [00:02<00:00, 9.49it/s] 93%|#########3| 28/30 [00:02<00:00, 9.49it/s] 100%|##########| 30/30 [00:03<00:00, 9.70it/s] 100%|##########| 30/30 [00:03<00:00, 9.70it/s] 100%|##########| 30/30 [00:03<00:00, 9.42it/s] 100%|##########| 30/30 [00:03<00:00, 9.42it/s]
- quantizing extras
Text Encoder
`torch_dtype` is deprecated! Use `dtype` instead!
`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s] Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s] Loading checkpoint shards: 100%|##########| 3/3 [00:00<00:00, 58.53it/s] Loading checkpoint shards: 100%|##########| 3/3 [00:00<00:00, 58.53it/s]
Quantizing Text Encoder
Loading VAE
Making pipe
Preparing Model
Model Loaded
create LoRA network. base dim (rank): 32, alpha: 32
neuron dropout: p=None, rank dropout: p=None, module dropout: p=None
apply LoRA to Conv2d with kernel size (3,3). dim (rank): 16, alpha: 16
create LoRA for Text Encoder: 0 modules.
create LoRA for U-Net: 240 modules.
enable LoRA for U-Net
Dataset: /teamspace/studios/this_studio/ai-toolkit/datasets/antaramag
- Preprocessing image dimensions
0%| | 0/6 [00:00<?, ?it/s] 0%| | 0/6 [00:00<?, ?it/s] 100%|##########| 6/6 [00:00<00:00, 151.03it/s] 100%|##########| 6/6 [00:00<00:00, 151.03it/s]
- Found 6 images
Bucket sizes for /teamspace/studios/this_studio/ai-toolkit/datasets/antaramag:
384x384: 1 files
384x672: 1 files
672x384: 1 files
480x544: 1 files
416x576: 2 files
5 buckets made
Dataset: /teamspace/studios/this_studio/ai-toolkit/datasets/antaramag
- Preprocessing image dimensions
0%| | 0/6 [00:00<?, ?it/s] 0%| | 0/6 [00:00<?, ?it/s] 100%|##########| 6/6 [00:00<00:00, 24745.16it/s] 100%|##########| 6/6 [00:00<00:00, 24745.16it/s]
- Found 6 images
Bucket sizes for /teamspace/studios/this_studio/ai-toolkit/datasets/antaramag:
384x384: 1 files
576x992: 1 files
992x576: 1 files
480x544: 1 files
672x864: 2 files
5 buckets made
Generating baseline samples before training
Unloading assistant lora
Generating Images: 0%| | 0/5 [00:00<?, ?it/s] Generating Images: 0%| | 0/5 [00:00<?, ?it/s] Generating Images: 20%|## | 1/5 [00:08<00:33, 8.38s/it] Generating Images: 20%|## | 1/5 [00:08<00:33, 8.38s/it] Generating Images: 40%|#### | 2/5 [00:16<00:24, 8.03s/it] Generating Images: 40%|#### | 2/5 [00:16<00:24, 8.03s/it] Generating Images: 60%|###### | 3/5 [00:23<00:15, 7.80s/it] Generating Images: 60%|###### | 3/5 [00:23<00:15, 7.80s/it] Generating Images: 80%|######## | 4/5 [00:31<00:07, 7.72s/it] Generating Images: 80%|######## | 4/5 [00:31<00:07, 7.72s/it] Generating Images: 100%|##########| 5/5 [00:39<00:00, 7.90s/it] Generating Images: 100%|##########| 5/5 [00:39<00:00, 7.90s/it] Loading assistant lora
my_first_lora_v8: 0%| | 0/4000 [00:00<?, ?it/s] my_first_lora_v8: 0%| | 0/4000 [00:00<?, ?it/s] my_first_lora_v8: 0%| | 0/4000 [00:01<?, ?it/s, lr: 1.0e-04 loss: 2.474e-01] my_first_lora_v8: 0%| | 0/4000 [00:01<?, ?it/s, lr: 1.0e-04 loss: 2.474e-01] my_first_lora_v8: 0%| | 0/4000 [00:01<?, ?it/s, lr: 1.0e-04 loss: 2.474e-01] my_first_lora_v8: 0%| | 0/4000 [00:01<?, ?it/s, lr: 1.0e-04 loss: 2.474e-01] my_first_lora_v8: 0%| | 0/4000 [00:02<?, ?it/s, lr: 1.0e-04 loss: 6.980e-01] my_first_lora_v8: 0%| | 0/4000 [00:02<?, ?it/s, lr: 1.0e-04 loss: 6.980e-01] my_first_lora_v8: 0%| | 1/4000 [00:02<3:10:05, 2.85s/it, lr: 1.0e-04 loss: 7.479e-02] my_first_lora_v8: 0%| | 1/4000 [00:02<3:10:05, 2.85s/it, lr: 1.0e-04 loss: 7.479e-02] my_first_lora_v8: 0%| | 2/4000 [00:04<2:24:56, 2.18s/it, lr: 1.0e-04 loss: 1.952e-01] my_first_lora_v8: 0%| | 2/4000 [00:04<2:24:56, 2.18s/it, lr: 1.0e-04 loss: 1.952e-01] my_first_lora_v8: 0%| | 3/4000 [00:05<2:09:39, 1.95s/it, lr: 1.0e-04 loss: 8.164e-01] my_first_lora_v8: 0%| | 3/4000 [00:05<2:09:39, 1.95s/it, lr: 1.0e-04 loss: 8.164e-01] my_first_lora_v8: 0%| | 4/4000 [00:06<1:50:21, 1.66s/it, lr: 1.0e-04 loss: 7.071e-01] my_first_lora_v8: 0%| | 4/4000 [00:06<1:50:21, 1.66s/it, lr: 1.0e-04 loss: 7.071e-01] my_first_lora_v8: 0%| | 5/4000 [00:08<1:47:47, 1.62s/it, lr: 1.0e-04 loss: 3.621e-01] my_first_lora_v8: 0%| | 5/4000 [00:08<1:47:47, 1.62s/it, lr: 1.0e-04 loss: 3.621e-01] my_first_lora_v8: 0%| | 6/4000 [00:09<1:45:55, 1.59s/it, lr: 1.0e-04 loss: 6.580e-01] my_first_lora_v8: 0%| | 6/4000 [00:09<1:45:55, 1.59s/it, lr: 1.0e-04 loss: 6.580e-01] my_first_lora_v8: 0%| | 7/4000 [00:10<1:38:30, 1.48s/it, lr: 1.0e-04 loss: 1.603e-01] my_first_lora_v8: 0%| | 7/4000 [00:10<1:38:30, 1.48s/it, lr: 1.0e-04 loss: 1.603e-01] my_first_lora_v8: 0%| | 8/4000 [00:11<1:32:59, 1.40s/it, lr: 1.0e-04 loss: 7.644e-01] my_first_lora_v8: 0%| | 8/4000 [00:11<1:32:59, 1.40s/it, lr: 1.0e-04 loss: 7.644e-01] my_first_lora_v8: 0%| | 9/4000 [00:11<1:28:35, 1.33s/it, lr: 1.0e-04 loss: 6.681e-01] my_first_lora_v8: 0%| | 9/4000 [00:11<1:28:35, 1.33s/it, lr: 1.0e-04 loss: 6.681e-01] my_first_lora_v8: 0%| | 10/4000 [00:12<1:24:53, 1.28s/it, lr: 1.0e-04 loss: 1.036e-01] my_first_lora_v8: 0%| | 10/4000 [00:12<1:24:53, 1.28s/it, lr: 1.0e-04 loss: 1.036e-01] my_first_lora_v8: 0%| | 11/4000 [00:14<1:27:07, 1.31s/it, lr: 1.0e-04 loss: 9.076e-02] my_first_lora_v8: 0%| | 11/4000 [00:14<1:27:07, 1.31s/it, lr: 1.0e-04 loss: 9.076e-02] my_first_lora_v8: 0%| | 12/4000 [00:15<1:26:10, 1.30s/it, lr: 1.0e-04 loss: 2.006e-02] my_first_lora_v8: 0%| | 12/4000 [00:15<1:26:10, 1.30s/it, lr: 1.0e-04 loss: 2.006e-02] my_first_lora_v8: 0%| | 13/4000 [00:16<1:23:36, 1.26s/it, lr: 1.0e-04 loss: 2.384e-01] my_first_lora_v8: 0%| | 13/4000 [00:16<1:23:36, 1.26s/it, lr: 1.0e-04 loss: 2.384e-01] my_first_lora_v8: 0%| | 14/4000 [00:17<1:24:36, 1.27s/it, lr: 1.0e-04 loss: 2.811e-01] my_first_lora_v8: 0%| | 14/4000 [00:17<1:24:36, 1.27s/it, lr: 1.0e-04 loss: 2.811e-01] my_first_lora_v8: 0%| | 15/4000 [00:18<1:22:22, 1.24s/it, lr: 1.0e-04 loss: 2.792e-01] my_first_lora_v8: 0%| | 15/4000 [00:18<1:22:22, 1.24s/it, lr: 1.0e-04 loss: 2.792e-01] my_first_lora_v8: 0%| | 16/4000 [00:19<1:20:37, 1.21s/it, lr: 1.0e-04 loss: 6.337e-01] my_first_lora_v8: 0%| | 16/4000 [00:19<1:20:37, 1.21s/it, lr: 1.0e-04 loss: 6.337e-01] my_first_lora_v8: 0%| | 17/4000 [00:20<1:18:56, 1.19s/it, lr: 1.0e-04 loss: 6.168e-01] my_first_lora_v8: 0%| | 17/4000 [00:20<1:18:56, 1.19s/it, lr: 1.0e-04 loss: 6.168e-01] my_first_lora_v8: 0%| | 18/4000 [00:20<1:17:22, 1.17s/it, lr: 1.0e-04 loss: 5.909e-01] my_first_lora_v8: 0%| | 18/4000 [00:20<1:17:22, 1.17s/it, lr: 1.0e-04 loss: 5.909e-01] my_first_lora_v8: 0%| | 19/4000 [00:22<1:18:29, 1.18s/it, lr: 1.0e-04 loss: 2.269e-01] my_first_lora_v8: 0%| | 19/4000 [00:22<1:18:29, 1.18s/it, lr: 1.0e-04 loss: 2.269e-01] my_first_lora_v8: 0%| | 20/4000 [00:23<1:17:12, 1.16s/it, lr: 1.0e-04 loss: 3.162e-01] my_first_lora_v8: 0%| | 20/4000 [00:23<1:17:12, 1.16s/it, lr: 1.0e-04 loss: 3.162e-01] my_first_lora_v8: 1%| | 21/4000 [00:24<1:15:56, 1.15s/it, lr: 1.0e-04 loss: 2.817e-01] my_first_lora_v8: 1%| | 21/4000 [00:24<1:15:56, 1.15s/it, lr: 1.0e-04 loss: 2.817e-01] my_first_lora_v8: 1%| | 22/4000 [00:25<1:16:55, 1.16s/it, lr: 1.0e-04 loss: 4.580e-01] my_first_lora_v8: 1%| | 22/4000 [00:25<1:16:55, 1.16s/it, lr: 1.0e-04 loss: 4.580e-01] my_first_lora_v8: 1%| | 23/4000 [00:26<1:16:18, 1.15s/it, lr: 1.0e-04 loss: 6.318e-01] my_first_lora_v8: 1%| | 23/4000 [00:26<1:16:18, 1.15s/it, lr: 1.0e-04 loss: 6.318e-01] my_first_lora_v8: 1%| | 24/4000 [00:27<1:15:21, 1.14s/it, lr: 1.0e-04 loss: 4.180e-01] my_first_lora_v8: 1%| | 24/4000 [00:27<1:15:21, 1.14s/it, lr: 1.0e-04 loss: 4.180e-01] my_first_lora_v8: 1%| | 25/4000 [00:28<1:16:16, 1.15s/it, lr: 1.0e-04 loss: 7.750e-01] my_first_lora_v8: 1%| | 25/4000 [00:28<1:16:16, 1.15s/it, lr: 1.0e-04 loss: 7.750e-01] my_first_lora_v8: 1%| | 26/4000 [00:29<1:16:14, 1.15s/it, lr: 1.0e-04 loss: 4.897e-01] my_first_lora_v8: 1%| | 26/4000 [00:29<1:16:14, 1.15s/it, lr: 1.0e-04 loss: 4.897e-01] my_first_lora_v8: 1%| | 27/4000 [00:30<1:15:18, 1.14s/it, lr: 1.0e-04 loss: 3.684e-01] my_first_lora_v8: 1%| | 27/4000 [00:30<1:15:18, 1.14s/it, lr: 1.0e-04 loss: 3.684e-01] my_first_lora_v8: 1%| | 28/4000 [00:31<1:14:29, 1.13s/it, lr: 1.0e-04 loss: 5.955e-01] my_first_lora_v8: 1%| | 28/4000 [00:31<1:14:29, 1.13s/it, lr: 1.0e-04 loss: 5.955e-01] my_first_lora_v8: 1%| | 29/4000 [00:32<1:13:43, 1.11s/it, lr: 1.0e-04 loss: 2.243e-01] my_first_lora_v8: 1%| | 29/4000 [00:32<1:13:43, 1.11s/it, lr: 1.0e-04 loss: 2.243e-01] my_first_lora_v8: 1%| | 30/4000 [00:33<1:12:57, 1.10s/it, lr: 1.0e-04 loss: 3.346e-01] my_first_lora_v8: 1%| | 30/4000 [00:33<1:12:57, 1.10s/it, lr: 1.0e-04 loss: 3.346e-01] my_first_lora_v8: 1%| | 31/4000 [00:33<1:12:14, 1.09s/it, lr: 1.0e-04 loss: 1.979e-01] my_first_lora_v8: 1%| | 31/4000 [00:33<1:12:14, 1.09s/it, lr: 1.0e-04 loss: 1.979e-01] my_first_lora_v8: 1%| | 32/4000 [00:35<1:13:01, 1.10s/it, lr: 1.0e-04 loss: 4.776e-01] my_first_lora_v8: 1%| | 32/4000 [00:35<1:13:01, 1.10s/it, lr: 1.0e-04 loss: 4.776e-01] my_first_lora_v8: 1%| | 33/4000 [00:36<1:13:45, 1.12s/it, lr: 1.0e-04 loss: 3.852e-01] my_first_lora_v8: 1%| | 33/4000 [00:36<1:13:45, 1.12s/it, lr: 1.0e-04 loss: 3.852e-01] my_first_lora_v8: 1%| | 34/4000 [00:38<1:14:29, 1.13s/it, lr: 1.0e-04 loss: 1.793e-01] my_first_lora_v8: 1%| | 34/4000 [00:38<1:14:29, 1.13s/it, lr: 1.0e-04 loss: 1.793e-01] my_first_lora_v8: 1%| | 35/4000 [00:39<1:15:24, 1.14s/it, lr: 1.0e-04 loss: 3.841e-01] my_first_lora_v8: 1%| | 35/4000 [00:39<1:15:24, 1.14s/it, lr: 1.0e-04 loss: 3.841e-01] my_first_lora_v8: 1%| | 36/4000 [00:41<1:15:59, 1.15s/it, lr: 1.0e-04 loss: 4.020e-01] my_first_lora_v8: 1%| | 36/4000 [00:41<1:15:59, 1.15s/it, lr: 1.0e-04 loss: 4.020e-01] my_first_lora_v8: 1%| | 37/4000 [00:42<1:15:22, 1.14s/it, lr: 1.0e-04 loss: 7.128e-01] my_first_lora_v8: 1%| | 37/4000 [00:42<1:15:22, 1.14s/it, lr: 1.0e-04 loss: 7.128e-01] my_first_lora_v8: 1%| | 38/4000 [00:43<1:15:56, 1.15s/it, lr: 1.0e-04 loss: 5.841e-01] my_first_lora_v8: 1%| | 38/4000 [00:43<1:15:56, 1.15s/it, lr: 1.0e-04 loss: 5.841e-01] my_first_lora_v8: 1%| | 39/4000 [00:44<1:15:56, 1.15s/it, lr: 1.0e-04 loss: 1.959e-01] my_first_lora_v8: 1%| | 39/4000 [00:44<1:15:56, 1.15s/it, lr: 1.0e-04 loss: 1.959e-01] my_first_lora_v8: 1%|1 | 40/4000 [00:45<1:15:22, 1.14s/it, lr: 1.0e-04 loss: 3.858e-01] my_first_lora_v8: 1%|1 | 40/4000 [00:45<1:15:22, 1.14s/it, lr: 1.0e-04 loss: 3.858e-01] my_first_lora_v8: 1%|1 | 41/4000 [00:46<1:14:45, 1.13s/it, lr: 1.0e-04 loss: 6.643e-01] my_first_lora_v8: 1%|1 | 41/4000 [00:46<1:14:45, 1.13s/it, lr: 1.0e-04 loss: 6.643e-01] my_first_lora_v8: 1%|1 | 42/4000 [00:47<1:14:14, 1.13s/it, lr: 1.0e-04 loss: 4.350e-01] my_first_lora_v8: 1%|1 | 42/4000 [00:47<1:14:14, 1.13s/it, lr: 1.0e-04 loss: 4.350e-01] my_first_lora_v8: 1%|1 | 43/4000 [00:48<1:13:43, 1.12s/it, lr: 1.0e-04 loss: 2.820e-01] my_first_lora_v8: 1%|1 | 43/4000 [00:48<1:13:43, 1.12s/it, lr: 1.0e-04 loss: 2.820e-01] my_first_lora_v8: 1%|1 | 44/4000 [00:48<1:13:12, 1.11s/it, lr: 1.0e-04 loss: 7.087e-01] my_first_lora_v8: 1%|1 | 44/4000 [00:48<1:13:12, 1.11s/it, lr: 1.0e-04 loss: 7.087e-01] my_first_lora_v8: 1%|1 | 45/4000 [00:49<1:12:40, 1.10s/it, lr: 1.0e-04 loss: 7.949e-02] my_first_lora_v8: 1%|1 | 45/4000 [00:49<1:12:40, 1.10s/it, lr: 1.0e-04 loss: 7.949e-02] my_first_lora_v8: 1%|1 | 46/4000 [00:51<1:13:12, 1.11s/it, lr: 1.0e-04 loss: 3.489e-01] my_first_lora_v8: 1%|1 | 46/4000 [00:51<1:13:12, 1.11s/it, lr: 1.0e-04 loss: 3.489e-01] my_first_lora_v8: 1%|1 | 47/4000 [00:52<1:13:02, 1.11s/it, lr: 1.0e-04 loss: 5.593e-01] my_first_lora_v8: 1%|1 | 47/4000 [00:52<1:13:02, 1.11s/it, lr: 1.0e-04 loss: 5.593e-01] my_first_lora_v8: 1%|1 | 48/4000 [00:52<1:12:36, 1.10s/it, lr: 1.0e-04 loss: 4.396e-01] my_first_lora_v8: 1%|1 | 48/4000 [00:52<1:12:36, 1.10s/it, lr: 1.0e-04 loss: 4.396e-01] my_first_lora_v8: 1%|1 | 49/4000 [00:53<1:12:09, 1.10s/it, lr: 1.0e-04 loss: 1.827e-01] my_first_lora_v8: 1%|1 | 49/4000 [00:53<1:12:09, 1.10s/it, lr: 1.0e-04 loss: 1.827e-01] my_first_lora_v8: 1%|1 | 50/4000 [00:54<1:11:45, 1.09s/it, lr: 1.0e-04 loss: 1.524e-02] my_first_lora_v8: 1%|1 | 50/4000 [00:54<1:11:45, 1.09s/it, lr: 1.0e-04 loss: 1.524e-02] my_first_lora_v8: 1%|1 | 51/4000 [00:55<1:11:20, 1.08s/it, lr: 1.0e-04 loss: 2.148e-01] my_first_lora_v8: 1%|1 | 51/4000 [00:55<1:11:20, 1.08s/it, lr: 1.0e-04 loss: 2.148e-01] my_first_lora_v8: 1%|1 | 52/4000 [00:56<1:11:49, 1.09s/it, lr: 1.0e-04 loss: 6.338e-01] my_first_lora_v8: 1%|1 | 52/4000 [00:56<1:11:49, 1.09s/it, lr: 1.0e-04 loss: 6.338e-01] my_first_lora_v8: 1%|1 | 53/4000 [00:57<1:11:52, 1.09s/it, lr: 1.0e-04 loss: 5.627e-01] my_first_lora_v8: 1%|1 | 53/4000 [00:57<1:11:52, 1.09s/it, lr: 1.0e-04 loss: 5.627e-01] my_first_lora_v8: 1%|1 | 54/4000 [00:59<1:12:20, 1.10s/it, lr: 1.0e-04 loss: 1.034e-01] my_first_lora_v8: 1%|1 | 54/4000 [00:59<1:12:20, 1.10s/it, lr: 1.0e-04 loss: 1.034e-01] my_first_lora_v8: 1%|1 | 55/4000 [01:00<1:12:48, 1.11s/it, lr: 1.0e-04 loss: 4.395e-02] my_first_lora_v8: 1%|1 | 55/4000 [01:00<1:12:48, 1.11s/it, lr: 1.0e-04 loss: 4.395e-02] my_first_lora_v8: 1%|1 | 56/4000 [01:01<1:12:24, 1.10s/it, lr: 1.0e-04 loss: 2.649e-01] my_first_lora_v8: 1%|1 | 56/4000 [01:01<1:12:24, 1.10s/it, lr: 1.0e-04 loss: 2.649e-01] my_first_lora_v8: 1%|1 | 57/4000 [01:03<1:12:51, 1.11s/it, lr: 1.0e-04 loss: 5.220e-01] my_first_lora_v8: 1%|1 | 57/4000 [01:03<1:12:51, 1.11s/it, lr: 1.0e-04 loss: 5.220e-01] my_first_lora_v8: 1%|1 | 58/4000 [01:04<1:12:30, 1.10s/it, lr: 1.0e-04 loss: 6.000e-01] my_first_lora_v8: 1%|1 | 58/4000 [01:04<1:12:30, 1.10s/it, lr: 1.0e-04 loss: 6.000e-01] my_first_lora_v8: 1%|1 | 59/4000 [01:04<1:12:20, 1.10s/it, lr: 1.0e-04 loss: 8.002e-01] my_first_lora_v8: 1%|1 | 59/4000 [01:04<1:12:20, 1.10s/it, lr: 1.0e-04 loss: 8.002e-01] my_first_lora_v8: 2%|1 | 60/4000 [01:05<1:12:00, 1.10s/it, lr: 1.0e-04 loss: 4.433e-01] my_first_lora_v8: 2%|1 | 60/4000 [01:05<1:12:00, 1.10s/it, lr: 1.0e-04 loss: 4.433e-01] my_first_lora_v8: 2%|1 | 61/4000 [01:06<1:11:39, 1.09s/it, lr: 1.0e-04 loss: 5.988e-01] my_first_lora_v8: 2%|1 | 61/4000 [01:06<1:11:39, 1.09s/it, lr: 1.0e-04 loss: 5.988e-01] my_first_lora_v8: 2%|1 | 62/4000 [01:08<1:12:04, 1.10s/it, lr: 1.0e-04 loss: 9.040e-02] my_first_lora_v8: 2%|1 | 62/4000 [01:08<1:12:04, 1.10s/it, lr: 1.0e-04 loss: 9.040e-02] my_first_lora_v8: 2%|1 | 63/4000 [01:08<1:11:43, 1.09s/it, lr: 1.0e-04 loss: 2.478e-01] my_first_lora_v8: 2%|1 | 63/4000 [01:08<1:11:43, 1.09s/it, lr: 1.0e-04 loss: 2.478e-01] my_first_lora_v8: 2%|1 | 64/4000 [01:09<1:11:22, 1.09s/it, lr: 1.0e-04 loss: 2.979e-01] my_first_lora_v8: 2%|1 | 64/4000 [01:09<1:11:22, 1.09s/it, lr: 1.0e-04 loss: 2.979e-01] my_first_lora_v8: 2%|1 | 65/4000 [01:10<1:11:25, 1.09s/it, lr: 1.0e-04 loss: 5.542e-01] my_first_lora_v8: 2%|1 | 65/4000 [01:10<1:11:25, 1.09s/it, lr: 1.0e-04 loss: 5.542e-01] my_first_lora_v8: 2%|1 | 66/4000 [01:11<1:11:08, 1.09s/it, lr: 1.0e-04 loss: 2.887e-01] my_first_lora_v8: 2%|1 | 66/4000 [01:11<1:11:08, 1.09s/it, lr: 1.0e-04 loss: 2.887e-01] my_first_lora_v8: 2%|1 | 67/4000 [01:12<1:10:53, 1.08s/it, lr: 1.0e-04 loss: 6.990e-01] my_first_lora_v8: 2%|1 | 67/4000 [01:12<1:10:53, 1.08s/it, lr: 1.0e-04 loss: 6.990e-01] my_first_lora_v8: 2%|1 | 68/4000 [01:13<1:11:16, 1.09s/it, lr: 1.0e-04 loss: 6.108e-01] my_first_lora_v8: 2%|1 | 68/4000 [01:13<1:11:16, 1.09s/it, lr: 1.0e-04 loss: 6.108e-01] my_first_lora_v8: 2%|1 | 69/4000 [01:15<1:11:40, 1.09s/it, lr: 1.0e-04 loss: 5.730e-01] my_first_lora_v8: 2%|1 | 69/4000 [01:15<1:11:40, 1.09s/it, lr: 1.0e-04 loss: 5.730e-01] my_first_lora_v8: 2%|1 | 70/4000 [01:16<1:12:02, 1.10s/it, lr: 1.0e-04 loss: 5.789e-01] my_first_lora_v8: 2%|1 | 70/4000 [01:16<1:12:02, 1.10s/it, lr: 1.0e-04 loss: 5.789e-01] my_first_lora_v8: 2%|1 | 71/4000 [01:18<1:12:31, 1.11s/it, lr: 1.0e-04 loss: 7.402e-01] my_first_lora_v8: 2%|1 | 71/4000 [01:18<1:12:31, 1.11s/it, lr: 1.0e-04 loss: 7.402e-01] my_first_lora_v8: 2%|1 | 72/4000 [01:19<1:12:12, 1.10s/it, lr: 1.0e-04 loss: 4.072e-01] my_first_lora_v8: 2%|1 | 72/4000 [01:19<1:12:12, 1.10s/it, lr: 1.0e-04 loss: 4.072e-01] my_first_lora_v8: 2%|1 | 73/4000 [01:20<1:11:53, 1.10s/it, lr: 1.0e-04 loss: 7.888e-01] my_first_lora_v8: 2%|1 | 73/4000 [01:20<1:11:53, 1.10s/it, lr: 1.0e-04 loss: 7.888e-01] my_first_lora_v8: 2%|1 | 74/4000 [01:20<1:11:36, 1.09s/it, lr: 1.0e-04 loss: 5.446e-01] my_first_lora_v8: 2%|1 | 74/4000 [01:20<1:11:36, 1.09s/it, lr: 1.0e-04 loss: 5.446e-01] my_first_lora_v8: 2%|1 | 75/4000 [01:22<1:11:57, 1.10s/it, lr: 1.0e-04 loss: 7.385e-01] my_first_lora_v8: 2%|1 | 75/4000 [01:22<1:11:57, 1.10s/it, lr: 1.0e-04 loss: 7.385e-01] my_first_lora_v8: 2%|1 | 76/4000 [01:24<1:12:18, 1.11s/it, lr: 1.0e-04 loss: 2.023e-01] my_first_lora_v8: 2%|1 | 76/4000 [01:24<1:12:18, 1.11s/it, lr: 1.0e-04 loss: 2.023e-01] my_first_lora_v8: 2%|1 | 77/4000 [01:24<1:11:59, 1.10s/it, lr: 1.0e-04 loss: 1.639e-01] my_first_lora_v8: 2%|1 | 77/4000 [01:24<1:11:59, 1.10s/it, lr: 1.0e-04 loss: 1.639e-01] my_first_lora_v8: 2%|1 | 78/4000 [01:25<1:11:43, 1.10s/it, lr: 1.0e-04 loss: 2.089e-01] my_first_lora_v8: 2%|1 | 78/4000 [01:25<1:11:43, 1.10s/it, lr: 1.0e-04 loss: 2.089e-01] my_first_lora_v8: 2%|1 | 79/4000 [01:26<1:11:46, 1.10s/it, lr: 1.0e-04 loss: 6.431e-01] my_first_lora_v8: 2%|1 | 79/4000 [01:26<1:11:46, 1.10s/it, lr: 1.0e-04 loss: 6.431e-01] my_first_lora_v8: 2%|2 | 80/4000 [01:28<1:12:04, 1.10s/it, lr: 1.0e-04 loss: 6.541e-01] my_first_lora_v8: 2%|2 | 80/4000 [01:28<1:12:04, 1.10s/it, lr: 1.0e-04 loss: 6.541e-01] my_first_lora_v8: 2%|2 | 81/4000 [01:29<1:11:47, 1.10s/it, lr: 1.0e-04 loss: 3.323e-01] my_first_lora_v8: 2%|2 | 81/4000 [01:29<1:11:47, 1.10s/it, lr: 1.0e-04 loss: 3.323e-01] my_first_lora_v8: 2%|2 | 82/4000 [01:29<1:11:31, 1.10s/it, lr: 1.0e-04 loss: 4.725e-01] my_first_lora_v8: 2%|2 | 82/4000 [01:29<1:11:31, 1.10s/it, lr: 1.0e-04 loss: 4.725e-01] my_first_lora_v8: 2%|2 | 83/4000 [01:30<1:11:26, 1.09s/it, lr: 1.0e-04 loss: 7.769e-01] my_first_lora_v8: 2%|2 | 83/4000 [01:30<1:11:26, 1.09s/it, lr: 1.0e-04 loss: 7.769e-01] my_first_lora_v8: 2%|2 | 84/4000 [01:32<1:11:43, 1.10s/it, lr: 1.0e-04 loss: 7.287e-01] my_first_lora_v8: 2%|2 | 84/4000 [01:32<1:11:43, 1.10s/it, lr: 1.0e-04 loss: 7.287e-01] my_first_lora_v8: 2%|2 | 85/4000 [01:33<1:11:30, 1.10s/it, lr: 1.0e-04 loss: 1.573e-01] my_first_lora_v8: 2%|2 | 85/4000 [01:33<1:11:30, 1.10s/it, lr: 1.0e-04 loss: 1.573e-01] my_first_lora_v8: 2%|2 | 86/4000 [01:34<1:11:47, 1.10s/it, lr: 1.0e-04 loss: 6.427e-02] my_first_lora_v8: 2%|2 | 86/4000 [01:34<1:11:47, 1.10s/it, lr: 1.0e-04 loss: 6.427e-02] my_first_lora_v8: 2%|2 | 87/4000 [01:35<1:11:32, 1.10s/it, lr: 1.0e-04 loss: 1.600e-01] my_first_lora_v8: 2%|2 | 87/4000 [01:35<1:11:32, 1.10s/it, lr: 1.0e-04 loss: 1.600e-01] my_first_lora_v8: 2%|2 | 88/4000 [01:36<1:11:19, 1.09s/it, lr: 1.0e-04 loss: 7.330e-01] my_first_lora_v8: 2%|2 | 88/4000 [01:36<1:11:19, 1.09s/it, lr: 1.0e-04 loss: 7.330e-01] my_first_lora_v8: 2%|2 | 89/4000 [01:37<1:11:04, 1.09s/it, lr: 1.0e-04 loss: 6.700e-01] my_first_lora_v8: 2%|2 | 89/4000 [01:37<1:11:04, 1.09s/it, lr: 1.0e-04 loss: 6.700e-01] my_first_lora_v8: 2%|2 | 90/4000 [01:38<1:11:22, 1.10s/it, lr: 1.0e-04 loss: 6.541e-01] my_first_lora_v8: 2%|2 | 90/4000 [01:38<1:11:22, 1.10s/it, lr: 1.0e-04 loss: 6.541e-01] my_first_lora_v8: 2%|2 | 91/4000 [01:40<1:11:39, 1.10s/it, lr: 1.0e-04 loss: 3.567e-01] my_first_lora_v8: 2%|2 | 91/4000 [01:40<1:11:39, 1.10s/it, lr: 1.0e-04 loss: 3.567e-01] my_first_lora_v8: 2%|2 | 92/4000 [01:40<1:11:27, 1.10s/it, lr: 1.0e-04 loss: 6.872e-01] my_first_lora_v8: 2%|2 | 92/4000 [01:40<1:11:27, 1.10s/it, lr: 1.0e-04 loss: 6.872e-01] my_first_lora_v8: 2%|2 | 93/4000 [01:42<1:11:30, 1.10s/it, lr: 1.0e-04 loss: 4.607e-01] my_first_lora_v8: 2%|2 | 93/4000 [01:42<1:11:30, 1.10s/it, lr: 1.0e-04 loss: 4.607e-01] my_first_lora_v8: 2%|2 | 94/4000 [01:42<1:11:16, 1.09s/it, lr: 1.0e-04 loss: 4.286e-01] my_first_lora_v8: 2%|2 | 94/4000 [01:42<1:11:16, 1.09s/it, lr: 1.0e-04 loss: 4.286e-01] my_first_lora_v8: 2%|2 | 95/4000 [01:43<1:11:11, 1.09s/it, lr: 1.0e-04 loss: 4.069e-01] my_first_lora_v8: 2%|2 | 95/4000 [01:43<1:11:11, 1.09s/it, lr: 1.0e-04 loss: 4.069e-01] my_first_lora_v8: 2%|2 | 96/4000 [01:44<1:10:57, 1.09s/it, lr: 1.0e-04 loss: 2.056e-02] my_first_lora_v8: 2%|2 | 96/4000 [01:44<1:10:57, 1.09s/it, lr: 1.0e-04 loss: 2.056e-02] my_first_lora_v8: 2%|2 | 97/4000 [01:45<1:10:44, 1.09s/it, lr: 1.0e-04 loss: 4.892e-01] my_first_lora_v8: 2%|2 | 97/4000 [01:45<1:10:44, 1.09s/it, lr: 1.0e-04 loss: 4.892e-01] my_first_lora_v8: 2%|2 | 98/4000 [01:46<1:10:31, 1.08s/it, lr: 1.0e-04 loss: 1.415e-01] my_first_lora_v8: 2%|2 | 98/4000 [01:46<1:10:31, 1.08s/it, lr: 1.0e-04 loss: 1.415e-01] my_first_lora_v8: 2%|2 | 99/4000 [01:47<1:10:18, 1.08s/it, lr: 1.0e-04 loss: 7.171e-01] my_first_lora_v8: 2%|2 | 99/4000 [01:47<1:10:18, 1.08s/it, lr: 1.0e-04 loss: 7.171e-01] my_first_lora_v8: 2%|2 | 100/4000 [01:47<1:10:06, 1.08s/it, lr: 1.0e-04 loss: 3.720e-01] my_first_lora_v8: 2%|2 | 100/4000 [01:47<1:10:06, 1.08s/it, lr: 1.0e-04 loss: 3.720e-01] my_first_lora_v8: 3%|2 | 101/4000 [01:49<1:10:23, 1.08s/it, lr: 1.0e-04 loss: 6.633e-01] my_first_lora_v8: 3%|2 | 101/4000 [01:49<1:10:23, 1.08s/it, lr: 1.0e-04 loss: 6.633e-01] my_first_lora_v8: 3%|2 | 102/4000 [01:50<1:10:39, 1.09s/it, lr: 1.0e-04 loss: 8.288e-01] my_first_lora_v8: 3%|2 | 102/4000 [01:50<1:10:39, 1.09s/it, lr: 1.0e-04 loss: 8.288e-01] my_first_lora_v8: 3%|2 | 103/4000 [01:52<1:10:53, 1.09s/it, lr: 1.0e-04 loss: 7.203e-01] my_first_lora_v8: 3%|2 | 103/4000 [01:52<1:10:53, 1.09s/it, lr: 1.0e-04 loss: 7.203e-01] my_first_lora_v8: 3%|2 | 104/4000 [01:53<1:10:41, 1.09s/it, lr: 1.0e-04 loss: 4.913e-01] my_first_lora_v8: 3%|2 | 104/4000 [01:53<1:10:41, 1.09s/it, lr: 1.0e-04 loss: 4.913e-01] my_first_lora_v8: 3%|2 | 105/4000 [01:54<1:10:30, 1.09s/it, lr: 1.0e-04 loss: 1.430e-01] my_first_lora_v8: 3%|2 | 105/4000 [01:54<1:10:30, 1.09s/it, lr: 1.0e-04 loss: 1.430e-01] my_first_lora_v8: 3%|2 | 106/4000 [01:55<1:10:58, 1.09s/it, lr: 1.0e-04 loss: 6.835e-01] my_first_lora_v8: 3%|2 | 106/4000 [01:55<1:10:58, 1.09s/it, lr: 1.0e-04 loss: 6.835e-01] my_first_lora_v8: 3%|2 | 107/4000 [01:57<1:11:18, 1.10s/it, lr: 1.0e-04 loss: 2.272e-01] my_first_lora_v8: 3%|2 | 107/4000 [01:57<1:11:18, 1.10s/it, lr: 1.0e-04 loss: 2.272e-01] my_first_lora_v8: 3%|2 | 108/4000 [01:58<1:11:08, 1.10s/it, lr: 1.0e-04 loss: 6.324e-01] my_first_lora_v8: 3%|2 | 108/4000 [01:58<1:11:08, 1.10s/it, lr: 1.0e-04 loss: 6.324e-01] my_first_lora_v8: 3%|2 | 109/4000 [01:59<1:10:55, 1.09s/it, lr: 1.0e-04 loss: 1.706e-01] my_first_lora_v8: 3%|2 | 109/4000 [01:59<1:10:55, 1.09s/it, lr: 1.0e-04 loss: 1.706e-01] my_first_lora_v8: 3%|2 | 110/4000 [02:00<1:10:44, 1.09s/it, lr: 1.0e-04 loss: 2.232e-01] my_first_lora_v8: 3%|2 | 110/4000 [02:00<1:10:44, 1.09s/it, lr: 1.0e-04 loss: 2.232e-01] my_first_lora_v8: 3%|2 | 111/4000 [02:00<1:10:32, 1.09s/it, lr: 1.0e-04 loss: 3.191e-01] my_first_lora_v8: 3%|2 | 111/4000 [02:00<1:10:32, 1.09s/it, lr: 1.0e-04 loss: 3.191e-01] my_first_lora_v8: 3%|2 | 112/4000 [02:02<1:10:46, 1.09s/it, lr: 1.0e-04 loss: 4.004e-01] my_first_lora_v8: 3%|2 | 112/4000 [02:02<1:10:46, 1.09s/it, lr: 1.0e-04 loss: 4.004e-01] my_first_lora_v8: 3%|2 | 113/4000 [02:03<1:10:33, 1.09s/it, lr: 1.0e-04 loss: 6.469e-01] my_first_lora_v8: 3%|2 | 113/4000 [02:03<1:10:33, 1.09s/it, lr: 1.0e-04 loss: 6.469e-01] my_first_lora_v8: 3%|2 | 114/4000 [02:03<1:10:21, 1.09s/it, lr: 1.0e-04 loss: 6.814e-01] my_first_lora_v8: 3%|2 | 114/4000 [02:03<1:10:21, 1.09s/it, lr: 1.0e-04 loss: 6.814e-01] my_first_lora_v8: 3%|2 | 115/4000 [02:04<1:10:10, 1.08s/it, lr: 1.0e-04 loss: 3.121e-01] my_first_lora_v8: 3%|2 | 115/4000 [02:04<1:10:10, 1.08s/it, lr: 1.0e-04 loss: 3.121e-01] my_first_lora_v8: 3%|2 | 116/4000 [02:06<1:10:23, 1.09s/it, lr: 1.0e-04 loss: 3.142e-01] my_first_lora_v8: 3%|2 | 116/4000 [02:06<1:10:23, 1.09s/it, lr: 1.0e-04 loss: 3.142e-01] my_first_lora_v8: 3%|2 | 117/4000 [02:06<1:10:13, 1.09s/it, lr: 1.0e-04 loss: 5.435e-01] my_first_lora_v8: 3%|2 | 117/4000 [02:06<1:10:13, 1.09s/it, lr: 1.0e-04 loss: 5.435e-01] my_first_lora_v8: 3%|2 | 118/4000 [02:08<1:10:27, 1.09s/it, lr: 1.0e-04 loss: 5.710e-01] my_first_lora_v8: 3%|2 | 118/4000 [02:08<1:10:27, 1.09s/it, lr: 1.0e-04 loss: 5.710e-01] my_first_lora_v8: 3%|2 | 119/4000 [02:09<1:10:35, 1.09s/it, lr: 1.0e-04 loss: 7.728e-01] my_first_lora_v8: 3%|2 | 119/4000 [02:09<1:10:35, 1.09s/it, lr: 1.0e-04 loss: 7.728e-01] my_first_lora_v8: 3%|3 | 120/4000 [02:11<1:10:47, 1.09s/it, lr: 1.0e-04 loss: 2.985e-02] my_first_lora_v8: 3%|3 | 120/4000 [02:11<1:10:47, 1.09s/it, lr: 1.0e-04 loss: 2.985e-02] my_first_lora_v8: 3%|3 | 121/4000 [02:12<1:10:36, 1.09s/it, lr: 1.0e-04 loss: 8.489e-02] my_first_lora_v8: 3%|3 | 121/4000 [02:12<1:10:36, 1.09s/it, lr: 1.0e-04 loss: 8.489e-02] my_first_lora_v8: 3%|3 | 122/4000 [02:12<1:10:26, 1.09s/it, lr: 1.0e-04 loss: 1.323e-01] my_first_lora_v8: 3%|3 | 122/4000 [02:12<1:10:26, 1.09s/it, lr: 1.0e-04 loss: 1.323e-01] my_first_lora_v8: 3%|3 | 123/4000 [02:13<1:10:16, 1.09s/it, lr: 1.0e-04 loss: 7.238e-01] my_first_lora_v8: 3%|3 | 123/4000 [02:13<1:10:16, 1.09s/it, lr: 1.0e-04 loss: 7.238e-01] my_first_lora_v8: 3%|3 | 124/4000 [02:14<1:10:05, 1.09s/it, lr: 1.0e-04 loss: 1.588e-01] my_first_lora_v8: 3%|3 | 124/4000 [02:14<1:10:05, 1.09s/it, lr: 1.0e-04 loss: 1.588e-01] my_first_lora_v8: 3%|3 | 125/4000 [02:16<1:10:17, 1.09s/it, lr: 1.0e-04 loss: 2.693e-01] my_first_lora_v8: 3%|3 | 125/4000 [02:16<1:10:17, 1.09s/it, lr: 1.0e-04 loss: 2.693e-01] my_first_lora_v8: 3%|3 | 126/4000 [02:16<1:10:08, 1.09s/it, lr: 1.0e-04 loss: 6.026e-01] my_first_lora_v8: 3%|3 | 126/4000 [02:16<1:10:08, 1.09s/it, lr: 1.0e-04 loss: 6.026e-01] my_first_lora_v8: 3%|3 | 127/4000 [02:17<1:09:58, 1.08s/it, lr: 1.0e-04 loss: 6.237e-01] my_first_lora_v8: 3%|3 | 127/4000 [02:17<1:09:58, 1.08s/it, lr: 1.0e-04 loss: 6.237e-01] my_first_lora_v8: 3%|3 | 128/4000 [02:19<1:10:10, 1.09s/it, lr: 1.0e-04 loss: 7.205e-01] my_first_lora_v8: 3%|3 | 128/4000 [02:19<1:10:10, 1.09s/it, lr: 1.0e-04 loss: 7.205e-01] my_first_lora_v8: 3%|3 | 129/4000 [02:20<1:10:22, 1.09s/it, lr: 1.0e-04 loss: 1.048e-01] my_first_lora_v8: 3%|3 | 129/4000 [02:20<1:10:22, 1.09s/it, lr: 1.0e-04 loss: 1.048e-01] my_first_lora_v8: 3%|3 | 130/4000 [02:21<1:10:13, 1.09s/it, lr: 1.0e-04 loss: 6.800e-01] my_first_lora_v8: 3%|3 | 130/4000 [02:21<1:10:13, 1.09s/it, lr: 1.0e-04 loss: 6.800e-01] my_first_lora_v8: 3%|3 | 131/4000 [02:22<1:10:08, 1.09s/it, lr: 1.0e-04 loss: 6.458e-01] my_first_lora_v8: 3%|3 | 131/4000 [02:22<1:10:08, 1.09s/it, lr: 1.0e-04 loss: 6.458e-01] my_first_lora_v8: 3%|3 | 132/4000 [02:23<1:09:59, 1.09s/it, lr: 1.0e-04 loss: 4.421e-01] my_first_lora_v8: 3%|3 | 132/4000 [02:23<1:09:59, 1.09s/it, lr: 1.0e-04 loss: 4.421e-01] my_first_lora_v8: 3%|3 | 133/4000 [02:24<1:10:11, 1.09s/it, lr: 1.0e-04 loss: 6.822e-01] my_first_lora_v8: 3%|3 | 133/4000 [02:24<1:10:11, 1.09s/it, lr: 1.0e-04 loss: 6.822e-01] my_first_lora_v8: 3%|3 | 134/4000 [02:26<1:10:12, 1.09s/it, lr: 1.0e-04 loss: 2.414e-01] my_first_lora_v8: 3%|3 | 134/4000 [02:26<1:10:12, 1.09s/it, lr: 1.0e-04 loss: 2.414e-01] my_first_lora_v8: 3%|3 | 135/4000 [02:26<1:10:03, 1.09s/it, lr: 1.0e-04 loss: 7.426e-01] my_first_lora_v8: 3%|3 | 135/4000 [02:26<1:10:03, 1.09s/it, lr: 1.0e-04 loss: 7.426e-01] my_first_lora_v8: 3%|3 | 136/4000 [02:28<1:10:14, 1.09s/it, lr: 1.0e-04 loss: 2.861e-01] my_first_lora_v8: 3%|3 | 136/4000 [02:28<1:10:14, 1.09s/it, lr: 1.0e-04 loss: 2.861e-01] my_first_lora_v8: 3%|3 | 137/4000 [02:29<1:10:25, 1.09s/it, lr: 1.0e-04 loss: 2.474e-01] my_first_lora_v8: 3%|3 | 137/4000 [02:29<1:10:25, 1.09s/it, lr: 1.0e-04 loss: 2.474e-01] my_first_lora_v8: 3%|3 | 138/4000 [02:30<1:10:17, 1.09s/it, lr: 1.0e-04 loss: 6.962e-01] my_first_lora_v8: 3%|3 | 138/4000 [02:30<1:10:17, 1.09s/it, lr: 1.0e-04 loss: 6.962e-01] my_first_lora_v8: 3%|3 | 139/4000 [02:31<1:10:08, 1.09s/it, lr: 1.0e-04 loss: 3.482e-01] my_first_lora_v8: 3%|3 | 139/4000 [02:31<1:10:08, 1.09s/it, lr: 1.0e-04 loss: 3.482e-01] my_first_lora_v8: 4%|3 | 140/4000 [02:32<1:10:00, 1.09s/it, lr: 1.0e-04 loss: 7.002e-01] my_first_lora_v8: 4%|3 | 140/4000 [02:32<1:10:00, 1.09s/it, lr: 1.0e-04 loss: 7.002e-01] my_first_lora_v8: 4%|3 | 141/4000 [02:33<1:09:52, 1.09s/it, lr: 1.0e-04 loss: 2.470e-01] my_first_lora_v8: 4%|3 | 141/4000 [02:33<1:09:52, 1.09s/it, lr: 1.0e-04 loss: 2.470e-01] my_first_lora_v8: 4%|3 | 142/4000 [02:34<1:10:02, 1.09s/it, lr: 1.0e-04 loss: 2.359e-01] my_first_lora_v8: 4%|3 | 142/4000 [02:34<1:10:02, 1.09s/it, lr: 1.0e-04 loss: 2.359e-01] my_first_lora_v8: 4%|3 | 143/4000 [02:35<1:09:59, 1.09s/it, lr: 1.0e-04 loss: 2.690e-01] my_first_lora_v8: 4%|3 | 143/4000 [02:35<1:09:59, 1.09s/it, lr: 1.0e-04 loss: 2.690e-01] my_first_lora_v8: 4%|3 | 144/4000 [02:36<1:09:49, 1.09s/it, lr: 1.0e-04 loss: 7.200e-01] my_first_lora_v8: 4%|3 | 144/4000 [02:36<1:09:49, 1.09s/it, lr: 1.0e-04 loss: 7.200e-01] my_first_lora_v8: 4%|3 | 145/4000 [02:37<1:09:40, 1.08s/it, lr: 1.0e-04 loss: 6.775e-01] my_first_lora_v8: 4%|3 | 145/4000 [02:37<1:09:40, 1.08s/it, lr: 1.0e-04 loss: 6.775e-01] my_first_lora_v8: 4%|3 | 146/4000 [02:38<1:09:32, 1.08s/it, lr: 1.0e-04 loss: 8.453e-01] my_first_lora_v8: 4%|3 | 146/4000 [02:38<1:09:32, 1.08s/it, lr: 1.0e-04 loss: 8.453e-01] my_first_lora_v8: 4%|3 | 147/4000 [02:39<1:09:34, 1.08s/it, lr: 1.0e-04 loss: 7.564e-01] my_first_lora_v8: 4%|3 | 147/4000 [02:39<1:09:34, 1.08s/it, lr: 1.0e-04 loss: 7.564e-01] my_first_lora_v8: 4%|3 | 148/4000 [02:40<1:09:26, 1.08s/it, lr: 1.0e-04 loss: 6.264e-01] my_first_lora_v8: 4%|3 | 148/4000 [02:40<1:09:26, 1.08s/it, lr: 1.0e-04 loss: 6.264e-01] my_first_lora_v8: 4%|3 | 149/4000 [02:41<1:09:36, 1.08s/it, lr: 1.0e-04 loss: 7.362e-01] my_first_lora_v8: 4%|3 | 149/4000 [02:41<1:09:36, 1.08s/it, lr: 1.0e-04 loss: 7.362e-01] my_first_lora_v8: 4%|3 | 150/4000 [02:42<1:09:28, 1.08s/it, lr: 1.0e-04 loss: 3.790e-01] my_first_lora_v8: 4%|3 | 150/4000 [02:42<1:09:28, 1.08s/it, lr: 1.0e-04 loss: 3.790e-01] my_first_lora_v8: 4%|3 | 151/4000 [02:43<1:09:38, 1.09s/it, lr: 1.0e-04 loss: 6.464e-01] my_first_lora_v8: 4%|3 | 151/4000 [02:43<1:09:38, 1.09s/it, lr: 1.0e-04 loss: 6.464e-01] my_first_lora_v8: 4%|3 | 152/4000 [02:44<1:09:29, 1.08s/it, lr: 1.0e-04 loss: 1.521e-01] my_first_lora_v8: 4%|3 | 152/4000 [02:44<1:09:29, 1.08s/it, lr: 1.0e-04 loss: 1.521e-01] my_first_lora_v8: 4%|3 | 153/4000 [02:46<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 4.096e-01] my_first_lora_v8: 4%|3 | 153/4000 [02:46<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 4.096e-01] my_first_lora_v8: 4%|3 | 154/4000 [02:47<1:09:49, 1.09s/it, lr: 1.0e-04 loss: 3.197e-01] my_first_lora_v8: 4%|3 | 154/4000 [02:47<1:09:49, 1.09s/it, lr: 1.0e-04 loss: 3.197e-01] my_first_lora_v8: 4%|3 | 155/4000 [02:48<1:09:45, 1.09s/it, lr: 1.0e-04 loss: 7.282e-01] my_first_lora_v8: 4%|3 | 155/4000 [02:48<1:09:45, 1.09s/it, lr: 1.0e-04 loss: 7.282e-01] my_first_lora_v8: 4%|3 | 156/4000 [02:50<1:09:54, 1.09s/it, lr: 1.0e-04 loss: 1.614e-01] my_first_lora_v8: 4%|3 | 156/4000 [02:50<1:09:54, 1.09s/it, lr: 1.0e-04 loss: 1.614e-01] my_first_lora_v8: 4%|3 | 157/4000 [02:51<1:09:46, 1.09s/it, lr: 1.0e-04 loss: 7.489e-01] my_first_lora_v8: 4%|3 | 157/4000 [02:51<1:09:46, 1.09s/it, lr: 1.0e-04 loss: 7.489e-01] my_first_lora_v8: 4%|3 | 158/4000 [02:51<1:09:37, 1.09s/it, lr: 1.0e-04 loss: 1.863e-01] my_first_lora_v8: 4%|3 | 158/4000 [02:51<1:09:37, 1.09s/it, lr: 1.0e-04 loss: 1.863e-01] my_first_lora_v8: 4%|3 | 159/4000 [02:52<1:09:30, 1.09s/it, lr: 1.0e-04 loss: 5.913e-01] my_first_lora_v8: 4%|3 | 159/4000 [02:52<1:09:30, 1.09s/it, lr: 1.0e-04 loss: 5.913e-01] my_first_lora_v8: 4%|4 | 160/4000 [02:54<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 6.771e-01] my_first_lora_v8: 4%|4 | 160/4000 [02:54<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 6.771e-01] my_first_lora_v8: 4%|4 | 161/4000 [02:55<1:09:41, 1.09s/it, lr: 1.0e-04 loss: 8.306e-02] my_first_lora_v8: 4%|4 | 161/4000 [02:55<1:09:41, 1.09s/it, lr: 1.0e-04 loss: 8.306e-02] my_first_lora_v8: 4%|4 | 162/4000 [02:56<1:09:50, 1.09s/it, lr: 1.0e-04 loss: 1.452e-01] my_first_lora_v8: 4%|4 | 162/4000 [02:56<1:09:50, 1.09s/it, lr: 1.0e-04 loss: 1.452e-01] my_first_lora_v8: 4%|4 | 163/4000 [02:58<1:09:59, 1.09s/it, lr: 1.0e-04 loss: 1.431e-01] my_first_lora_v8: 4%|4 | 163/4000 [02:58<1:09:59, 1.09s/it, lr: 1.0e-04 loss: 1.431e-01] my_first_lora_v8: 4%|4 | 164/4000 [02:59<1:09:51, 1.09s/it, lr: 1.0e-04 loss: 3.712e-01] my_first_lora_v8: 4%|4 | 164/4000 [02:59<1:09:51, 1.09s/it, lr: 1.0e-04 loss: 3.712e-01] my_first_lora_v8: 4%|4 | 165/4000 [02:59<1:09:43, 1.09s/it, lr: 1.0e-04 loss: 3.042e-01] my_first_lora_v8: 4%|4 | 165/4000 [02:59<1:09:43, 1.09s/it, lr: 1.0e-04 loss: 3.042e-01] my_first_lora_v8: 4%|4 | 166/4000 [03:00<1:09:35, 1.09s/it, lr: 1.0e-04 loss: 1.195e-01] my_first_lora_v8: 4%|4 | 166/4000 [03:00<1:09:35, 1.09s/it, lr: 1.0e-04 loss: 1.195e-01] my_first_lora_v8: 4%|4 | 167/4000 [03:02<1:09:47, 1.09s/it, lr: 1.0e-04 loss: 5.568e-01] my_first_lora_v8: 4%|4 | 167/4000 [03:02<1:09:47, 1.09s/it, lr: 1.0e-04 loss: 5.568e-01] my_first_lora_v8: 4%|4 | 168/4000 [03:03<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 1.652e-01] my_first_lora_v8: 4%|4 | 168/4000 [03:03<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 1.652e-01] my_first_lora_v8: 4%|4 | 169/4000 [03:04<1:09:47, 1.09s/it, lr: 1.0e-04 loss: 4.299e-01] my_first_lora_v8: 4%|4 | 169/4000 [03:04<1:09:47, 1.09s/it, lr: 1.0e-04 loss: 4.299e-01] my_first_lora_v8: 4%|4 | 170/4000 [03:06<1:09:55, 1.10s/it, lr: 1.0e-04 loss: 7.448e-01] my_first_lora_v8: 4%|4 | 170/4000 [03:06<1:09:55, 1.10s/it, lr: 1.0e-04 loss: 7.448e-01] my_first_lora_v8: 4%|4 | 171/4000 [03:06<1:09:46, 1.09s/it, lr: 1.0e-04 loss: 5.932e-01] my_first_lora_v8: 4%|4 | 171/4000 [03:06<1:09:46, 1.09s/it, lr: 1.0e-04 loss: 5.932e-01] my_first_lora_v8: 4%|4 | 172/4000 [03:07<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 2.371e-01] my_first_lora_v8: 4%|4 | 172/4000 [03:07<1:09:39, 1.09s/it, lr: 1.0e-04 loss: 2.371e-01] my_first_lora_v8: 4%|4 | 173/4000 [03:08<1:09:31, 1.09s/it, lr: 1.0e-04 loss: 3.533e-01] my_first_lora_v8: 4%|4 | 173/4000 [03:08<1:09:31, 1.09s/it, lr: 1.0e-04 loss: 3.533e-01] my_first_lora_v8: 4%|4 | 174/4000 [03:10<1:09:45, 1.09s/it, lr: 1.0e-04 loss: 6.864e-01] my_first_lora_v8: 4%|4 | 174/4000 [03:10<1:09:45, 1.09s/it, lr: 1.0e-04 loss: 6.864e-01] my_first_lora_v8: 4%|4 | 175/4000 [03:11<1:09:38, 1.09s/it, lr: 1.0e-04 loss: 6.908e-01] my_first_lora_v8: 4%|4 | 175/4000 [03:11<1:09:38, 1.09s/it, lr: 1.0e-04 loss: 6.908e-01] my_first_lora_v8: 4%|4 | 176/4000 [03:11<1:09:30, 1.09s/it, lr: 1.0e-04 loss: 7.036e-01] my_first_lora_v8: 4%|4 | 176/4000 [03:11<1:09:30, 1.09s/it, lr: 1.0e-04 loss: 7.036e-01] my_first_lora_v8: 4%|4 | 177/4000 [03:12<1:09:23, 1.09s/it, lr: 1.0e-04 loss: 6.378e-01] my_first_lora_v8: 4%|4 | 177/4000 [03:12<1:09:23, 1.09s/it, lr: 1.0e-04 loss: 6.378e-01] my_first_lora_v8: 4%|4 | 178/4000 [03:13<1:09:15, 1.09s/it, lr: 1.0e-04 loss: 6.186e-01] my_first_lora_v8: 4%|4 | 178/4000 [03:13<1:09:15, 1.09s/it, lr: 1.0e-04 loss: 6.186e-01] my_first_lora_v8: 4%|4 | 179/4000 [03:14<1:09:11, 1.09s/it, lr: 1.0e-04 loss: 7.082e-01] my_first_lora_v8: 4%|4 | 179/4000 [03:14<1:09:11, 1.09s/it, lr: 1.0e-04 loss: 7.082e-01] my_first_lora_v8: 4%|4 | 180/4000 [03:15<1:09:05, 1.09s/it, lr: 1.0e-04 loss: 3.507e-01] my_first_lora_v8: 4%|4 | 180/4000 [03:15<1:09:05, 1.09s/it, lr: 1.0e-04 loss: 3.507e-01] my_first_lora_v8: 5%|4 | 181/4000 [03:16<1:08:58, 1.08s/it, lr: 1.0e-04 loss: 2.652e-01] my_first_lora_v8: 5%|4 | 181/4000 [03:16<1:08:58, 1.08s/it, lr: 1.0e-04 loss: 2.652e-01] my_first_lora_v8: 5%|4 | 182/4000 [03:16<1:08:51, 1.08s/it, lr: 1.0e-04 loss: 1.599e-01] my_first_lora_v8: 5%|4 | 182/4000 [03:16<1:08:51, 1.08s/it, lr: 1.0e-04 loss: 1.599e-01] my_first_lora_v8: 5%|4 | 183/4000 [03:18<1:08:58, 1.08s/it, lr: 1.0e-04 loss: 7.213e-01] my_first_lora_v8: 5%|4 | 183/4000 [03:18<1:08:58, 1.08s/it, lr: 1.0e-04 loss: 7.213e-01] my_first_lora_v8: 5%|4 | 184/4000 [03:19<1:09:05, 1.09s/it, lr: 1.0e-04 loss: 6.475e-01] my_first_lora_v8: 5%|4 | 184/4000 [03:19<1:09:05, 1.09s/it, lr: 1.0e-04 loss: 6.475e-01] my_first_lora_v8: 5%|4 | 185/4000 [03:20<1:08:58, 1.08s/it, lr: 1.0e-04 loss: 4.320e-01] my_first_lora_v8: 5%|4 | 185/4000 [03:20<1:08:58, 1.08s/it, lr: 1.0e-04 loss: 4.320e-01] my_first_lora_v8: 5%|4 | 186/4000 [03:21<1:08:52, 1.08s/it, lr: 1.0e-04 loss: 2.375e-01] my_first_lora_v8: 5%|4 | 186/4000 [03:21<1:08:52, 1.08s/it, lr: 1.0e-04 loss: 2.375e-01] my_first_lora_v8: 5%|4 | 187/4000 [03:22<1:08:45, 1.08s/it, lr: 1.0e-04 loss: 7.143e-01] my_first_lora_v8: 5%|4 | 187/4000 [03:22<1:08:45, 1.08s/it, lr: 1.0e-04 loss: 7.143e-01] my_first_lora_v8: 5%|4 | 188/4000 [03:24<1:08:59, 1.09s/it, lr: 1.0e-04 loss: 2.655e-01] my_first_lora_v8: 5%|4 | 188/4000 [03:24<1:08:59, 1.09s/it, lr: 1.0e-04 loss: 2.655e-01] my_first_lora_v8: 5%|4 | 189/4000 [03:24<1:08:52, 1.08s/it, lr: 1.0e-04 loss: 2.671e-01] my_first_lora_v8: 5%|4 | 189/4000 [03:24<1:08:52, 1.08s/it, lr: 1.0e-04 loss: 2.671e-01] my_first_lora_v8: 5%|4 | 190/4000 [03:26<1:09:00, 1.09s/it, lr: 1.0e-04 loss: 6.535e-01] my_first_lora_v8: 5%|4 | 190/4000 [03:26<1:09:00, 1.09s/it, lr: 1.0e-04 loss: 6.535e-01] my_first_lora_v8: 5%|4 | 191/4000 [03:28<1:09:10, 1.09s/it, lr: 1.0e-04 loss: 1.597e-01] my_first_lora_v8: 5%|4 | 191/4000 [03:28<1:09:10, 1.09s/it, lr: 1.0e-04 loss: 1.597e-01] my_first_lora_v8: 5%|4 | 192/4000 [03:29<1:09:05, 1.09s/it, lr: 1.0e-04 loss: 7.107e-01] my_first_lora_v8: 5%|4 | 192/4000 [03:29<1:09:05, 1.09s/it, lr: 1.0e-04 loss: 7.107e-01] my_first_lora_v8: 5%|4 | 193/4000 [03:30<1:09:12, 1.09s/it, lr: 1.0e-04 loss: 2.934e-01] my_first_lora_v8: 5%|4 | 193/4000 [03:30<1:09:12, 1.09s/it, lr: 1.0e-04 loss: 2.934e-01] my_first_lora_v8: 5%|4 | 194/4000 [03:31<1:09:06, 1.09s/it, lr: 1.0e-04 loss: 1.278e-01] my_first_lora_v8: 5%|4 | 194/4000 [03:31<1:09:06, 1.09s/it, lr: 1.0e-04 loss: 1.278e-01] my_first_lora_v8: 5%|4 | 195/4000 [03:32<1:08:59, 1.09s/it, lr: 1.0e-04 loss: 8.088e-01] my_first_lora_v8: 5%|4 | 195/4000 [03:32<1:08:59, 1.09s/it, lr: 1.0e-04 loss: 8.088e-01] my_first_lora_v8: 5%|4 | 196/4000 [03:33<1:09:06, 1.09s/it, lr: 1.0e-04 loss: 5.411e-01] my_first_lora_v8: 5%|4 | 196/4000 [03:33<1:09:06, 1.09s/it, lr: 1.0e-04 loss: 5.411e-01] my_first_lora_v8: 5%|4 | 197/4000 [03:34<1:08:59, 1.09s/it, lr: 1.0e-04 loss: 1.063e-01] my_first_lora_v8: 5%|4 | 197/4000 [03:34<1:08:59, 1.09s/it, lr: 1.0e-04 loss: 1.063e-01] my_first_lora_v8: 5%|4 | 198/4000 [03:35<1:08:53, 1.09s/it, lr: 1.0e-04 loss: 5.162e-01] my_first_lora_v8: 5%|4 | 198/4000 [03:35<1:08:53, 1.09s/it, lr: 1.0e-04 loss: 5.162e-01] my_first_lora_v8: 5%|4 | 199/4000 [03:36<1:09:00, 1.09s/it, lr: 1.0e-04 loss: 6.718e-01] my_first_lora_v8: 5%|4 | 199/4000 [03:36<1:09:00, 1.09s/it, lr: 1.0e-04 loss: 6.718e-01] my_first_lora_v8: 5%|5 | 200/4000 [03:37<1:08:53, 1.09s/it, lr: 1.0e-04 loss: 6.649e-01] my_first_lora_v8: 5%|5 | 200/4000 [03:37<1:08:53, 1.09s/it, lr: 1.0e-04 loss: 6.649e-01] my_first_lora_v8: 5%|5 | 201/4000 [03:38<1:08:47, 1.09s/it, lr: 1.0e-04 loss: 7.657e-01] my_first_lora_v8: 5%|5 | 201/4000 [03:38<1:08:47, 1.09s/it, lr: 1.0e-04 loss: 7.657e-01] my_first_lora_v8: 5%|5 | 202/4000 [03:39<1:08:48, 1.09s/it, lr: 1.0e-04 loss: 5.083e-01] my_first_lora_v8: 5%|5 | 202/4000 [03:39<1:08:48, 1.09s/it, lr: 1.0e-04 loss: 5.083e-01] my_first_lora_v8: 5%|5 | 203/4000 [03:40<1:08:44, 1.09s/it, lr: 1.0e-04 loss: 2.306e-01] my_first_lora_v8: 5%|5 | 203/4000 [03:40<1:08:44, 1.09s/it, lr: 1.0e-04 loss: 2.306e-01] my_first_lora_v8: 5%|5 | 204/4000 [03:42<1:08:51, 1.09s/it, lr: 1.0e-04 loss: 6.810e-01] my_first_lora_v8: 5%|5 | 204/4000 [03:42<1:08:51, 1.09s/it, lr: 1.0e-04 loss: 6.810e-01] my_first_lora_v8: 5%|5 | 205/4000 [03:42<1:08:45, 1.09s/it, lr: 1.0e-04 loss: 6.845e-01] my_first_lora_v8: 5%|5 | 205/4000 [03:42<1:08:45, 1.09s/it, lr: 1.0e-04 loss: 6.845e-01] my_first_lora_v8: 5%|5 | 206/4000 [03:43<1:08:39, 1.09s/it, lr: 1.0e-04 loss: 1.408e-01] my_first_lora_v8: 5%|5 | 206/4000 [03:43<1:08:39, 1.09s/it, lr: 1.0e-04 loss: 1.408e-01] my_first_lora_v8: 5%|5 | 207/4000 [03:44<1:08:32, 1.08s/it, lr: 1.0e-04 loss: 6.814e-01] my_first_lora_v8: 5%|5 | 207/4000 [03:44<1:08:32, 1.08s/it, lr: 1.0e-04 loss: 6.814e-01] my_first_lora_v8: 5%|5 | 208/4000 [03:45<1:08:26, 1.08s/it, lr: 1.0e-04 loss: 6.371e-01] my_first_lora_v8: 5%|5 | 208/4000 [03:45<1:08:26, 1.08s/it, lr: 1.0e-04 loss: 6.371e-01] my_first_lora_v8: 5%|5 | 209/4000 [03:46<1:08:32, 1.08s/it, lr: 1.0e-04 loss: 2.555e-01] my_first_lora_v8: 5%|5 | 209/4000 [03:46<1:08:32, 1.08s/it, lr: 1.0e-04 loss: 2.555e-01] my_first_lora_v8: 5%|5 | 210/4000 [03:48<1:08:38, 1.09s/it, lr: 1.0e-04 loss: 8.141e-01] my_first_lora_v8: 5%|5 | 210/4000 [03:48<1:08:38, 1.09s/it, lr: 1.0e-04 loss: 8.141e-01] my_first_lora_v8: 5%|5 | 211/4000 [03:49<1:08:32, 1.09s/it, lr: 1.0e-04 loss: 2.098e-01] my_first_lora_v8: 5%|5 | 211/4000 [03:49<1:08:32, 1.09s/it, lr: 1.0e-04 loss: 2.098e-01] my_first_lora_v8: 5%|5 | 212/4000 [03:50<1:08:38, 1.09s/it, lr: 1.0e-04 loss: 6.997e-01] my_first_lora_v8: 5%|5 | 212/4000 [03:50<1:08:38, 1.09s/it, lr: 1.0e-04 loss: 6.997e-01] my_first_lora_v8: 5%|5 | 213/4000 [03:51<1:08:32, 1.09s/it, lr: 1.0e-04 loss: 3.055e-01] my_first_lora_v8: 5%|5 | 213/4000 [03:51<1:08:32, 1.09s/it, lr: 1.0e-04 loss: 3.055e-01] my_first_lora_v8: 5%|5 | 214/4000 [03:52<1:08:26, 1.08s/it, lr: 1.0e-04 loss: 1.091e-01] my_first_lora_v8: 5%|5 | 214/4000 [03:52<1:08:26, 1.08s/it, lr: 1.0e-04 loss: 1.091e-01] my_first_lora_v8: 5%|5 | 215/4000 [03:54<1:08:40, 1.09s/it, lr: 1.0e-04 loss: 8.132e-01] my_first_lora_v8: 5%|5 | 215/4000 [03:54<1:08:40, 1.09s/it, lr: 1.0e-04 loss: 8.132e-01] my_first_lora_v8: 5%|5 | 216/4000 [03:54<1:08:33, 1.09s/it, lr: 1.0e-04 loss: 5.008e-01] my_first_lora_v8: 5%|5 | 216/4000 [03:54<1:08:33, 1.09s/it, lr: 1.0e-04 loss: 5.008e-01] my_first_lora_v8: 5%|5 | 217/4000 [03:55<1:08:27, 1.09s/it, lr: 1.0e-04 loss: 5.262e-01] my_first_lora_v8: 5%|5 | 217/4000 [03:55<1:08:27, 1.09s/it, lr: 1.0e-04 loss: 5.262e-01] my_first_lora_v8: 5%|5 | 218/4000 [03:56<1:08:22, 1.08s/it, lr: 1.0e-04 loss: 6.811e-01] my_first_lora_v8: 5%|5 | 218/4000 [03:56<1:08:22, 1.08s/it, lr: 1.0e-04 loss: 6.811e-01] my_first_lora_v8: 5%|5 | 219/4000 [03:57<1:08:16, 1.08s/it, lr: 1.0e-04 loss: 6.432e-01] my_first_lora_v8: 5%|5 | 219/4000 [03:57<1:08:16, 1.08s/it, lr: 1.0e-04 loss: 6.432e-01] my_first_lora_v8: 6%|5 | 220/4000 [03:58<1:08:22, 1.09s/it, lr: 1.0e-04 loss: 1.342e-01] my_first_lora_v8: 6%|5 | 220/4000 [03:58<1:08:22, 1.09s/it, lr: 1.0e-04 loss: 1.342e-01] my_first_lora_v8: 6%|5 | 221/4000 [04:00<1:08:28, 1.09s/it, lr: 1.0e-04 loss: 4.768e-01] my_first_lora_v8: 6%|5 | 221/4000 [04:00<1:08:28, 1.09s/it, lr: 1.0e-04 loss: 4.768e-01] my_first_lora_v8: 6%|5 | 222/4000 [04:01<1:08:22, 1.09s/it, lr: 1.0e-04 loss: 6.793e-01] my_first_lora_v8: 6%|5 | 222/4000 [04:01<1:08:22, 1.09s/it, lr: 1.0e-04 loss: 6.793e-01] my_first_lora_v8: 6%|5 | 223/4000 [04:01<1:08:15, 1.08s/it, lr: 1.0e-04 loss: 5.641e-01] my_first_lora_v8: 6%|5 | 223/4000 [04:01<1:08:15, 1.08s/it, lr: 1.0e-04 loss: 5.641e-01] my_first_lora_v8: 6%|5 | 224/4000 [04:02<1:08:09, 1.08s/it, lr: 1.0e-04 loss: 5.284e-01] my_first_lora_v8: 6%|5 | 224/4000 [04:02<1:08:09, 1.08s/it, lr: 1.0e-04 loss: 5.284e-01] my_first_lora_v8: 6%|5 | 225/4000 [04:03<1:08:03, 1.08s/it, lr: 1.0e-04 loss: 2.571e-01] my_first_lora_v8: 6%|5 | 225/4000 [04:03<1:08:03, 1.08s/it, lr: 1.0e-04 loss: 2.571e-01] my_first_lora_v8: 6%|5 | 226/4000 [04:04<1:08:09, 1.08s/it, lr: 1.0e-04 loss: 6.738e-01] my_first_lora_v8: 6%|5 | 226/4000 [04:04<1:08:09, 1.08s/it, lr: 1.0e-04 loss: 6.738e-01] my_first_lora_v8: 6%|5 | 227/4000 [04:05<1:08:07, 1.08s/it, lr: 1.0e-04 loss: 4.581e-01] my_first_lora_v8: 6%|5 | 227/4000 [04:05<1:08:07, 1.08s/it, lr: 1.0e-04 loss: 4.581e-01] my_first_lora_v8: 6%|5 | 228/4000 [04:07<1:08:08, 1.08s/it, lr: 1.0e-04 loss: 7.002e-01] my_first_lora_v8: 6%|5 | 228/4000 [04:07<1:08:08, 1.08s/it, lr: 1.0e-04 loss: 7.002e-01] my_first_lora_v8: 6%|5 | 229/4000 [04:08<1:08:04, 1.08s/it, lr: 1.0e-04 loss: 6.797e-01] my_first_lora_v8: 6%|5 | 229/4000 [04:08<1:08:04, 1.08s/it, lr: 1.0e-04 loss: 6.797e-01] my_first_lora_v8: 6%|5 | 230/4000 [04:09<1:08:10, 1.08s/it, lr: 1.0e-04 loss: 3.435e-01] my_first_lora_v8: 6%|5 | 230/4000 [04:09<1:08:10, 1.08s/it, lr: 1.0e-04 loss: 3.435e-01] my_first_lora_v8: 6%|5 | 231/4000 [04:11<1:08:15, 1.09s/it, lr: 1.0e-04 loss: 2.397e-01] my_first_lora_v8: 6%|5 | 231/4000 [04:11<1:08:15, 1.09s/it, lr: 1.0e-04 loss: 2.397e-01] my_first_lora_v8: 6%|5 | 232/4000 [04:11<1:08:10, 1.09s/it, lr: 1.0e-04 loss: 1.405e-01] my_first_lora_v8: 6%|5 | 232/4000 [04:11<1:08:10, 1.09s/it, lr: 1.0e-04 loss: 1.405e-01] my_first_lora_v8: 6%|5 | 233/4000 [04:12<1:08:04, 1.08s/it, lr: 1.0e-04 loss: 2.783e-01] my_first_lora_v8: 6%|5 | 233/4000 [04:12<1:08:04, 1.08s/it, lr: 1.0e-04 loss: 2.783e-01] my_first_lora_v8: 6%|5 | 234/4000 [04:14<1:08:10, 1.09s/it, lr: 1.0e-04 loss: 5.431e-01] my_first_lora_v8: 6%|5 | 234/4000 [04:14<1:08:10, 1.09s/it, lr: 1.0e-04 loss: 5.431e-01] my_first_lora_v8: 6%|5 | 235/4000 [04:15<1:08:15, 1.09s/it, lr: 1.0e-04 loss: 5.711e-01] my_first_lora_v8: 6%|5 | 235/4000 [04:15<1:08:15, 1.09s/it, lr: 1.0e-04 loss: 5.711e-01] my_first_lora_v8: 6%|5 | 236/4000 [04:16<1:08:10, 1.09s/it, lr: 1.0e-04 loss: 5.237e-01] my_first_lora_v8: 6%|5 | 236/4000 [04:16<1:08:10, 1.09s/it, lr: 1.0e-04 loss: 5.237e-01] my_first_lora_v8: 6%|5 | 237/4000 [04:17<1:08:04, 1.09s/it, lr: 1.0e-04 loss: 2.486e-01] my_first_lora_v8: 6%|5 | 237/4000 [04:17<1:08:04, 1.09s/it, lr: 1.0e-04 loss: 2.486e-01] my_first_lora_v8: 6%|5 | 238/4000 [04:18<1:07:59, 1.08s/it, lr: 1.0e-04 loss: 5.187e-01] my_first_lora_v8: 6%|5 | 238/4000 [04:18<1:07:59, 1.08s/it, lr: 1.0e-04 loss: 5.187e-01] my_first_lora_v8: 6%|5 | 239/4000 [04:19<1:07:57, 1.08s/it, lr: 1.0e-04 loss: 1.835e-01] my_first_lora_v8: 6%|5 | 239/4000 [04:19<1:07:57, 1.08s/it, lr: 1.0e-04 loss: 1.835e-01] my_first_lora_v8: 6%|6 | 240/4000 [04:20<1:08:02, 1.09s/it, lr: 1.0e-04 loss: 1.693e-01] my_first_lora_v8: 6%|6 | 240/4000 [04:20<1:08:02, 1.09s/it, lr: 1.0e-04 loss: 1.693e-01] my_first_lora_v8: 6%|6 | 241/4000 [04:22<1:08:07, 1.09s/it, lr: 1.0e-04 loss: 9.192e-02] my_first_lora_v8: 6%|6 | 241/4000 [04:22<1:08:07, 1.09s/it, lr: 1.0e-04 loss: 9.192e-02] my_first_lora_v8: 6%|6 | 242/4000 [04:23<1:08:08, 1.09s/it, lr: 1.0e-04 loss: 6.201e-01] my_first_lora_v8: 6%|6 | 242/4000 [04:23<1:08:08, 1.09s/it, lr: 1.0e-04 loss: 6.201e-01] my_first_lora_v8: 6%|6 | 243/4000 [04:24<1:08:03, 1.09s/it, lr: 1.0e-04 loss: 5.316e-01] my_first_lora_v8: 6%|6 | 243/4000 [04:24<1:08:03, 1.09s/it, lr: 1.0e-04 loss: 5.316e-01] my_first_lora_v8: 6%|6 | 244/4000 [04:25<1:08:08, 1.09s/it, lr: 1.0e-04 loss: 6.515e-01] my_first_lora_v8: 6%|6 | 244/4000 [04:25<1:08:08, 1.09s/it, lr: 1.0e-04 loss: 6.515e-01] my_first_lora_v8: 6%|6 | 245/4000 [04:26<1:08:03, 1.09s/it, lr: 1.0e-04 loss: 8.605e-02] my_first_lora_v8: 6%|6 | 245/4000 [04:26<1:08:03, 1.09s/it, lr: 1.0e-04 loss: 8.605e-02] my_first_lora_v8: 6%|6 | 246/4000 [04:27<1:07:57, 1.09s/it, lr: 1.0e-04 loss: 4.219e-01] my_first_lora_v8: 6%|6 | 246/4000 [04:27<1:07:57, 1.09s/it, lr: 1.0e-04 loss: 4.219e-01] my_first_lora_v8: 6%|6 | 247/4000 [04:28<1:07:52, 1.09s/it, lr: 1.0e-04 loss: 2.738e-01] my_first_lora_v8: 6%|6 | 247/4000 [04:28<1:07:52, 1.09s/it, lr: 1.0e-04 loss: 2.738e-01] my_first_lora_v8: 6%|6 | 248/4000 [04:28<1:07:47, 1.08s/it, lr: 1.0e-04 loss: 8.311e-01] my_first_lora_v8: 6%|6 | 248/4000 [04:28<1:07:47, 1.08s/it, lr: 1.0e-04 loss: 8.311e-01] my_first_lora_v8: 6%|6 | 249/4000 [04:30<1:07:52, 1.09s/it, lr: 1.0e-04 loss: 4.654e-01] my_first_lora_v8: 6%|6 | 249/4000 [04:30<1:07:52, 1.09s/it, lr: 1.0e-04 loss: 4.654e-01]
Saving at step 250
Saved checkpoint to /teamspace/studios/this_studio/ai-toolkit/output/my_first_lora_v8/my_first_lora_v8_000000250.safetensors
Saved optimizer to /teamspace/studios/this_studio/ai-toolkit/output/my_first_lora_v8/optimizer.pt
Unloading assistant lora
Generating Images: 0%| | 0/5 [00:00<?, ?it/s] Generating Images: 0%| | 0/5 [00:00<?, ?it/s]
Generating Images: 20%|## | 1/5 [00:08<00:34, 8.61s/it] Generating Images: 20%|## | 1/5 [00:08<00:34, 8.61s/it]
Generating Images: 40%|#### | 2/5 [00:16<00:24, 8.29s/it] Generating Images: 40%|#### | 2/5 [00:16<00:24, 8.29s/it]
Generating Images: 60%|###### | 3/5 [00:24<00:16, 8.05s/it] Generating Images: 60%|###### | 3/5 [00:24<00:16, 8.05s/it]