LAnA-v5 / benchmark_results.json
manu02's picture
Upload MIMIC test evaluation results
a515a07 verified
Raw
History Blame Contribute Delete
12.9 kB
{
"results": [
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 1,
"global_batch_size_requested": 1,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 1,
"global_batch_size_requested": 8,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 1,
"global_batch_size_requested": 16,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 2,
"global_batch_size_requested": 2,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 2,
"global_batch_size_requested": 8,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 2,
"global_batch_size_requested": 16,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 4,
"global_batch_size_requested": 4,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 4,
"global_batch_size_requested": 8,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "qlora_paged_adamw8bit",
"local_batch_size": 4,
"global_batch_size_requested": 16,
"status": "failed",
"error": "element 0 of tensors does not require grad and does not have a grad_fn"
},
{
"method": "lora_adamw",
"local_batch_size": 1,
"global_batch_size_requested": 1,
"status": "ok",
"effective_global_batch_size": 1,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.12944729999981064,
"images_per_sec": 7.7251514709187665,
"mean_loss": 9.920842170715332,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 1,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 8,
"optimizer_step_time_sec": 0.792737899999338,
"images_per_sec": 10.091607831550228,
"mean_loss": 8.131502032279968,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 1,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 16,
"optimizer_step_time_sec": 1.6773667999987083,
"images_per_sec": 9.538760395169572,
"mean_loss": 8.80642619729042,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 2,
"global_batch_size_requested": 2,
"status": "ok",
"effective_global_batch_size": 2,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.20009290000052715,
"images_per_sec": 9.995357156574427,
"mean_loss": 9.088608741760254,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 2,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 4,
"optimizer_step_time_sec": 0.8304937000011705,
"images_per_sec": 9.63282442719159,
"mean_loss": 8.245712995529175,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 2,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 8,
"optimizer_step_time_sec": 1.6668036999981268,
"images_per_sec": 9.599210752902685,
"mean_loss": 9.106984257698059,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 4,
"global_batch_size_requested": 4,
"status": "ok",
"effective_global_batch_size": 4,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.4656030999994982,
"images_per_sec": 8.591008092524106,
"mean_loss": 8.862140655517578,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 4,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 2,
"optimizer_step_time_sec": 2.6093234999989363,
"images_per_sec": 3.0659287742601715,
"mean_loss": 8.241507053375244,
"trainable_params": 1106688
},
{
"method": "lora_adamw",
"local_batch_size": 4,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 4,
"optimizer_step_time_sec": 18.058491499999946,
"images_per_sec": 0.8860097755119827,
"mean_loss": 8.916554927825928,
"trainable_params": 1106688
},
{
"method": "full_adam",
"local_batch_size": 1,
"global_batch_size_requested": 1,
"status": "ok",
"effective_global_batch_size": 1,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 1.4309436000003188,
"images_per_sec": 0.6988395629288094,
"mean_loss": 8.042855262756348,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 1,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 8,
"optimizer_step_time_sec": 2.7121656999988772,
"images_per_sec": 2.9496722858796245,
"mean_loss": 7.829526960849762,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 1,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 16,
"optimizer_step_time_sec": 1.8378386999993381,
"images_per_sec": 8.705878268863183,
"mean_loss": 9.189274996519089,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 2,
"global_batch_size_requested": 2,
"status": "ok",
"effective_global_batch_size": 2,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.23647629999868514,
"images_per_sec": 8.457507158269646,
"mean_loss": 9.128178596496582,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 2,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 4,
"optimizer_step_time_sec": 0.8083188999989943,
"images_per_sec": 9.897083935572896,
"mean_loss": 8.64337944984436,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 2,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 8,
"optimizer_step_time_sec": 1.8274533999974665,
"images_per_sec": 8.755353214490823,
"mean_loss": 8.331470370292664,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 4,
"global_batch_size_requested": 4,
"status": "ok",
"effective_global_batch_size": 4,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.511095199999545,
"images_per_sec": 7.826330593602838,
"mean_loss": 8.954268455505371,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 4,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 2,
"optimizer_step_time_sec": 2.2738564999981463,
"images_per_sec": 3.518251921353226,
"mean_loss": 9.192809581756592,
"trainable_params": 125521920
},
{
"method": "full_adam",
"local_batch_size": 4,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 4,
"optimizer_step_time_sec": 18.631701800000883,
"images_per_sec": 0.8587513997244869,
"mean_loss": 8.159156560897827,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 1,
"global_batch_size_requested": 1,
"status": "ok",
"effective_global_batch_size": 1,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.13992360000156623,
"images_per_sec": 7.146757230294293,
"mean_loss": 9.259998321533203,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 1,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 8,
"optimizer_step_time_sec": 0.8451360999988538,
"images_per_sec": 9.465930990299492,
"mean_loss": 8.10985803604126,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 1,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 16,
"optimizer_step_time_sec": 1.8945816999930685,
"images_per_sec": 8.445135936897595,
"mean_loss": 8.591163873672485,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 2,
"global_batch_size_requested": 2,
"status": "ok",
"effective_global_batch_size": 2,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.23971350000101666,
"images_per_sec": 8.343293139483249,
"mean_loss": 9.75894832611084,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 2,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 4,
"optimizer_step_time_sec": 0.9259438999997656,
"images_per_sec": 8.6398322835779,
"mean_loss": 8.462790489196777,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 2,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 8,
"optimizer_step_time_sec": 1.8237968999983423,
"images_per_sec": 8.772906676184471,
"mean_loss": 10.191668510437012,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 4,
"global_batch_size_requested": 4,
"status": "ok",
"effective_global_batch_size": 4,
"gradient_accumulation_steps": 1,
"optimizer_step_time_sec": 0.5224713000006886,
"images_per_sec": 7.655922918626779,
"mean_loss": 8.14057445526123,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 4,
"global_batch_size_requested": 8,
"status": "ok",
"effective_global_batch_size": 8,
"gradient_accumulation_steps": 2,
"optimizer_step_time_sec": 3.7809107000011863,
"images_per_sec": 2.1158923430795364,
"mean_loss": 8.521550178527832,
"trainable_params": 125521920
},
{
"method": "full_adam8bit",
"local_batch_size": 4,
"global_batch_size_requested": 16,
"status": "ok",
"effective_global_batch_size": 16,
"gradient_accumulation_steps": 4,
"optimizer_step_time_sec": 27.688971800002037,
"images_per_sec": 0.5778473868790903,
"mean_loss": 9.247632026672363,
"trainable_params": 125521920
}
]
}