Instructions to use Howard881010/heat_transfer_sft_10000_mcq_2epoch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Howard881010/heat_transfer_sft_10000_mcq_2epoch with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-Nemo-Instruct-2407") model = PeftModel.from_pretrained(base_model, "Howard881010/heat_transfer_sft_10000_mcq_2epoch") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 30, | |
| "global_step": 450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 0.318470299243927, | |
| "learning_rate": 9.987820251299122e-05, | |
| "loss": 0.4455, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 0.02355371043086052, | |
| "learning_rate": 9.951340343707852e-05, | |
| "loss": 0.0053, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 0.036702342331409454, | |
| "learning_rate": 9.890738003669029e-05, | |
| "loss": 0.004, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "eval_loss": 0.0037715784274041653, | |
| "eval_runtime": 108.061, | |
| "eval_samples_per_second": 9.254, | |
| "eval_steps_per_second": 0.231, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 0.07428745925426483, | |
| "learning_rate": 9.806308479691595e-05, | |
| "loss": 0.0038, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 0.07172686606645584, | |
| "learning_rate": 9.698463103929542e-05, | |
| "loss": 0.0038, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 0.03130387142300606, | |
| "learning_rate": 9.567727288213005e-05, | |
| "loss": 0.0036, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "eval_loss": 0.0035628757905215025, | |
| "eval_runtime": 108.1607, | |
| "eval_samples_per_second": 9.246, | |
| "eval_steps_per_second": 0.231, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3111111111111111, | |
| "grad_norm": 0.02135792188346386, | |
| "learning_rate": 9.414737964294636e-05, | |
| "loss": 0.0036, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 0.1094546914100647, | |
| "learning_rate": 9.24024048078213e-05, | |
| "loss": 0.0033, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.15919151902198792, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.0035, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.003310541156679392, | |
| "eval_runtime": 108.1107, | |
| "eval_samples_per_second": 9.25, | |
| "eval_steps_per_second": 0.231, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.024381157010793686, | |
| "learning_rate": 8.83022221559489e-05, | |
| "loss": 0.0029, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4888888888888889, | |
| "grad_norm": 0.05485767126083374, | |
| "learning_rate": 8.596699001693255e-05, | |
| "loss": 0.002, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.08088912814855576, | |
| "learning_rate": 8.345653031794292e-05, | |
| "loss": 0.0024, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "eval_loss": 0.0021427080500870943, | |
| "eval_runtime": 108.1623, | |
| "eval_samples_per_second": 9.245, | |
| "eval_steps_per_second": 0.231, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5777777777777777, | |
| "grad_norm": 0.09581312537193298, | |
| "learning_rate": 8.07830737662829e-05, | |
| "loss": 0.0019, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 0.06600484997034073, | |
| "learning_rate": 7.795964517353735e-05, | |
| "loss": 0.0012, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.08135157078504562, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.0011, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "eval_loss": 0.0011440212838351727, | |
| "eval_runtime": 108.1819, | |
| "eval_samples_per_second": 9.244, | |
| "eval_steps_per_second": 0.231, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 0.0257169920951128, | |
| "learning_rate": 7.191855733945387e-05, | |
| "loss": 0.0018, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7555555555555555, | |
| "grad_norm": 0.02252952568233013, | |
| "learning_rate": 6.873032967079561e-05, | |
| "loss": 0.0013, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.015104785561561584, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 0.0011, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.0008804052486084402, | |
| "eval_runtime": 108.241, | |
| "eval_samples_per_second": 9.239, | |
| "eval_steps_per_second": 0.231, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8444444444444444, | |
| "grad_norm": 0.037444427609443665, | |
| "learning_rate": 6.209609477998338e-05, | |
| "loss": 0.0009, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.041397638618946075, | |
| "learning_rate": 5.868240888334653e-05, | |
| "loss": 0.0009, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 0.04828115180134773, | |
| "learning_rate": 5.522642316338268e-05, | |
| "loss": 0.001, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "eval_loss": 0.0008426142740063369, | |
| "eval_runtime": 108.1605, | |
| "eval_samples_per_second": 9.246, | |
| "eval_steps_per_second": 0.231, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 0.09075287729501724, | |
| "learning_rate": 5.174497483512506e-05, | |
| "loss": 0.0011, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0222222222222221, | |
| "grad_norm": 0.025804603472352028, | |
| "learning_rate": 4.825502516487497e-05, | |
| "loss": 0.0009, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 0.04228019714355469, | |
| "learning_rate": 4.477357683661734e-05, | |
| "loss": 0.0008, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "eval_loss": 0.000761281349696219, | |
| "eval_runtime": 108.2466, | |
| "eval_samples_per_second": 9.238, | |
| "eval_steps_per_second": 0.231, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.07708187401294708, | |
| "learning_rate": 4.131759111665349e-05, | |
| "loss": 0.0008, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1555555555555554, | |
| "grad_norm": 0.090398870408535, | |
| "learning_rate": 3.790390522001662e-05, | |
| "loss": 0.0007, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.10483791679143906, | |
| "learning_rate": 3.4549150281252636e-05, | |
| "loss": 0.0009, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.0007036189781501889, | |
| "eval_runtime": 108.3447, | |
| "eval_samples_per_second": 9.23, | |
| "eval_steps_per_second": 0.231, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2444444444444445, | |
| "grad_norm": 0.046087298542261124, | |
| "learning_rate": 3.12696703292044e-05, | |
| "loss": 0.0008, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2888888888888888, | |
| "grad_norm": 0.016715016216039658, | |
| "learning_rate": 2.8081442660546125e-05, | |
| "loss": 0.0007, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.0191776305437088, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 0.0006, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "eval_loss": 0.0006860981229692698, | |
| "eval_runtime": 108.3015, | |
| "eval_samples_per_second": 9.233, | |
| "eval_steps_per_second": 0.231, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3777777777777778, | |
| "grad_norm": 0.03184030205011368, | |
| "learning_rate": 2.2040354826462668e-05, | |
| "loss": 0.0007, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.4222222222222223, | |
| "grad_norm": 0.025107963010668755, | |
| "learning_rate": 1.9216926233717085e-05, | |
| "loss": 0.0007, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.4666666666666668, | |
| "grad_norm": 0.023573119193315506, | |
| "learning_rate": 1.6543469682057106e-05, | |
| "loss": 0.0006, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4666666666666668, | |
| "eval_loss": 0.0006219326751306653, | |
| "eval_runtime": 108.2828, | |
| "eval_samples_per_second": 9.235, | |
| "eval_steps_per_second": 0.231, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.511111111111111, | |
| "grad_norm": 0.04575636237859726, | |
| "learning_rate": 1.4033009983067452e-05, | |
| "loss": 0.0007, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 0.1070500984787941, | |
| "learning_rate": 1.1697777844051105e-05, | |
| "loss": 0.0007, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.03425155580043793, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 0.0004, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.0006177299073897302, | |
| "eval_runtime": 108.3475, | |
| "eval_samples_per_second": 9.23, | |
| "eval_steps_per_second": 0.231, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.6444444444444444, | |
| "grad_norm": 0.030520088970661163, | |
| "learning_rate": 7.597595192178702e-06, | |
| "loss": 0.0006, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6888888888888889, | |
| "grad_norm": 0.010435817763209343, | |
| "learning_rate": 5.852620357053651e-06, | |
| "loss": 0.0005, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.7333333333333334, | |
| "grad_norm": 0.020339515060186386, | |
| "learning_rate": 4.322727117869951e-06, | |
| "loss": 0.0007, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7333333333333334, | |
| "eval_loss": 0.0006012282683514059, | |
| "eval_runtime": 108.2882, | |
| "eval_samples_per_second": 9.235, | |
| "eval_steps_per_second": 0.231, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 0.10952532291412354, | |
| "learning_rate": 3.0153689607045845e-06, | |
| "loss": 0.0008, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.8222222222222222, | |
| "grad_norm": 0.017915133386850357, | |
| "learning_rate": 1.9369152030840556e-06, | |
| "loss": 0.0007, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 0.04453803971409798, | |
| "learning_rate": 1.0926199633097157e-06, | |
| "loss": 0.0005, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "eval_loss": 0.0006457158015109599, | |
| "eval_runtime": 108.2276, | |
| "eval_samples_per_second": 9.24, | |
| "eval_steps_per_second": 0.231, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.911111111111111, | |
| "grad_norm": 0.02466416358947754, | |
| "learning_rate": 4.865965629214819e-07, | |
| "loss": 0.0006, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.9555555555555557, | |
| "grad_norm": 0.030644970014691353, | |
| "learning_rate": 1.2179748700879012e-07, | |
| "loss": 0.0005, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.01875125616788864, | |
| "learning_rate": 0.0, | |
| "loss": 0.0005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.0005785770481452346, | |
| "eval_runtime": 108.2462, | |
| "eval_samples_per_second": 9.238, | |
| "eval_steps_per_second": 0.231, | |
| "step": 450 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6644589041987092e+18, | |
| "train_batch_size": 10, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |