Instructions to use Howard881010/heat_transfer_sft_10000_mcq_u_1epoch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Howard881010/heat_transfer_sft_10000_mcq_u_1epoch with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-Nemo-Instruct-2407") model = PeftModel.from_pretrained(base_model, "Howard881010/heat_transfer_sft_10000_mcq_u_1epoch") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 50, | |
| "global_step": 439, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04555808656036447, | |
| "grad_norm": 0.5296587944030762, | |
| "learning_rate": 9.948875483893885e-05, | |
| "loss": 0.3433, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09111617312072894, | |
| "grad_norm": 0.042843956500291824, | |
| "learning_rate": 9.796547422034374e-05, | |
| "loss": 0.0076, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11389521640091116, | |
| "eval_loss": 0.005896100774407387, | |
| "eval_runtime": 122.4009, | |
| "eval_samples_per_second": 7.957, | |
| "eval_steps_per_second": 0.4, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1366742596810934, | |
| "grad_norm": 0.04506906867027283, | |
| "learning_rate": 9.546130893802246e-05, | |
| "loss": 0.006, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18223234624145787, | |
| "grad_norm": 0.04249206930398941, | |
| "learning_rate": 9.20274686872984e-05, | |
| "loss": 0.006, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22779043280182232, | |
| "grad_norm": 0.034582458436489105, | |
| "learning_rate": 8.773417483665309e-05, | |
| "loss": 0.0054, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22779043280182232, | |
| "eval_loss": 0.004958716221153736, | |
| "eval_runtime": 122.3931, | |
| "eval_samples_per_second": 7.958, | |
| "eval_steps_per_second": 0.4, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2733485193621868, | |
| "grad_norm": 0.09121271967887878, | |
| "learning_rate": 8.266922441433284e-05, | |
| "loss": 0.0043, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31890660592255127, | |
| "grad_norm": 0.025009050965309143, | |
| "learning_rate": 7.693619467611464e-05, | |
| "loss": 0.0041, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3416856492027335, | |
| "eval_loss": 0.0036018535029143095, | |
| "eval_runtime": 122.4383, | |
| "eval_samples_per_second": 7.955, | |
| "eval_steps_per_second": 0.4, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.36446469248291574, | |
| "grad_norm": 0.06171397864818573, | |
| "learning_rate": 7.065232497047384e-05, | |
| "loss": 0.0036, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.41002277904328016, | |
| "grad_norm": 0.0585937425494194, | |
| "learning_rate": 6.394611921660036e-05, | |
| "loss": 0.003, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.45558086560364464, | |
| "grad_norm": 0.03573513403534889, | |
| "learning_rate": 5.695471802412413e-05, | |
| "loss": 0.0026, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.45558086560364464, | |
| "eval_loss": 0.0025479402393102646, | |
| "eval_runtime": 122.4406, | |
| "eval_samples_per_second": 7.955, | |
| "eval_steps_per_second": 0.4, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5011389521640092, | |
| "grad_norm": 0.08403529226779938, | |
| "learning_rate": 4.982109419419277e-05, | |
| "loss": 0.0023, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5466970387243736, | |
| "grad_norm": 0.08128103613853455, | |
| "learning_rate": 4.269112895336161e-05, | |
| "loss": 0.0025, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5694760820045558, | |
| "eval_loss": 0.002369565423578024, | |
| "eval_runtime": 122.4121, | |
| "eval_samples_per_second": 7.957, | |
| "eval_steps_per_second": 0.4, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.592255125284738, | |
| "grad_norm": 0.04797542467713356, | |
| "learning_rate": 3.5710628710747e-05, | |
| "loss": 0.0025, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6378132118451025, | |
| "grad_norm": 0.041951198130846024, | |
| "learning_rate": 2.9022343345181846e-05, | |
| "loss": 0.0022, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.683371298405467, | |
| "grad_norm": 0.03911906108260155, | |
| "learning_rate": 2.276304699782381e-05, | |
| "loss": 0.0022, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.683371298405467, | |
| "eval_loss": 0.0020905383862555027, | |
| "eval_runtime": 122.4348, | |
| "eval_samples_per_second": 7.955, | |
| "eval_steps_per_second": 0.4, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7289293849658315, | |
| "grad_norm": 0.07351183891296387, | |
| "learning_rate": 1.7060741067442288e-05, | |
| "loss": 0.0021, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7744874715261959, | |
| "grad_norm": 0.06717297434806824, | |
| "learning_rate": 1.2032036606589175e-05, | |
| "loss": 0.0022, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7972665148063781, | |
| "eval_loss": 0.0018556159920990467, | |
| "eval_runtime": 122.4481, | |
| "eval_samples_per_second": 7.954, | |
| "eval_steps_per_second": 0.4, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8200455580865603, | |
| "grad_norm": 0.03857114166021347, | |
| "learning_rate": 7.779769648145201e-06, | |
| "loss": 0.0015, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8656036446469249, | |
| "grad_norm": 0.036848343908786774, | |
| "learning_rate": 4.390898228352131e-06, | |
| "loss": 0.0018, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9111617312072893, | |
| "grad_norm": 0.09445371478796005, | |
| "learning_rate": 1.9347241118030823e-06, | |
| "loss": 0.0021, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9111617312072893, | |
| "eval_loss": 0.0018473172094672918, | |
| "eval_runtime": 122.4479, | |
| "eval_samples_per_second": 7.954, | |
| "eval_steps_per_second": 0.4, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9567198177676538, | |
| "grad_norm": 0.04989920184016228, | |
| "learning_rate": 4.614755837704321e-07, | |
| "loss": 0.0018, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 439, | |
| "total_flos": 9.38862713986089e+17, | |
| "train_loss": 0.018706909701194197, | |
| "train_runtime": 4591.0233, | |
| "train_samples_per_second": 1.909, | |
| "train_steps_per_second": 0.096 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 439, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.38862713986089e+17, | |
| "train_batch_size": 10, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |