Instructions to use pritamdeka/muril-base-cased-assamese with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use pritamdeka/muril-base-cased-assamese with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="pritamdeka/muril-base-cased-assamese")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("pritamdeka/muril-base-cased-assamese") model = AutoModelForMaskedLM.from_pretrained("pritamdeka/muril-base-cased-assamese") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.7026871898897578, | |
| "best_model_checkpoint": "/content/temp_assamese/checkpoint-28000", | |
| "epoch": 2.0, | |
| "eval_steps": 2000, | |
| "global_step": 28386, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14091453533431975, | |
| "grad_norm": 12.538192749023438, | |
| "learning_rate": 4.648418234340873e-05, | |
| "loss": 2.2163, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.14091453533431975, | |
| "eval_accuracy": 0.6319857833787309, | |
| "eval_loss": 1.8646236658096313, | |
| "eval_runtime": 102.3117, | |
| "eval_samples_per_second": 116.966, | |
| "eval_steps_per_second": 7.311, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2818290706686395, | |
| "grad_norm": 12.645801544189453, | |
| "learning_rate": 4.296131896005073e-05, | |
| "loss": 1.9456, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2818290706686395, | |
| "eval_accuracy": 0.6494802758779904, | |
| "eval_loss": 1.749164342880249, | |
| "eval_runtime": 102.5618, | |
| "eval_samples_per_second": 116.681, | |
| "eval_steps_per_second": 7.293, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.4227436060029592, | |
| "grad_norm": 9.79688835144043, | |
| "learning_rate": 3.943845557669274e-05, | |
| "loss": 1.8391, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4227436060029592, | |
| "eval_accuracy": 0.6606430993204072, | |
| "eval_loss": 1.6770141124725342, | |
| "eval_runtime": 102.5061, | |
| "eval_samples_per_second": 116.744, | |
| "eval_steps_per_second": 7.297, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.563658141337279, | |
| "grad_norm": 10.446520805358887, | |
| "learning_rate": 3.5917353625026426e-05, | |
| "loss": 1.7704, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.563658141337279, | |
| "eval_accuracy": 0.6706733344622967, | |
| "eval_loss": 1.6165672540664673, | |
| "eval_runtime": 102.6569, | |
| "eval_samples_per_second": 116.573, | |
| "eval_steps_per_second": 7.286, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.7045726766715987, | |
| "grad_norm": 9.16817569732666, | |
| "learning_rate": 3.23962516733601e-05, | |
| "loss": 1.7213, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.7045726766715987, | |
| "eval_accuracy": 0.6759152415500662, | |
| "eval_loss": 1.5817841291427612, | |
| "eval_runtime": 102.3347, | |
| "eval_samples_per_second": 116.94, | |
| "eval_steps_per_second": 7.309, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8454872120059184, | |
| "grad_norm": 9.296396255493164, | |
| "learning_rate": 2.8875149721693794e-05, | |
| "loss": 1.6802, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.8454872120059184, | |
| "eval_accuracy": 0.6820033844378066, | |
| "eval_loss": 1.5402722358703613, | |
| "eval_runtime": 102.6712, | |
| "eval_samples_per_second": 116.557, | |
| "eval_steps_per_second": 7.285, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.9864017473402381, | |
| "grad_norm": 7.622576713562012, | |
| "learning_rate": 2.5354047770027478e-05, | |
| "loss": 1.6432, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.9864017473402381, | |
| "eval_accuracy": 0.6857634747617221, | |
| "eval_loss": 1.5153496265411377, | |
| "eval_runtime": 102.6162, | |
| "eval_samples_per_second": 116.619, | |
| "eval_steps_per_second": 7.289, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.127316282674558, | |
| "grad_norm": 11.516377449035645, | |
| "learning_rate": 2.1832945818361165e-05, | |
| "loss": 1.6074, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.127316282674558, | |
| "eval_accuracy": 0.6885159496543541, | |
| "eval_loss": 1.496454119682312, | |
| "eval_runtime": 102.8787, | |
| "eval_samples_per_second": 116.321, | |
| "eval_steps_per_second": 7.271, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.2682308180088775, | |
| "grad_norm": 9.480605125427246, | |
| "learning_rate": 1.831008243500317e-05, | |
| "loss": 1.5833, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.2682308180088775, | |
| "eval_accuracy": 0.6934179945828158, | |
| "eval_loss": 1.4677945375442505, | |
| "eval_runtime": 102.7153, | |
| "eval_samples_per_second": 116.506, | |
| "eval_steps_per_second": 7.282, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.4091453533431975, | |
| "grad_norm": 8.643658638000488, | |
| "learning_rate": 1.4788980483336856e-05, | |
| "loss": 1.5649, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.4091453533431975, | |
| "eval_accuracy": 0.6950099353567151, | |
| "eval_loss": 1.4508079290390015, | |
| "eval_runtime": 102.6391, | |
| "eval_samples_per_second": 116.593, | |
| "eval_steps_per_second": 7.288, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.550059888677517, | |
| "grad_norm": 7.6539506912231445, | |
| "learning_rate": 1.1267878531670542e-05, | |
| "loss": 1.553, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.550059888677517, | |
| "eval_accuracy": 0.698540482055296, | |
| "eval_loss": 1.436693787574768, | |
| "eval_runtime": 102.5486, | |
| "eval_samples_per_second": 116.696, | |
| "eval_steps_per_second": 7.294, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.690974424011837, | |
| "grad_norm": 8.063584327697754, | |
| "learning_rate": 7.746776580004228e-06, | |
| "loss": 1.5345, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.690974424011837, | |
| "eval_accuracy": 0.7001222876777317, | |
| "eval_loss": 1.4230775833129883, | |
| "eval_runtime": 102.8829, | |
| "eval_samples_per_second": 116.317, | |
| "eval_steps_per_second": 7.27, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.8318889593461565, | |
| "grad_norm": 8.720465660095215, | |
| "learning_rate": 4.2256746283379135e-06, | |
| "loss": 1.5261, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.8318889593461565, | |
| "eval_accuracy": 0.701337358949075, | |
| "eval_loss": 1.4157360792160034, | |
| "eval_runtime": 102.7046, | |
| "eval_samples_per_second": 116.519, | |
| "eval_steps_per_second": 7.283, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.9728034946804764, | |
| "grad_norm": 9.144937515258789, | |
| "learning_rate": 7.045726766715987e-07, | |
| "loss": 1.5148, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.9728034946804764, | |
| "eval_accuracy": 0.7026871898897578, | |
| "eval_loss": 1.4097787141799927, | |
| "eval_runtime": 102.5398, | |
| "eval_samples_per_second": 116.706, | |
| "eval_steps_per_second": 7.295, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 28386, | |
| "total_flos": 1.197729267088466e+17, | |
| "train_loss": 1.6903211268009264, | |
| "train_runtime": 8975.6005, | |
| "train_samples_per_second": 50.601, | |
| "train_steps_per_second": 3.163 | |
| } | |
| ], | |
| "logging_steps": 2000, | |
| "max_steps": 28386, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.197729267088466e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |