Update README.md

9c8bf1e verified over 2 years ago

18.2 kB

	---
	library_name: peft
	base_model: meta-llama/Llama-2-7b-hf
	---

	# Model Card for Model ID

	<!-- Provide a quick summary of what the model is/does. -->

	```json
	{
	"best_metric": 0.0680607408285141,
	"best_model_checkpoint": "./Lora-Meta-Llama2-7b-hf-QandA_2g_v01-r2-v01\\checkpoint-19607",
	"epoch": 38.99950273495773,
	"eval_steps": 500,
	"global_step": 19607,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 1.0,
	"learning_rate": 9.75e-05,
	"loss": 0.8234,
	"step": 502
	},
	{
	"epoch": 1.0,
	"eval_loss": 0.6993071436882019,
	"eval_runtime": 196.6154,
	"eval_samples_per_second": 2.05,
	"eval_steps_per_second": 0.259,
	"step": 502
	},
	{
	"epoch": 2.0,
	"learning_rate": 9.499501992031873e-05,
	"loss": 0.6127,
	"step": 1005
	},
	{
	"epoch": 2.0,
	"eval_loss": 0.599130392074585,
	"eval_runtime": 196.9551,
	"eval_samples_per_second": 2.046,
	"eval_steps_per_second": 0.259,
	"step": 1005
	},
	{
	"epoch": 3.0,
	"learning_rate": 9.249003984063745e-05,
	"loss": 0.5345,
	"step": 1508
	},
	{
	"epoch": 3.0,
	"eval_loss": 0.5176905989646912,
	"eval_runtime": 198.2897,
	"eval_samples_per_second": 2.032,
	"eval_steps_per_second": 0.257,
	"step": 1508
	},
	{
	"epoch": 4.0,
	"learning_rate": 8.998505976095618e-05,
	"loss": 0.4679,
	"step": 2011
	},
	{
	"epoch": 4.0,
	"eval_loss": 0.4569143056869507,
	"eval_runtime": 196.7986,
	"eval_samples_per_second": 2.048,
	"eval_steps_per_second": 0.259,
	"step": 2011
	},
	{
	"epoch": 5.0,
	"learning_rate": 8.748505976095617e-05,
	"loss": 0.4086,
	"step": 2513
	},
	{
	"epoch": 5.0,
	"eval_loss": 0.4007655382156372,
	"eval_runtime": 197.0527,
	"eval_samples_per_second": 2.045,
	"eval_steps_per_second": 0.259,
	"step": 2513
	},
	{
	"epoch": 6.0,
	"learning_rate": 8.49800796812749e-05,
	"loss": 0.3562,
	"step": 3016
	},
	{
	"epoch": 6.0,
	"eval_loss": 0.3483542799949646,
	"eval_runtime": 198.1137,
	"eval_samples_per_second": 2.034,
	"eval_steps_per_second": 0.257,
	"step": 3016
	},
	{
	"epoch": 7.0,
	"learning_rate": 8.247509960159363e-05,
	"loss": 0.3143,
	"step": 3519
	},
	{
	"epoch": 7.0,
	"eval_loss": 0.3145788311958313,
	"eval_runtime": 196.7599,
	"eval_samples_per_second": 2.048,
	"eval_steps_per_second": 0.259,
	"step": 3519
	},
	{
	"epoch": 8.0,
	"learning_rate": 7.997011952191235e-05,
	"loss": 0.2789,
	"step": 4022
	},
	{
	"epoch": 8.0,
	"eval_loss": 0.2774547338485718,
	"eval_runtime": 195.9949,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 4022
	},
	{
	"epoch": 9.0,
	"learning_rate": 7.747011952191235e-05,
	"loss": 0.2506,
	"step": 4524
	},
	{
	"epoch": 9.0,
	"eval_loss": 0.25074610114097595,
	"eval_runtime": 195.9777,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 4524
	},
	{
	"epoch": 10.0,
	"learning_rate": 7.496513944223108e-05,
	"loss": 0.2257,
	"step": 5027
	},
	{
	"epoch": 10.0,
	"eval_loss": 0.22645100951194763,
	"eval_runtime": 195.4398,
	"eval_samples_per_second": 2.062,
	"eval_steps_per_second": 0.261,
	"step": 5027
	},
	{
	"epoch": 11.0,
	"learning_rate": 7.24601593625498e-05,
	"loss": 0.2031,
	"step": 5530
	},
	{
	"epoch": 11.0,
	"eval_loss": 0.20663012564182281,
	"eval_runtime": 196.0347,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 5530
	},
	{
	"epoch": 12.0,
	"learning_rate": 6.995517928286853e-05,
	"loss": 0.1845,
	"step": 6033
	},
	{
	"epoch": 12.0,
	"eval_loss": 0.1891084611415863,
	"eval_runtime": 195.5982,
	"eval_samples_per_second": 2.06,
	"eval_steps_per_second": 0.261,
	"step": 6033
	},
	{
	"epoch": 13.0,
	"learning_rate": 6.745517928286854e-05,
	"loss": 0.1691,
	"step": 6535
	},
	{
	"epoch": 13.0,
	"eval_loss": 0.17209213972091675,
	"eval_runtime": 196.259,
	"eval_samples_per_second": 2.053,
	"eval_steps_per_second": 0.26,
	"step": 6535
	},
	{
	"epoch": 14.0,
	"learning_rate": 6.495019920318725e-05,
	"loss": 0.1542,
	"step": 7038
	},
	{
	"epoch": 14.0,
	"eval_loss": 0.15993133187294006,
	"eval_runtime": 196.0195,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 7038
	},
	{
	"epoch": 15.0,
	"learning_rate": 6.244521912350598e-05,
	"loss": 0.1414,
	"step": 7541
	},
	{
	"epoch": 15.0,
	"eval_loss": 0.14806699752807617,
	"eval_runtime": 195.4909,
	"eval_samples_per_second": 2.061,
	"eval_steps_per_second": 0.261,
	"step": 7541
	},
	{
	"epoch": 16.0,
	"learning_rate": 5.994023904382471e-05,
	"loss": 0.1309,
	"step": 8044
	},
	{
	"epoch": 16.0,
	"eval_loss": 0.13711141049861908,
	"eval_runtime": 195.7592,
	"eval_samples_per_second": 2.059,
	"eval_steps_per_second": 0.261,
	"step": 8044
	},
	{
	"epoch": 17.0,
	"learning_rate": 5.7440239043824705e-05,
	"loss": 0.1222,
	"step": 8546
	},
	{
	"epoch": 17.0,
	"eval_loss": 0.13089050352573395,
	"eval_runtime": 195.6972,
	"eval_samples_per_second": 2.059,
	"eval_steps_per_second": 0.261,
	"step": 8546
	},
	{
	"epoch": 18.0,
	"learning_rate": 5.493525896414343e-05,
	"loss": 0.1134,
	"step": 9049
	},
	{
	"epoch": 18.0,
	"eval_loss": 0.12404956668615341,
	"eval_runtime": 195.8078,
	"eval_samples_per_second": 2.058,
	"eval_steps_per_second": 0.26,
	"step": 9049
	},
	{
	"epoch": 19.0,
	"learning_rate": 5.243027888446216e-05,
	"loss": 0.107,
	"step": 9552
	},
	{
	"epoch": 19.0,
	"eval_loss": 0.11492674797773361,
	"eval_runtime": 196.1937,
	"eval_samples_per_second": 2.054,
	"eval_steps_per_second": 0.26,
	"step": 9552
	},
	{
	"epoch": 20.0,
	"learning_rate": 4.992529880478088e-05,
	"loss": 0.1014,
	"step": 10055
	},
	{
	"epoch": 20.0,
	"eval_loss": 0.1099749356508255,
	"eval_runtime": 196.1333,
	"eval_samples_per_second": 2.055,
	"eval_steps_per_second": 0.26,
	"step": 10055
	},
	{
	"epoch": 21.0,
	"learning_rate": 4.742529880478088e-05,
	"loss": 0.0966,
	"step": 10557
	},
	{
	"epoch": 21.0,
	"eval_loss": 0.1054670661687851,
	"eval_runtime": 196.2688,
	"eval_samples_per_second": 2.053,
	"eval_steps_per_second": 0.26,
	"step": 10557
	},
	{
	"epoch": 22.0,
	"learning_rate": 4.49203187250996e-05,
	"loss": 0.0925,
	"step": 11060
	},
	{
	"epoch": 22.0,
	"eval_loss": 0.1019241139292717,
	"eval_runtime": 195.6941,
	"eval_samples_per_second": 2.059,
	"eval_steps_per_second": 0.261,
	"step": 11060
	},
	{
	"epoch": 23.0,
	"learning_rate": 4.241533864541833e-05,
	"loss": 0.0883,
	"step": 11563
	},
	{
	"epoch": 23.0,
	"eval_loss": 0.09764768928289413,
	"eval_runtime": 196.6317,
	"eval_samples_per_second": 2.05,
	"eval_steps_per_second": 0.259,
	"step": 11563
	},
	{
	"epoch": 24.0,
	"learning_rate": 3.9910358565737054e-05,
	"loss": 0.085,
	"step": 12066
	},
	{
	"epoch": 24.0,
	"eval_loss": 0.09518074989318848,
	"eval_runtime": 196.0702,
	"eval_samples_per_second": 2.055,
	"eval_steps_per_second": 0.26,
	"step": 12066
	},
	{
	"epoch": 25.0,
	"learning_rate": 3.7410358565737055e-05,
	"loss": 0.0832,
	"step": 12568
	},
	{
	"epoch": 25.0,
	"eval_loss": 0.09052950888872147,
	"eval_runtime": 196.4373,
	"eval_samples_per_second": 2.052,
	"eval_steps_per_second": 0.26,
	"step": 12568
	},
	{
	"epoch": 26.0,
	"learning_rate": 3.490537848605578e-05,
	"loss": 0.0795,
	"step": 13071
	},
	{
	"epoch": 26.0,
	"eval_loss": 0.0874876081943512,
	"eval_runtime": 196.0257,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 13071
	},
	{
	"epoch": 27.0,
	"learning_rate": 3.24003984063745e-05,
	"loss": 0.078,
	"step": 13574
	},
	{
	"epoch": 27.0,
	"eval_loss": 0.08521231263875961,
	"eval_runtime": 195.7721,
	"eval_samples_per_second": 2.059,
	"eval_steps_per_second": 0.261,
	"step": 13574
	},
	{
	"epoch": 28.0,
	"learning_rate": 2.989541832669323e-05,
	"loss": 0.0761,
	"step": 14077
	},
	{
	"epoch": 28.0,
	"eval_loss": 0.08261983096599579,
	"eval_runtime": 195.9135,
	"eval_samples_per_second": 2.057,
	"eval_steps_per_second": 0.26,
	"step": 14077
	},
	{
	"epoch": 29.0,
	"learning_rate": 2.739541832669323e-05,
	"loss": 0.0748,
	"step": 14579
	},
	{
	"epoch": 29.0,
	"eval_loss": 0.08173193037509918,
	"eval_runtime": 196.0573,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 14579
	},
	{
	"epoch": 30.0,
	"learning_rate": 2.4890438247011953e-05,
	"loss": 0.0727,
	"step": 15082
	},
	{
	"epoch": 30.0,
	"eval_loss": 0.07807794213294983,
	"eval_runtime": 196.0191,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 15082
	},
	{
	"epoch": 31.0,
	"learning_rate": 2.2385458167330677e-05,
	"loss": 0.0712,
	"step": 15585
	},
	{
	"epoch": 31.0,
	"eval_loss": 0.07727421820163727,
	"eval_runtime": 196.043,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 15585
	},
	{
	"epoch": 32.0,
	"learning_rate": 1.9880478087649404e-05,
	"loss": 0.07,
	"step": 16088
	},
	{
	"epoch": 32.0,
	"eval_loss": 0.07521162927150726,
	"eval_runtime": 196.5081,
	"eval_samples_per_second": 2.051,
	"eval_steps_per_second": 0.26,
	"step": 16088
	},
	{
	"epoch": 33.0,
	"learning_rate": 1.7380478087649405e-05,
	"loss": 0.069,
	"step": 16590
	},
	{
	"epoch": 33.0,
	"eval_loss": 0.07360897213220596,
	"eval_runtime": 196.0187,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 16590
	},
	{
	"epoch": 34.0,
	"learning_rate": 1.4875498007968128e-05,
	"loss": 0.0682,
	"step": 17093
	},
	{
	"epoch": 34.0,
	"eval_loss": 0.07235261797904968,
	"eval_runtime": 196.2571,
	"eval_samples_per_second": 2.053,
	"eval_steps_per_second": 0.26,
	"step": 17093
	},
	{
	"epoch": 35.0,
	"learning_rate": 1.2370517928286854e-05,
	"loss": 0.0671,
	"step": 17596
	},
	{
	"epoch": 35.0,
	"eval_loss": 0.07117172330617905,
	"eval_runtime": 197.0583,
	"eval_samples_per_second": 2.045,
	"eval_steps_per_second": 0.259,
	"step": 17596
	},
	{
	"epoch": 36.0,
	"learning_rate": 9.86553784860558e-06,
	"loss": 0.0663,
	"step": 18099
	},
	{
	"epoch": 36.0,
	"eval_loss": 0.07005689293146133,
	"eval_runtime": 196.2214,
	"eval_samples_per_second": 2.054,
	"eval_steps_per_second": 0.26,
	"step": 18099
	},
	{
	"epoch": 37.0,
	"learning_rate": 7.365537848605578e-06,
	"loss": 0.0656,
	"step": 18601
	},
	{
	"epoch": 37.0,
	"eval_loss": 0.06923193484544754,
	"eval_runtime": 195.9767,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 18601
	},
	{
	"epoch": 38.0,
	"learning_rate": 4.860557768924303e-06,
	"loss": 0.0648,
	"step": 19104
	},
	{
	"epoch": 38.0,
	"eval_loss": 0.06851127743721008,
	"eval_runtime": 196.8249,
	"eval_samples_per_second": 2.048,
	"eval_steps_per_second": 0.259,
	"step": 19104
	},
	{
	"epoch": 39.0,
	"learning_rate": 2.355577689243028e-06,
	"loss": 0.064,
	"step": 19607
	},
	{
	"epoch": 39.0,
	"eval_loss": 0.0680607408285141,
	"eval_runtime": 195.9876,
	"eval_samples_per_second": 2.056,
	"eval_steps_per_second": 0.26,
	"step": 19607
	}
	],
	"logging_steps": 500,
	"max_steps": 20080,
	"num_train_epochs": 40,
	"save_steps": 500,
	"total_flos": 4.6336747921934746e+17,
	"trial_name": null,
	"trial_params": null
	}
	```

	## Model Details

	### Model Description

	<!-- Provide a longer summary of what this model is. -->



	- Developed by: [More Information Needed]
	- Funded by [optional]: [More Information Needed]
	- Shared by [optional]: [More Information Needed]
	- Model type: [More Information Needed]
	- Language(s) (NLP): [More Information Needed]
	- License: [More Information Needed]
	- Finetuned from model [optional]: [More Information Needed]

	### Model Sources [optional]

	<!-- Provide the basic links for the model. -->

	- Repository: [More Information Needed]
	- Paper [optional]: [More Information Needed]
	- Demo [optional]: [More Information Needed]

	## Uses

	<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->

	### Direct Use

	<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->

	[More Information Needed]

	### Downstream Use [optional]

	<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->

	[More Information Needed]

	### Out-of-Scope Use

	<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->

	[More Information Needed]

	## Bias, Risks, and Limitations

	<!-- This section is meant to convey both technical and sociotechnical limitations. -->

	[More Information Needed]

	### Recommendations

	<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->

	Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.

	## How to Get Started with the Model

	Use the code below to get started with the model.

	[More Information Needed]

	## Training Details

	### Training Data

	<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->

	[More Information Needed]

	### Training Procedure

	<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->

	#### Preprocessing [optional]

	[More Information Needed]


	#### Training Hyperparameters

	- Training regime: [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->

	#### Speeds, Sizes, Times [optional]

	<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->

	[More Information Needed]

	## Evaluation

	<!-- This section describes the evaluation protocols and provides the results. -->

	### Testing Data, Factors & Metrics

	#### Testing Data

	<!-- This should link to a Dataset Card if possible. -->

	[More Information Needed]

	#### Factors

	<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->

	[More Information Needed]

	#### Metrics

	<!-- These are the evaluation metrics being used, ideally with a description of why. -->

	[More Information Needed]

	### Results

	[More Information Needed]

	#### Summary



	## Model Examination [optional]

	<!-- Relevant interpretability work for the model goes here -->

	[More Information Needed]

	## Environmental Impact

	<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->

	Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).

	- Hardware Type: [More Information Needed]
	- Hours used: [More Information Needed]
	- Cloud Provider: [More Information Needed]
	- Compute Region: [More Information Needed]
	- Carbon Emitted: [More Information Needed]

	## Technical Specifications [optional]

	### Model Architecture and Objective

	[More Information Needed]

	### Compute Infrastructure

	[More Information Needed]

	#### Hardware

	[More Information Needed]

	#### Software

	[More Information Needed]

	## Citation [optional]

	<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->

	BibTeX:

	[More Information Needed]

	APA:

	[More Information Needed]

	## Glossary [optional]

	<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->

	[More Information Needed]

	## More Information [optional]

	[More Information Needed]

	## Model Card Authors [optional]

	[More Information Needed]

	## Model Card Contact

	[More Information Needed]


	## Training procedure


	### Framework versions


	- PEFT 0.6.2