Commit ·
dd87728
1
Parent(s): 39e2474
bed
Browse files- README.md +10 -0
- llama_test.ipynb +41 -5
README.md
CHANGED
|
@@ -11,5 +11,15 @@ pip uninstall -r uninstall.txt
|
|
| 11 |
```
|
| 12 |
I had to uninstall some cuda stuff that torch installed to make this work.
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
|
|
|
| 11 |
```
|
| 12 |
I had to uninstall some cuda stuff that torch installed to make this work.
|
| 13 |
|
| 14 |
+
# References
|
| 15 |
+
https://github.com/tloen/alpaca-lora
|
| 16 |
+
https://huggingface.co/docs/transformers/main/en/model_doc/llama#llama
|
| 17 |
+
https://huggingface.co/docs/transformers/index
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
https://arxiv.org/pdf/1910.13461.pdf
|
| 21 |
+
https://arxiv.org/pdf/1808.06226.pdf
|
| 22 |
+
|
| 23 |
+
|
| 24 |
|
| 25 |
|
llama_test.ipynb
CHANGED
|
@@ -18,12 +18,13 @@
|
|
| 18 |
],
|
| 19 |
"source": [
|
| 20 |
"import torch\n",
|
|
|
|
| 21 |
"torch.cuda.is_available()"
|
| 22 |
]
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"cell_type": "code",
|
| 26 |
-
"execution_count":
|
| 27 |
"metadata": {},
|
| 28 |
"outputs": [
|
| 29 |
{
|
|
@@ -38,7 +39,7 @@
|
|
| 38 |
{
|
| 39 |
"data": {
|
| 40 |
"application/vnd.jupyter.widget-view+json": {
|
| 41 |
-
"model_id": "
|
| 42 |
"version_major": 2,
|
| 43 |
"version_minor": 0
|
| 44 |
},
|
|
@@ -52,15 +53,50 @@
|
|
| 52 |
],
|
| 53 |
"source": [
|
| 54 |
"from transformers import LlamaTokenizer, LlamaForCausalLM\n",
|
| 55 |
-
"tokenizer = LlamaTokenizer.from_pretrained(\
|
| 56 |
-
"\n",
|
|
|
|
|
|
|
|
|
|
| 57 |
"model = LlamaForCausalLM.from_pretrained(\n",
|
| 58 |
" \"decapoda-research/llama-7b-hf\",\n",
|
| 59 |
-
" load_in_8bit=True,\n",
|
| 60 |
" device_map=\"auto\",\n",
|
| 61 |
" torch_dtype=torch.float16\n",
|
| 62 |
")\n"
|
| 63 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
],
|
| 66 |
"metadata": {
|
|
|
|
| 18 |
],
|
| 19 |
"source": [
|
| 20 |
"import torch\n",
|
| 21 |
+
"import torch.nn as nn\n",
|
| 22 |
"torch.cuda.is_available()"
|
| 23 |
]
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "code",
|
| 27 |
+
"execution_count": 2,
|
| 28 |
"metadata": {},
|
| 29 |
"outputs": [
|
| 30 |
{
|
|
|
|
| 39 |
{
|
| 40 |
"data": {
|
| 41 |
"application/vnd.jupyter.widget-view+json": {
|
| 42 |
+
"model_id": "37df56d6ddb747f3a91bc9100b33f47b",
|
| 43 |
"version_major": 2,
|
| 44 |
"version_minor": 0
|
| 45 |
},
|
|
|
|
| 53 |
],
|
| 54 |
"source": [
|
| 55 |
"from transformers import LlamaTokenizer, LlamaForCausalLM\n",
|
| 56 |
+
"tokenizer = LlamaTokenizer.from_pretrained(\n",
|
| 57 |
+
" \"decapoda-research/llama-7b-hf\")\n",
|
| 58 |
+
" \n",
|
| 59 |
+
"#tokenizer.pad_token_id = (0)\n",
|
| 60 |
+
"#tokenizer.padding_side = 'left'\n",
|
| 61 |
"model = LlamaForCausalLM.from_pretrained(\n",
|
| 62 |
" \"decapoda-research/llama-7b-hf\",\n",
|
|
|
|
| 63 |
" device_map=\"auto\",\n",
|
| 64 |
" torch_dtype=torch.float16\n",
|
| 65 |
")\n"
|
| 66 |
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": 9,
|
| 71 |
+
"metadata": {},
|
| 72 |
+
"outputs": [
|
| 73 |
+
{
|
| 74 |
+
"name": "stdout",
|
| 75 |
+
"output_type": "stream",
|
| 76 |
+
"text": [
|
| 77 |
+
" ⁇ hey dude, talk to me.\n",
|
| 78 |
+
"I'm a 20 year old guy from the UK. I'm a bit of a nerd, I like to read, I like to write, I like to play video games, I like to watch movies, I like to listen\n",
|
| 79 |
+
" ⁇ whats the capital of georgia?\n",
|
| 80 |
+
"What is the capital of Georgia?\n",
|
| 81 |
+
"The capital of Georgia is Atlanta.\n",
|
| 82 |
+
"What is the capital of Georgia?\n",
|
| 83 |
+
"The capital of Georgia is Atlanta. The capital of Georgia is Atlanta. The capital of Georgia is Atlanta. The capital of Georgia is Atlanta. The\n"
|
| 84 |
+
]
|
| 85 |
+
}
|
| 86 |
+
],
|
| 87 |
+
"source": [
|
| 88 |
+
"def ask(q,l=64):\n",
|
| 89 |
+
" toks = tokenizer(q , return_tensors='pt')\n",
|
| 90 |
+
" ctoks = toks.input_ids.to('cuda')\n",
|
| 91 |
+
" gen = model.generate(ctoks, max_length=64)\n",
|
| 92 |
+
" return tokenizer.decode(gen[0])\n",
|
| 93 |
+
"\n",
|
| 94 |
+
"r = ask('hey dude, talk to me')\n",
|
| 95 |
+
"print(r)\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"r = ask('whats the capital of georgia?')\n",
|
| 98 |
+
"print(r)\n"
|
| 99 |
+
]
|
| 100 |
}
|
| 101 |
],
|
| 102 |
"metadata": {
|