PEFT
szalontaib commited on
Commit
0a65e4e
·
verified ·
1 Parent(s): 484f40a

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +74 -1
  2. adapter_config.json +29 -0
  3. adapter_model.bin +3 -0
README.md CHANGED
@@ -1,3 +1,76 @@
1
  ---
2
- license: apache-2.0
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ library_name: peft
3
+ base_model: WizardLMTeam/WizardCoder-Python-13B-V1.0
4
  ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ This is a model capable of injecting bugs into correct Python programs. It was used to inject bugs into correct programs to form the core of the MegaBugFix benchmark.
9
+
10
+
11
+ ## Model Details
12
+
13
+ - **Developed by:** Balázs Szalontai
14
+ - **Model type:** Decoder-only Language Model
15
+ - **Language(s) (NLP):** None
16
+ - **License:** Apache license 2.0
17
+ - **Finetuned from model [optional]:** WizardLMTeam/WizardCoder-Python-13B-V1.0
18
+
19
+ ## Uses
20
+
21
+ You may use the model in the following way:
22
+
23
+ ```python
24
+ import os
25
+ import torch
26
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
27
+ from peft import PeftModel
28
+
29
+ model_id_pretrained = 'WizardLMTeam/WizardCoder-Python-13B-V1.0'
30
+ model_id_finetuned = 'szalontaib/MegaDiffInject'
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(model_id_pretrained, add_eos_token=False)
33
+ model = AutoModelForCausalLM.from_pretrained(model_id_pretrained, device_map='auto', dtype=torch.float16, trust_remote_code=True)
34
+ model = PeftModel.from_pretrained(model, model_id_finetuned)
35
+
36
+ def diff2code(diff : str) -> str:
37
+ return '\n'.join(
38
+ line[2:] for line in diff.splitlines()
39
+ if not line.startswith('-')
40
+ ).strip()
41
+
42
+ def corrupt(program, tokenizer, model, temperature=0.5, sample_size=1):
43
+ prompt = f'[PYTHON]\n{program.strip()}\n[/PYTHON]\n[DIFF]\n'
44
+ generator = pipeline(
45
+ model=model,
46
+ tokenizer=tokenizer,
47
+ task="text-generation",
48
+ dtype=torch.float16,
49
+ device_map="auto",
50
+ temperature=temperature,
51
+ do_sample = (temperature>0),
52
+ num_return_sequences=sample_size,
53
+ eos_token_id=tokenizer.eos_token_id
54
+ )
55
+ outputs = generator(prompt, max_new_tokens=4096)
56
+ outputs = [output['generated_text'][len(prompt):] for output in outputs]
57
+ diffs = [output.removesuffix('\n[/DIFF]') for output in outputs]
58
+ corrupted_programs = [diff2code(diff) for diff in diffs]
59
+ return corrupted_programs
60
+
61
+
62
+ test_code = '''
63
+ def bitcount(n):
64
+ count = 0
65
+ while n:
66
+ n &= n - 1
67
+ count += 1
68
+ return count
69
+ '''.strip()
70
+
71
+ corrupted_programs = corrupt(test_code, tokenizer, model, temperature=0.5, sample_size=5)
72
+
73
+ for corrupted_program in corrupted_programs:
74
+ print(corrupted_program)
75
+ print('-'*30)
76
+ ```
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/home/bszalontai/balazs_munka/codellama/models_hf/wizard-coder-13b-python",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 1024,
12
+ "lora_dropout": 0.1,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 512,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "o_proj",
20
+ "up_proj",
21
+ "v_proj",
22
+ "gate_proj",
23
+ "k_proj",
24
+ "q_proj",
25
+ "lm_head",
26
+ "down_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM"
29
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da29e0cbbf68e9b0141f8826d39386aed16b2c184fa5873cc2ab6e18880dd0e2
3
+ size 8087351638