Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- 02-gpt2_bert/.ipynb_checkpoints/1-dna-bpe-checkpoint.ipynb +1 -1
- 04-gene-sft/.ipynb_checkpoints/2-gpt2-instruction-ft-checkpoint.ipynb +1125 -52
- 04-gene-sft/.ipynb_checkpoints/4-deepspeed-intro-checkpoint.ipynb +184 -168
- 04-gene-sft/.ipynb_checkpoints/5-peft-intro-checkpoint.ipynb +191 -74
- 04-gene-sft/.ipynb_checkpoints/6-llama-continue-train-checkpoint.ipynb +243 -17
- 04-gene-sft/.ipynb_checkpoints/7-llama-instruction-ft-checkpoint.ipynb +403 -29
- 04-gene-sft/.ipynb_checkpoints/gpt2-small3-1024-checkpoint.json +602 -0
- 04-gene-sft/.ipynb_checkpoints/merge_llama_with_dna_lora-checkpoint.py +368 -0
- 04-gene-sft/.ipynb_checkpoints/pip_list-checkpoint.txt +3 -0
- 04-gene-sft/.ipynb_checkpoints/run_clm_pt_with_peft-checkpoint.py +11 -0
- 04-gene-sft/.ipynb_checkpoints/run_pt-checkpoint.sh +6 -6
- 04-gene-sft/.ipynb_checkpoints/run_sft-checkpoint.sh +4 -4
- 04-gene-sft/2-gpt2-instruction-ft.ipynb +1125 -52
- 04-gene-sft/4-deepspeed-intro.ipynb +184 -168
- 04-gene-sft/5-peft-intro.ipynb +253 -73
- 04-gene-sft/6-llama-continue-train.ipynb +242 -16
- 04-gene-sft/7-llama-instruction-ft.ipynb +403 -29
- 04-gene-sft/__pycache__/build_dataset.cpython-312.pyc +0 -0
- 04-gene-sft/data/.ipynb_checkpoints/dna_promoter_300-checkpoint.jsonl +3 -0
- 04-gene-sft/data/dna_promoter_300.jsonl +3 -0
- 04-gene-sft/gpt2-small3-1024.json +602 -0
- 04-gene-sft/gpt2_lora_text_classification/README.md +202 -0
- 04-gene-sft/gpt2_lora_text_classification/adapter_config.json +34 -0
- 04-gene-sft/gpt2_lora_text_classification/adapter_model.safetensors +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/README.md +202 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/adapter_config.json +34 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/adapter_model.safetensors +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/merges.txt +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/optimizer.pt +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/rng_state.pth +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/scheduler.pt +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/special_tokens_map.json +24 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/tokenizer.json +0 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/tokenizer_config.json +23 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/trainer_state.json +768 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/training_args.bin +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/vocab.json +0 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/README.md +202 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/adapter_config.json +34 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/adapter_model.safetensors +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/merges.txt +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/optimizer.pt +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/rng_state.pth +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/scheduler.pt +3 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/special_tokens_map.json +24 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/tokenizer.json +0 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/tokenizer_config.json +23 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/trainer_state.json +1084 -0
- 04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/training_args.bin +3 -0
.gitattributes
CHANGED
@@ -37,5 +37,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
37 |
*.txt filter=lfs diff=lfs merge=lfs -text
|
38 |
03-gene-task/data/dna_protein_full.json filter=lfs diff=lfs merge=lfs -text
|
39 |
03-gene-task/img/protein-structure-1-2.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
40 |
04-gene-sft/sft_data/train_data.json filter=lfs diff=lfs merge=lfs -text
|
41 |
img/gpt2_bridge.png filter=lfs diff=lfs merge=lfs -text
|
|
|
37 |
*.txt filter=lfs diff=lfs merge=lfs -text
|
38 |
03-gene-task/data/dna_protein_full.json filter=lfs diff=lfs merge=lfs -text
|
39 |
03-gene-task/img/protein-structure-1-2.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
04-gene-sft/data/.ipynb_checkpoints/dna_promoter_300-checkpoint.jsonl filter=lfs diff=lfs merge=lfs -text
|
41 |
+
04-gene-sft/data/dna_promoter_300.jsonl filter=lfs diff=lfs merge=lfs -text
|
42 |
04-gene-sft/sft_data/train_data.json filter=lfs diff=lfs merge=lfs -text
|
43 |
img/gpt2_bridge.png filter=lfs diff=lfs merge=lfs -text
|
02-gpt2_bert/.ipynb_checkpoints/1-dna-bpe-checkpoint.ipynb
CHANGED
@@ -284,7 +284,7 @@
|
|
284 |
"id": "c24f10dc-1117-4493-9333-5ed6d898f44a",
|
285 |
"metadata": {},
|
286 |
"source": [
|
287 |
-
"###
|
288 |
"\n",
|
289 |
"以上方法展示了如何对 DNA 和蛋白质序列进行“分词”,以提取有用的特征。选择哪种方法取决于具体的任务需求和数据特性。对于简单的分类或回归任务,K-mer 分解或滑动窗口可能是足够的;而对于更复杂的任务,如序列标注或结构预测,基于词汇表的方法或嵌入表示可能会提供更好的性能。\n",
|
290 |
"\n",
|
|
|
284 |
"id": "c24f10dc-1117-4493-9333-5ed6d898f44a",
|
285 |
"metadata": {},
|
286 |
"source": [
|
287 |
+
"### **训练DNA BPE分词器**\n",
|
288 |
"\n",
|
289 |
"以上方法展示了如何对 DNA 和蛋白质序列进行“分词”,以提取有用的特征。选择哪种方法取决于具体的任务需求和数据特性。对于简单的分类或回归任务,K-mer 分解或滑动窗口可能是足够的;而对于更复杂的任务,如序列标注或结构预测,基于词汇表的方法或嵌入表示可能会提供更好的性能。\n",
|
290 |
"\n",
|
04-gene-sft/.ipynb_checkpoints/2-gpt2-instruction-ft-checkpoint.ipynb
CHANGED
@@ -127,20 +127,76 @@
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "code",
|
130 |
-
"execution_count":
|
131 |
-
"id": "
|
132 |
"metadata": {},
|
133 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
"source": [
|
135 |
"from datasets import load_dataset\n",
|
136 |
"# 1. load ~11k samples from promoters prediction dataset\n",
|
137 |
-
"
|
138 |
-
"
|
139 |
]
|
140 |
},
|
141 |
{
|
142 |
"cell_type": "code",
|
143 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
"id": "93d09d8d-f521-49f7-b0e0-7ac089dfbf49",
|
145 |
"metadata": {},
|
146 |
"outputs": [],
|
@@ -169,10 +225,18 @@
|
|
169 |
},
|
170 |
{
|
171 |
"cell_type": "code",
|
172 |
-
"execution_count":
|
173 |
"id": "9f9c0e5a-6591-47ac-b358-d746a00dfc0a",
|
174 |
"metadata": {},
|
175 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
"source": [
|
177 |
"example = dna_dataset[\"train\"][0]\n",
|
178 |
"print(build_prompt(example))"
|
@@ -180,7 +244,7 @@
|
|
180 |
},
|
181 |
{
|
182 |
"cell_type": "code",
|
183 |
-
"execution_count":
|
184 |
"id": "83070a23-1604-4d28-b371-e01060331ed5",
|
185 |
"metadata": {},
|
186 |
"outputs": [],
|
@@ -199,10 +263,40 @@
|
|
199 |
},
|
200 |
{
|
201 |
"cell_type": "code",
|
202 |
-
"execution_count":
|
203 |
"id": "89fb8ed3-aa58-462f-b2a6-ce445c597a33",
|
204 |
"metadata": {},
|
205 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
"source": [
|
207 |
"dna_ft_dataset = load_dataset(\"json\", data_files='data/dna_promoter_300.jsonl')\n",
|
208 |
"dna_ft_dataset"
|
@@ -210,10 +304,30 @@
|
|
210 |
},
|
211 |
{
|
212 |
"cell_type": "code",
|
213 |
-
"execution_count":
|
214 |
"id": "e4f7b75f-6ccb-4fda-8004-40df7d52678f",
|
215 |
"metadata": {},
|
216 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
"source": [
|
218 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.9, seed=42)\n",
|
219 |
"data"
|
@@ -221,7 +335,7 @@
|
|
221 |
},
|
222 |
{
|
223 |
"cell_type": "code",
|
224 |
-
"execution_count":
|
225 |
"id": "36d9ee0e-8423-4529-aa7e-fda2728fab2f",
|
226 |
"metadata": {},
|
227 |
"outputs": [],
|
@@ -235,13 +349,14 @@
|
|
235 |
"from tokenizers import Tokenizer\n",
|
236 |
"from transformers import GPT2TokenizerFast\n",
|
237 |
"\n",
|
|
|
238 |
"tokenizer = GPT2Tokenizer.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")\n",
|
239 |
"tokenizer.pad_token = tokenizer.eos_token"
|
240 |
]
|
241 |
},
|
242 |
{
|
243 |
"cell_type": "code",
|
244 |
-
"execution_count":
|
245 |
"id": "871baee0-f06f-4422-a741-af533f7d92e1",
|
246 |
"metadata": {},
|
247 |
"outputs": [],
|
@@ -270,10 +385,23 @@
|
|
270 |
},
|
271 |
{
|
272 |
"cell_type": "code",
|
273 |
-
"execution_count":
|
274 |
"id": "bca1c275-cc3d-43df-923e-e6604d584226",
|
275 |
"metadata": {},
|
276 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
"source": [
|
278 |
"example = data[\"test\"][0]\n",
|
279 |
"example"
|
@@ -281,10 +409,27 @@
|
|
281 |
},
|
282 |
{
|
283 |
"cell_type": "code",
|
284 |
-
"execution_count":
|
285 |
"id": "76f2e027-0a31-4919-bb7e-404c786e1599",
|
286 |
"metadata": {},
|
287 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
"source": [
|
289 |
"prompt = build_prompt(example)\n",
|
290 |
"print(prompt)"
|
@@ -292,24 +437,80 @@
|
|
292 |
},
|
293 |
{
|
294 |
"cell_type": "code",
|
295 |
-
"execution_count":
|
296 |
"id": "932b54ca-7e27-47cd-b67d-7ef8386b6608",
|
297 |
"metadata": {},
|
298 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
"source": [
|
300 |
"print('tokens: ', ' '.join(tokenizer.tokenize(prompt)))"
|
301 |
]
|
302 |
},
|
303 |
{
|
304 |
"cell_type": "code",
|
305 |
-
"execution_count":
|
306 |
"id": "26671faf-68d0-4a44-978e-e1a24e86c9b1",
|
307 |
"metadata": {},
|
308 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
"source": [
|
310 |
"def tokenize_function(example):\n",
|
311 |
" prompt = build_prompt(example)\n",
|
312 |
-
" result = tokenizer(prompt, padding='max_length', truncation=True, max_length=
|
313 |
" return result\n",
|
314 |
"\n",
|
315 |
"\n",
|
@@ -322,17 +523,31 @@
|
|
322 |
},
|
323 |
{
|
324 |
"cell_type": "code",
|
325 |
-
"execution_count":
|
326 |
"id": "3d46c8b1-9fb3-431a-87ea-c278468543e7",
|
327 |
"metadata": {},
|
328 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
"source": [
|
330 |
"tokenized_datasets[\"train\"]"
|
331 |
]
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
335 |
-
"execution_count":
|
336 |
"id": "26985c81-4335-4ac0-9a5a-84a5b4f2d0e4",
|
337 |
"metadata": {},
|
338 |
"outputs": [],
|
@@ -346,17 +561,32 @@
|
|
346 |
},
|
347 |
{
|
348 |
"cell_type": "code",
|
349 |
-
"execution_count":
|
350 |
"id": "e18d3095-d6dd-423b-84fb-dca4a629d450",
|
351 |
"metadata": {},
|
352 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
"source": [
|
354 |
"model = GPT2LMHeadModel.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")"
|
355 |
]
|
356 |
},
|
357 |
{
|
358 |
"cell_type": "code",
|
359 |
-
"execution_count":
|
360 |
"id": "12134cf2-676a-4176-a733-35caab2fd520",
|
361 |
"metadata": {},
|
362 |
"outputs": [],
|
@@ -401,10 +631,41 @@
|
|
401 |
},
|
402 |
{
|
403 |
"cell_type": "code",
|
404 |
-
"execution_count":
|
405 |
"id": "b9a2e2a9-a1ff-44b0-a550-623a16d0d7a2",
|
406 |
"metadata": {},
|
407 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
"source": [
|
409 |
"input_text = format_input(data[\"test\"][0])\n",
|
410 |
"\n",
|
@@ -418,7 +679,7 @@
|
|
418 |
},
|
419 |
{
|
420 |
"cell_type": "code",
|
421 |
-
"execution_count":
|
422 |
"id": "63b54fe2-f077-4ca8-974e-1bcc41ce57d6",
|
423 |
"metadata": {},
|
424 |
"outputs": [],
|
@@ -437,10 +698,217 @@
|
|
437 |
},
|
438 |
{
|
439 |
"cell_type": "code",
|
440 |
-
"execution_count":
|
441 |
"id": "61df123d-e67d-4548-998a-de1e2781e774",
|
442 |
"metadata": {},
|
443 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
"source": [
|
445 |
"# 初始化Trainer\n",
|
446 |
"trainer = Trainer(\n",
|
@@ -454,10 +922,204 @@
|
|
454 |
},
|
455 |
{
|
456 |
"cell_type": "code",
|
457 |
-
"execution_count":
|
458 |
"id": "a9cd936a-5ea6-43e3-9848-27080f818606",
|
459 |
"metadata": {},
|
460 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
"source": [
|
462 |
"# 开始训练\n",
|
463 |
"trainer.train()"
|
@@ -465,10 +1127,18 @@
|
|
465 |
},
|
466 |
{
|
467 |
"cell_type": "code",
|
468 |
-
"execution_count":
|
469 |
"id": "315aae76-44b4-4513-8139-40ef22934873",
|
470 |
"metadata": {},
|
471 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
"source": [
|
473 |
"save_dir = 'gpt_ft/final'\n",
|
474 |
"trainer.save_model(save_dir)\n",
|
@@ -477,7 +1147,7 @@
|
|
477 |
},
|
478 |
{
|
479 |
"cell_type": "code",
|
480 |
-
"execution_count":
|
481 |
"id": "28d2dbbc-02ff-4120-b230-b19905a786cd",
|
482 |
"metadata": {},
|
483 |
"outputs": [],
|
@@ -488,20 +1158,92 @@
|
|
488 |
},
|
489 |
{
|
490 |
"cell_type": "code",
|
491 |
-
"execution_count":
|
492 |
"id": "08987c3c-063a-4e9b-9ebb-e637b0b5bccd",
|
493 |
"metadata": {},
|
494 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
"source": [
|
496 |
"finetuned_model"
|
497 |
]
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
-
"execution_count":
|
502 |
"id": "d75010e8-6d6a-40ef-852e-0d705adc3da8",
|
503 |
"metadata": {},
|
504 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
"source": [
|
506 |
"print(\"input (test):\", input_text)\n",
|
507 |
"\n",
|
@@ -517,10 +1259,217 @@
|
|
517 |
},
|
518 |
{
|
519 |
"cell_type": "code",
|
520 |
-
"execution_count":
|
521 |
"id": "64365e15-510e-4abf-92f5-c78b660b37dc",
|
522 |
"metadata": {},
|
523 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
"source": [
|
525 |
"test_data = data[\"test\"].select(range(100))\n",
|
526 |
"\n",
|
@@ -543,7 +1492,7 @@
|
|
543 |
},
|
544 |
{
|
545 |
"cell_type": "code",
|
546 |
-
"execution_count":
|
547 |
"id": "a45fb780-fc3f-401c-b6e0-6f7d0c1682de",
|
548 |
"metadata": {},
|
549 |
"outputs": [],
|
@@ -556,19 +1505,128 @@
|
|
556 |
"# 将 Dataset 对象导出为 JSON 文件\n",
|
557 |
"# test_data.to_json(output_file)\n",
|
558 |
"with open(output_file, \"w\") as file:\n",
|
559 |
-
" json.dump(data_list, file, indent=4) # \"indent\" for pretty-printing"
|
560 |
]
|
561 |
},
|
562 |
{
|
563 |
"cell_type": "code",
|
564 |
-
"execution_count":
|
565 |
"id": "a83c8881-c763-4bba-8b85-584a6722a38e",
|
566 |
"metadata": {},
|
567 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
"source": [
|
569 |
"import json\n",
|
570 |
"\n",
|
571 |
"\n",
|
|
|
572 |
"\n",
|
573 |
"with open(output_file, \"r\") as file:\n",
|
574 |
" test_data = json.load(file)\n",
|
@@ -580,15 +1638,30 @@
|
|
580 |
" output = item[\"output\"]\n",
|
581 |
" #output = \" \".join(tokenizer.tokenize(output))\n",
|
582 |
" model_response = item[\"model_response\"]\n",
|
|
|
|
|
|
|
583 |
" if model_response == output: #same it\n",
|
584 |
" same_sum = same_sum + 1\n",
|
585 |
" \n",
|
586 |
-
" if
|
587 |
-
"
|
|
|
|
|
|
|
|
|
588 |
"\n",
|
589 |
"\n",
|
590 |
-
"print(\"
|
591 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
}
|
593 |
],
|
594 |
"metadata": {
|
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "code",
|
130 |
+
"execution_count": 1,
|
131 |
+
"id": "b28e3499-bbff-4548-9f85-2baee088cabf",
|
132 |
"metadata": {},
|
133 |
"outputs": [],
|
134 |
+
"source": [
|
135 |
+
"import subprocess\n",
|
136 |
+
"import os\n",
|
137 |
+
"# 设置环境变量, autodl一般区域\n",
|
138 |
+
"result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n",
|
139 |
+
"output = result.stdout\n",
|
140 |
+
"for line in output.splitlines():\n",
|
141 |
+
" if '=' in line:\n",
|
142 |
+
" var, value = line.split('=', 1)\n",
|
143 |
+
" os.environ[var] = value"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"cell_type": "code",
|
148 |
+
"execution_count": 6,
|
149 |
+
"id": "dc04d5e3-7623-4d59-9f3b-ad03e339db11",
|
150 |
+
"metadata": {},
|
151 |
+
"outputs": [
|
152 |
+
{
|
153 |
+
"data": {
|
154 |
+
"text/plain": [
|
155 |
+
"DatasetDict({\n",
|
156 |
+
" train: Dataset({\n",
|
157 |
+
" features: ['sequence', 'label'],\n",
|
158 |
+
" num_rows: 59195\n",
|
159 |
+
" })\n",
|
160 |
+
"})"
|
161 |
+
]
|
162 |
+
},
|
163 |
+
"execution_count": 6,
|
164 |
+
"metadata": {},
|
165 |
+
"output_type": "execute_result"
|
166 |
+
}
|
167 |
+
],
|
168 |
"source": [
|
169 |
"from datasets import load_dataset\n",
|
170 |
"# 1. load ~11k samples from promoters prediction dataset\n",
|
171 |
+
"dna_dataset = load_dataset(\"dnagpt/dna_promoter_300\")\n",
|
172 |
+
"dna_dataset"
|
173 |
]
|
174 |
},
|
175 |
{
|
176 |
"cell_type": "code",
|
177 |
+
"execution_count": 7,
|
178 |
+
"id": "f7332fa1-3343-4247-b4cc-54733dff6964",
|
179 |
+
"metadata": {},
|
180 |
+
"outputs": [
|
181 |
+
{
|
182 |
+
"data": {
|
183 |
+
"text/plain": [
|
184 |
+
"{'sequence': 'TAAATACGGAAGTTTATTACTTGAGGAATAGATGGAATCGTCGGGCGTGAGAGATCATAATCGGCTGCTTCTGGGAGCCGCACGTGGGAAAGACTTATCCCCGACGGAGCTGGGACTGGGGCACAAACCGGAAGGAACACATCTGACCGAGAAAGAGACCAAGTGGCTCAGGTAGGACCAAAGCGAGCAAGGCTGCGGGTCCTGTTGCTCTCTGTCCTGTAAATTTAAACGTTACGCCACCTGGTAATGATACCCTCGTCCTCCGAGGCGACAAGTCAGAACTTCCACCAAGGGCATTAC',\n",
|
185 |
+
" 'label': 0}"
|
186 |
+
]
|
187 |
+
},
|
188 |
+
"execution_count": 7,
|
189 |
+
"metadata": {},
|
190 |
+
"output_type": "execute_result"
|
191 |
+
}
|
192 |
+
],
|
193 |
+
"source": [
|
194 |
+
"dna_dataset[\"train\"][0]"
|
195 |
+
]
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"cell_type": "code",
|
199 |
+
"execution_count": 8,
|
200 |
"id": "93d09d8d-f521-49f7-b0e0-7ac089dfbf49",
|
201 |
"metadata": {},
|
202 |
"outputs": [],
|
|
|
225 |
},
|
226 |
{
|
227 |
"cell_type": "code",
|
228 |
+
"execution_count": 9,
|
229 |
"id": "9f9c0e5a-6591-47ac-b358-d746a00dfc0a",
|
230 |
"metadata": {},
|
231 |
+
"outputs": [
|
232 |
+
{
|
233 |
+
"name": "stdout",
|
234 |
+
"output_type": "stream",
|
235 |
+
"text": [
|
236 |
+
"{'instruction': 'Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.', 'input': 'TAAATACGGAAGTTTATTACTTGAGGAATAGATGGAATCGTCGGGCGTGAGAGATCATAATCGGCTGCTTCTGGGAGCCGCACGTGGGAAAGACTTATCCCCGACGGAGCTGGGACTGGGGCACAAACCGGAAGGAACACATCTGACCGAGAAAGAGACCAAGTGGCTCAGGTAGGACCAAAGCGAGCAAGGCTGCGGGTCCTGTTGCTCTCTGTCCTGTAAATTTAAACGTTACGCCACCTGGTAATGATACCCTCGTCCTCCGAGGCGACAAGTCAGAACTTCCACCAAGGGCATTAC', 'output': 'Non-promoter'}\n"
|
237 |
+
]
|
238 |
+
}
|
239 |
+
],
|
240 |
"source": [
|
241 |
"example = dna_dataset[\"train\"][0]\n",
|
242 |
"print(build_prompt(example))"
|
|
|
244 |
},
|
245 |
{
|
246 |
"cell_type": "code",
|
247 |
+
"execution_count": 10,
|
248 |
"id": "83070a23-1604-4d28-b371-e01060331ed5",
|
249 |
"metadata": {},
|
250 |
"outputs": [],
|
|
|
263 |
},
|
264 |
{
|
265 |
"cell_type": "code",
|
266 |
+
"execution_count": 11,
|
267 |
"id": "89fb8ed3-aa58-462f-b2a6-ce445c597a33",
|
268 |
"metadata": {},
|
269 |
+
"outputs": [
|
270 |
+
{
|
271 |
+
"data": {
|
272 |
+
"application/vnd.jupyter.widget-view+json": {
|
273 |
+
"model_id": "7b84d3a64bf645ada13d0cada2d9f524",
|
274 |
+
"version_major": 2,
|
275 |
+
"version_minor": 0
|
276 |
+
},
|
277 |
+
"text/plain": [
|
278 |
+
"Generating train split: 0 examples [00:00, ? examples/s]"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
"metadata": {},
|
282 |
+
"output_type": "display_data"
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"data": {
|
286 |
+
"text/plain": [
|
287 |
+
"DatasetDict({\n",
|
288 |
+
" train: Dataset({\n",
|
289 |
+
" features: ['instruction', 'input', 'output'],\n",
|
290 |
+
" num_rows: 59195\n",
|
291 |
+
" })\n",
|
292 |
+
"})"
|
293 |
+
]
|
294 |
+
},
|
295 |
+
"execution_count": 11,
|
296 |
+
"metadata": {},
|
297 |
+
"output_type": "execute_result"
|
298 |
+
}
|
299 |
+
],
|
300 |
"source": [
|
301 |
"dna_ft_dataset = load_dataset(\"json\", data_files='data/dna_promoter_300.jsonl')\n",
|
302 |
"dna_ft_dataset"
|
|
|
304 |
},
|
305 |
{
|
306 |
"cell_type": "code",
|
307 |
+
"execution_count": 12,
|
308 |
"id": "e4f7b75f-6ccb-4fda-8004-40df7d52678f",
|
309 |
"metadata": {},
|
310 |
+
"outputs": [
|
311 |
+
{
|
312 |
+
"data": {
|
313 |
+
"text/plain": [
|
314 |
+
"DatasetDict({\n",
|
315 |
+
" train: Dataset({\n",
|
316 |
+
" features: ['instruction', 'input', 'output'],\n",
|
317 |
+
" num_rows: 53275\n",
|
318 |
+
" })\n",
|
319 |
+
" test: Dataset({\n",
|
320 |
+
" features: ['instruction', 'input', 'output'],\n",
|
321 |
+
" num_rows: 5920\n",
|
322 |
+
" })\n",
|
323 |
+
"})"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"execution_count": 12,
|
327 |
+
"metadata": {},
|
328 |
+
"output_type": "execute_result"
|
329 |
+
}
|
330 |
+
],
|
331 |
"source": [
|
332 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.9, seed=42)\n",
|
333 |
"data"
|
|
|
335 |
},
|
336 |
{
|
337 |
"cell_type": "code",
|
338 |
+
"execution_count": 13,
|
339 |
"id": "36d9ee0e-8423-4529-aa7e-fda2728fab2f",
|
340 |
"metadata": {},
|
341 |
"outputs": [],
|
|
|
349 |
"from tokenizers import Tokenizer\n",
|
350 |
"from transformers import GPT2TokenizerFast\n",
|
351 |
"\n",
|
352 |
+
"#需要使用生物序列+英文的多模态大模型\n",
|
353 |
"tokenizer = GPT2Tokenizer.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")\n",
|
354 |
"tokenizer.pad_token = tokenizer.eos_token"
|
355 |
]
|
356 |
},
|
357 |
{
|
358 |
"cell_type": "code",
|
359 |
+
"execution_count": 14,
|
360 |
"id": "871baee0-f06f-4422-a741-af533f7d92e1",
|
361 |
"metadata": {},
|
362 |
"outputs": [],
|
|
|
385 |
},
|
386 |
{
|
387 |
"cell_type": "code",
|
388 |
+
"execution_count": 15,
|
389 |
"id": "bca1c275-cc3d-43df-923e-e6604d584226",
|
390 |
"metadata": {},
|
391 |
+
"outputs": [
|
392 |
+
{
|
393 |
+
"data": {
|
394 |
+
"text/plain": [
|
395 |
+
"{'instruction': 'Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.',\n",
|
396 |
+
" 'input': 'CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC',\n",
|
397 |
+
" 'output': 'promoter'}"
|
398 |
+
]
|
399 |
+
},
|
400 |
+
"execution_count": 15,
|
401 |
+
"metadata": {},
|
402 |
+
"output_type": "execute_result"
|
403 |
+
}
|
404 |
+
],
|
405 |
"source": [
|
406 |
"example = data[\"test\"][0]\n",
|
407 |
"example"
|
|
|
409 |
},
|
410 |
{
|
411 |
"cell_type": "code",
|
412 |
+
"execution_count": 16,
|
413 |
"id": "76f2e027-0a31-4919-bb7e-404c786e1599",
|
414 |
"metadata": {},
|
415 |
+
"outputs": [
|
416 |
+
{
|
417 |
+
"name": "stdout",
|
418 |
+
"output_type": "stream",
|
419 |
+
"text": [
|
420 |
+
"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
421 |
+
"\n",
|
422 |
+
"### Instruction:\n",
|
423 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
424 |
+
"\n",
|
425 |
+
"### Input:\n",
|
426 |
+
"CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC\n",
|
427 |
+
"\n",
|
428 |
+
"### Response:\n",
|
429 |
+
"promoter\n"
|
430 |
+
]
|
431 |
+
}
|
432 |
+
],
|
433 |
"source": [
|
434 |
"prompt = build_prompt(example)\n",
|
435 |
"print(prompt)"
|
|
|
437 |
},
|
438 |
{
|
439 |
"cell_type": "code",
|
440 |
+
"execution_count": 17,
|
441 |
"id": "932b54ca-7e27-47cd-b67d-7ef8386b6608",
|
442 |
"metadata": {},
|
443 |
+
"outputs": [
|
444 |
+
{
|
445 |
+
"name": "stdout",
|
446 |
+
"output_type": "stream",
|
447 |
+
"text": [
|
448 |
+
"tokens: Bel ow Ġ is Ġ an Ġ instruc tion Ġ th at Ġ describ es Ġ a Ġ t ask . ĠWrit e Ġ a Ġ respon se Ġ th at Ġ appropri at el y Ġ complet es Ġ the Ġ request . Ċ Ċ # # # ĠIn struc tion : Ċ D eter min e Ġ cor e Ġ promo ter Ġ det ec tion Ġ of Ġ follow ing Ġ d na Ġ sequenc e , ĠTh e Ġ resul t Ġ will Ġ be Ġ on e Ġ of Ġ the Ġ follow ing : ĠN on - promo ter , Ġ promo ter . Ċ Ċ # # # ĠIn put : Ċ CC AGGATGC GC TGACG ACCC GGCTGGC AGGC GGGTCC TCG TGGGCG AGGCG AGGGAGGC GGCG AGAGAGG AGCAATAG TTTCCC ACCGC TCCCTCTC AGGCGC AGGG TCTAG AGAAGC GCG AGGGG ATCTAG AGAAGCC GG AGGGG AGGAAGC GCG AGTCC GCGG CCCGCC CCG TTGCG TCCC ACCCACC GCG TCCCCTCCCC TCCCCTCCC GCTGC GGG AAAAGC GGCCGC GGGCGGC GGCGCCC ACTGTG GGGC GGGC GGAGC GCCGC GGGAGGC GGACG AGATGCG AGCGC GGCCGC Ċ Ċ # # # ĠR esp on se : Ċ promo ter\n"
|
449 |
+
]
|
450 |
+
}
|
451 |
+
],
|
452 |
"source": [
|
453 |
"print('tokens: ', ' '.join(tokenizer.tokenize(prompt)))"
|
454 |
]
|
455 |
},
|
456 |
{
|
457 |
"cell_type": "code",
|
458 |
+
"execution_count": 18,
|
459 |
"id": "26671faf-68d0-4a44-978e-e1a24e86c9b1",
|
460 |
"metadata": {},
|
461 |
+
"outputs": [
|
462 |
+
{
|
463 |
+
"data": {
|
464 |
+
"application/vnd.jupyter.widget-view+json": {
|
465 |
+
"model_id": "02d2f083e74a45e6ada46b9872822dfc",
|
466 |
+
"version_major": 2,
|
467 |
+
"version_minor": 0
|
468 |
+
},
|
469 |
+
"text/plain": [
|
470 |
+
"Map: 0%| | 0/53275 [00:00<?, ? examples/s]"
|
471 |
+
]
|
472 |
+
},
|
473 |
+
"metadata": {},
|
474 |
+
"output_type": "display_data"
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"data": {
|
478 |
+
"application/vnd.jupyter.widget-view+json": {
|
479 |
+
"model_id": "f9ebfc901049446cb70580ee6d90861b",
|
480 |
+
"version_major": 2,
|
481 |
+
"version_minor": 0
|
482 |
+
},
|
483 |
+
"text/plain": [
|
484 |
+
"Map: 0%| | 0/5920 [00:00<?, ? examples/s]"
|
485 |
+
]
|
486 |
+
},
|
487 |
+
"metadata": {},
|
488 |
+
"output_type": "display_data"
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"data": {
|
492 |
+
"text/plain": [
|
493 |
+
"DatasetDict({\n",
|
494 |
+
" train: Dataset({\n",
|
495 |
+
" features: ['input_ids', 'attention_mask'],\n",
|
496 |
+
" num_rows: 53275\n",
|
497 |
+
" })\n",
|
498 |
+
" test: Dataset({\n",
|
499 |
+
" features: ['input_ids', 'attention_mask'],\n",
|
500 |
+
" num_rows: 5920\n",
|
501 |
+
" })\n",
|
502 |
+
"})"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"execution_count": 18,
|
506 |
+
"metadata": {},
|
507 |
+
"output_type": "execute_result"
|
508 |
+
}
|
509 |
+
],
|
510 |
"source": [
|
511 |
"def tokenize_function(example):\n",
|
512 |
" prompt = build_prompt(example)\n",
|
513 |
+
" result = tokenizer(prompt, padding='max_length', truncation=True, max_length=256) # max_length=256\n",
|
514 |
" return result\n",
|
515 |
"\n",
|
516 |
"\n",
|
|
|
523 |
},
|
524 |
{
|
525 |
"cell_type": "code",
|
526 |
+
"execution_count": 19,
|
527 |
"id": "3d46c8b1-9fb3-431a-87ea-c278468543e7",
|
528 |
"metadata": {},
|
529 |
+
"outputs": [
|
530 |
+
{
|
531 |
+
"data": {
|
532 |
+
"text/plain": [
|
533 |
+
"Dataset({\n",
|
534 |
+
" features: ['input_ids', 'attention_mask'],\n",
|
535 |
+
" num_rows: 53275\n",
|
536 |
+
"})"
|
537 |
+
]
|
538 |
+
},
|
539 |
+
"execution_count": 19,
|
540 |
+
"metadata": {},
|
541 |
+
"output_type": "execute_result"
|
542 |
+
}
|
543 |
+
],
|
544 |
"source": [
|
545 |
"tokenized_datasets[\"train\"]"
|
546 |
]
|
547 |
},
|
548 |
{
|
549 |
"cell_type": "code",
|
550 |
+
"execution_count": 20,
|
551 |
"id": "26985c81-4335-4ac0-9a5a-84a5b4f2d0e4",
|
552 |
"metadata": {},
|
553 |
"outputs": [],
|
|
|
561 |
},
|
562 |
{
|
563 |
"cell_type": "code",
|
564 |
+
"execution_count": 21,
|
565 |
"id": "e18d3095-d6dd-423b-84fb-dca4a629d450",
|
566 |
"metadata": {},
|
567 |
+
"outputs": [
|
568 |
+
{
|
569 |
+
"data": {
|
570 |
+
"application/vnd.jupyter.widget-view+json": {
|
571 |
+
"model_id": "08a24dc5ff954b8581d5c2378e0d60d6",
|
572 |
+
"version_major": 2,
|
573 |
+
"version_minor": 0
|
574 |
+
},
|
575 |
+
"text/plain": [
|
576 |
+
"generation_config.json: 0%| | 0.00/111 [00:00<?, ?B/s]"
|
577 |
+
]
|
578 |
+
},
|
579 |
+
"metadata": {},
|
580 |
+
"output_type": "display_data"
|
581 |
+
}
|
582 |
+
],
|
583 |
"source": [
|
584 |
"model = GPT2LMHeadModel.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")"
|
585 |
]
|
586 |
},
|
587 |
{
|
588 |
"cell_type": "code",
|
589 |
+
"execution_count": 22,
|
590 |
"id": "12134cf2-676a-4176-a733-35caab2fd520",
|
591 |
"metadata": {},
|
592 |
"outputs": [],
|
|
|
631 |
},
|
632 |
{
|
633 |
"cell_type": "code",
|
634 |
+
"execution_count": 23,
|
635 |
"id": "b9a2e2a9-a1ff-44b0-a550-623a16d0d7a2",
|
636 |
"metadata": {},
|
637 |
+
"outputs": [
|
638 |
+
{
|
639 |
+
"name": "stderr",
|
640 |
+
"output_type": "stream",
|
641 |
+
"text": [
|
642 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
643 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
644 |
+
"The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
|
645 |
+
]
|
646 |
+
},
|
647 |
+
{
|
648 |
+
"name": "stdout",
|
649 |
+
"output_type": "stream",
|
650 |
+
"text": [
|
651 |
+
"input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
652 |
+
"\n",
|
653 |
+
"### Instruction:\n",
|
654 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
655 |
+
"\n",
|
656 |
+
"### Input:\n",
|
657 |
+
"CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC\n",
|
658 |
+
"\n",
|
659 |
+
"### Response:\n",
|
660 |
+
"\n",
|
661 |
+
"--------------------------\n",
|
662 |
+
"\n",
|
663 |
+
"model's answer: \n",
|
664 |
+
"\n",
|
665 |
+
"TATAT\n"
|
666 |
+
]
|
667 |
+
}
|
668 |
+
],
|
669 |
"source": [
|
670 |
"input_text = format_input(data[\"test\"][0])\n",
|
671 |
"\n",
|
|
|
679 |
},
|
680 |
{
|
681 |
"cell_type": "code",
|
682 |
+
"execution_count": 24,
|
683 |
"id": "63b54fe2-f077-4ca8-974e-1bcc41ce57d6",
|
684 |
"metadata": {},
|
685 |
"outputs": [],
|
|
|
698 |
},
|
699 |
{
|
700 |
"cell_type": "code",
|
701 |
+
"execution_count": 25,
|
702 |
"id": "61df123d-e67d-4548-998a-de1e2781e774",
|
703 |
"metadata": {},
|
704 |
+
"outputs": [
|
705 |
+
{
|
706 |
+
"name": "stdout",
|
707 |
+
"output_type": "stream",
|
708 |
+
"text": [
|
709 |
+
"[2025-01-10 15:41:26,331] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
|
710 |
+
]
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"name": "stderr",
|
714 |
+
"output_type": "stream",
|
715 |
+
"text": [
|
716 |
+
"/root/miniconda3/compiler_compat/ld: cannot find -laio: No such file or directory\n",
|
717 |
+
"collect2: error: ld returned 1 exit status\n",
|
718 |
+
"/root/miniconda3/compiler_compat/ld: warning: libpthread.so.0, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
|
719 |
+
"/root/miniconda3/compiler_compat/ld: warning: libstdc++.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
|
720 |
+
"/root/miniconda3/compiler_compat/ld: warning: libm.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
|
721 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'\n",
|
722 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'\n",
|
723 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'\n",
|
724 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'\n",
|
725 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'\n",
|
726 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for bool@CXXABI_1.3'\n",
|
727 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_logic_error(char const*)@GLIBCXX_3.4'\n",
|
728 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
729 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::logic_error@GLIBCXX_3.4'\n",
|
730 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::~locale()@GLIBCXX_3.4'\n",
|
731 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::string const&, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
|
732 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_end_catch@CXXABI_1.3'\n",
|
733 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ofstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
734 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::logic_error::~logic_error()@GLIBCXX_3.4'\n",
|
735 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__si_class_type_info@CXXABI_1.3'\n",
|
736 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n",
|
737 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
738 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new[](unsigned long)@GLIBCXX_3.4'\n",
|
739 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak_hard()@GLIBCXX_3.4'\n",
|
740 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ifstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
741 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >::basic_streambuf(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> > const&)@GLIBCXX_3.4'\n",
|
742 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*, unsigned long)@GLIBCXX_3.4'\n",
|
743 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::string const&)@GLIBCXX_3.4'\n",
|
744 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned short@CXXABI_1.3'\n",
|
745 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::resize(unsigned long, char)@GLIBCXX_3.4'\n",
|
746 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char const*@CXXABI_1.3'\n",
|
747 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ctype<char>::_M_widen_init() const@GLIBCXX_3.4.11'\n",
|
748 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_invalid_argument(char const*)@GLIBCXX_3.4'\n",
|
749 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::operator=(std::locale const&)@GLIBCXX_3.4'\n",
|
750 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<wchar_t, std::char_traits<wchar_t> >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n",
|
751 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n",
|
752 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_free_exception@CXXABI_1.3'\n",
|
753 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::notify_one()@GLIBCXX_3.4.11'\n",
|
754 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::~Init()@GLIBCXX_3.4'\n",
|
755 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string()@GLIBCXX_3.4'\n",
|
756 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_pure_virtual@CXXABI_1.3'\n",
|
757 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::flush()@GLIBCXX_3.4'\n",
|
758 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__class_type_info@CXXABI_1.3'\n",
|
759 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_rethrow@CXXABI_1.3'\n",
|
760 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
761 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_fstream<char, std::char_traits<char> >::~basic_fstream()@GLIBCXX_3.4'\n",
|
762 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::compare(char const*) const@GLIBCXX_3.4'\n",
|
763 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
764 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale()@GLIBCXX_3.4'\n",
|
765 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::system_clock::now()@GLIBCXX_3.4.19'\n",
|
766 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ifstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
767 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Hash_bytes(void const*, unsigned long, unsigned long)@CXXABI_1.3.5'\n",
|
768 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<long long>(long long)@GLIBCXX_3.4.9'\n",
|
769 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char*@CXXABI_1.3'\n",
|
770 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const@GLIBCXX_3.4.18'\n",
|
771 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::out_of_range@GLIBCXX_3.4'\n",
|
772 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<unsigned long>(unsigned long)@GLIBCXX_3.4.9'\n",
|
773 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n",
|
774 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::~ios_base()@GLIBCXX_3.4'\n",
|
775 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::range_error::~range_error()@GLIBCXX_3.4'\n",
|
776 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file<char>::~__basic_file()@GLIBCXX_3.4'\n",
|
777 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_acquire@CXXABI_1.3'\n",
|
778 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<bool>(bool)@GLIBCXX_3.4.9'\n",
|
779 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::overflow_error@GLIBCXX_3.4'\n",
|
780 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_fstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
781 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::range_error@GLIBCXX_3.4'\n",
|
782 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
783 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_filebuf<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
784 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete[](void*)@GLIBCXX_3.4'\n",
|
785 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
786 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(unsigned long, char, std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
787 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_transfer(std::__detail::_List_node_base*, std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n",
|
788 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::replace(unsigned long, unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n",
|
789 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::exception@GLIBCXX_3.4'\n",
|
790 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_M_destroy(std::allocator<wchar_t> const&)@GLIBCXX_3.4'\n",
|
791 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream& std::istream::_M_extract<double>(double&)@GLIBCXX_3.4.9'\n",
|
792 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
|
793 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_fstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
794 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::basic_ifstream(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n",
|
795 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(std::string const&)@GLIBCXX_3.4'\n",
|
796 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new(unsigned long)@GLIBCXX_3.4'\n",
|
797 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
798 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned int@CXXABI_1.3'\n",
|
799 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*)@GLIBCXX_3.4'\n",
|
800 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::domain_error@GLIBCXX_3.4'\n",
|
801 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char, unsigned long) const@GLIBCXX_3.4'\n",
|
802 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::put(char)@GLIBCXX_3.4'\n",
|
803 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for int@CXXABI_1.3'\n",
|
804 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_alloc()@GLIBCXX_3.4'\n",
|
805 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_thread_atexit@CXXABI_1.3.7'\n",
|
806 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n",
|
807 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::~basic_ifstream()@GLIBCXX_3.4'\n",
|
808 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::Init()@GLIBCXX_3.4'\n",
|
809 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::condition_variable()@GLIBCXX_3.4.11'\n",
|
810 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::basic_filebuf()@GLIBCXX_3.4'\n",
|
811 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
812 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::domain_error::~domain_error()@GLIBCXX_3.4'\n",
|
813 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cerr@GLIBCXX_3.4'\n",
|
814 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char const*, unsigned long, unsigned long) const@GLIBCXX_3.4'\n",
|
815 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
816 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
817 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::str() const@GLIBCXX_3.4'\n",
|
818 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::invalid_argument@GLIBCXX_3.4'\n",
|
819 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void*@CXXABI_1.3'\n",
|
820 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(std::string const&)@GLIBCXX_3.4'\n",
|
821 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_ostringstream()@GLIBCXX_3.4'\n",
|
822 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_rebalance_for_erase(std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n",
|
823 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long@CXXABI_1.3'\n",
|
824 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n",
|
825 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_unhook()@GLIBCXX_3.4.15'\n",
|
826 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
827 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::_M_sync(char*, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
|
828 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream<char, std::char_traits<char> >::~basic_iostream()@GLIBCXX_3.4'\n",
|
829 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale(std::locale const&)@GLIBCXX_3.4'\n",
|
830 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
831 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `log2f@GLIBC_2.2.5'\n",
|
832 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(std::basic_streambuf<char, std::char_traits<char> >*)@GLIBCXX_3.4'\n",
|
833 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >@GLIBCXX_3.4'\n",
|
834 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::exception::~exception()@GLIBCXX_3.4'\n",
|
835 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_create(unsigned long, unsigned long, std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
836 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file<char>::is_open() const@GLIBCXX_3.4'\n",
|
837 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_istringstream()@GLIBCXX_3.4'\n",
|
838 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::swap(std::string&)@GLIBCXX_3.4'\n",
|
839 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long*@CXXABI_1.3'\n",
|
840 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
841 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf(std::basic_streambuf<char, std::char_traits<char> > const&)@GLIBCXX_3.4'\n",
|
842 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::init(std::basic_streambuf<char, std::char_traits<char> >*)@GLIBCXX_3.4'\n",
|
843 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_cast()@GLIBCXX_3.4'\n",
|
844 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)@GLIBCXX_3.4'\n",
|
845 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >::operator=(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> > const&)@GLIBCXX_3.4'\n",
|
846 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete(void*)@GLIBCXX_3.4'\n",
|
847 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(int)@GLIBCXX_3.4'\n",
|
848 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n",
|
849 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_M_destroy(std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
850 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream<wchar_t, std::char_traits<wchar_t> >::~basic_iostream()@GLIBCXX_3.4'\n",
|
851 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::runtime_error@GLIBCXX_3.4'\n",
|
852 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ofstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
853 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n",
|
854 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_stringstream()@GLIBCXX_3.4'\n",
|
855 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
856 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<long>(long)@GLIBCXX_3.4.9'\n",
|
857 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::get()@GLIBCXX_3.4'\n",
|
858 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long long@CXXABI_1.3'\n",
|
859 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)@GLIBCXX_3.4'\n",
|
860 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::out_of_range::~out_of_range()@GLIBCXX_3.4'\n",
|
861 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::length_error::~length_error()@GLIBCXX_3.4'\n",
|
862 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)@GLIBCXX_3.4.9'\n",
|
863 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::invalid_argument::~invalid_argument()@GLIBCXX_3.4'\n",
|
864 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::swap(std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >&)@GLIBCXX_3.4'\n",
|
865 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cout@GLIBCXX_3.4'\n",
|
866 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<unsigned long long>(unsigned long long)@GLIBCXX_3.4.9'\n",
|
867 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for int*@CXXABI_1.3'\n",
|
868 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<void const*>(void const*)@GLIBCXX_3.4.9'\n",
|
869 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::underflow_error@GLIBCXX_3.4'\n",
|
870 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
871 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::out_of_range@GLIBCXX_3.4'\n",
|
872 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_allocate_exception@CXXABI_1.3'\n",
|
873 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios<wchar_t, std::char_traits<wchar_t> >@GLIBCXX_3.4'\n",
|
874 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void const*@CXXABI_1.3'\n",
|
875 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<wchar_t, std::char_traits<wchar_t> >::init(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >*)@GLIBCXX_3.4'\n",
|
876 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::reserve(unsigned long)@GLIBCXX_3.4'\n",
|
877 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_begin_catch@CXXABI_1.3'\n",
|
878 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long@CXXABI_1.3'\n",
|
879 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n",
|
880 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak()@GLIBCXX_3.4'\n",
|
881 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::open(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n",
|
882 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_M_sync(wchar_t*, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
|
883 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::getline(char*, long, char)@GLIBCXX_3.4'\n",
|
884 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istream<char, std::char_traits<char> >& std::getline<char, std::char_traits<char>, std::allocator<char> >(std::basic_istream<char, std::char_traits<char> >&, std::basic_string<char, std::char_traits<char>, std::allocator<char> >&, char)@GLIBCXX_3.4'\n",
|
885 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
886 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::~condition_variable()@GLIBCXX_3.4.11'\n",
|
887 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
888 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::insert(unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n",
|
889 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(char const*, unsigned long)@GLIBCXX_3.4'\n",
|
890 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned char@CXXABI_1.3'\n",
|
891 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::ios_base()@GLIBCXX_3.4'\n",
|
892 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_out_of_range(char const*)@GLIBCXX_3.4'\n",
|
893 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::overflow_error::~overflow_error()@GLIBCXX_3.4'\n",
|
894 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_length_error(char const*)@GLIBCXX_3.4'\n",
|
895 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_system_error(int)@GLIBCXX_3.4.11'\n",
|
896 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ofstream<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
|
897 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<double>(double)@GLIBCXX_3.4.9'\n",
|
898 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<char, std::char_traits<char> >::operator=(std::basic_streambuf<char, std::char_traits<char> > const&)@GLIBCXX_3.4'\n",
|
899 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long long@CXXABI_1.3'\n",
|
900 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, unsigned long, std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
901 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
|
902 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_release@CXXABI_1.3'\n",
|
903 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_throw@CXXABI_1.3'\n",
|
904 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::underflow_error::~underflow_error()@GLIBCXX_3.4'\n",
|
905 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n",
|
906 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::length_error@GLIBCXX_3.4'\n",
|
907 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::~basic_filebuf()@GLIBCXX_3.4'\n",
|
908 |
+
"collect2: error: ld returned 1 exit status\n"
|
909 |
+
]
|
910 |
+
}
|
911 |
+
],
|
912 |
"source": [
|
913 |
"# 初始化Trainer\n",
|
914 |
"trainer = Trainer(\n",
|
|
|
922 |
},
|
923 |
{
|
924 |
"cell_type": "code",
|
925 |
+
"execution_count": 26,
|
926 |
"id": "a9cd936a-5ea6-43e3-9848-27080f818606",
|
927 |
"metadata": {},
|
928 |
+
"outputs": [
|
929 |
+
{
|
930 |
+
"data": {
|
931 |
+
"text/html": [
|
932 |
+
"\n",
|
933 |
+
" <div>\n",
|
934 |
+
" \n",
|
935 |
+
" <progress value='19980' max='19980' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
936 |
+
" [19980/19980 21:55, Epoch 3/3]\n",
|
937 |
+
" </div>\n",
|
938 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
939 |
+
" <thead>\n",
|
940 |
+
" <tr style=\"text-align: left;\">\n",
|
941 |
+
" <th>Step</th>\n",
|
942 |
+
" <th>Training Loss</th>\n",
|
943 |
+
" </tr>\n",
|
944 |
+
" </thead>\n",
|
945 |
+
" <tbody>\n",
|
946 |
+
" <tr>\n",
|
947 |
+
" <td>500</td>\n",
|
948 |
+
" <td>2.335700</td>\n",
|
949 |
+
" </tr>\n",
|
950 |
+
" <tr>\n",
|
951 |
+
" <td>1000</td>\n",
|
952 |
+
" <td>2.184100</td>\n",
|
953 |
+
" </tr>\n",
|
954 |
+
" <tr>\n",
|
955 |
+
" <td>1500</td>\n",
|
956 |
+
" <td>2.178500</td>\n",
|
957 |
+
" </tr>\n",
|
958 |
+
" <tr>\n",
|
959 |
+
" <td>2000</td>\n",
|
960 |
+
" <td>2.172400</td>\n",
|
961 |
+
" </tr>\n",
|
962 |
+
" <tr>\n",
|
963 |
+
" <td>2500</td>\n",
|
964 |
+
" <td>2.171400</td>\n",
|
965 |
+
" </tr>\n",
|
966 |
+
" <tr>\n",
|
967 |
+
" <td>3000</td>\n",
|
968 |
+
" <td>2.171900</td>\n",
|
969 |
+
" </tr>\n",
|
970 |
+
" <tr>\n",
|
971 |
+
" <td>3500</td>\n",
|
972 |
+
" <td>2.163100</td>\n",
|
973 |
+
" </tr>\n",
|
974 |
+
" <tr>\n",
|
975 |
+
" <td>4000</td>\n",
|
976 |
+
" <td>2.159300</td>\n",
|
977 |
+
" </tr>\n",
|
978 |
+
" <tr>\n",
|
979 |
+
" <td>4500</td>\n",
|
980 |
+
" <td>2.161000</td>\n",
|
981 |
+
" </tr>\n",
|
982 |
+
" <tr>\n",
|
983 |
+
" <td>5000</td>\n",
|
984 |
+
" <td>2.160200</td>\n",
|
985 |
+
" </tr>\n",
|
986 |
+
" <tr>\n",
|
987 |
+
" <td>5500</td>\n",
|
988 |
+
" <td>2.158400</td>\n",
|
989 |
+
" </tr>\n",
|
990 |
+
" <tr>\n",
|
991 |
+
" <td>6000</td>\n",
|
992 |
+
" <td>2.151600</td>\n",
|
993 |
+
" </tr>\n",
|
994 |
+
" <tr>\n",
|
995 |
+
" <td>6500</td>\n",
|
996 |
+
" <td>2.153700</td>\n",
|
997 |
+
" </tr>\n",
|
998 |
+
" <tr>\n",
|
999 |
+
" <td>7000</td>\n",
|
1000 |
+
" <td>2.129500</td>\n",
|
1001 |
+
" </tr>\n",
|
1002 |
+
" <tr>\n",
|
1003 |
+
" <td>7500</td>\n",
|
1004 |
+
" <td>2.119100</td>\n",
|
1005 |
+
" </tr>\n",
|
1006 |
+
" <tr>\n",
|
1007 |
+
" <td>8000</td>\n",
|
1008 |
+
" <td>2.119800</td>\n",
|
1009 |
+
" </tr>\n",
|
1010 |
+
" <tr>\n",
|
1011 |
+
" <td>8500</td>\n",
|
1012 |
+
" <td>2.121600</td>\n",
|
1013 |
+
" </tr>\n",
|
1014 |
+
" <tr>\n",
|
1015 |
+
" <td>9000</td>\n",
|
1016 |
+
" <td>2.122500</td>\n",
|
1017 |
+
" </tr>\n",
|
1018 |
+
" <tr>\n",
|
1019 |
+
" <td>9500</td>\n",
|
1020 |
+
" <td>2.122300</td>\n",
|
1021 |
+
" </tr>\n",
|
1022 |
+
" <tr>\n",
|
1023 |
+
" <td>10000</td>\n",
|
1024 |
+
" <td>2.121500</td>\n",
|
1025 |
+
" </tr>\n",
|
1026 |
+
" <tr>\n",
|
1027 |
+
" <td>10500</td>\n",
|
1028 |
+
" <td>2.119500</td>\n",
|
1029 |
+
" </tr>\n",
|
1030 |
+
" <tr>\n",
|
1031 |
+
" <td>11000</td>\n",
|
1032 |
+
" <td>2.123500</td>\n",
|
1033 |
+
" </tr>\n",
|
1034 |
+
" <tr>\n",
|
1035 |
+
" <td>11500</td>\n",
|
1036 |
+
" <td>2.119600</td>\n",
|
1037 |
+
" </tr>\n",
|
1038 |
+
" <tr>\n",
|
1039 |
+
" <td>12000</td>\n",
|
1040 |
+
" <td>2.119300</td>\n",
|
1041 |
+
" </tr>\n",
|
1042 |
+
" <tr>\n",
|
1043 |
+
" <td>12500</td>\n",
|
1044 |
+
" <td>2.121800</td>\n",
|
1045 |
+
" </tr>\n",
|
1046 |
+
" <tr>\n",
|
1047 |
+
" <td>13000</td>\n",
|
1048 |
+
" <td>2.123500</td>\n",
|
1049 |
+
" </tr>\n",
|
1050 |
+
" <tr>\n",
|
1051 |
+
" <td>13500</td>\n",
|
1052 |
+
" <td>2.103200</td>\n",
|
1053 |
+
" </tr>\n",
|
1054 |
+
" <tr>\n",
|
1055 |
+
" <td>14000</td>\n",
|
1056 |
+
" <td>2.080700</td>\n",
|
1057 |
+
" </tr>\n",
|
1058 |
+
" <tr>\n",
|
1059 |
+
" <td>14500</td>\n",
|
1060 |
+
" <td>2.082100</td>\n",
|
1061 |
+
" </tr>\n",
|
1062 |
+
" <tr>\n",
|
1063 |
+
" <td>15000</td>\n",
|
1064 |
+
" <td>2.082900</td>\n",
|
1065 |
+
" </tr>\n",
|
1066 |
+
" <tr>\n",
|
1067 |
+
" <td>15500</td>\n",
|
1068 |
+
" <td>2.086400</td>\n",
|
1069 |
+
" </tr>\n",
|
1070 |
+
" <tr>\n",
|
1071 |
+
" <td>16000</td>\n",
|
1072 |
+
" <td>2.086600</td>\n",
|
1073 |
+
" </tr>\n",
|
1074 |
+
" <tr>\n",
|
1075 |
+
" <td>16500</td>\n",
|
1076 |
+
" <td>2.083800</td>\n",
|
1077 |
+
" </tr>\n",
|
1078 |
+
" <tr>\n",
|
1079 |
+
" <td>17000</td>\n",
|
1080 |
+
" <td>2.085000</td>\n",
|
1081 |
+
" </tr>\n",
|
1082 |
+
" <tr>\n",
|
1083 |
+
" <td>17500</td>\n",
|
1084 |
+
" <td>2.082800</td>\n",
|
1085 |
+
" </tr>\n",
|
1086 |
+
" <tr>\n",
|
1087 |
+
" <td>18000</td>\n",
|
1088 |
+
" <td>2.077600</td>\n",
|
1089 |
+
" </tr>\n",
|
1090 |
+
" <tr>\n",
|
1091 |
+
" <td>18500</td>\n",
|
1092 |
+
" <td>2.080300</td>\n",
|
1093 |
+
" </tr>\n",
|
1094 |
+
" <tr>\n",
|
1095 |
+
" <td>19000</td>\n",
|
1096 |
+
" <td>2.086600</td>\n",
|
1097 |
+
" </tr>\n",
|
1098 |
+
" <tr>\n",
|
1099 |
+
" <td>19500</td>\n",
|
1100 |
+
" <td>2.084200</td>\n",
|
1101 |
+
" </tr>\n",
|
1102 |
+
" </tbody>\n",
|
1103 |
+
"</table><p>"
|
1104 |
+
],
|
1105 |
+
"text/plain": [
|
1106 |
+
"<IPython.core.display.HTML object>"
|
1107 |
+
]
|
1108 |
+
},
|
1109 |
+
"metadata": {},
|
1110 |
+
"output_type": "display_data"
|
1111 |
+
},
|
1112 |
+
{
|
1113 |
+
"data": {
|
1114 |
+
"text/plain": [
|
1115 |
+
"TrainOutput(global_step=19980, training_loss=2.1272921145021977, metrics={'train_runtime': 1315.5944, 'train_samples_per_second': 121.485, 'train_steps_per_second': 15.187, 'total_flos': 2.08804995072e+16, 'train_loss': 2.1272921145021977, 'epoch': 3.0})"
|
1116 |
+
]
|
1117 |
+
},
|
1118 |
+
"execution_count": 26,
|
1119 |
+
"metadata": {},
|
1120 |
+
"output_type": "execute_result"
|
1121 |
+
}
|
1122 |
+
],
|
1123 |
"source": [
|
1124 |
"# 开始训练\n",
|
1125 |
"trainer.train()"
|
|
|
1127 |
},
|
1128 |
{
|
1129 |
"cell_type": "code",
|
1130 |
+
"execution_count": 27,
|
1131 |
"id": "315aae76-44b4-4513-8139-40ef22934873",
|
1132 |
"metadata": {},
|
1133 |
+
"outputs": [
|
1134 |
+
{
|
1135 |
+
"name": "stdout",
|
1136 |
+
"output_type": "stream",
|
1137 |
+
"text": [
|
1138 |
+
"Saved model to: gpt_ft/final\n"
|
1139 |
+
]
|
1140 |
+
}
|
1141 |
+
],
|
1142 |
"source": [
|
1143 |
"save_dir = 'gpt_ft/final'\n",
|
1144 |
"trainer.save_model(save_dir)\n",
|
|
|
1147 |
},
|
1148 |
{
|
1149 |
"cell_type": "code",
|
1150 |
+
"execution_count": 28,
|
1151 |
"id": "28d2dbbc-02ff-4120-b230-b19905a786cd",
|
1152 |
"metadata": {},
|
1153 |
"outputs": [],
|
|
|
1158 |
},
|
1159 |
{
|
1160 |
"cell_type": "code",
|
1161 |
+
"execution_count": 29,
|
1162 |
"id": "08987c3c-063a-4e9b-9ebb-e637b0b5bccd",
|
1163 |
"metadata": {},
|
1164 |
+
"outputs": [
|
1165 |
+
{
|
1166 |
+
"data": {
|
1167 |
+
"text/plain": [
|
1168 |
+
"GPT2LMHeadModel(\n",
|
1169 |
+
" (transformer): GPT2Model(\n",
|
1170 |
+
" (wte): Embedding(90000, 768)\n",
|
1171 |
+
" (wpe): Embedding(1024, 768)\n",
|
1172 |
+
" (drop): Dropout(p=0.1, inplace=False)\n",
|
1173 |
+
" (h): ModuleList(\n",
|
1174 |
+
" (0-11): 12 x GPT2Block(\n",
|
1175 |
+
" (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
1176 |
+
" (attn): GPT2SdpaAttention(\n",
|
1177 |
+
" (c_attn): Conv1D(nf=2304, nx=768)\n",
|
1178 |
+
" (c_proj): Conv1D(nf=768, nx=768)\n",
|
1179 |
+
" (attn_dropout): Dropout(p=0.1, inplace=False)\n",
|
1180 |
+
" (resid_dropout): Dropout(p=0.1, inplace=False)\n",
|
1181 |
+
" )\n",
|
1182 |
+
" (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
1183 |
+
" (mlp): GPT2MLP(\n",
|
1184 |
+
" (c_fc): Conv1D(nf=3072, nx=768)\n",
|
1185 |
+
" (c_proj): Conv1D(nf=768, nx=3072)\n",
|
1186 |
+
" (act): NewGELUActivation()\n",
|
1187 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
1188 |
+
" )\n",
|
1189 |
+
" )\n",
|
1190 |
+
" )\n",
|
1191 |
+
" (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
1192 |
+
" )\n",
|
1193 |
+
" (lm_head): Linear(in_features=768, out_features=90000, bias=False)\n",
|
1194 |
+
")"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"execution_count": 29,
|
1198 |
+
"metadata": {},
|
1199 |
+
"output_type": "execute_result"
|
1200 |
+
}
|
1201 |
+
],
|
1202 |
"source": [
|
1203 |
"finetuned_model"
|
1204 |
]
|
1205 |
},
|
1206 |
{
|
1207 |
"cell_type": "code",
|
1208 |
+
"execution_count": 30,
|
1209 |
"id": "d75010e8-6d6a-40ef-852e-0d705adc3da8",
|
1210 |
"metadata": {},
|
1211 |
+
"outputs": [
|
1212 |
+
{
|
1213 |
+
"name": "stderr",
|
1214 |
+
"output_type": "stream",
|
1215 |
+
"text": [
|
1216 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1217 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
|
1218 |
+
]
|
1219 |
+
},
|
1220 |
+
{
|
1221 |
+
"name": "stdout",
|
1222 |
+
"output_type": "stream",
|
1223 |
+
"text": [
|
1224 |
+
"input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
1225 |
+
"\n",
|
1226 |
+
"### Instruction:\n",
|
1227 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
1228 |
+
"\n",
|
1229 |
+
"### Input:\n",
|
1230 |
+
"CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC\n",
|
1231 |
+
"\n",
|
1232 |
+
"### Response:\n",
|
1233 |
+
"\n",
|
1234 |
+
"--------------------------\n",
|
1235 |
+
"\n",
|
1236 |
+
"model's answer: \n",
|
1237 |
+
"\n",
|
1238 |
+
"promoterpromoterpromo\n",
|
1239 |
+
"--------------------------\n",
|
1240 |
+
"\n",
|
1241 |
+
"real answer: \n",
|
1242 |
+
"\n",
|
1243 |
+
"promoter\n"
|
1244 |
+
]
|
1245 |
+
}
|
1246 |
+
],
|
1247 |
"source": [
|
1248 |
"print(\"input (test):\", input_text)\n",
|
1249 |
"\n",
|
|
|
1259 |
},
|
1260 |
{
|
1261 |
"cell_type": "code",
|
1262 |
+
"execution_count": 31,
|
1263 |
"id": "64365e15-510e-4abf-92f5-c78b660b37dc",
|
1264 |
"metadata": {},
|
1265 |
+
"outputs": [
|
1266 |
+
{
|
1267 |
+
"name": "stderr",
|
1268 |
+
"output_type": "stream",
|
1269 |
+
"text": [
|
1270 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1271 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1272 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1273 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1274 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1275 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1276 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1277 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1278 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1279 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1280 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1281 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1282 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1283 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1284 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1285 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1286 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1287 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1288 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1289 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1290 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1291 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1292 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1293 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1294 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1295 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1296 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1297 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1298 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1299 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1300 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1301 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1302 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1303 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1304 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1305 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1306 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1307 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1308 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1309 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1310 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1311 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1312 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1313 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1314 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1315 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1316 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1317 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1318 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1319 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1320 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1321 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1322 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1323 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1324 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1325 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1326 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1327 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1328 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1329 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1330 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1331 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1332 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1333 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1334 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1335 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1336 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1337 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1338 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1339 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1340 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1341 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1342 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1343 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1344 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1345 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1346 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1347 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1348 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1349 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1350 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1351 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1352 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1353 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1354 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1355 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1356 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1357 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1358 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1359 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1360 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1361 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1362 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1363 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1364 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1365 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1366 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1367 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1368 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1369 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1370 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1371 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1372 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1373 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1374 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1375 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1376 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1377 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1378 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1379 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1380 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1381 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1382 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1383 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1384 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1385 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1386 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1387 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1388 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1389 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1390 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1391 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1392 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1393 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1394 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1395 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1396 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1397 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1398 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1399 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1400 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1401 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1402 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1403 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1404 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1405 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1406 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1407 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1408 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1409 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1410 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1411 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1412 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1413 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1414 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1415 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1416 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1417 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1418 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1419 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1420 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1421 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1422 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1423 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1424 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1425 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1426 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1427 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1428 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1429 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1430 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1431 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1432 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1433 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1434 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1435 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1436 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1437 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1438 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1439 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1440 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1441 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1442 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1443 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1444 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1445 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1446 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1447 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1448 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1449 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1450 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1451 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1452 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1453 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1454 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1455 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1456 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1457 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1458 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1459 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1460 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1461 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1462 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1463 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1464 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1465 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1466 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1467 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1468 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1469 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
|
1470 |
+
]
|
1471 |
+
}
|
1472 |
+
],
|
1473 |
"source": [
|
1474 |
"test_data = data[\"test\"].select(range(100))\n",
|
1475 |
"\n",
|
|
|
1492 |
},
|
1493 |
{
|
1494 |
"cell_type": "code",
|
1495 |
+
"execution_count": 32,
|
1496 |
"id": "a45fb780-fc3f-401c-b6e0-6f7d0c1682de",
|
1497 |
"metadata": {},
|
1498 |
"outputs": [],
|
|
|
1505 |
"# 将 Dataset 对象导出为 JSON 文件\n",
|
1506 |
"# test_data.to_json(output_file)\n",
|
1507 |
"with open(output_file, \"w\") as file:\n",
|
1508 |
+
" json.dump(data_list, file, indent=4) # \"indent\" for pretty-printing\n"
|
1509 |
]
|
1510 |
},
|
1511 |
{
|
1512 |
"cell_type": "code",
|
1513 |
+
"execution_count": 1,
|
1514 |
"id": "a83c8881-c763-4bba-8b85-584a6722a38e",
|
1515 |
"metadata": {},
|
1516 |
+
"outputs": [
|
1517 |
+
{
|
1518 |
+
"name": "stdout",
|
1519 |
+
"output_type": "stream",
|
1520 |
+
"text": [
|
1521 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1522 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1523 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1524 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1525 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1526 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1527 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1528 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1529 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1530 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1531 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1532 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1533 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1534 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1535 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1536 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1537 |
+
"Non-promoter |||||||||||| promoterpromoterpromo\n",
|
1538 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1539 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1540 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1541 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1542 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1543 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1544 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1545 |
+
"promoter |||||||||||| Non-promoter\n",
|
1546 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1547 |
+
"Non-promoter |||||||||||| promoterpromoterpromo\n",
|
1548 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1549 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1550 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1551 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1552 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1553 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1554 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1555 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1556 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1557 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1558 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1559 |
+
"promoter |||||||||||| Non-promoter\n",
|
1560 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1561 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1562 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1563 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1564 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1565 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1566 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1567 |
+
"promoter |||||||||||| Non-promoter\n",
|
1568 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1569 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1570 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1571 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1572 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1573 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1574 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1575 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1576 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1577 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1578 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1579 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1580 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1581 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1582 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1583 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1584 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1585 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1586 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1587 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1588 |
+
"Non-promoter |||||||||||| promoterpromoterpromo\n",
|
1589 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1590 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1591 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1592 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1593 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1594 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1595 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1596 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1597 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1598 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1599 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1600 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1601 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1602 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1603 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1604 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1605 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1606 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1607 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1608 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1609 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1610 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1611 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1612 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1613 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1614 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1615 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1616 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1617 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1618 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1619 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1620 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1621 |
+
"presicion 0.94 same 0.49\n"
|
1622 |
+
]
|
1623 |
+
}
|
1624 |
+
],
|
1625 |
"source": [
|
1626 |
"import json\n",
|
1627 |
"\n",
|
1628 |
"\n",
|
1629 |
+
"output_file = 'gpt2-small3-1024.json'\n",
|
1630 |
"\n",
|
1631 |
"with open(output_file, \"r\") as file:\n",
|
1632 |
" test_data = json.load(file)\n",
|
|
|
1638 |
" output = item[\"output\"]\n",
|
1639 |
" #output = \" \".join(tokenizer.tokenize(output))\n",
|
1640 |
" model_response = item[\"model_response\"]\n",
|
1641 |
+
"\n",
|
1642 |
+
" print(output,\"||||||||||||\", model_response)\n",
|
1643 |
+
"\n",
|
1644 |
" if model_response == output: #same it\n",
|
1645 |
" same_sum = same_sum + 1\n",
|
1646 |
" \n",
|
1647 |
+
" if output.find(\"Non\")==-1: # no Non\n",
|
1648 |
+
" if model_response.find(output)!=-1 and model_response.find(\"Non\")==-1: #find it, but no Non\n",
|
1649 |
+
" right_sum = right_sum + 1\n",
|
1650 |
+
" else:\n",
|
1651 |
+
" if model_response.find(output)!=-1: #find it\n",
|
1652 |
+
" right_sum = right_sum + 1\n",
|
1653 |
"\n",
|
1654 |
"\n",
|
1655 |
+
"print(\"Accuracy\", right_sum/all_num, \"same\", same_sum/all_num)"
|
1656 |
]
|
1657 |
+
},
|
1658 |
+
{
|
1659 |
+
"cell_type": "code",
|
1660 |
+
"execution_count": null,
|
1661 |
+
"id": "8bf88885-fb33-406e-9644-cd174a8a2f28",
|
1662 |
+
"metadata": {},
|
1663 |
+
"outputs": [],
|
1664 |
+
"source": []
|
1665 |
}
|
1666 |
],
|
1667 |
"metadata": {
|
04-gene-sft/.ipynb_checkpoints/4-deepspeed-intro-checkpoint.ipynb
CHANGED
@@ -10,12 +10,172 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
-
"id": "
|
14 |
"metadata": {},
|
15 |
"source": [
|
16 |
-
"##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
]
|
18 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
{
|
20 |
"cell_type": "markdown",
|
21 |
"id": "c0d29667-1e75-46df-8f65-cae27609ee3f",
|
@@ -169,171 +329,19 @@
|
|
169 |
]
|
170 |
},
|
171 |
{
|
172 |
-
"cell_type": "
|
173 |
-
"
|
|
|
174 |
"metadata": {},
|
175 |
-
"
|
176 |
-
|
177 |
-
"\n",
|
178 |
-
"大模型的并行训练旨在克服单个 GPU 显存的限制和加速训练过程,通常适用于参数规模较大的模型(如 GPT-3、T5 等)。并行训练主要包括以下几种方法,每种方法适用于不同的场景和模型特性。\n",
|
179 |
-
"\n",
|
180 |
-
"---\n",
|
181 |
-
"\n",
|
182 |
-
"### **1. 数据并行(Data Parallelism)**\n",
|
183 |
-
"\n",
|
184 |
-
"#### **原理**\n",
|
185 |
-
"- 将数据切分成多个小批次,每个 GPU 处理其中一部分。\n",
|
186 |
-
"- 模型副本被复制到每个 GPU。\n",
|
187 |
-
"- 每个 GPU 独立计算梯度,最终通过梯度同步(如 AllReduce 操作)更新参数。\n",
|
188 |
-
"\n",
|
189 |
-
"#### **特点**\n",
|
190 |
-
"- **优点**:\n",
|
191 |
-
" - 实现简单,是最常用的并行方法。\n",
|
192 |
-
" - 对模型大小没有限制。\n",
|
193 |
-
"- **缺点**:\n",
|
194 |
-
" - 模型副本需要完整加载到每个 GPU,占用显存。\n",
|
195 |
-
" - 在超大规模模型中,显存压力较大。\n",
|
196 |
-
"\n",
|
197 |
-
"#### **适用场景**\n",
|
198 |
-
"- 参数规模适中,显存可以容纳整个模型的场景。\n",
|
199 |
-
"\n",
|
200 |
-
"---\n",
|
201 |
-
"\n",
|
202 |
-
"### **2. 模型并行(Model Parallelism)**\n",
|
203 |
-
"\n",
|
204 |
-
"#### **原理**\n",
|
205 |
-
"- 将模型切分成不同的部分,将不同部分分配到不同的 GPU。\n",
|
206 |
-
"- 前向传播和后向传播时,数据在模型的不同部分之间传递。\n",
|
207 |
-
"\n",
|
208 |
-
"#### **特点**\n",
|
209 |
-
"- **优点**:\n",
|
210 |
-
" - 不需要复制整个模型,可以支持超大规模模型。\n",
|
211 |
-
"- **缺点**:\n",
|
212 |
-
" - GPU 之间通信频繁,可能成为性能瓶颈。\n",
|
213 |
-
" - 实现复杂,切分模型需要精心设计。\n",
|
214 |
-
" \n",
|
215 |
-
"#### **适用场景**\n",
|
216 |
-
"- 单个 GPU 无法容纳完整模型参数的场景。\n",
|
217 |
-
"\n",
|
218 |
-
"#### **具体实现**\n",
|
219 |
-
"- 将 Transformer 的不同层分配到不同的 GPU。\n",
|
220 |
-
"- 常用工具:DeepSpeed 的 Pipeline Parallelism、NVIDIA Megatron-LM。\n",
|
221 |
-
"\n",
|
222 |
-
"---\n",
|
223 |
-
"\n",
|
224 |
-
"### **3. 张量并行(Tensor Parallelism)**\n",
|
225 |
-
"\n",
|
226 |
-
"#### **原理**\n",
|
227 |
-
"- 将模型内部的张量(如权重矩阵)切分为多个子张量,并分配到不同 GPU。\n",
|
228 |
-
"- GPU 之间协作完成矩阵计算。\n",
|
229 |
-
"\n",
|
230 |
-
"#### **特点**\n",
|
231 |
-
"- **优点**:\n",
|
232 |
-
" - 减少了每个 GPU 的显存占用,同时保持模型整体完整性。\n",
|
233 |
-
"- **缺点**:\n",
|
234 |
-
" - 实现较复杂,需要优化通信操作。\n",
|
235 |
-
" - 通信开销较高,适合较大批量的训练。\n",
|
236 |
-
"\n",
|
237 |
-
"#### **适用场景**\n",
|
238 |
-
"- 参数非常大的模型(如 GPT-3)。\n",
|
239 |
-
"- 需要极致优化显存的场景。\n",
|
240 |
-
"\n",
|
241 |
-
"#### **具体实现**\n",
|
242 |
-
"- NVIDIA 的 Megatron-LM 和 Hugging Face Transformers 提供了张量并行的支持。\n",
|
243 |
-
"\n",
|
244 |
-
"---\n",
|
245 |
-
"\n",
|
246 |
-
"### **4. 管道并行(Pipeline Parallelism)**\n",
|
247 |
-
"\n",
|
248 |
-
"#### **原理**\n",
|
249 |
-
"- 将模型分为不同的部分(通常是按层划分),每部分分配到不同的 GPU。\n",
|
250 |
-
"- 数据按照流水线的方式流经每个 GPU。\n",
|
251 |
-
"\n",
|
252 |
-
"#### **特点**\n",
|
253 |
-
"- **优点**:\n",
|
254 |
-
" - 减少每个 GPU 的显存压力。\n",
|
255 |
-
" - 通过流水线增加计算效率。\n",
|
256 |
-
"- **缺点**:\n",
|
257 |
-
" - 引入流水线延迟。\n",
|
258 |
-
" - 实现复杂,需管理数据依赖和同步。\n",
|
259 |
-
"\n",
|
260 |
-
"#### **适用场景**\n",
|
261 |
-
"- 模型非常深,层数较多的场景。\n",
|
262 |
-
"\n",
|
263 |
-
"#### **具体实现**\n",
|
264 |
-
"- DeepSpeed 的 Pipeline Parallelism。\n",
|
265 |
-
"\n",
|
266 |
-
"---\n",
|
267 |
-
"\n",
|
268 |
-
"### **5. 混合并行(Hybrid Parallelism)**\n",
|
269 |
-
"\n",
|
270 |
-
"#### **原理**\n",
|
271 |
-
"- 将数据并行、模型并行、张量并行和管道并行组合使用,充分利用多 GPU 资源。\n",
|
272 |
-
"- 不同的并行方法在不同维度协同工作。\n",
|
273 |
-
"\n",
|
274 |
-
"#### **特点**\n",
|
275 |
-
"- **优点**:\n",
|
276 |
-
" - 灵活且适应性强,适合超大规模模型。\n",
|
277 |
-
"- **缺点**:\n",
|
278 |
-
" - 配置复杂,依赖于框架和训练任务。\n",
|
279 |
-
"\n",
|
280 |
-
"#### **适用场景**\n",
|
281 |
-
"- 超大规模模型(如 GPT-3 或参数量 >1T)。\n",
|
282 |
-
"- 多机多卡的大型训练环境。\n",
|
283 |
-
"\n",
|
284 |
-
"#### **具体实现**\n",
|
285 |
-
"- NVIDIA Megatron-LM 和 DeepSpeed 的混合并行支持。\n",
|
286 |
-
"\n",
|
287 |
-
"---\n",
|
288 |
-
"\n",
|
289 |
-
"### **6. ZeRO 优化并行(Zero Redundancy Optimizer)**\n",
|
290 |
-
"\n",
|
291 |
-
"#### **原理**\n",
|
292 |
-
"- 通过分片存储模型参数、优化器状态和梯度,显著减少每个 GPU 的显存占用。\n",
|
293 |
-
"\n",
|
294 |
-
"#### **特点**\n",
|
295 |
-
"- **优点**:\n",
|
296 |
-
" - 极大降低显存需求。\n",
|
297 |
-
" - 支持超大规模模型。\n",
|
298 |
-
"- **缺点**:\n",
|
299 |
-
" - 对 GPU 间通信要求较高。\n",
|
300 |
-
" - 比数据并行复杂。\n",
|
301 |
-
"\n",
|
302 |
-
"#### **适用场景**\n",
|
303 |
-
"- 超大模型的高效训练。\n",
|
304 |
-
"\n",
|
305 |
-
"#### **具体实现**\n",
|
306 |
-
"- DeepSpeed 提供的 ZeRO Stage 1/2/3。\n",
|
307 |
-
"\n",
|
308 |
-
"---\n",
|
309 |
-
"\n",
|
310 |
-
"### **方法对比**\n",
|
311 |
-
"\n",
|
312 |
-
"| 并行方法 | 主要优点 | 主要缺点 | 适用场景 |\n",
|
313 |
-
"|---------------|-------------------------------|-------------------------------|---------------------------|\n",
|
314 |
-
"| 数据并行 | 简单高效,易实现 | 模型副本占用大量显存 | 模型规模适中,显存足够 |\n",
|
315 |
-
"| 模型并行 | 支持大模型 | 通信开销大,切分复杂 | 超大模型,显存有限 |\n",
|
316 |
-
"| 张量并行 | 高效利用显存 | 实现复杂,通信频繁 | 参数规模极大的模型 |\n",
|
317 |
-
"| 管道并行 | 显存需求降低,适合深模型 | 流水线延迟,数据同步复杂 | 层数多的大型模型 |\n",
|
318 |
-
"| 混合并行 | 灵活适配超大规模模型 | 配置复杂,依赖框架 | 超大规模模型(如 GPT-3) |\n",
|
319 |
-
"| ZeRO 并行 | 极大节省显存,占用少 | 通信成本高 | 超大规模模型显存优化 |\n",
|
320 |
-
"\n",
|
321 |
-
"---\n",
|
322 |
-
"\n",
|
323 |
-
"### **总结**\n",
|
324 |
-
"- **中等规模模型**:优先使用 **数据并行**。\n",
|
325 |
-
"- **单卡显存不足**:采用 **模型并行** 或 **张量并行**。\n",
|
326 |
-
"- **超大规模模型**:使用 **混合并行** 或 DeepSpeed 的 **ZeRO 优化**。\n",
|
327 |
-
"\n",
|
328 |
-
"对于现代超大规模模型,通常采用混合并行方法,比如 NVIDIA 的 Megatron-LM 和微软的 DeepSpeed,它们综合了多种并行策略,能够有效利用计算资源并加速训练。如果您有具体的硬件环境或模型需求,可以进一步探讨适合的并行方案!"
|
329 |
-
]
|
330 |
},
|
331 |
{
|
332 |
"cell_type": "markdown",
|
333 |
"id": "cd848439-bac8-46b2-9a0f-59ae7c343954",
|
334 |
"metadata": {},
|
335 |
"source": [
|
336 |
-
"## deepspeed
|
337 |
"\n",
|
338 |
"\n",
|
339 |
"是的,DeepSpeed 支持多种并行策略,包括 **数据并行**、**模型并行** 和 **张量并行**,并且可以通过其��置文件灵活地设置这些并行模式。\n",
|
@@ -544,6 +552,22 @@
|
|
544 |
"DeepSpeed 的配置高度灵活,可以根据模型大小、显存限制和硬件条件选择适合的并行策略。"
|
545 |
]
|
546 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
547 |
{
|
548 |
"cell_type": "markdown",
|
549 |
"id": "ab2812bc-f743-4f18-b49c-972781484dc6",
|
@@ -569,14 +593,6 @@
|
|
569 |
"metadata": {},
|
570 |
"outputs": [],
|
571 |
"source": []
|
572 |
-
},
|
573 |
-
{
|
574 |
-
"cell_type": "code",
|
575 |
-
"execution_count": null,
|
576 |
-
"id": "ce701aeb-c8c7-450a-bbf9-b793a19cd0c6",
|
577 |
-
"metadata": {},
|
578 |
-
"outputs": [],
|
579 |
-
"source": []
|
580 |
}
|
581 |
],
|
582 |
"metadata": {
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
+
"id": "75b8219d-8069-4b18-96c8-d5024ee049f1",
|
14 |
"metadata": {},
|
15 |
"source": [
|
16 |
+
"## 大模型并行训练简介\n",
|
17 |
+
"\n",
|
18 |
+
"大模型的并行训练旨在克服单个 GPU 显存的限制和加速训练过程,通常适用于参数规模较大的模型(如 GPT-3、T5 等)。并行训练主要包括以下几种方法,每种方法适用于不同的场景和模型特性。\n",
|
19 |
+
"\n",
|
20 |
+
"---\n",
|
21 |
+
"\n",
|
22 |
+
"### **1. 数据并行(Data Parallelism)**\n",
|
23 |
+
"\n",
|
24 |
+
"#### **原理**\n",
|
25 |
+
"- 将数据切分成多个小批次,每个 GPU 处理其中一部分。\n",
|
26 |
+
"- 模型副本被复制到每个 GPU。\n",
|
27 |
+
"- 每个 GPU 独立计算梯度,最终通过梯度同步(如 AllReduce 操作)更新参数。\n",
|
28 |
+
"\n",
|
29 |
+
"#### **特点**\n",
|
30 |
+
"- **优点**:\n",
|
31 |
+
" - 实现简单,是最常用的并行方法。\n",
|
32 |
+
" - 对模型大小没有限制。\n",
|
33 |
+
"- **缺点**:\n",
|
34 |
+
" - 模型副本需要完整加载到每个 GPU,占用显存。\n",
|
35 |
+
" - 在超大规模模型中,显存压力较大。\n",
|
36 |
+
"\n",
|
37 |
+
"#### **适用场景**\n",
|
38 |
+
"- 参数规模适中,显存可以容纳整个模型的场景。\n",
|
39 |
+
"\n",
|
40 |
+
"---\n",
|
41 |
+
"\n",
|
42 |
+
"### **2. 模型并行(Model Parallelism)**\n",
|
43 |
+
"\n",
|
44 |
+
"#### **原理**\n",
|
45 |
+
"- 将模型切分成不同的部分,将不同部分分配到不同的 GPU。\n",
|
46 |
+
"- 前向传播和后向传播时,数据在模型的不同部分之间传递。\n",
|
47 |
+
"\n",
|
48 |
+
"#### **特点**\n",
|
49 |
+
"- **优点**:\n",
|
50 |
+
" - 不需要复制整个模型,可以支持超大规模模型。\n",
|
51 |
+
"- **缺点**:\n",
|
52 |
+
" - GPU 之间通信频繁,可能成为性能瓶颈。\n",
|
53 |
+
" - 实现复杂,切分模型需要精心设计。\n",
|
54 |
+
" \n",
|
55 |
+
"#### **适用场景**\n",
|
56 |
+
"- 单个 GPU 无法容纳完整模型参数的场景。\n",
|
57 |
+
"\n",
|
58 |
+
"#### **具体实现**\n",
|
59 |
+
"- 将 Transformer 的不同层分配到不同的 GPU。\n",
|
60 |
+
"- 常用工具:DeepSpeed 的 Pipeline Parallelism、NVIDIA Megatron-LM。\n",
|
61 |
+
"\n",
|
62 |
+
"---\n",
|
63 |
+
"\n",
|
64 |
+
"### **3. 张量并行(Tensor Parallelism)**\n",
|
65 |
+
"\n",
|
66 |
+
"#### **原理**\n",
|
67 |
+
"- 将模型内部的张量(如权重矩阵)切分为多个子张量,并分配到不同 GPU。\n",
|
68 |
+
"- GPU 之间协作完成矩阵计算。\n",
|
69 |
+
"\n",
|
70 |
+
"#### **特点**\n",
|
71 |
+
"- **优点**:\n",
|
72 |
+
" - 减少了每个 GPU 的显存占用,同时保持模型整体完整性。\n",
|
73 |
+
"- **缺点**:\n",
|
74 |
+
" - 实现较复杂,需要优化通信操作。\n",
|
75 |
+
" - 通信开销较高,适合较大批量的训练。\n",
|
76 |
+
"\n",
|
77 |
+
"#### **适用场景**\n",
|
78 |
+
"- 参数非常大的模型(如 GPT-3)。\n",
|
79 |
+
"- 需要极致优化显存的场景。\n",
|
80 |
+
"\n",
|
81 |
+
"#### **具体实现**\n",
|
82 |
+
"- NVIDIA 的 Megatron-LM 和 Hugging Face Transformers 提供了张量并行的支持。\n",
|
83 |
+
"\n",
|
84 |
+
"---\n",
|
85 |
+
"\n",
|
86 |
+
"### **4. 管道并行(Pipeline Parallelism)**\n",
|
87 |
+
"\n",
|
88 |
+
"#### **原理**\n",
|
89 |
+
"- 将模型分为不同的部分(通常是按层划分),每部分分配到不同的 GPU。\n",
|
90 |
+
"- 数据按照流水线的方式流经每个 GPU。\n",
|
91 |
+
"\n",
|
92 |
+
"#### **特点**\n",
|
93 |
+
"- **优点**:\n",
|
94 |
+
" - 减少每个 GPU 的显存压力。\n",
|
95 |
+
" - 通过流水线增加计算效率。\n",
|
96 |
+
"- **缺点**:\n",
|
97 |
+
" - 引入流水线延迟。\n",
|
98 |
+
" - 实现复杂,需管理数据依赖和同步。\n",
|
99 |
+
"\n",
|
100 |
+
"#### **适用场景**\n",
|
101 |
+
"- 模型非常深,层数较多的场景。\n",
|
102 |
+
"\n",
|
103 |
+
"#### **具体实现**\n",
|
104 |
+
"- DeepSpeed 的 Pipeline Parallelism。\n",
|
105 |
+
"\n",
|
106 |
+
"---\n",
|
107 |
+
"\n",
|
108 |
+
"### **5. 混合并行(Hybrid Parallelism)**\n",
|
109 |
+
"\n",
|
110 |
+
"#### **原理**\n",
|
111 |
+
"- 将数据并行、模型并行、张量并行和管道并行组合使用,充分利用多 GPU 资源。\n",
|
112 |
+
"- 不同的并行方法在不同维度协同工作。\n",
|
113 |
+
"\n",
|
114 |
+
"#### **特点**\n",
|
115 |
+
"- **优点**:\n",
|
116 |
+
" - 灵活且适应性强,适合超大规模模型。\n",
|
117 |
+
"- **缺点**:\n",
|
118 |
+
" - 配置复杂,依赖于框架和训练任务。\n",
|
119 |
+
"\n",
|
120 |
+
"#### **适用场景**\n",
|
121 |
+
"- 超大规模模型(如 GPT-3 或参数量 >1T)。\n",
|
122 |
+
"- 多机多卡的大型训练环境。\n",
|
123 |
+
"\n",
|
124 |
+
"#### **具体实现**\n",
|
125 |
+
"- NVIDIA Megatron-LM 和 DeepSpeed 的混合并行支持。\n",
|
126 |
+
"\n",
|
127 |
+
"---\n",
|
128 |
+
"\n",
|
129 |
+
"### **6. ZeRO 优化并行(Zero Redundancy Optimizer)**\n",
|
130 |
+
"\n",
|
131 |
+
"#### **原理**\n",
|
132 |
+
"- 通过分片存储模型参数、优化器状态和梯度,显著减少每个 GPU 的显存占用。\n",
|
133 |
+
"\n",
|
134 |
+
"#### **特点**\n",
|
135 |
+
"- **优点**:\n",
|
136 |
+
" - 极大降低显存需求。\n",
|
137 |
+
" - 支持超大规模模型。\n",
|
138 |
+
"- **缺点**:\n",
|
139 |
+
" - 对 GPU 间通信要求较高。\n",
|
140 |
+
" - 比数据并行复杂。\n",
|
141 |
+
"\n",
|
142 |
+
"#### **适用场景**\n",
|
143 |
+
"- 超大模型的高效训练。\n",
|
144 |
+
"\n",
|
145 |
+
"#### **具体实现**\n",
|
146 |
+
"- DeepSpeed 提供的 ZeRO Stage 1/2/3。\n",
|
147 |
+
"\n",
|
148 |
+
"---\n",
|
149 |
+
"\n",
|
150 |
+
"### **方法对比**\n",
|
151 |
+
"\n",
|
152 |
+
"| 并行方法 | 主要优点 | 主要缺点 | 适用场景 |\n",
|
153 |
+
"|---------------|-------------------------------|-------------------------------|---------------------------|\n",
|
154 |
+
"| 数据并行 | 简单高效,易实现 | 模型副本占用大量显存 | 模型规模适中,显存足够 |\n",
|
155 |
+
"| 模型并行 | 支持大模型 | 通信开销大,切分复杂 | 超大模型,显存有限 |\n",
|
156 |
+
"| 张量并行 | 高效利用显存 | 实现复杂,通信频繁 | 参数规模极大的模型 |\n",
|
157 |
+
"| 管道并行 | 显存需求降低,适合深模型 | 流水线延迟,数据同步复杂 | 层数多的大型模型 |\n",
|
158 |
+
"| 混合并行 | 灵活适配超大规模模型 | 配置复杂,依赖框架 | 超大规模模型(如 GPT-3) |\n",
|
159 |
+
"| ZeRO 并行 | 极大节省显存,占用少 | 通信成本高 | 超大规模模型显存优化 |\n",
|
160 |
+
"\n",
|
161 |
+
"---\n",
|
162 |
+
"\n",
|
163 |
+
"### **总结**\n",
|
164 |
+
"- **中等规模模型**:优先使用 **数据并行**。\n",
|
165 |
+
"- **单卡显存不足**:采用 **模型并行** 或 **张量并行**。\n",
|
166 |
+
"- **超大规模模型**:使用 **混合并行** 或 DeepSpeed 的 **ZeRO 优化**。\n",
|
167 |
+
"\n",
|
168 |
+
"对于现代超大规模模型,通常采用混合并行方法,比如 NVIDIA 的 Megatron-LM 和微软的 DeepSpeed,它们综合了多种并行策略,能够有效利用计算资源并加速训练。如果您有具体的硬件环境或模型需求,可以进一步探讨适合的并行方案!"
|
169 |
]
|
170 |
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": null,
|
174 |
+
"id": "06ddaa4d-e04a-41e0-beb5-f04dfaebcd54",
|
175 |
+
"metadata": {},
|
176 |
+
"outputs": [],
|
177 |
+
"source": []
|
178 |
+
},
|
179 |
{
|
180 |
"cell_type": "markdown",
|
181 |
"id": "c0d29667-1e75-46df-8f65-cae27609ee3f",
|
|
|
329 |
]
|
330 |
},
|
331 |
{
|
332 |
+
"cell_type": "code",
|
333 |
+
"execution_count": null,
|
334 |
+
"id": "a5372798-ced3-420c-b853-badd3ff05dc1",
|
335 |
"metadata": {},
|
336 |
+
"outputs": [],
|
337 |
+
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
},
|
339 |
{
|
340 |
"cell_type": "markdown",
|
341 |
"id": "cd848439-bac8-46b2-9a0f-59ae7c343954",
|
342 |
"metadata": {},
|
343 |
"source": [
|
344 |
+
"## deepspeed具体设置\n",
|
345 |
"\n",
|
346 |
"\n",
|
347 |
"是的,DeepSpeed 支持多种并行策略,包括 **数据并行**、**模型并行** 和 **张量并行**,并且可以通过其��置文件灵活地设置这些并行模式。\n",
|
|
|
552 |
"DeepSpeed 的配置高度灵活,可以根据模型大小、显存限制和硬件条件选择适合的并行策略。"
|
553 |
]
|
554 |
},
|
555 |
+
{
|
556 |
+
"cell_type": "code",
|
557 |
+
"execution_count": null,
|
558 |
+
"id": "a8e6de4c-adc1-4a1b-840a-c8542b4ed783",
|
559 |
+
"metadata": {},
|
560 |
+
"outputs": [],
|
561 |
+
"source": []
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"cell_type": "markdown",
|
565 |
+
"id": "3383c2d7-91a9-4940-b3b2-698fb7d9dbb7",
|
566 |
+
"metadata": {},
|
567 |
+
"source": [
|
568 |
+
"## 使用gpt2+deepspeed训练"
|
569 |
+
]
|
570 |
+
},
|
571 |
{
|
572 |
"cell_type": "markdown",
|
573 |
"id": "ab2812bc-f743-4f18-b49c-972781484dc6",
|
|
|
593 |
"metadata": {},
|
594 |
"outputs": [],
|
595 |
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
}
|
597 |
],
|
598 |
"metadata": {
|
04-gene-sft/.ipynb_checkpoints/5-peft-intro-checkpoint.ipynb
CHANGED
@@ -5,7 +5,15 @@
|
|
5 |
"id": "963e9ae0-ac68-44be-8c7d-fb9842784362",
|
6 |
"metadata": {},
|
7 |
"source": [
|
8 |
-
"# 4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
]
|
10 |
},
|
11 |
{
|
@@ -139,6 +147,22 @@
|
|
139 |
"如果您需要实现高效微调,可以结合 Hugging Face 的 PEFT 库快速上手。"
|
140 |
]
|
141 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
{
|
143 |
"cell_type": "code",
|
144 |
"execution_count": 1,
|
@@ -366,14 +390,6 @@
|
|
366 |
" print(name)"
|
367 |
]
|
368 |
},
|
369 |
-
{
|
370 |
-
"cell_type": "code",
|
371 |
-
"execution_count": null,
|
372 |
-
"id": "37aa6abb-ab1c-4e9c-b968-579dd74044db",
|
373 |
-
"metadata": {},
|
374 |
-
"outputs": [],
|
375 |
-
"source": []
|
376 |
-
},
|
377 |
{
|
378 |
"cell_type": "markdown",
|
379 |
"id": "0add2f79-f35c-4638-80bb-0d8a87a9b6a7",
|
@@ -502,11 +518,19 @@
|
|
502 |
{
|
503 |
"cell_type": "code",
|
504 |
"execution_count": null,
|
505 |
-
"id": "
|
506 |
"metadata": {},
|
507 |
"outputs": [],
|
508 |
"source": []
|
509 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
510 |
{
|
511 |
"cell_type": "markdown",
|
512 |
"id": "10c99eb9-8007-4297-972e-7be71768c9c3",
|
@@ -595,62 +619,6 @@
|
|
595 |
"\n",
|
596 |
"---\n",
|
597 |
"\n",
|
598 |
-
"### **2. 微调哪些参数,冻结哪些参数**\n",
|
599 |
-
"\n",
|
600 |
-
"LoRA 的核心思想是通过 **分解矩阵**,只更新少量参数,而冻结模型的大部分参数。以下是常见设置的说明:\n",
|
601 |
-
"\n",
|
602 |
-
"#### **微调的参数**\n",
|
603 |
-
"- LoRA 通过 `target_modules` 指定的模块,例如:\n",
|
604 |
-
" - GPT-2 的 `c_attn`(自注意力模块)。\n",
|
605 |
-
" - BERT 的 `query` 和 `key`。\n",
|
606 |
-
"- 这些模块是模型中对性能贡献最大的部分,通过微调这些模块,任务性能可以显著提升。\n",
|
607 |
-
"\n",
|
608 |
-
"#### **冻结的参数**\n",
|
609 |
-
"- 除了 `target_modules` 中指定的参数外,所有其他模型参数默认冻结,包括:\n",
|
610 |
-
" - 预训练权重的绝大部分。\n",
|
611 |
-
" - 偏置参数(如果 `bias=\"none\"`)。\n",
|
612 |
-
"\n",
|
613 |
-
"---\n",
|
614 |
-
"\n",
|
615 |
-
"### **3. 一般如何设置**\n",
|
616 |
-
"\n",
|
617 |
-
"#### **(1)针对不同任务调整**\n",
|
618 |
-
"- **文本分类任务**:\n",
|
619 |
-
" - 优先选择自注意力模块(如 `c_attn`)作为 `target_modules`。\n",
|
620 |
-
" - `r=8` 或 `r=16` 是常见选择,适中计算开销。\n",
|
621 |
-
" - 设置适当的 dropout(如 `lora_dropout=0.1`)以防止过拟合。\n",
|
622 |
-
" \n",
|
623 |
-
"- **语言生成任务**:\n",
|
624 |
-
" - 对 GPT-2 或 GPT-3,选择 `q_proj` 和 `v_proj`(query 和 value 投影模块)。\n",
|
625 |
-
" - `r=16` 或更高,适应生成任务的高复杂性。\n",
|
626 |
-
"\n",
|
627 |
-
"- **命名实体识别任务**:\n",
|
628 |
-
" - 优先选择 `q_proj` 和 `k_proj`(query 和 key 模块)。\n",
|
629 |
-
"\n",
|
630 |
-
"#### **(2)参数量与显存的权衡**\n",
|
631 |
-
"- 如果显存有限,减少 `r` 的值。\n",
|
632 |
-
"- 对小型任务,`r=4` 或 `r=8` 通常已经足够。\n",
|
633 |
-
"\n",
|
634 |
-
"#### **(3)偏置设置**\n",
|
635 |
-
"- 偏置参数的影响较小,在大多数情况下,可以选择 `bias=\"none\"` 保持冻结。\n",
|
636 |
-
"- 对非常依赖偏置的任务(如生成风格微调),可以尝试 `bias=\"lora_only\"`。\n",
|
637 |
-
"\n",
|
638 |
-
"---\n",
|
639 |
-
"\n",
|
640 |
-
"### **4. 示例:如何选择目标模块**\n",
|
641 |
-
"\n",
|
642 |
-
"#### **GPT-2**\n",
|
643 |
-
"对 GPT-2 来说,以下模块通常是微调的目标:\n",
|
644 |
-
"- **`c_attn`**:注意力模块的组合层。\n",
|
645 |
-
"- **`q_proj` 和 `v_proj`**:Query 和 Value 的线性投影。\n",
|
646 |
-
"\n",
|
647 |
-
"#### **BERT**\n",
|
648 |
-
"对 BERT 来说,以下模块通常是微调的目标:\n",
|
649 |
-
"- **`query`**:Attention 的 Query 模块。\n",
|
650 |
-
"- **`key`**:Attention 的 Key 模块。\n",
|
651 |
-
"\n",
|
652 |
-
"---\n",
|
653 |
-
"\n",
|
654 |
"### **5. 总结建议**\n",
|
655 |
"- **微调的参数**:优先选择模型中注意力相关模块。\n",
|
656 |
"- **冻结的参数**:大部分参数默认冻结以节省显存。\n",
|
@@ -664,22 +632,82 @@
|
|
664 |
},
|
665 |
{
|
666 |
"cell_type": "code",
|
667 |
-
"execution_count":
|
668 |
-
"id": "
|
669 |
"metadata": {},
|
670 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
"source": [
|
672 |
"from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer\n",
|
673 |
"from peft import LoraConfig, get_peft_model, TaskType\n",
|
674 |
"from datasets import load_dataset\n",
|
675 |
"from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
|
|
|
676 |
"\n",
|
677 |
"# **1. 加载模型和分词器**\n",
|
678 |
-
"model_name = \"
|
679 |
"num_labels = 2 # 二分类任务\n",
|
680 |
"model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)\n",
|
681 |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
682 |
-
"
|
|
|
|
|
|
|
683 |
"\n",
|
684 |
"# **2. 定义数据集**\n",
|
685 |
"# 示例数据集:dna_promoter_300\n",
|
@@ -693,7 +721,10 @@
|
|
693 |
" )\n",
|
694 |
"\n",
|
695 |
"tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
|
696 |
-
"tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\") # Hugging Face Trainer 要求标签列名为 'labels'\n",
|
|
|
|
|
|
|
697 |
"\n",
|
698 |
"# **4. 划分数据集**\n",
|
699 |
"train_dataset = tokenized_datasets[\"train\"]\n",
|
@@ -711,8 +742,93 @@
|
|
711 |
"\n",
|
712 |
"# 使用 LoRA 包装模型\n",
|
713 |
"model = get_peft_model(model, lora_config)\n",
|
714 |
-
"model.print_trainable_parameters() #
|
715 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
716 |
"# **6. 计算指标**\n",
|
717 |
"def compute_metrics(eval_pred):\n",
|
718 |
" predictions, labels = eval_pred\n",
|
@@ -729,7 +845,7 @@
|
|
729 |
" learning_rate=2e-5, # 学习率\n",
|
730 |
" per_device_train_batch_size=8, # 每设备的批量大小\n",
|
731 |
" per_device_eval_batch_size=8, # 每设备评估的批量大小\n",
|
732 |
-
" num_train_epochs=
|
733 |
" weight_decay=0.01, # 权重衰减\n",
|
734 |
" logging_dir=\"./logs\", # 日志路径\n",
|
735 |
" fp16=True, # 启用混合精度训练\n",
|
@@ -746,6 +862,7 @@
|
|
746 |
" train_dataset=train_dataset,\n",
|
747 |
" eval_dataset=test_dataset,\n",
|
748 |
" tokenizer=tokenizer,\n",
|
|
|
749 |
" compute_metrics=compute_metrics,\n",
|
750 |
")\n",
|
751 |
"\n",
|
|
|
5 |
"id": "963e9ae0-ac68-44be-8c7d-fb9842784362",
|
6 |
"metadata": {},
|
7 |
"source": [
|
8 |
+
"# 4.5 peft简介"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "f4288594-c676-4369-aca1-730446f293d7",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"## peft"
|
17 |
]
|
18 |
},
|
19 |
{
|
|
|
147 |
"如果您需要实现高效微调,可以结合 Hugging Face 的 PEFT 库快速上手。"
|
148 |
]
|
149 |
},
|
150 |
+
{
|
151 |
+
"cell_type": "code",
|
152 |
+
"execution_count": null,
|
153 |
+
"id": "a70b2631-c9b9-49da-96c6-6760c63040ac",
|
154 |
+
"metadata": {},
|
155 |
+
"outputs": [],
|
156 |
+
"source": []
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"cell_type": "markdown",
|
160 |
+
"id": "7b47ddf3-85c9-4dd8-bbbb-34fc3bd6aa1b",
|
161 |
+
"metadata": {},
|
162 |
+
"source": [
|
163 |
+
"## GPT2使用peft样例"
|
164 |
+
]
|
165 |
+
},
|
166 |
{
|
167 |
"cell_type": "code",
|
168 |
"execution_count": 1,
|
|
|
390 |
" print(name)"
|
391 |
]
|
392 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
{
|
394 |
"cell_type": "markdown",
|
395 |
"id": "0add2f79-f35c-4638-80bb-0d8a87a9b6a7",
|
|
|
518 |
{
|
519 |
"cell_type": "code",
|
520 |
"execution_count": null,
|
521 |
+
"id": "14f20171-0719-4dfa-b888-147b657ebff4",
|
522 |
"metadata": {},
|
523 |
"outputs": [],
|
524 |
"source": []
|
525 |
},
|
526 |
+
{
|
527 |
+
"cell_type": "markdown",
|
528 |
+
"id": "b4e7bff2-2a4f-4a1d-9cb1-dd02aead2f85",
|
529 |
+
"metadata": {},
|
530 |
+
"source": [
|
531 |
+
"## LoraConfig具体配置"
|
532 |
+
]
|
533 |
+
},
|
534 |
{
|
535 |
"cell_type": "markdown",
|
536 |
"id": "10c99eb9-8007-4297-972e-7be71768c9c3",
|
|
|
619 |
"\n",
|
620 |
"---\n",
|
621 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
622 |
"### **5. 总结建议**\n",
|
623 |
"- **微调的参数**:优先选择模型中注意力相关模块。\n",
|
624 |
"- **冻结的参数**:大部分参数默认冻结以节省显存。\n",
|
|
|
632 |
},
|
633 |
{
|
634 |
"cell_type": "code",
|
635 |
+
"execution_count": 1,
|
636 |
+
"id": "bbc080ba-3ee8-4bc6-afd9-2a3241f1bcda",
|
637 |
"metadata": {},
|
638 |
"outputs": [],
|
639 |
+
"source": [
|
640 |
+
"import subprocess\n",
|
641 |
+
"import os\n",
|
642 |
+
"# 设置环境变量, autodl一般区域\n",
|
643 |
+
"result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n",
|
644 |
+
"output = result.stdout\n",
|
645 |
+
"for line in output.splitlines():\n",
|
646 |
+
" if '=' in line:\n",
|
647 |
+
" var, value = line.split('=', 1)\n",
|
648 |
+
" os.environ[var] = value"
|
649 |
+
]
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"cell_type": "code",
|
653 |
+
"execution_count": 7,
|
654 |
+
"id": "26d9f362-18cc-471f-b208-f29a6933c06a",
|
655 |
+
"metadata": {},
|
656 |
+
"outputs": [
|
657 |
+
{
|
658 |
+
"name": "stderr",
|
659 |
+
"output_type": "stream",
|
660 |
+
"text": [
|
661 |
+
"Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at dnagpt/dna_gpt2_v0 and are newly initialized: ['score.weight']\n",
|
662 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
{
|
666 |
+
"data": {
|
667 |
+
"application/vnd.jupyter.widget-view+json": {
|
668 |
+
"model_id": "f7e72521368341d38a2b11028715a871",
|
669 |
+
"version_major": 2,
|
670 |
+
"version_minor": 0
|
671 |
+
},
|
672 |
+
"text/plain": [
|
673 |
+
"Map: 0%| | 0/5920 [00:00<?, ? examples/s]"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"metadata": {},
|
677 |
+
"output_type": "display_data"
|
678 |
+
},
|
679 |
+
{
|
680 |
+
"name": "stdout",
|
681 |
+
"output_type": "stream",
|
682 |
+
"text": [
|
683 |
+
"trainable params: 296,448 || all params: 109,180,416 || trainable%: 0.2715\n"
|
684 |
+
]
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"name": "stderr",
|
688 |
+
"output_type": "stream",
|
689 |
+
"text": [
|
690 |
+
"/root/miniconda3/lib/python3.12/site-packages/peft/tuners/lora/layer.py:1264: UserWarning: fan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.\n",
|
691 |
+
" warnings.warn(\n"
|
692 |
+
]
|
693 |
+
}
|
694 |
+
],
|
695 |
"source": [
|
696 |
"from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer\n",
|
697 |
"from peft import LoraConfig, get_peft_model, TaskType\n",
|
698 |
"from datasets import load_dataset\n",
|
699 |
"from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
|
700 |
+
"from transformers import DataCollatorWithPadding\n",
|
701 |
"\n",
|
702 |
"# **1. 加载模型和分词器**\n",
|
703 |
+
"model_name = \"dnagpt/dna_gpt2_v0\" # 基础模型\n",
|
704 |
"num_labels = 2 # 二分类任务\n",
|
705 |
"model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)\n",
|
706 |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
707 |
+
"\n",
|
708 |
+
"tokenizer.pad_token = tokenizer.eos_token\n",
|
709 |
+
"model.config.pad_token_id = tokenizer.pad_token_id\n",
|
710 |
+
"\n",
|
711 |
"\n",
|
712 |
"# **2. 定义数据集**\n",
|
713 |
"# 示例数据集:dna_promoter_300\n",
|
|
|
721 |
" )\n",
|
722 |
"\n",
|
723 |
"tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
|
724 |
+
"#tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\") # Hugging Face Trainer 要求标签列名为 'labels'\n",
|
725 |
+
"\n",
|
726 |
+
"# 4. 创建一个数据收集器,用于动态填充和遮蔽\n",
|
727 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
728 |
"\n",
|
729 |
"# **4. 划分数据集**\n",
|
730 |
"train_dataset = tokenized_datasets[\"train\"]\n",
|
|
|
742 |
"\n",
|
743 |
"# 使用 LoRA 包装模型\n",
|
744 |
"model = get_peft_model(model, lora_config)\n",
|
745 |
+
"model.print_trainable_parameters() # 打印可训练的参数信息"
|
746 |
+
]
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"cell_type": "code",
|
750 |
+
"execution_count": 8,
|
751 |
+
"id": "7da39e7f-db92-483c-888d-19707ab35c5f",
|
752 |
+
"metadata": {},
|
753 |
+
"outputs": [
|
754 |
+
{
|
755 |
+
"name": "stderr",
|
756 |
+
"output_type": "stream",
|
757 |
+
"text": [
|
758 |
+
"/root/miniconda3/lib/python3.12/site-packages/transformers/training_args.py:1575: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
|
759 |
+
" warnings.warn(\n",
|
760 |
+
"/tmp/ipykernel_2399/742597822.py:28: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
|
761 |
+
" trainer = Trainer(\n"
|
762 |
+
]
|
763 |
+
},
|
764 |
+
{
|
765 |
+
"data": {
|
766 |
+
"text/html": [
|
767 |
+
"\n",
|
768 |
+
" <div>\n",
|
769 |
+
" \n",
|
770 |
+
" <progress value='19980' max='19980' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
771 |
+
" [19980/19980 10:13, Epoch 3/3]\n",
|
772 |
+
" </div>\n",
|
773 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
774 |
+
" <thead>\n",
|
775 |
+
" <tr style=\"text-align: left;\">\n",
|
776 |
+
" <th>Epoch</th>\n",
|
777 |
+
" <th>Training Loss</th>\n",
|
778 |
+
" <th>Validation Loss</th>\n",
|
779 |
+
" <th>Accuracy</th>\n",
|
780 |
+
" <th>Precision</th>\n",
|
781 |
+
" <th>Recall</th>\n",
|
782 |
+
" <th>F1</th>\n",
|
783 |
+
" </tr>\n",
|
784 |
+
" </thead>\n",
|
785 |
+
" <tbody>\n",
|
786 |
+
" <tr>\n",
|
787 |
+
" <td>1</td>\n",
|
788 |
+
" <td>0.299700</td>\n",
|
789 |
+
" <td>0.325549</td>\n",
|
790 |
+
" <td>0.897635</td>\n",
|
791 |
+
" <td>0.908117</td>\n",
|
792 |
+
" <td>0.885483</td>\n",
|
793 |
+
" <td>0.896658</td>\n",
|
794 |
+
" </tr>\n",
|
795 |
+
" <tr>\n",
|
796 |
+
" <td>2</td>\n",
|
797 |
+
" <td>0.304000</td>\n",
|
798 |
+
" <td>0.290004</td>\n",
|
799 |
+
" <td>0.904899</td>\n",
|
800 |
+
" <td>0.889069</td>\n",
|
801 |
+
" <td>0.925901</td>\n",
|
802 |
+
" <td>0.907111</td>\n",
|
803 |
+
" </tr>\n",
|
804 |
+
" <tr>\n",
|
805 |
+
" <td>3</td>\n",
|
806 |
+
" <td>0.310100</td>\n",
|
807 |
+
" <td>0.289658</td>\n",
|
808 |
+
" <td>0.906250</td>\n",
|
809 |
+
" <td>0.892138</td>\n",
|
810 |
+
" <td>0.924891</td>\n",
|
811 |
+
" <td>0.908219</td>\n",
|
812 |
+
" </tr>\n",
|
813 |
+
" </tbody>\n",
|
814 |
+
"</table><p>"
|
815 |
+
],
|
816 |
+
"text/plain": [
|
817 |
+
"<IPython.core.display.HTML object>"
|
818 |
+
]
|
819 |
+
},
|
820 |
+
"metadata": {},
|
821 |
+
"output_type": "display_data"
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"name": "stdout",
|
825 |
+
"output_type": "stream",
|
826 |
+
"text": [
|
827 |
+
"训练完成,模型已保存至 ./gpt2_lora_text_classification\n"
|
828 |
+
]
|
829 |
+
}
|
830 |
+
],
|
831 |
+
"source": [
|
832 |
"# **6. 计算指标**\n",
|
833 |
"def compute_metrics(eval_pred):\n",
|
834 |
" predictions, labels = eval_pred\n",
|
|
|
845 |
" learning_rate=2e-5, # 学习率\n",
|
846 |
" per_device_train_batch_size=8, # 每设备的批量大小\n",
|
847 |
" per_device_eval_batch_size=8, # 每设备评估的批量大小\n",
|
848 |
+
" num_train_epochs=10, # 训练轮数\n",
|
849 |
" weight_decay=0.01, # 权重衰减\n",
|
850 |
" logging_dir=\"./logs\", # 日志路径\n",
|
851 |
" fp16=True, # 启用混合精度训练\n",
|
|
|
862 |
" train_dataset=train_dataset,\n",
|
863 |
" eval_dataset=test_dataset,\n",
|
864 |
" tokenizer=tokenizer,\n",
|
865 |
+
" data_collator=data_collator,\n",
|
866 |
" compute_metrics=compute_metrics,\n",
|
867 |
")\n",
|
868 |
"\n",
|
04-gene-sft/.ipynb_checkpoints/6-llama-continue-train-checkpoint.ipynb
CHANGED
@@ -330,6 +330,38 @@
|
|
330 |
"本节任务是基于llama。训练一个能够处理dna和protein蛋白质数据的基础预训练大模型,数据为第一章中的预训练数据,包括英文数据。"
|
331 |
]
|
332 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
{
|
334 |
"cell_type": "markdown",
|
335 |
"id": "b1bd33b8-2e05-4b59-9d8f-c48de194cfd6",
|
@@ -339,14 +371,24 @@
|
|
339 |
"\n",
|
340 |
"```\n",
|
341 |
"# 复制第一章训练数据,包括dna,protein,还有英文数据,添加英文数据是为了避免遗忘问题\n",
|
|
|
342 |
"mkdir train_data\n",
|
343 |
"cp ../01-data_env/data/*.txt train_data/\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
"\n",
|
345 |
"#持续预训练\n",
|
346 |
"./run_pt.sh\n",
|
347 |
"\n",
|
348 |
"#合并模型\n",
|
349 |
-
"./
|
350 |
"\n",
|
351 |
"```"
|
352 |
]
|
@@ -361,7 +403,7 @@
|
|
361 |
},
|
362 |
{
|
363 |
"cell_type": "code",
|
364 |
-
"execution_count":
|
365 |
"id": "69b3e97f-a801-4264-a651-a854bcfba9c6",
|
366 |
"metadata": {},
|
367 |
"outputs": [],
|
@@ -376,10 +418,25 @@
|
|
376 |
},
|
377 |
{
|
378 |
"cell_type": "code",
|
379 |
-
"execution_count":
|
380 |
"id": "339435d9-9379-4b30-ae8b-50feee1ba714",
|
381 |
"metadata": {},
|
382 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
"source": [
|
384 |
"tokenizer = LlamaTokenizer.from_pretrained(\"dnahlm-merge-hf\")\n",
|
385 |
"tokenizer.pad_token = tokenizer.eos_token\n",
|
@@ -388,10 +445,61 @@
|
|
388 |
},
|
389 |
{
|
390 |
"cell_type": "code",
|
391 |
-
"execution_count":
|
392 |
"id": "d0f154bb-b1ab-4611-a14c-9b403043fd96",
|
393 |
"metadata": {},
|
394 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
"source": [
|
396 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-merge-hf\") #continue pretrain\n",
|
397 |
"model"
|
@@ -399,10 +507,51 @@
|
|
399 |
},
|
400 |
{
|
401 |
"cell_type": "code",
|
402 |
-
"execution_count":
|
403 |
"id": "792a9f78-1828-4695-9f6e-479a704ea7e8",
|
404 |
"metadata": {},
|
405 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
"source": [
|
407 |
"from transformers import AutoConfig\n",
|
408 |
"# 加载配置\n",
|
@@ -412,10 +561,22 @@
|
|
412 |
},
|
413 |
{
|
414 |
"cell_type": "code",
|
415 |
-
"execution_count":
|
416 |
"id": "49021c65-54bb-4a97-a96d-b030cc3dcd13",
|
417 |
"metadata": {},
|
418 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
"source": [
|
420 |
"text='''GCTGACTCTGCCAGGATGGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTAGTCAGACATAGTCACATAGGCAAGTAAGGGAACCTAAAATTGCTTGGAAT,\n",
|
421 |
"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKEGIAITGCWPRWPDEMDERSAVWRVEPYTRHFGRVLYSFGV,\n",
|
@@ -426,10 +587,45 @@
|
|
426 |
},
|
427 |
{
|
428 |
"cell_type": "code",
|
429 |
-
"execution_count":
|
430 |
"id": "ebf869c8-866d-4770-8f64-79d671f88663",
|
431 |
"metadata": {},
|
432 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
"source": [
|
434 |
"import torch\n",
|
435 |
"from transformers import pipeline\n",
|
@@ -448,23 +644,53 @@
|
|
448 |
},
|
449 |
{
|
450 |
"cell_type": "code",
|
451 |
-
"execution_count":
|
452 |
"id": "40a22c70-f1c4-4cd5-a118-2f5db40790e6",
|
453 |
"metadata": {},
|
454 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
"source": [
|
456 |
"pipe(\"GGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCT\")"
|
457 |
]
|
458 |
},
|
459 |
{
|
460 |
"cell_type": "code",
|
461 |
-
"execution_count":
|
462 |
"id": "aec95d0a-4269-4540-bf14-4ce157b9a194",
|
463 |
"metadata": {},
|
464 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
"source": [
|
466 |
-
"pipe(\"
|
467 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
}
|
469 |
],
|
470 |
"metadata": {
|
|
|
330 |
"本节任务是基于llama。训练一个能够处理dna和protein蛋白质数据的基础预训练大模型,数据为第一章中的预训练数据,包括英文数据。"
|
331 |
]
|
332 |
},
|
333 |
+
{
|
334 |
+
"cell_type": "markdown",
|
335 |
+
"id": "aec90d65-ac62-4394-a526-ca62d8bdbad4",
|
336 |
+
"metadata": {},
|
337 |
+
"source": [
|
338 |
+
"## 环境设置\n",
|
339 |
+
"并行环境对transformer、peft等的版本要求比较高,如果版本不匹配可能会出现各种异常问题\n",
|
340 |
+
"之前的课程,都是单GPU运行,一般不存在版本问题,默认安装的都是最新版本。但运行并行环境时,需要确认下版本再运行,本课程运行并行环境如下:\n",
|
341 |
+
"\n",
|
342 |
+
"* Python 3.12.3\n",
|
343 |
+
"* transformers 4.45.2\n",
|
344 |
+
"* peft 0.3.0.dev0\n",
|
345 |
+
"* deepspeed 0.15.2\n",
|
346 |
+
"* accelerate 1.0.0\n",
|
347 |
+
"\n",
|
348 |
+
"如果不是,可以重新安装即可:\n",
|
349 |
+
"```\n",
|
350 |
+
"pip install transformers==4.45.2 deepspeed==0.15.2 accelerate==1.0.0\n",
|
351 |
+
"\n",
|
352 |
+
"#peft参考使用的是chinese llama的版本,需要git安装\n",
|
353 |
+
"\n",
|
354 |
+
"git clone https://github.com/huggingface/peft.git\n",
|
355 |
+
"\n",
|
356 |
+
"cd peft\n",
|
357 |
+
"\n",
|
358 |
+
"git checkout 13e53fc\n",
|
359 |
+
"\n",
|
360 |
+
"pip install . \n",
|
361 |
+
"```\n",
|
362 |
+
"如果有环境问题,可以查看本目录下的pip_list.txt"
|
363 |
+
]
|
364 |
+
},
|
365 |
{
|
366 |
"cell_type": "markdown",
|
367 |
"id": "b1bd33b8-2e05-4b59-9d8f-c48de194cfd6",
|
|
|
371 |
"\n",
|
372 |
"```\n",
|
373 |
"# 复制第一章训练数据,包括dna,protein,还有英文数据,添加英文数据是为了避免遗忘问题\n",
|
374 |
+
"\n",
|
375 |
"mkdir train_data\n",
|
376 |
"cp ../01-data_env/data/*.txt train_data/\n",
|
377 |
+
"使用这些数据,6卡4090大概大致需要训练16个小时,autodl也需要近200块钱了。\n",
|
378 |
+
"\n",
|
379 |
+
"建议学习时,可以使用1/10的数据训练:\n",
|
380 |
+
"awk ‘NR%10==1’ dna_1g.txt > dna.txt\n",
|
381 |
+
"rm dna_1g.txt\n",
|
382 |
+
"其他2类数据依次类推\n",
|
383 |
+
"\n",
|
384 |
+
"这样大概需要2到3个小时就能训练完成了\n",
|
385 |
+
"\n",
|
386 |
"\n",
|
387 |
"#持续预训练\n",
|
388 |
"./run_pt.sh\n",
|
389 |
"\n",
|
390 |
"#合并模型\n",
|
391 |
+
"./merge_pt_model.sh\n",
|
392 |
"\n",
|
393 |
"```"
|
394 |
]
|
|
|
403 |
},
|
404 |
{
|
405 |
"cell_type": "code",
|
406 |
+
"execution_count": 1,
|
407 |
"id": "69b3e97f-a801-4264-a651-a854bcfba9c6",
|
408 |
"metadata": {},
|
409 |
"outputs": [],
|
|
|
418 |
},
|
419 |
{
|
420 |
"cell_type": "code",
|
421 |
+
"execution_count": 2,
|
422 |
"id": "339435d9-9379-4b30-ae8b-50feee1ba714",
|
423 |
"metadata": {},
|
424 |
+
"outputs": [
|
425 |
+
{
|
426 |
+
"data": {
|
427 |
+
"text/plain": [
|
428 |
+
"LlamaTokenizer(name_or_path='dnahlm-merge-hf', vocab_size=91643, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={\n",
|
429 |
+
"\t0: AddedToken(\"<unk>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
430 |
+
"\t1: AddedToken(\"<s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
431 |
+
"\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
432 |
+
"}"
|
433 |
+
]
|
434 |
+
},
|
435 |
+
"execution_count": 2,
|
436 |
+
"metadata": {},
|
437 |
+
"output_type": "execute_result"
|
438 |
+
}
|
439 |
+
],
|
440 |
"source": [
|
441 |
"tokenizer = LlamaTokenizer.from_pretrained(\"dnahlm-merge-hf\")\n",
|
442 |
"tokenizer.pad_token = tokenizer.eos_token\n",
|
|
|
445 |
},
|
446 |
{
|
447 |
"cell_type": "code",
|
448 |
+
"execution_count": 3,
|
449 |
"id": "d0f154bb-b1ab-4611-a14c-9b403043fd96",
|
450 |
"metadata": {},
|
451 |
+
"outputs": [
|
452 |
+
{
|
453 |
+
"data": {
|
454 |
+
"application/vnd.jupyter.widget-view+json": {
|
455 |
+
"model_id": "342e4ab139b64bb78f0429c2f92c8310",
|
456 |
+
"version_major": 2,
|
457 |
+
"version_minor": 0
|
458 |
+
},
|
459 |
+
"text/plain": [
|
460 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
461 |
+
]
|
462 |
+
},
|
463 |
+
"metadata": {},
|
464 |
+
"output_type": "display_data"
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"data": {
|
468 |
+
"text/plain": [
|
469 |
+
"LlamaForCausalLM(\n",
|
470 |
+
" (model): LlamaModel(\n",
|
471 |
+
" (embed_tokens): Embedding(91643, 4096, padding_idx=0)\n",
|
472 |
+
" (layers): ModuleList(\n",
|
473 |
+
" (0-31): 32 x LlamaDecoderLayer(\n",
|
474 |
+
" (self_attn): LlamaSdpaAttention(\n",
|
475 |
+
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
476 |
+
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
477 |
+
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
478 |
+
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
479 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
480 |
+
" )\n",
|
481 |
+
" (mlp): LlamaMLP(\n",
|
482 |
+
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
483 |
+
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
484 |
+
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
|
485 |
+
" (act_fn): SiLU()\n",
|
486 |
+
" )\n",
|
487 |
+
" (input_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
488 |
+
" (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
489 |
+
" )\n",
|
490 |
+
" )\n",
|
491 |
+
" (norm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
492 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
493 |
+
" )\n",
|
494 |
+
" (lm_head): Linear(in_features=4096, out_features=91643, bias=False)\n",
|
495 |
+
")"
|
496 |
+
]
|
497 |
+
},
|
498 |
+
"execution_count": 3,
|
499 |
+
"metadata": {},
|
500 |
+
"output_type": "execute_result"
|
501 |
+
}
|
502 |
+
],
|
503 |
"source": [
|
504 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-merge-hf\") #continue pretrain\n",
|
505 |
"model"
|
|
|
507 |
},
|
508 |
{
|
509 |
"cell_type": "code",
|
510 |
+
"execution_count": 4,
|
511 |
"id": "792a9f78-1828-4695-9f6e-479a704ea7e8",
|
512 |
"metadata": {},
|
513 |
+
"outputs": [
|
514 |
+
{
|
515 |
+
"data": {
|
516 |
+
"text/plain": [
|
517 |
+
"LlamaConfig {\n",
|
518 |
+
" \"_name_or_path\": \"dnahlm-merge-hf\",\n",
|
519 |
+
" \"architectures\": [\n",
|
520 |
+
" \"LlamaForCausalLM\"\n",
|
521 |
+
" ],\n",
|
522 |
+
" \"attention_bias\": false,\n",
|
523 |
+
" \"attention_dropout\": 0.0,\n",
|
524 |
+
" \"bos_token_id\": 1,\n",
|
525 |
+
" \"eos_token_id\": 2,\n",
|
526 |
+
" \"head_dim\": 128,\n",
|
527 |
+
" \"hidden_act\": \"silu\",\n",
|
528 |
+
" \"hidden_size\": 4096,\n",
|
529 |
+
" \"initializer_range\": 0.02,\n",
|
530 |
+
" \"intermediate_size\": 11008,\n",
|
531 |
+
" \"max_position_embeddings\": 2048,\n",
|
532 |
+
" \"mlp_bias\": false,\n",
|
533 |
+
" \"model_type\": \"llama\",\n",
|
534 |
+
" \"num_attention_heads\": 32,\n",
|
535 |
+
" \"num_hidden_layers\": 32,\n",
|
536 |
+
" \"num_key_value_heads\": 32,\n",
|
537 |
+
" \"pad_token_id\": 0,\n",
|
538 |
+
" \"pretraining_tp\": 1,\n",
|
539 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
540 |
+
" \"rope_scaling\": null,\n",
|
541 |
+
" \"rope_theta\": 10000.0,\n",
|
542 |
+
" \"tie_word_embeddings\": false,\n",
|
543 |
+
" \"torch_dtype\": \"float16\",\n",
|
544 |
+
" \"transformers_version\": \"4.45.2\",\n",
|
545 |
+
" \"use_cache\": true,\n",
|
546 |
+
" \"vocab_size\": 91643\n",
|
547 |
+
"}"
|
548 |
+
]
|
549 |
+
},
|
550 |
+
"execution_count": 4,
|
551 |
+
"metadata": {},
|
552 |
+
"output_type": "execute_result"
|
553 |
+
}
|
554 |
+
],
|
555 |
"source": [
|
556 |
"from transformers import AutoConfig\n",
|
557 |
"# 加载配置\n",
|
|
|
561 |
},
|
562 |
{
|
563 |
"cell_type": "code",
|
564 |
+
"execution_count": 5,
|
565 |
"id": "49021c65-54bb-4a97-a96d-b030cc3dcd13",
|
566 |
"metadata": {},
|
567 |
+
"outputs": [
|
568 |
+
{
|
569 |
+
"name": "stdout",
|
570 |
+
"output_type": "stream",
|
571 |
+
"text": [
|
572 |
+
"Test text:\n",
|
573 |
+
" GCTGACTCTGCCAGGATGGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTAGTCAGACATAGTCACATAGGCAAGTAAGGGAACCTAAAATTGCTTGGAAT,\n",
|
574 |
+
"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKEGIAITGCWPRWPDEMDERSAVWRVEPYTRHFGRVLYSFGV,\n",
|
575 |
+
"The primary use of LLaMA is research on large language models, including\n",
|
576 |
+
"Tokenized by DNA-LLaMA tokenizer:['▁GC', 'TGA', 'CT', 'C', 'TGCC', 'AGGATGG', 'AATG', 'AAATT', 'AGGTTG', 'TTTTAATT', 'ATAATGTAA', 'AGTCAG', 'TTCTAG', 'TCAG', 'ACATAG', 'TC', 'ACATAGG', 'CA', 'AGTAAGGG', 'AAC', 'CT', 'AAAATTGC', 'TTGG', 'AAT', ',', '<0x0A>', 'KCG', 'FVGP', 'MVHL', 'KV', 'HLE', 'ADV', 'ASSC', 'RSAV', 'I', 'YL', 'TSEE', 'P', 'FEG', 'VLGL', 'RLK', 'EGI', 'AI', 'TGC', 'W', 'PRW', 'P', 'DEM', 'DER', 'SAV', 'W', 'RVE', 'PY', 'TRH', 'FG', 'RVLY', 'SFGV', ',', '<0x0A>', 'The', '▁primary', '▁use', '▁of', '▁L', 'La', 'MA', '▁is', '▁research', '▁on', '▁large', '▁language', '▁models', ',', '▁including']\n"
|
577 |
+
]
|
578 |
+
}
|
579 |
+
],
|
580 |
"source": [
|
581 |
"text='''GCTGACTCTGCCAGGATGGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTAGTCAGACATAGTCACATAGGCAAGTAAGGGAACCTAAAATTGCTTGGAAT,\n",
|
582 |
"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKEGIAITGCWPRWPDEMDERSAVWRVEPYTRHFGRVLYSFGV,\n",
|
|
|
587 |
},
|
588 |
{
|
589 |
"cell_type": "code",
|
590 |
+
"execution_count": 6,
|
591 |
"id": "ebf869c8-866d-4770-8f64-79d671f88663",
|
592 |
"metadata": {},
|
593 |
+
"outputs": [
|
594 |
+
{
|
595 |
+
"data": {
|
596 |
+
"application/vnd.jupyter.widget-view+json": {
|
597 |
+
"model_id": "e497889a1c3c484cb57c4b6fd93b45ab",
|
598 |
+
"version_major": 2,
|
599 |
+
"version_minor": 0
|
600 |
+
},
|
601 |
+
"text/plain": [
|
602 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"metadata": {},
|
606 |
+
"output_type": "display_data"
|
607 |
+
},
|
608 |
+
{
|
609 |
+
"name": "stderr",
|
610 |
+
"output_type": "stream",
|
611 |
+
"text": [
|
612 |
+
"Some parameters are on the meta device because they were offloaded to the cpu.\n",
|
613 |
+
"/root/miniconda3/lib/python3.12/site-packages/transformers/generation/utils.py:1220: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
|
614 |
+
" warnings.warn(\n",
|
615 |
+
"Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n"
|
616 |
+
]
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"data": {
|
620 |
+
"text/plain": [
|
621 |
+
"[{'generated_text': 'The key to life is to accept the fact that you are going to die. The key to'}]"
|
622 |
+
]
|
623 |
+
},
|
624 |
+
"execution_count": 6,
|
625 |
+
"metadata": {},
|
626 |
+
"output_type": "execute_result"
|
627 |
+
}
|
628 |
+
],
|
629 |
"source": [
|
630 |
"import torch\n",
|
631 |
"from transformers import pipeline\n",
|
|
|
644 |
},
|
645 |
{
|
646 |
"cell_type": "code",
|
647 |
+
"execution_count": 7,
|
648 |
"id": "40a22c70-f1c4-4cd5-a118-2f5db40790e6",
|
649 |
"metadata": {},
|
650 |
+
"outputs": [
|
651 |
+
{
|
652 |
+
"data": {
|
653 |
+
"text/plain": [
|
654 |
+
"[{'generated_text': 'GGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTCTCCTCCTCCTCCTC'}]"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"execution_count": 7,
|
658 |
+
"metadata": {},
|
659 |
+
"output_type": "execute_result"
|
660 |
+
}
|
661 |
+
],
|
662 |
"source": [
|
663 |
"pipe(\"GGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCT\")"
|
664 |
]
|
665 |
},
|
666 |
{
|
667 |
"cell_type": "code",
|
668 |
+
"execution_count": 9,
|
669 |
"id": "aec95d0a-4269-4540-bf14-4ce157b9a194",
|
670 |
"metadata": {},
|
671 |
+
"outputs": [
|
672 |
+
{
|
673 |
+
"data": {
|
674 |
+
"text/plain": [
|
675 |
+
"[{'generated_text': 'KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKETLK'}]"
|
676 |
+
]
|
677 |
+
},
|
678 |
+
"execution_count": 9,
|
679 |
+
"metadata": {},
|
680 |
+
"output_type": "execute_result"
|
681 |
+
}
|
682 |
+
],
|
683 |
"source": [
|
684 |
+
"pipe(\"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLK\")"
|
685 |
]
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"cell_type": "code",
|
689 |
+
"execution_count": null,
|
690 |
+
"id": "c1cfab60-2820-4885-8961-0290c49dfbec",
|
691 |
+
"metadata": {},
|
692 |
+
"outputs": [],
|
693 |
+
"source": []
|
694 |
}
|
695 |
],
|
696 |
"metadata": {
|
04-gene-sft/.ipynb_checkpoints/7-llama-instruction-ft-checkpoint.ipynb
CHANGED
@@ -184,11 +184,21 @@
|
|
184 |
"指令微调通过在特定格式的数据集上进一步训练大模型,使其能够更好地理解和执行用户的自然语言指令。这种方法适合多任务场景,并能提升模型的交互能力和领域适应性。借助高质量的指令数据集和高效的微调技术,大模型在实际应用中的表现可以得到显著提升。"
|
185 |
]
|
186 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
{
|
188 |
"cell_type": "markdown",
|
189 |
"id": "7be8b814-42f6-4fb6-bf4b-ae23292030f6",
|
190 |
"metadata": {},
|
191 |
-
"source": [
|
|
|
|
|
192 |
},
|
193 |
{
|
194 |
"cell_type": "markdown",
|
@@ -293,6 +303,8 @@
|
|
293 |
"#微调\n",
|
294 |
"./run_sft.sh\n",
|
295 |
"\n",
|
|
|
|
|
296 |
"#合并模型\n",
|
297 |
"./merge_sft_model.sh\n",
|
298 |
"\n",
|
@@ -325,17 +337,9 @@
|
|
325 |
" os.environ[var] = value"
|
326 |
]
|
327 |
},
|
328 |
-
{
|
329 |
-
"cell_type": "markdown",
|
330 |
-
"id": "17bdb69d-3f0f-465e-bd60-2047a088e264",
|
331 |
-
"metadata": {},
|
332 |
-
"source": [
|
333 |
-
"如果您不确定模型中有哪些模块可以微调,可以打印模型结构:"
|
334 |
-
]
|
335 |
-
},
|
336 |
{
|
337 |
"cell_type": "code",
|
338 |
-
"execution_count":
|
339 |
"id": "054a2956-9045-4ad5-a878-1bfc84ad4ed8",
|
340 |
"metadata": {},
|
341 |
"outputs": [],
|
@@ -350,10 +354,26 @@
|
|
350 |
},
|
351 |
{
|
352 |
"cell_type": "code",
|
353 |
-
"execution_count":
|
354 |
"id": "63c8bf16-9576-41bc-b27c-c92ba4289cf4",
|
355 |
"metadata": {},
|
356 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
"source": [
|
358 |
"from datasets import load_dataset\n",
|
359 |
"dna_ft_dataset = load_dataset('json', data_files='val_data.json')\n",
|
@@ -362,10 +382,30 @@
|
|
362 |
},
|
363 |
{
|
364 |
"cell_type": "code",
|
365 |
-
"execution_count":
|
366 |
"id": "95928da3-ca64-4a17-80f4-945da395702c",
|
367 |
"metadata": {},
|
368 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
"source": [
|
370 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.1, seed=42)\n",
|
371 |
"data"
|
@@ -373,7 +413,7 @@
|
|
373 |
},
|
374 |
{
|
375 |
"cell_type": "code",
|
376 |
-
"execution_count":
|
377 |
"id": "a3e65bcd-85ce-4261-8ba6-7665c4ec60e2",
|
378 |
"metadata": {},
|
379 |
"outputs": [],
|
@@ -384,10 +424,61 @@
|
|
384 |
},
|
385 |
{
|
386 |
"cell_type": "code",
|
387 |
-
"execution_count":
|
388 |
"id": "3d3fe49b-f48f-42b2-bc97-028e443111e4",
|
389 |
"metadata": {},
|
390 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
"source": [
|
392 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-llama-7b-sft-v0\") #continue pretrain\n",
|
393 |
"model"
|
@@ -395,7 +486,7 @@
|
|
395 |
},
|
396 |
{
|
397 |
"cell_type": "code",
|
398 |
-
"execution_count":
|
399 |
"id": "c54df9fe-86c4-4963-b313-b438894bf9dd",
|
400 |
"metadata": {},
|
401 |
"outputs": [],
|
@@ -424,10 +515,23 @@
|
|
424 |
},
|
425 |
{
|
426 |
"cell_type": "code",
|
427 |
-
"execution_count":
|
428 |
"id": "ee540cfb-1f6e-4e02-a3bc-c814e43685cb",
|
429 |
"metadata": {},
|
430 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
"source": [
|
432 |
"example = data[\"test\"][0]\n",
|
433 |
"example"
|
@@ -435,10 +539,27 @@
|
|
435 |
},
|
436 |
{
|
437 |
"cell_type": "code",
|
438 |
-
"execution_count":
|
439 |
"id": "7ee35528-7b3f-4e60-b88b-1bc3e950012b",
|
440 |
"metadata": {},
|
441 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
"source": [
|
443 |
"prompt = build_prompt(example)\n",
|
444 |
"print(prompt)"
|
@@ -446,17 +567,116 @@
|
|
446 |
},
|
447 |
{
|
448 |
"cell_type": "code",
|
449 |
-
"execution_count":
|
450 |
"id": "8aa6f38f-3bcc-4566-8a66-a541db91e031",
|
451 |
"metadata": {},
|
452 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
"source": [
|
454 |
"tokenizer.tokenize(prompt)"
|
455 |
]
|
456 |
},
|
457 |
{
|
458 |
"cell_type": "code",
|
459 |
-
"execution_count":
|
460 |
"id": "11875339-4901-4912-86e5-afe8c74921d9",
|
461 |
"metadata": {},
|
462 |
"outputs": [],
|
@@ -498,10 +718,48 @@
|
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
-
"execution_count":
|
502 |
"id": "1b02644a-8b24-45aa-b22d-0f7ce2270dd9",
|
503 |
"metadata": {},
|
504 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
"source": [
|
506 |
"input_text = format_input(data[\"test\"][0])\n",
|
507 |
"\n",
|
@@ -562,10 +820,118 @@
|
|
562 |
},
|
563 |
{
|
564 |
"cell_type": "code",
|
565 |
-
"execution_count":
|
566 |
"id": "68831e19-5a99-46d8-9f40-e8bf6957dbfc",
|
567 |
"metadata": {},
|
568 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
"source": [
|
570 |
"import json\n",
|
571 |
"from tqdm import tqdm\n",
|
@@ -598,6 +964,14 @@
|
|
598 |
"\n",
|
599 |
"print(\"presicion\", right_sum/all_num, \"same\", same_sum/all_num)\n"
|
600 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
}
|
602 |
],
|
603 |
"metadata": {
|
|
|
184 |
"指令微调通过在特定格式的数据集上进一步训练大模型,使其能够更好地理解和执行用户的自然语言指令。这种方法适合多任务场景,并能提升模型的交互能力和领域适应性。借助高质量的指令数据集和高效的微调技术,大模型在实际应用中的表现可以得到显著提升。"
|
185 |
]
|
186 |
},
|
187 |
+
{
|
188 |
+
"cell_type": "code",
|
189 |
+
"execution_count": null,
|
190 |
+
"id": "e77f8b39-e75a-4014-a98a-bde5b2534bf1",
|
191 |
+
"metadata": {},
|
192 |
+
"outputs": [],
|
193 |
+
"source": []
|
194 |
+
},
|
195 |
{
|
196 |
"cell_type": "markdown",
|
197 |
"id": "7be8b814-42f6-4fb6-bf4b-ae23292030f6",
|
198 |
"metadata": {},
|
199 |
+
"source": [
|
200 |
+
"## 持续预训练 VS 指令微调"
|
201 |
+
]
|
202 |
},
|
203 |
{
|
204 |
"cell_type": "markdown",
|
|
|
303 |
"#微调\n",
|
304 |
"./run_sft.sh\n",
|
305 |
"\n",
|
306 |
+
"运行时间约3小时\n",
|
307 |
+
"\n",
|
308 |
"#合并模型\n",
|
309 |
"./merge_sft_model.sh\n",
|
310 |
"\n",
|
|
|
337 |
" os.environ[var] = value"
|
338 |
]
|
339 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
+
"execution_count": 2,
|
343 |
"id": "054a2956-9045-4ad5-a878-1bfc84ad4ed8",
|
344 |
"metadata": {},
|
345 |
"outputs": [],
|
|
|
354 |
},
|
355 |
{
|
356 |
"cell_type": "code",
|
357 |
+
"execution_count": 3,
|
358 |
"id": "63c8bf16-9576-41bc-b27c-c92ba4289cf4",
|
359 |
"metadata": {},
|
360 |
+
"outputs": [
|
361 |
+
{
|
362 |
+
"data": {
|
363 |
+
"text/plain": [
|
364 |
+
"DatasetDict({\n",
|
365 |
+
" train: Dataset({\n",
|
366 |
+
" features: ['instruction', 'input', 'output'],\n",
|
367 |
+
" num_rows: 19839\n",
|
368 |
+
" })\n",
|
369 |
+
"})"
|
370 |
+
]
|
371 |
+
},
|
372 |
+
"execution_count": 3,
|
373 |
+
"metadata": {},
|
374 |
+
"output_type": "execute_result"
|
375 |
+
}
|
376 |
+
],
|
377 |
"source": [
|
378 |
"from datasets import load_dataset\n",
|
379 |
"dna_ft_dataset = load_dataset('json', data_files='val_data.json')\n",
|
|
|
382 |
},
|
383 |
{
|
384 |
"cell_type": "code",
|
385 |
+
"execution_count": 4,
|
386 |
"id": "95928da3-ca64-4a17-80f4-945da395702c",
|
387 |
"metadata": {},
|
388 |
+
"outputs": [
|
389 |
+
{
|
390 |
+
"data": {
|
391 |
+
"text/plain": [
|
392 |
+
"DatasetDict({\n",
|
393 |
+
" train: Dataset({\n",
|
394 |
+
" features: ['instruction', 'input', 'output'],\n",
|
395 |
+
" num_rows: 1983\n",
|
396 |
+
" })\n",
|
397 |
+
" test: Dataset({\n",
|
398 |
+
" features: ['instruction', 'input', 'output'],\n",
|
399 |
+
" num_rows: 17856\n",
|
400 |
+
" })\n",
|
401 |
+
"})"
|
402 |
+
]
|
403 |
+
},
|
404 |
+
"execution_count": 4,
|
405 |
+
"metadata": {},
|
406 |
+
"output_type": "execute_result"
|
407 |
+
}
|
408 |
+
],
|
409 |
"source": [
|
410 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.1, seed=42)\n",
|
411 |
"data"
|
|
|
413 |
},
|
414 |
{
|
415 |
"cell_type": "code",
|
416 |
+
"execution_count": 5,
|
417 |
"id": "a3e65bcd-85ce-4261-8ba6-7665c4ec60e2",
|
418 |
"metadata": {},
|
419 |
"outputs": [],
|
|
|
424 |
},
|
425 |
{
|
426 |
"cell_type": "code",
|
427 |
+
"execution_count": 6,
|
428 |
"id": "3d3fe49b-f48f-42b2-bc97-028e443111e4",
|
429 |
"metadata": {},
|
430 |
+
"outputs": [
|
431 |
+
{
|
432 |
+
"data": {
|
433 |
+
"application/vnd.jupyter.widget-view+json": {
|
434 |
+
"model_id": "4f060ff2029447b9bad5e2b2e40b7133",
|
435 |
+
"version_major": 2,
|
436 |
+
"version_minor": 0
|
437 |
+
},
|
438 |
+
"text/plain": [
|
439 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"metadata": {},
|
443 |
+
"output_type": "display_data"
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"data": {
|
447 |
+
"text/plain": [
|
448 |
+
"LlamaForCausalLM(\n",
|
449 |
+
" (model): LlamaModel(\n",
|
450 |
+
" (embed_tokens): Embedding(91644, 4096, padding_idx=0)\n",
|
451 |
+
" (layers): ModuleList(\n",
|
452 |
+
" (0-31): 32 x LlamaDecoderLayer(\n",
|
453 |
+
" (self_attn): LlamaSdpaAttention(\n",
|
454 |
+
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
455 |
+
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
456 |
+
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
457 |
+
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
458 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
459 |
+
" )\n",
|
460 |
+
" (mlp): LlamaMLP(\n",
|
461 |
+
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
462 |
+
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
463 |
+
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
|
464 |
+
" (act_fn): SiLU()\n",
|
465 |
+
" )\n",
|
466 |
+
" (input_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
467 |
+
" (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
468 |
+
" )\n",
|
469 |
+
" )\n",
|
470 |
+
" (norm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
471 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
472 |
+
" )\n",
|
473 |
+
" (lm_head): Linear(in_features=4096, out_features=91644, bias=False)\n",
|
474 |
+
")"
|
475 |
+
]
|
476 |
+
},
|
477 |
+
"execution_count": 6,
|
478 |
+
"metadata": {},
|
479 |
+
"output_type": "execute_result"
|
480 |
+
}
|
481 |
+
],
|
482 |
"source": [
|
483 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-llama-7b-sft-v0\") #continue pretrain\n",
|
484 |
"model"
|
|
|
486 |
},
|
487 |
{
|
488 |
"cell_type": "code",
|
489 |
+
"execution_count": 7,
|
490 |
"id": "c54df9fe-86c4-4963-b313-b438894bf9dd",
|
491 |
"metadata": {},
|
492 |
"outputs": [],
|
|
|
515 |
},
|
516 |
{
|
517 |
"cell_type": "code",
|
518 |
+
"execution_count": 8,
|
519 |
"id": "ee540cfb-1f6e-4e02-a3bc-c814e43685cb",
|
520 |
"metadata": {},
|
521 |
+
"outputs": [
|
522 |
+
{
|
523 |
+
"data": {
|
524 |
+
"text/plain": [
|
525 |
+
"{'instruction': 'Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.',\n",
|
526 |
+
" 'input': 'CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC',\n",
|
527 |
+
" 'output': 'promoter'}"
|
528 |
+
]
|
529 |
+
},
|
530 |
+
"execution_count": 8,
|
531 |
+
"metadata": {},
|
532 |
+
"output_type": "execute_result"
|
533 |
+
}
|
534 |
+
],
|
535 |
"source": [
|
536 |
"example = data[\"test\"][0]\n",
|
537 |
"example"
|
|
|
539 |
},
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
+
"execution_count": 9,
|
543 |
"id": "7ee35528-7b3f-4e60-b88b-1bc3e950012b",
|
544 |
"metadata": {},
|
545 |
+
"outputs": [
|
546 |
+
{
|
547 |
+
"name": "stdout",
|
548 |
+
"output_type": "stream",
|
549 |
+
"text": [
|
550 |
+
"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
551 |
+
"\n",
|
552 |
+
"### Instruction:\n",
|
553 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
554 |
+
"\n",
|
555 |
+
"### Input:\n",
|
556 |
+
"CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC\n",
|
557 |
+
"\n",
|
558 |
+
"### Response:\n",
|
559 |
+
"promoter\n"
|
560 |
+
]
|
561 |
+
}
|
562 |
+
],
|
563 |
"source": [
|
564 |
"prompt = build_prompt(example)\n",
|
565 |
"print(prompt)"
|
|
|
567 |
},
|
568 |
{
|
569 |
"cell_type": "code",
|
570 |
+
"execution_count": 10,
|
571 |
"id": "8aa6f38f-3bcc-4566-8a66-a541db91e031",
|
572 |
"metadata": {},
|
573 |
+
"outputs": [
|
574 |
+
{
|
575 |
+
"data": {
|
576 |
+
"text/plain": [
|
577 |
+
"['▁Below',\n",
|
578 |
+
" '▁is',\n",
|
579 |
+
" '▁an',\n",
|
580 |
+
" '▁instruction',\n",
|
581 |
+
" '▁that',\n",
|
582 |
+
" '▁describes',\n",
|
583 |
+
" '▁a',\n",
|
584 |
+
" '▁task',\n",
|
585 |
+
" '.',\n",
|
586 |
+
" '▁Write',\n",
|
587 |
+
" '▁a',\n",
|
588 |
+
" '▁response',\n",
|
589 |
+
" '▁that',\n",
|
590 |
+
" '▁appropri',\n",
|
591 |
+
" 'ately',\n",
|
592 |
+
" '▁comple',\n",
|
593 |
+
" 'tes',\n",
|
594 |
+
" '▁the',\n",
|
595 |
+
" '▁request',\n",
|
596 |
+
" '.',\n",
|
597 |
+
" '<0x0A>',\n",
|
598 |
+
" '<0x0A>',\n",
|
599 |
+
" '##',\n",
|
600 |
+
" '#',\n",
|
601 |
+
" '▁Inst',\n",
|
602 |
+
" 'ruction',\n",
|
603 |
+
" ':',\n",
|
604 |
+
" '<0x0A>',\n",
|
605 |
+
" 'Det',\n",
|
606 |
+
" 'erm',\n",
|
607 |
+
" 'ine',\n",
|
608 |
+
" '▁core',\n",
|
609 |
+
" '▁prom',\n",
|
610 |
+
" 'oter',\n",
|
611 |
+
" '▁detection',\n",
|
612 |
+
" '▁of',\n",
|
613 |
+
" '▁following',\n",
|
614 |
+
" '▁d',\n",
|
615 |
+
" 'na',\n",
|
616 |
+
" '▁sequence',\n",
|
617 |
+
" ',',\n",
|
618 |
+
" '▁The',\n",
|
619 |
+
" '▁result',\n",
|
620 |
+
" '▁will',\n",
|
621 |
+
" '▁be',\n",
|
622 |
+
" '▁one',\n",
|
623 |
+
" '▁of',\n",
|
624 |
+
" '▁the',\n",
|
625 |
+
" '▁following',\n",
|
626 |
+
" ':',\n",
|
627 |
+
" '▁Non',\n",
|
628 |
+
" '-',\n",
|
629 |
+
" 'prom',\n",
|
630 |
+
" 'oter',\n",
|
631 |
+
" ',',\n",
|
632 |
+
" '▁prom',\n",
|
633 |
+
" 'oter',\n",
|
634 |
+
" '.',\n",
|
635 |
+
" '<0x0A>',\n",
|
636 |
+
" '<0x0A>',\n",
|
637 |
+
" '##',\n",
|
638 |
+
" '#',\n",
|
639 |
+
" '▁Input',\n",
|
640 |
+
" ':',\n",
|
641 |
+
" '<0x0A>',\n",
|
642 |
+
" 'CCG',\n",
|
643 |
+
" 'TGCG',\n",
|
644 |
+
" 'ACCGG',\n",
|
645 |
+
" 'AAG',\n",
|
646 |
+
" 'TGGGGC',\n",
|
647 |
+
" 'GGCG',\n",
|
648 |
+
" 'ACCCCGG',\n",
|
649 |
+
" 'AAG',\n",
|
650 |
+
" 'TCCCC',\n",
|
651 |
+
" 'GCCGGG',\n",
|
652 |
+
" 'TGCAGC',\n",
|
653 |
+
" 'TTGG',\n",
|
654 |
+
" 'TCGG',\n",
|
655 |
+
" 'TTCG',\n",
|
656 |
+
" 'ATCGCC',\n",
|
657 |
+
" '<0x0A>',\n",
|
658 |
+
" '<0x0A>',\n",
|
659 |
+
" '##',\n",
|
660 |
+
" '#',\n",
|
661 |
+
" '▁Response',\n",
|
662 |
+
" ':',\n",
|
663 |
+
" '<0x0A>',\n",
|
664 |
+
" 'prom',\n",
|
665 |
+
" 'oter']"
|
666 |
+
]
|
667 |
+
},
|
668 |
+
"execution_count": 10,
|
669 |
+
"metadata": {},
|
670 |
+
"output_type": "execute_result"
|
671 |
+
}
|
672 |
+
],
|
673 |
"source": [
|
674 |
"tokenizer.tokenize(prompt)"
|
675 |
]
|
676 |
},
|
677 |
{
|
678 |
"cell_type": "code",
|
679 |
+
"execution_count": 11,
|
680 |
"id": "11875339-4901-4912-86e5-afe8c74921d9",
|
681 |
"metadata": {},
|
682 |
"outputs": [],
|
|
|
718 |
},
|
719 |
{
|
720 |
"cell_type": "code",
|
721 |
+
"execution_count": 12,
|
722 |
"id": "1b02644a-8b24-45aa-b22d-0f7ce2270dd9",
|
723 |
"metadata": {},
|
724 |
+
"outputs": [
|
725 |
+
{
|
726 |
+
"name": "stdout",
|
727 |
+
"output_type": "stream",
|
728 |
+
"text": [
|
729 |
+
"input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
730 |
+
"\n",
|
731 |
+
"### Instruction:\n",
|
732 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
733 |
+
"\n",
|
734 |
+
"### Input:\n",
|
735 |
+
"CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC\n",
|
736 |
+
"\n",
|
737 |
+
"### Response:\n",
|
738 |
+
"\n",
|
739 |
+
"real answer: promoter\n",
|
740 |
+
"--------------------------\n",
|
741 |
+
"\n",
|
742 |
+
"model's answer: \n",
|
743 |
+
"\n"
|
744 |
+
]
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"name": "stderr",
|
748 |
+
"output_type": "stream",
|
749 |
+
"text": [
|
750 |
+
"/root/miniconda3/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:601: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.01` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
|
751 |
+
" warnings.warn(\n",
|
752 |
+
"Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n"
|
753 |
+
]
|
754 |
+
},
|
755 |
+
{
|
756 |
+
"name": "stdout",
|
757 |
+
"output_type": "stream",
|
758 |
+
"text": [
|
759 |
+
"promoter\n"
|
760 |
+
]
|
761 |
+
}
|
762 |
+
],
|
763 |
"source": [
|
764 |
"input_text = format_input(data[\"test\"][0])\n",
|
765 |
"\n",
|
|
|
820 |
},
|
821 |
{
|
822 |
"cell_type": "code",
|
823 |
+
"execution_count": 16,
|
824 |
"id": "68831e19-5a99-46d8-9f40-e8bf6957dbfc",
|
825 |
"metadata": {},
|
826 |
+
"outputs": [
|
827 |
+
{
|
828 |
+
"name": "stdout",
|
829 |
+
"output_type": "stream",
|
830 |
+
"text": [
|
831 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
832 |
+
"promoter |||||||||||| promoter\n",
|
833 |
+
"promoter |||||||||||| promoter\n",
|
834 |
+
"promoter |||||||||||| Non-promoter\n",
|
835 |
+
"promoter |||||||||||| promoter\n",
|
836 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
837 |
+
"promoter |||||||||||| promoter\n",
|
838 |
+
"promoter |||||||||||| Non-promoter\n",
|
839 |
+
"Non-promoter |||||||||||| promoter\n",
|
840 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
841 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
842 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
843 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
844 |
+
"Non-promoter |||||||||||| promoter\n",
|
845 |
+
"promoter |||||||||||| promoter\n",
|
846 |
+
"promoter |||||||||||| promoter\n",
|
847 |
+
"Donor Sites |||||||||||| Splice Sites\n",
|
848 |
+
"Background Sequences |||||||||||| Background Sequences\n",
|
849 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
850 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
851 |
+
"promoter |||||||||||| Non-promoter\n",
|
852 |
+
"promoter |||||||||||| promoter\n",
|
853 |
+
"promoter |||||||||||| promoter\n",
|
854 |
+
"promoter |||||||||||| Non-promoter\n",
|
855 |
+
"promoter |||||||||||| promoter\n",
|
856 |
+
"promoter |||||||||||| promoter\n",
|
857 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
858 |
+
"Non-Splice Sites |||||||||||| Non-Splice Sites\n",
|
859 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
860 |
+
"promoter |||||||||||| Non-promoter\n",
|
861 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
862 |
+
"Binding Sites |||||||||||| Background Sequences\n",
|
863 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
864 |
+
"Non-Splice Sites |||||||||||| Non-Splice Sites\n",
|
865 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
866 |
+
"Non-promoter |||||||||||| promoter\n",
|
867 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
868 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
869 |
+
"Non-promoter |||||||||||| promoter\n",
|
870 |
+
"promoter |||||||||||| promoter\n",
|
871 |
+
"Background Sequences |||||||||||| Background Sequences\n",
|
872 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
873 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
874 |
+
"promoter |||||||||||| promoter\n",
|
875 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
876 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
877 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
878 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
879 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
880 |
+
"promoter |||||||||||| promoter\n",
|
881 |
+
"promoter |||||||||||| promoter\n",
|
882 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
883 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
884 |
+
"promoter |||||||||||| Non-promoter\n",
|
885 |
+
"promoter |||||||||||| promoter\n",
|
886 |
+
"Background Sequences |||||||||||| Binding Sites\n",
|
887 |
+
"promoter |||||||||||| promoter\n",
|
888 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
889 |
+
"Background Sequences |||||||||||| Background Sequences\n",
|
890 |
+
"promoter |||||||||||| promoter\n",
|
891 |
+
"promoter |||||||||||| Non-promoter\n",
|
892 |
+
"promoter |||||||||||| promoter\n",
|
893 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
894 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
895 |
+
"promoter |||||||||||| promoter\n",
|
896 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
897 |
+
"Non-promoter |||||||||||| promoter\n",
|
898 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
899 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
900 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
901 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
902 |
+
"Non-promoter |||||||||||| promoter\n",
|
903 |
+
"promoter |||||||||||| promoter\n",
|
904 |
+
"promoter |||||||||||| promoter\n",
|
905 |
+
"promoter |||||||||||| promoter\n",
|
906 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
907 |
+
"Acceptor Sites |||||||||||| Acceptor Sites\n",
|
908 |
+
"promoter |||||||||||| promoter\n",
|
909 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
910 |
+
"Donor Sites |||||||||||| Acceptor Sites\n",
|
911 |
+
"promoter |||||||||||| promoter\n",
|
912 |
+
"promoter |||||||||||| promoter\n",
|
913 |
+
"promoter |||||||||||| promoter\n",
|
914 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
915 |
+
"Non-promoter |||||||||||| promoter\n",
|
916 |
+
"promoter |||||||||||| Non-promoter\n",
|
917 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
918 |
+
"promoter |||||||||||| promoter\n",
|
919 |
+
"Background Sequences |||||||||||| Binding Sites\n",
|
920 |
+
"Acceptor Sites |||||||||||| Splice Sites\n",
|
921 |
+
"Non-Splice Sites |||||||||||| Non-Splice Sites\n",
|
922 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
923 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
924 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
925 |
+
"promoter |||||||||||| promoter\n",
|
926 |
+
"Background Sequences |||||||||||| Binding Sites\n",
|
927 |
+
"promoter |||||||||||| promoter\n",
|
928 |
+
"promoter |||||||||||| promoter\n",
|
929 |
+
"Acceptor Sites |||||||||||| Splice Sites\n",
|
930 |
+
"promoter |||||||||||| promoter\n",
|
931 |
+
"presicion 0.73 same 0.3\n"
|
932 |
+
]
|
933 |
+
}
|
934 |
+
],
|
935 |
"source": [
|
936 |
"import json\n",
|
937 |
"from tqdm import tqdm\n",
|
|
|
964 |
"\n",
|
965 |
"print(\"presicion\", right_sum/all_num, \"same\", same_sum/all_num)\n"
|
966 |
]
|
967 |
+
},
|
968 |
+
{
|
969 |
+
"cell_type": "code",
|
970 |
+
"execution_count": null,
|
971 |
+
"id": "7bc38f47-4a7d-43eb-abe8-db4310d280e3",
|
972 |
+
"metadata": {},
|
973 |
+
"outputs": [],
|
974 |
+
"source": []
|
975 |
}
|
976 |
],
|
977 |
"metadata": {
|
04-gene-sft/.ipynb_checkpoints/gpt2-small3-1024-checkpoint.json
ADDED
@@ -0,0 +1,602 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
4 |
+
"input": "CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC",
|
5 |
+
"output": "promoter",
|
6 |
+
"model_response": "promoterpromoterpromo"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
10 |
+
"input": "AGTCATGCCTGCAGGGAGAGAGGGCTGGGGCGCTATCTATGGTAGAAGTCAGAAGAGGCTGTCTATATGTCAGTGGGAGGAGAAAAGTGGCTCCAAAGCGGACTGTCTGTCCCTGTGCCAGAAGACATTGCTATTCAACGTTCACTCGCTTCTGCACAGGCCCACGTCACCCACAGACCGAGAACTCATCCACTAAGCGCTGCGGGTGGGCAGTACATACCGACGAAGTGCCTTCGCTACCCCTCTGGGTGTCCGTGCTCCCGCGAACCGGCTGGGGCTGCAGACGGGAAAGGAATGTCC",
|
11 |
+
"output": "Non-promoter",
|
12 |
+
"model_response": "Non-promoter"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
16 |
+
"input": "AGGGTTTGCACTAACCGATCCACTCCGCGGGGACCTCTTAGAATTCTGCTGTGGATAAGGATATCGAACTACCGAGTCACTGGACCTAAAGAGAGATTCACTGTATCATCGTCTGCCTTAGACTGTTCCGCACCCAGGAACTGGGGCTGTTGCGCCCTTAAGGTTACTTTGAAGCCAAGGTCGCAAACAGACTTCCGCATTGACGTCAGTAGCCGAACGCTGATTTTCTTAATCTAGTATTTAGGATGGGCCTCTGTCGCCTAGCCGCTATCGCAGAGTGGAGCGGGGCTGGGAGCAAAG",
|
17 |
+
"output": "Non-promoter",
|
18 |
+
"model_response": "Non-promoter"
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
22 |
+
"input": "TCCTCGTAAGGACCTAACTGTTCCAGCACTACACAGCTCATGGTGTCCTCAAGATTAGTACATATCAGAGGCCATGAACTTAGTCTTACGCTCACTGTGGCACTGGACGCGCGGGGGATGCGGGGTTGTTCCCAGCGATTTATGATTCGTTATTTGCCGCGTTGACTCGCCGTCTGTAGCCCCATGACAACGACATTCCTGCATTCTCTGCCTGGAGAGCGAAGTGACAATACTGAATTGAAACGGCTAGAATGTCGCTCGCTGAGGCTCCGGACCTTGGAGCGTCTAGAGTCTGGCTAC",
|
23 |
+
"output": "Non-promoter",
|
24 |
+
"model_response": "Non-promoter"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
28 |
+
"input": "TGCCACATAAGTCGGGTCAGCAAGTCGGTGGCGAAAAGAGGGCCACGTCCCAGACTTTCTGGGAGGAGGCGGGACAAAGGGGCGGGGCGAGCGCAGCGCCCTCCGGGTGGGCGCCGTCAAGAGGCCGGGGGCGGGGCCGAGCGCGGCTGGGCGGGGCCTTGAGAGGCCGGCCGGGGGCGGGGAGGCTGGCGGGTCGGCGCGGGCCCAGCCGTGCGTGCTCACGTGACGGGTCCGCGAGGCCCAGCTCGCGCAGTCGTTCGGGTGAGCGAAGATGGCGGCCGAGAGGGAACCTCCTCCGCT",
|
29 |
+
"output": "promoter",
|
30 |
+
"model_response": "promoterpromoterpromo"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
34 |
+
"input": "ATAGGATGCGTCTGTACTCTGATTAACAAACTGCTGAAGGAATAAATATGTACTTGCTGGGCAGCGCCGCCGGCCGAATGGAGATAAGCCTATGCAGCTTCGTGCGCGGCTCCCCAGCCCTTTGCTGCGCCGCGAGCTGCGCCCTGAGACCCCCGCCTCGCTGCCAGCTACTTACCTGCCCCGGCGGAGGGGGCCATGTTGCTACACCTAGGCAGGCGGCAAGAAAGCACGCGTAATGAATTCCTTATATCCCCCGCGCCCCAACGGCGGCGGCGCGCCGGCCGGCATGGAGCCCCGCGC",
|
35 |
+
"output": "Non-promoter",
|
36 |
+
"model_response": "Non-promoter"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
40 |
+
"input": "AGGGTAAAAAAGGGGACAAATGGAAATTTGTGTTGCAGATAGATGAGGAGCCAACAAAAAAGAGCCTCAGGATCCAGCACACATTATCACAAACTTAGTGTCCATCCATCACTGCTGACCCTCTCCGGACCTGACTCCACCCCTGAGGGACACAGGTCAGCCTTGACCAATGACTTTTAAGTACCATGGAGAACAGGGGGCCAGAACTTCGGCAGTAAAGAATAAAAGGCCAGACAGAGAGGCAGCAGCACATATCTGCTTCCGACACAGCTGCAATCACTAGCAAGCTCTCAGGCCTGG",
|
41 |
+
"output": "promoter",
|
42 |
+
"model_response": "promoterpromoterpromo"
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
46 |
+
"input": "TCTGGGTCCTAGAGGCTACTGAGACAAATGCTGGCTGAAGCCGAGGTACCTAGATTTGTGCAAATCGAGGAGTTCTATTCTGCGTGTTTGTGAAAGGGGGTTAGACGAGAGGAGTGCGGTGGAGGCGAAGATAATAGGATTATTCTTACAGTGTAATAATAATTATTGGTGCTAATGTTTAGGAAGCCGTGAAAGATAACACGAGGGGAGAAAGTGGATAGAAAAACCACCGCTGGCAGTTCTTTTCATTTGAAAGTGAATTGGGAAAAAGCGAATAAAAATTAGTCATGAGTTTTGGTA",
|
47 |
+
"output": "Non-promoter",
|
48 |
+
"model_response": "Non-promoter"
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
52 |
+
"input": "CTCGTTGCGTAATCCTCCTTCCGCACCAACGCAAGGCAGGACAGGAGGTCAAATGAGTTCGAGATCAGGCGCCACAGACTCCACTAAAGCACAGACACCAGGTGCGGTAAGACACGCTGGCTCCAAAGTAGGACACCAAACCATTGCTAATGCCTGGGGAGGGCCAAACATGGGCAGGCTGGAGCACTCTCCTCAAAGAGGACCACAAGCCGTGACAGCAGAGCCCCTCGGACTGATGGGACACGGCTTAAACTGAAGTTCGAGGACTCTGTTCTGCAGAGGTCTGAGGCTGGAAGGGGG",
|
53 |
+
"output": "Non-promoter",
|
54 |
+
"model_response": "Non-promoter"
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
58 |
+
"input": "CGGGAGAGAGCTGGAGTCTCGCGGCCTCGGAGGCCGGGAAGCCAGCCCCGGCGGGGACCCGGGCGGGGCGGAGCCAGCGGAGGCCCCACCCCCGGCGTCACCGGCCCCCAGGGGGGCGTCGCCCCCACCCCGCGCTCCAGGTAGCGGCGGCCTCAGCTGCCGGCGGTGGCTGGCTGAGCTCCCCAGCGGCCCGCGGGCCGGGGCGGGGGCGGGCCGGGGGCGTTCCCGCGGGCCACCGCCCGTGATGTCACAATCGCGGCGGGCCGCGGCGCTCCGGGGTCGGCGGTGGGCGGCGGGCCG",
|
59 |
+
"output": "promoter",
|
60 |
+
"model_response": "promoterpromoterpromo"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
64 |
+
"input": "AGACTATACCACTTGCCGGCCGCGCAGAGCCCTCCCCAGGTCGCGCAAGGAGCTCGAGCTACTAACGGAGGAGCGGCGCAGGCCCCGCCCTTAGTTCATGAGCTTACTTGTACAGATCTCGGCCCCGGCCCCGGCCCCGCCCCCTGCCTCATCGTGGACCATCACGCTTCCAGCCCAGTCGGCTACGGAGATAATCCATGGAGCTCCGAGGCGCCGAGAGAGCAACCTTAAAGTATCGGGATCAGCGACATTATCAGCGCTGGTCCCTGCAGCTTCTGGTTCGAGATCTGGAAGGGCGGT",
|
65 |
+
"output": "Non-promoter",
|
66 |
+
"model_response": "Non-promoter"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
70 |
+
"input": "AAGTTTAAAAATGAACCCCCAACAGAGCCACCCGCCGGGGACGGTTCTGTTTTCAGCACAACCACGGGGTTCGTTTAGCCTGTCGCCCAAGTTTGTTTCCTCTAGGTCGCCCACCTTCCGTCGAATTCTGTCACTAGACTTTTATACTGGGTTTGTATGTAGCTCGATTCTGCATACAAGCTGGACTGAGAACTGGCAGGAGGCGAAGATGAGGGGGCGGCATCCAATTTGTTGTGATCCTTTTGCGTCAGGCTTCTGCCTGAGCTCGGTGAGGTCAAGCCTCCTCTGCTTCCACCCCTC",
|
71 |
+
"output": "Non-promoter",
|
72 |
+
"model_response": "Non-promoter"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
76 |
+
"input": "GGTCCGCTCCGCGGGGCAGGCGTCGTGGGCCCAAGGAGGCCAGGCCAGGAGTCTCGGCCTCGGGGTTACGCGGCGGCGGGCAGCTCTTCCGCTCCGCTCAGGGTGCTGCTAACCCACAGCCAAGCAGCGACCCGCAGGACCGGCGAGAACCCCGCCACTGACTCCGGCCGGAAGAGAACGTGCAGAGATGTACGTCACTTCCGGGGCGTGGCGTGGCCGGGCGTGGCCGTCCCGCCCCTGGCGGCGTGCCTGGTGCCCTGTTCCGCGTCTGTGCGACCGTCCGTCCCGAGCGCGCAGCCG",
|
77 |
+
"output": "promoter",
|
78 |
+
"model_response": "promoterpromoterpromo"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
82 |
+
"input": "TACCGGTGCTACTTTACGCTGCGTGTTCCCTAGCAGGGGACCTAGTGTGCTACCATAAAAGCACGACGATAAGACTATACCTCTTCGCCTGCTACTAACATAGGGCACTTCTTCCCCCTCAATCTATCGACGTTCGTTAATATCACCGACTCCCAGGGAGGATCTAATAAAAGGAGGAGGGCTCCTGGAATAATAGAGGAGTGGTGCTGCGAGGGGAAGGAAAAGCCAATCCGTGACCCCTTTCCCGGAAAGCTTCGAACCGACAGCCAGAAGCCCGCACGCACCTCGCACCATGAGATG",
|
83 |
+
"output": "Non-promoter",
|
84 |
+
"model_response": "Non-promoter"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
88 |
+
"input": "GGTCGAGAAGACCTGGGCTACTGAATGACGAATGGGACGCACACGTACTGGCTACTTTTGTGGAAGATGGTCCGCCCTGGTTTGGATTGCGCTTTGCCACGAGAACCGCCGGGAAGGTGGGGGACGGGGACGGGAGCAGCCAATCACGCGCCGTCCCGTTCGCTTCGTCAGGGCGCGATCGCGGATTCGGCACTCAGTGTATGGTACTTCGGGCGAGCGAGGAGTCGCGAGCGCTGTGAGTTTCTCCTGGAGTTTTCTGCCGTTTTCGGACCCTAAAGCTTATCTAGGCTCGCGCTCTGC",
|
89 |
+
"output": "Non-promoter",
|
90 |
+
"model_response": "Non-promoter"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
94 |
+
"input": "CCCGTCCTGCTCCAATTGCCAGGAGATACCATTAGCGGACTGACTCCAAGAAAAGAGATCCCTTGAAAAAGGCAGTGCTGAACGCGAGTATGGCGCTGCTTGCGGGCCATTCTGTGCGCTACGCTACCCGCTCGTATTAATCAACAACAGGACCGGACTCTGGTTGCTTGGGAGTTCAGGTGGACAACAGCAACTACTAACAAGGTTTTTCGTTCATAGTGCGGGAATGGCAAAACACGAGAGAAACGCACTCTTCATAGGTCACTATCCGTCTGGAAGAAGGCTTTGGACAGAACAGGG",
|
95 |
+
"output": "Non-promoter",
|
96 |
+
"model_response": "Non-promoter"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
100 |
+
"input": "CCCCTGCCCCCAGGCACTGACAGGGTAGCAGAGGGAAGACACATGAGCTGCTTTTTAATTTTTTTTAGAATTAATAGAGACGGGGTCTCACTCTGTCACTATGTTGCCCAGGCTAGTTTTGAACTCCTGGCCGCAAGCAGTCCTCCTACCTCAGCCTCCCAAAATGCTGGGATTACAGGTGTGAGCCACCGTGCTTAGCCATGAATTGATTTTAAAACCTTAAATGAGGGGCCAGGTGTTGTCTTTCCGTTTGCCTGAGTGACGCGGGTCTTCTCCAGGACATCCGGCCACAGATCTGCC",
|
101 |
+
"output": "Non-promoter",
|
102 |
+
"model_response": "promoterpromoterpromo"
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
106 |
+
"input": "CAGCCCCTATGTTGGTGTGGAGACCCTCACCCGAGCTGGGCCTGCACCCTACACCAATCCGAGGTACCACGATCGGATTACTACAACAGCGGGGTCAGCTCAGGCGCTCGGTCCCCCGGGATCCTGCTCCCAGGCACACGATTCCGTTCGTTATAATTATGTAGACGGGGCCTGCGGTAGACCGACTGCGGGTGACCGAGTAGTGCAAAGTCTGGCTTCTCTTTTGATAGAGCACATCGATCTGTCTTCCTCTTTTCACCGTCGATGAGCCCCGCGTGCCTGGGACTGCTCCAGGCTCCA",
|
107 |
+
"output": "Non-promoter",
|
108 |
+
"model_response": "Non-promoter"
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
112 |
+
"input": "GTCGCCGCCCACATTAGTGTGGGGCCCTGCGGCCTAGCGTCCCTCACCAGAGGCCTCCCCTTGCCTAGCTGGACCGCCGAGGGACATCGACGAGTATCCTCCTCCTGCTGTCCCCGGCTTCGCCTGCCGCCCCTAACCGGCCAGTCAAGATGGCCGCCGCTGGGTGAGGCAAGCTGGCGCGCCGCGGGGGCGTCTGGGAGTTGTAGTTCGGGACGGCGGGCTGACGCACTTCGCCGCCGGCCGACGGGCGCCATTGTGCGGCGCGCGCCGGGTGAGTGCCGCGCGAAACCTGCGTCCGTC",
|
113 |
+
"output": "promoter",
|
114 |
+
"model_response": "promoterpromoterpromo"
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
118 |
+
"input": "GCGGGTAGCGATGGGCCGGGCCTCAGTTGTTTAAAGGCCCGATGCGACCCCGCTTAAGACGTCAGATCGTTCACCCCCGAGGGTCGCCGGATTTCGCTCCCGCGCGAGATGGCCTCCACCAGAGACTGCTGCTGCGCCTTGGAGCGGTGCCTCGCCTCCCAATCCTCGTCGTGAATTGCCCCAATTGCTGATCTGGGAACAAGCTCGTGCCAGGCGGGCGGAGAGTACGAACTCCTTATAGACCAGGATGGGCGTAGCAAGAGCGCGGGCGGCCCATGAATGTCTGCAGCGGTGAGGACC",
|
119 |
+
"output": "Non-promoter",
|
120 |
+
"model_response": "Non-promoter"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
124 |
+
"input": "CGGATGCGGGGCCGGAGGGGATGGAAGGGGCCGATCTAACACGATATTCCGTTCAAGAACTACAACCGTCTACAGCACACCATAGCCGCTCGCTGAAGGATGTCGAAGGGCGCCCAGGCACCCACCACGTTTGGCCCCTGCCCGGGCCGGTTCCGATATGACTCACTCATTTACTCCCCTAGAGCTCCGAGCCTCGGATCGCCGAGGTGACCCTTACCCGCCTTCCTACCACCTTAGGGGATTTGGCCCGGAGAACGAGATCACCCTCTCATCGTGGCGGGGTATTCCCTTTAAGGTTTG",
|
125 |
+
"output": "Non-promoter",
|
126 |
+
"model_response": "Non-promoter"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
130 |
+
"input": "TCCCCGTCGGTCCGCGGGGCTCTCGCTGGAGCCTCCCCGACTCCGGTTTCCCCTCGTCCAGAAGCCCGACGTAACCAAAGCCCAGTCTGTCACTTTAAACACGCCCCGCCCCGCCTCCCGCGGCTGTGTTGCCTCCTCGCTGGAGAACACCCTGGTCGACCTCTGTGCGTCCGTGTGCGCGAGCGCGTCCCGCCGAGGCGGTGGGCAGGGCGGACGGTGCGCAGTGCGTTCCCGCTGGTCGGAGCCAGCACACTAACCACGCCACGCGCCCTGCCGTCCCTTCGCCTCCAGCCGCTGCAG",
|
131 |
+
"output": "promoter",
|
132 |
+
"model_response": "promoterpromoterpromo"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
136 |
+
"input": "CACCTACTCGGGGTCCGAGGAGGGAGGATTGCCTAGGCCCAGGACTTTGAGAACACGCCTCTACAGAGATTTACATTTTAAGAAAATTAGCTGAGTTCGGGTCGGGCGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGGCGGATCACCTGAGGTCAGGAATTCGAGACCAGCCTGACCAAAACGGCGAAACCCGTTCTCTACTAAAAATACAAACACTAGCCGGGCGTGGTGGCAGTCGCCTGTAATCCCAGCTACTCCAGAAGCTGAGGCAGGAGAATCGCTT",
|
137 |
+
"output": "Non-promoter",
|
138 |
+
"model_response": "Non-promoter"
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
142 |
+
"input": "CCCGGCCGGTGTGCGGGACACACTATTGTAATCCTAAAAGGCCATGGGCCCCTGCTGGGGTCAAGAATAATGCTCAAAGTTTAAACTACCGAGTGTTGCTGAGGCTTGGGTTGCACATCGAGAGTCGCGATAACACTCACCCAGGGGAACATACCGCGAGGGATTACGGACGATGGAGAGATCACCCAAGGCAGTGGGAGTCCGCGCTTCCTCCCTTAAGTACCGCTGGTTGTCCCCCGACGTAGTGTAGATAGTGAGCCCTCTTGTCCGCTGGCACGTGATGTATGTGTGTCCTTGTCT",
|
143 |
+
"output": "Non-promoter",
|
144 |
+
"model_response": "Non-promoter"
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
148 |
+
"input": "GGGTTAGGCGCGTGCCGCGAGAACAGAGTGGACGGAGCGTAGGAGAGACCGAAAAGGCTGGGGGTGGGAGTAGCGGATTTGAAGCACTTGTTGGCCTACAGAGGTGTGGCAAGCAGAGCACCTCAGAACTCAGGCGTACTGCCCGCCGCCCGAGCCCTGCGAGGGCCGATAGCGAGGGTGTGGCCCTTATCTGCACCCAGCAGAGCGCCGGCGGGGTACGGTCTTAGGACCTCGATCTCCTTCTCCCTCATTTTCTCTCATCCCTACCTATTGTGGGTGAGTCCTGGCCCCTGGACGGGG",
|
149 |
+
"output": "promoter",
|
150 |
+
"model_response": "Non-promoter"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
154 |
+
"input": "CTTGCATTAATACACCCGGCGCCAGAGGAACGTGGGGATCAATCATATTACACTGCTTCAGACTTGTAATTTTATTTGGTTCAGGCATCGTAGTGGTGAGGCTTGGCAGAGAGCCCGAGATGGAGCTATTGAGACGTTTCAGGCTGGGCGGATAATCGCATCAGTAGGGACTTGGCTAGATCTCCCAAGCCTTCTCCACTCTTCAAACTTTCACCAAACTCTTAGGACCCGTCGGGTCGGTTTCATAGACTCAGCCGCTCAACCTTTTAAGCGAGCGGCGCGGCGGGGCGGGCGAACTCG",
|
155 |
+
"output": "Non-promoter",
|
156 |
+
"model_response": "Non-promoter"
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
160 |
+
"input": "GGAGCGTCATCTTATCATTCTCTCTTTTTTTTTTTTTTCTGGTAATGATTTTTTTTGTTTTTTTTTTTTTTGATATTGACAAAAGTTTAATCATTTCAATTAAAAATGCCACTAATTTGACTTTTTAAGTAAAAAATGTAGGGGGTTTTAAAACTACTTTCCTACTACCAAAAAATCAGAAAGTATCTAGCTTTCTAAATTGGGAAAGCAAGCAATGTTATAAAAACACTGAAGGAATCTCTTTCTTCGTGACCTTTTGTTAAACTCGGTTTAAGCTGTAGACCTTATTTAAAATAAAAT",
|
161 |
+
"output": "Non-promoter",
|
162 |
+
"model_response": "promoterpromoterpromo"
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
166 |
+
"input": "GGCTGTGAAACAGGGCCACCTGCAACCGTGGCTTTTCAAAGCTGAAGCCATACCCTGGAGGGAGAGAGGATCCTTGGGCACTGACCCTGGCTGTCCACAGCTAAATGATCATGGGACACCCATGACCATGGGCGTGGCTGGAGGAGGCCCTGCCCTCAGGGGTCTATTGGCATCCAGGGGAAGTGTCTACGGGGTAATGATTAACCCACACCTGCCACCTGGTTTTCAGCCTTCTTGCCCTGCTGACTCATTTTGCAAATCCCACTTGCAAAATTTGGGAGCAGCCTGGGTCCAGCCCCC",
|
167 |
+
"output": "promoter",
|
168 |
+
"model_response": "promoterpromoterpromo"
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
172 |
+
"input": "CTGTGACAATCATGGAGCAGTTGATTGACCCCTTTTCGAGCGCTCGCTATGCTTTAATTTCAAGACCCGCTCTCTAAGGTGCTCTTCCCCTTTAAGACTCACGGCTCTTCCCGTCAGGCTAGCTCGACCTGAGGATTCGTCAACCTCCTTGACAGCTTCCGCTGGCCTGCACCCGAGATTGGGGACGGTTTTCCGCACATCGTGCCTTCATTAGTTAGGTCCTGCGAGTGCATCCAAGGGCCCTCCCCCAAGCCAACAGCAGAACCAGCTCAATTTCTACCCTCACGGCGCTCCGCGGTG",
|
173 |
+
"output": "Non-promoter",
|
174 |
+
"model_response": "Non-promoter"
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
178 |
+
"input": "TAGAGAGGCCAGAATCCCTCGGGAGTGCTACCCGGTCAAAAAATCGCGCCGCCCCGTGGGGGACCGAACTTAGCGATGACGGTTCGTCTAATCCCGTAGGAGAGTATCGGGCGAGCTCGGATTGCGAGATTACCTAGTGTTAGTGCGACCGCCCAGGCACATCTGCAGACTGACGTCAGTATGCAAATCAGAGTCGGAAAAGCTGCCAAAATTTTCTCCTGCTGCGCCACCTAACCTGCGTTCGGGGGTAAACCCATCTAGGGCTACTTCAACAAAACTTTGCTGCCCTTCCTGCTCCTC",
|
179 |
+
"output": "Non-promoter",
|
180 |
+
"model_response": "Non-promoter"
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
184 |
+
"input": "AAGCTTTGTTTAGGTCCGGAAGGCGGGCTTTCCTGGGAGTGGGTGGGGAGGGGGCGTTGATTCTTGACCAATCCTTTCAGTCCGTTGGGTGGTGACCAGCCAATGGGCCGGATGGATAGGACGCTCCTCCCGGAGAGTAGTGAGACCCCTGGTGCGGGGCGATTGGCGGCGGGAGCGATGAGTGGCAGCCGCACGGCCCAACGGGAGCTGTGCGTGGGCCGCGGGGCGGGGCCAGGGCGGGTGCGCGGCGGCGGCGGGGTGGCTGGGCCGGCGGCGGCGGCGGTACGAGGCGCGCGCTCG",
|
185 |
+
"output": "promoter",
|
186 |
+
"model_response": "promoterpromoterpromo"
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
190 |
+
"input": "TCCCATCCAGCGAGAGGGGCAGGTTCCGCATTTTCTCTTCCCCTTTCCCAGCGCTTCCTCCAGCACCCGAAGCCCCAACCCTGCGGGTCAGGAACTCCCTAGTCCCCAAGTCTAGGGATGAGATGGGGGAAGGAGAGCCGTCAGGGTTGACCTGGAGTTTTGTCCGCTCCTCCCCTACAGTGATCCCTCTAGCCTTCTCCAGTCGCCTCCGCCATGTCCGAGGAGCTGGCCCAGGGCCCCAAGGAGAGCCCCCCGGCGCCGCGTGCGGGCCCCAGGGAGGTGTGGAAGAAGGGTGGCCGC",
|
191 |
+
"output": "promoter",
|
192 |
+
"model_response": "promoterpromoterpromo"
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
196 |
+
"input": "ACATCGTATTGCGCTTACTCGGGTCGTGCAATGCACTGACTAGGAAAGGGCTGCGGATGCCTACGTACATAATGCTGTTGCGCACCTATGCACGCGGGTCTGGTCCTGGGCAAGAACCGCCCCCTCTCCGGGCCTGGTTGCCTTTTGATTTGCAGAACAACGGGCCAGGCCCCTTCCCTCTACCGCTCACAAGCGTACATCGCGACCATACCTGGTCCAATGCGCTACGGACGAGGGCAATTCTCGTAATAGGACCCACAGCAAGTCTTGTATCACCTTTGGCTAGCCTGTGCCGGCTGT",
|
197 |
+
"output": "Non-promoter",
|
198 |
+
"model_response": "Non-promoter"
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
202 |
+
"input": "ACGTGACCGACGTCTTCCGCATACCCGGGGCTTCCCTCACTTCCAGGACAACCTGTTGCGCCTCTATCTCCTCCCCTCCCCGCAGTTTCCCCGCCTTGGCCTCTAATAGTAGTGAGAGCACTTTAGGCGGTATAAAGTCTGACGCTAGAATCAATCTAGCCGCGGCGATTGGCCACCTGGGGAATCGTTAGCAAAATGTCACGTTTACTACTTTACGGCGATCTTCCAGGGTCCGGGGACTGTGCCAAGGATCCTACCGGGGCTGGCAGGAGCCTAAGACAAATACGCTAGCGGCGGAGC",
|
203 |
+
"output": "Non-promoter",
|
204 |
+
"model_response": "Non-promoter"
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
208 |
+
"input": "GAGATTTCACGCCTAGCGGCTCAGGATCGTGTGTGCTACAAAAATAGGGCCGCAGGGCCGTATAGTCGCACGAACATACAGGTCGTCCGATTCAGGCTCGGGGGCTTGGACGGAGATGACGGCCAACTCTCACCCTCACCCCTCATTTAGGCGGGCGCCGGGGGCATCATCGGGGTCCCTGGGCCCTCCCCCTGCTCAGTGCGTAGCTCCGATAGAAAAGCTAGGGCATACGTGCAGAGACGGGTTGTCAGGCAGACACCCTGTCTGTTGGCAGCTAGCAGCTGGCGGACGCGACCCGGA",
|
209 |
+
"output": "Non-promoter",
|
210 |
+
"model_response": "Non-promoter"
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
214 |
+
"input": "AAACCATGCAATCCGGATGTGGCAAACAAAGGGTGGGAGCCCACTTGTCATGGCATTAATTGCACGTCTGCTTAGGGCGAGTTTTAACCGTGGCACACAAAACAGCACGATCGAGGATACGGGAGGTCGCCGCGTGATATAACGTCTACCCATGGTTCTCATCGAACCTCTACCTAGACACTGACCAAACCAACGCTCCCAGTGCGCACGGCCATTTCTCATAGATATCTTGAGCAGGTTGCACCACCAAGATAGTGAGCAGGGCGGTGGGTGGTGCGGAGTGCCGAGCGGCCTCACCCC",
|
215 |
+
"output": "Non-promoter",
|
216 |
+
"model_response": "Non-promoter"
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
220 |
+
"input": "GTCGGAGGTCGGCGGCCAGAAGCCAGATTCCCTTGGCCCCAGCTCGTCCACTTCCATCCAGCAGTAGCCAAACAACCGTTGAAAATGGCCTGGCGGCGGCCGGGAAACACCAGAGAACTATCTTCCAACCCCTAGATCCCGCCCATCTCGTTCCGGCGGTGTTTCCGTGGCGACGCTATCCGAAGTGCGGCTGCGCAAGGGTGACGGCGCGCGAGCAAGGGGGAGGGGGTGTTTTGGTTCTAGCCGCTCGCCGTCCTTGCAGGCTCTGCCGTCGGAAAGCCGCTCATTCTCGCTTCCCCT",
|
221 |
+
"output": "promoter",
|
222 |
+
"model_response": "promoterpromoterpromo"
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
226 |
+
"input": "GGTGGTTGTAGGTGGGAAGAATCCTGGGTTCCCATGCTGTCTGAACCAGAGTCATTTGTGGCCCTGCCCTCCCCTGGGACTCAATTTCCCCACCTATAAAATAAGCCCCAGTGTCCGCGAACCCTGGAGGGGCCCGCACCACTGCAGGAGCGGCCGCCGGCGCCAGGGGGCGCCTCCTCGATAACTCGGCGCTCGGCTGGCCAGGCACCGGCGCGTCGGCCGCTCGATTGGTCGGGTCGGGGCCGGCCTGAGCGCCGCGGGCCTGCGCCATTGAGGAGCGGCGGGGAGGAAACGCCGCGC",
|
227 |
+
"output": "promoter",
|
228 |
+
"model_response": "promoterpromoterpromo"
|
229 |
+
},
|
230 |
+
{
|
231 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
232 |
+
"input": "TAGGAGGCCTGTCTGCACAGCTGTGCTGGGAAGCTGATCCATTCCAACCACCGCCTCATGGCCGCTTACGTGCAGCTCATGCCTGCCCTGGTACAGCGCCGCATCGCAGACTACGAGGCTGCCTCGGCTGTGCCAGGCGTTGCTGCTGAACAGCCTGGGGTCTCTCCATCAGGCAGCTAGCCATACCCAACCCCAGGAAGGAAGGCCTTGGATGGACCCTCAGATTGAAGGACCCGGTGGACCTTGGGGTTGGTGAATCCTAAACAGAGAGAATTCGAGGTTGCCTGAAAGCTGGGTGTC",
|
233 |
+
"output": "promoter",
|
234 |
+
"model_response": "Non-promoter"
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
238 |
+
"input": "TGTTCTAGCTATCTTGAAATACGCAATACTTTGACATTAACACACTGCTATGTTCTCTAATGACATCCCTGGAAACCTCCTAGGGGCAGCCAGATCTTTCATGATAGTGGTTGTCAGTCCTCATATGGAGGGTGGAGGTTTGAAGCAGAGAGCCAAGGGAGGTTTTGTGCACCTATGCTTGTTGTGTTTGTACACAATGACTATGCATACGCTGTGAGTATAAAAGGCTCATTTAATCCTATTGTGTCCCAGGCTTGGTTTGTTTTCAAATCATTACAGCATGAATTTAGAAGGTTTTGT",
|
239 |
+
"output": "Non-promoter",
|
240 |
+
"model_response": "Non-promoter"
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
244 |
+
"input": "CCCCGAAACCCCTCATCTTGGGGGGCCCACGAGACCTCTGAGACAGGAACTGCGAAATGCTCACGAGATTAGGACACGCGCCAAGGCGGGGGCAGGGAGCTGCGAGCGCTGGGGACGCAGCCGGGCGGCCGCAGAAGCGCCCAGGCCCGCGCGCCACCCCTCTGGCGCCACCGTGGTTGAGCCCGTGACGTTTACACTCATTCATAAAACGCTTGTTATAAAAGCAGTGGCTGCGGCGCCTCGTACTCCAACCGCATCTGCAGCGAGCATCTGAGAAGCCAAGACTGAGCCGGCGGCCGC",
|
245 |
+
"output": "promoter",
|
246 |
+
"model_response": "promoterpromoterpromo"
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
250 |
+
"input": "AGCATCTTAGAGTTTCATACAAGGATCAGATTTGTGCTATCGGAGGCCGCCGATCACCCAAGTAAGCTTCATACGGGGAGGGAGGGAGGGCGCGAGCTTGGGAGTGGAGAGGGAAGGAGCAAACTTTGGCATGTATCACAGGGTAGCTCTGCCCCTGCGCGCCCCCTCCAGATCCGGCCCGCGGCTCCCCTCCCGCGGGACATGCTGCGGGGAAGCACGGCGAACAGCCCCTCCTATGATCACAAATGGCTCACCGCTCTCGGAGCCCGGCCGCCACCGCCTGGTGTGCGAGGCTATCCG",
|
251 |
+
"output": "Non-promoter",
|
252 |
+
"model_response": "Non-promoter"
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
256 |
+
"input": "TCCCTCTGACTCGGTTTCCCCTCTCCCCCGGCTCCGCGGTCGCCCTCCTCCTGGACTGCTTATTTCGTCCTCGGCCACTGCCTCTCGGGCTCGAGCTTTCTGCTTCAGAGCAGGAGAGAAACGGAGCGAAAGCAGTTTCTGTCTCCCCGGGGTCTGACTCGGTCCCCCGCGCTCGGTTCTCTGTCCCCTCCCCCTCCCGCCATTGTTCCCGGCAGGAGGAGTGCGCGGCGCAAACTTGCGAGTTCCCCCAGTCTTTGCCCCCGCAGGGCCGCGGACGGAGGTGGTGGCGGAGTTCCCGCT",
|
257 |
+
"output": "promoter",
|
258 |
+
"model_response": "promoterpromoterpromo"
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
262 |
+
"input": "GGCATGATGTTGGCCGACTTGTGCTAAGGTCCTTGGAGAGGTGGAGGCTCTACGGGGGGTGGGCAGTACCCCTTAGAAAACACCCCTCACTCTCAACAACAGAGGCGGTCTGACGTATTGGAACTCATTCTTTAAGGCCAGAGCTGTGGTTGCTCCAGCTGCAATGTGCGCTCTGATGATACTTTATAATAGATTTGAAAAGGGGAGGGACAAGCCTAAGGTCATGGCAGGAGGCTGGGCCAGGGAAGTGTGGGATCCCGTGGTTGTAGTCGGCGAACCGAACGTCTCGGCCTCTCTCCC",
|
263 |
+
"output": "Non-promoter",
|
264 |
+
"model_response": "Non-promoter"
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
268 |
+
"input": "TGGGGGTTTTTTGGGGGCTCGGGCGTCGACTGTGGGGCGCTGCGGAAGGGTGGGTCGTCCGCGTGGGGTCCTGGAGCGCAGGAGGCGGAGGGGAAGCCGCGCAGGGGCCACTAGGGAGGGGCTCTGGCGGGGTCGCAGGGGCGCGGTCTGCGGGCGTGGGGGCGGGACCTCGGGGGCGGGGCCTGAGGGCGCGCGCACGCCTCCGGCGCGCCCCCTCCCGGCCGCCATGTTGGCTGGTGTGTGGGTGTCAAACTGAGCCAGACGCGGCGGTGGCGGCGGCTCCGCGGGCTACGGTCGCTC",
|
269 |
+
"output": "promoter",
|
270 |
+
"model_response": "promoterpromoterpromo"
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
274 |
+
"input": "GGAGAGGGGCGGTACGCACCACGGGGGAAGCCAATGAGAAAATCAGGCCCAGCCCTAGGGGGCGGTGCTGTCGGTCACATGCGCACCTGGGGCGGGTGGTGGCGGCGGCGCGGGCACCGCGAGCCGGCGGAAGGGAGAGGGGCCGGCCTGGGGCGGGGTTAGGCAGGTGAGTGACAGGCTCCAGGGGGCCGGCCCCTGCCTGGTGCCCCGAGCGAGCCGGGAGTAGCTGCGGCGGTGCCCGCCCCCTCTCTCCGCCCCTCCAGCGGAGCTGGTCTCCGGCCGGGCACCGTCGCGGGCCCC",
|
275 |
+
"output": "promoter",
|
276 |
+
"model_response": "promoterpromoterpromo"
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
280 |
+
"input": "CGACAACCCGCCCTTGTTCCCGGTAGAGGAACAAAGAGTGCTGATTTACGAATCTAGGCTGCCAGATTCTGTGTTTCCACTGGAGGGCGCGTCCGATGCAGATGTTGGCTCAAATTCCATCTTAACCTATAAACTCAGTTCTAGCGAATACTTCGGGCTAGATGTGAAAATAAACAGTGATGACAATAAACAAATTGGGCTCTTATTAAAGAAATCCTTGGACAGAGAGGAAGCTCCTGCACACAACTTATTCCTGACAGCCACAGATGGGGGCAAACCTGAGCTCACAGGCACTGTTCA",
|
281 |
+
"output": "promoter",
|
282 |
+
"model_response": "Non-promoter"
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
286 |
+
"input": "TTCCAGCACTCCGGGAGCCCGATTGCGAGTTTGTGAGAGTCCTTGGTGGAGGAGGTTGGAAAGCGTTCTCCCTCCCACAAAACCCGGTGCAGAGGGAGGGAGAGCCAGGTCCGTAGCGTGGGTCCAAGAAACTATGGATCCTGGAAGTGAGCACACTTTTGTGTCTTAAAAAGTTTGGTGTCTCGTTTTGAGGTGAAAAATGAATGAGATAAGTGCTTTTTAAAAAGGTAAAGTTTGTCTTACAACTGAAAAAGCTTTTAAACAAAACTATAGTTTTTGCGAGTAACTCCCGTGACACCT",
|
287 |
+
"output": "Non-promoter",
|
288 |
+
"model_response": "Non-promoter"
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
292 |
+
"input": "ATCAGCCCTGGCTAGGACCAGCTTCCCACTAAATCGCTTGCGGCAAGGAAGTCTCAGCCCCCGAGGCAGTCCTCACTGTGAGCAAACCAAAAGATGGGATCACTTGGGGCGCGTACGGGCTCCCAGCGCTGGCAGAAACTGTAGCGTTGTAGTAGCCTTTGTGACTGGTTGCCTGTTGGAAGTCTACCCCACCCCCAAACCATAAATCCACCGGAAACTCCTCCAGGACACCCTCTCAAGCAAGCCCCTGGGATGTAACTGCCACAGGGGCTGCGCCACGATGTTCCCTGGCTCTCAGGT",
|
293 |
+
"output": "Non-promoter",
|
294 |
+
"model_response": "Non-promoter"
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
298 |
+
"input": "CACCGCGTGAGGATGTGCCGGGTGGTCCTTTCCTCCTCCTCTTCCTCCTCCTCCCGGCTCCCTGCCTAGTCTCCATATAAAAGCGGCGCCGCCTCCCCGCCCTCTCTCACTCCCCGCTCCTCTCCGCCGCGCACTCTCCGCGGCGCTGGGAGAGGGCGGAGGGGGAGGCGGCGCGCGGCGCCAGAGGAGGGGGGACGCAGGGGGCGGAGCGGAGACAGTACCTTCGGAGATAATCCTTTCTCCTGCCGCAGTGGAGAGGAGCGGCCGGAGCGAGACACTTCGCCGAGGCACAGCAGCCGG",
|
299 |
+
"output": "promoter",
|
300 |
+
"model_response": "promoterpromoterpromo"
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
304 |
+
"input": "TCAGGAGTCGGTAGGGGCAAGAACGTTTCCGGCTCCTCAACGGGAGAAGCGCGGGAAATTTGTTGACGTGGAATAGCTCGACGAAATCATCGGCAGGCGCCCGCCAGGGAGCTAGCAACGGAGCATATGCGACCGTCGGTTCGCAAGAGACACACCTCGGGCCTTGCCCCGGAACGCCGCAATGGCTCCTACCTTTCCGCCGAGTGCAGGTTGATGCTGCACACCCAGAACACTTTGCGGCACGGGGGGGGTTATATGACATGCAAAGCCCTGATTCAACTTGGAGATGAGCGGGTCTGT",
|
305 |
+
"output": "Non-promoter",
|
306 |
+
"model_response": "Non-promoter"
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
310 |
+
"input": "GAACTCCTGAACTCAGGTGATCCGCTTGCCTCGGCCTCCCAAAATGCTGGGATTACAGGCGTGAGCCACTGCGCCGGCATCCTGGATGCAACACGATCAACGAATGATTGCCCTTTATTCTGCCTGGTCTCCCAGCGGTGCTGTTGTGTCTTCATTTTAATGGATTTTAATTGATGTCACTTAGGGGGGCCCTCAAGCCTTCCTTCCTCTATCCATTTACGTGGTGTACCATCGAACTTTATCGACTCAGAAGTCGTGGGCGGAGGGCGTTCCGATAGAACCAATTGGCTCACGTCTATA",
|
311 |
+
"output": "Non-promoter",
|
312 |
+
"model_response": "Non-promoter"
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
316 |
+
"input": "AAGATGGCCTGAGTCAGGAGCAGGGGCAGAGCTCAGGTTGTCGTGGGGTCCCGCTGCCCACGTCAGACTGGAGGTGAGGGATGGGCGGGGCCTGACAGCAGGCCTGGAAGGAACAGGATGTCTATGCTGGAGATAGAGGGAGAGGACAGTGCCAAAACCCAGCTCCTGGCCAGTCCCCAGCTCCTCCCTGCCTGGCCCTATCCCAGGATCCCCTCCCCGGCCTCCCAGCTATGATCTACCCCGGGGCCCAGACTTCAGGCGCCTTCACGATGCCGGCGGTCAGTGGTCCAGGTCCCTTAT",
|
317 |
+
"output": "promoter",
|
318 |
+
"model_response": "promoterpromoterpromo"
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
322 |
+
"input": "CCTGACACCCTTCGGCCATTTTTTTTAAACGTGTTGGGGTTTGGGTTTCAGCCACAGGCCTAGCACATGGAACCAAAACGCTACCACGGACTATGGAGTTGTGAACCGCGGAACAGCCGGGCGGACTCCGGGGCTCCTGTGTTTGAGATACGCGTGCCATGAACCTCCGCGCCGGAGGCGAGCCACTGGTAATTTACTTTCCAGTTGCGCCAATCCGAGTCTACCTTGATATTGAAAGGTAAGTCAAGAACAAAATTATCGCATAAGACTGTTGAGGTTACCCCGCGGCGGGGTCGGGGC",
|
323 |
+
"output": "Non-promoter",
|
324 |
+
"model_response": "Non-promoter"
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
328 |
+
"input": "CTGAGGCTGGCAGGCACTGGGCAGGCAATGAGTTCTTTCCCTGTAAGTTGGGCACAAAAAGATCGGCATGGGAGCAGCCTCCTTTGAGACAGCTGCTCTGAGAGAATGCAATAAGCAGGGAGCAGCCAGCAATTCCTCCTAGCAGAGGGCGACTCGTGGGAGGAGTTCAGTTTGCCAAGTATTGTCATTTGTTGAGAGAAGGTGTGTGCTCAAGGAGGAGTTTTAACCTGGAGGATCATTAACTCTTTTAGTCAGCTGAGGAGCTGCGGTGGCTCGGCGAGTTGGAGTTCATCCTGGAAG",
|
329 |
+
"output": "promoter",
|
330 |
+
"model_response": "promoterpromoterpromo"
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
334 |
+
"input": "GCGCGCCCCTCCCCCCGGGTCGCGCGCCGCGGGCCCGAGCGCGAGACGCCGCTGCTCCCGCCCCCGCCGCCTCCTGGAGTCGCCGGGGCGGACGCGCAGTCCATGGGGCGCGGTGGGCCGGGGAGTTGCCCCAGGGGCCGCGGGAGTTGCTGAGAGGAGACAGGGTTGGGCTTTCTCCTCGCCCAGACCCCACCCCACCCTGCCGACCCCACCCCCTGCTCCTTCCTCCCCGGGGGCGCGCACTCGGGCACGCGCTCGGAAGTCGGGGGTCGGCGCGGAGTGCAGGCTGCTCCCGGGGTA",
|
335 |
+
"output": "promoter",
|
336 |
+
"model_response": "promoterpromoterpromo"
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
340 |
+
"input": "TATCTCCCCTCCTCGGTCCCTCTCCCTCCCTTCCCCCTCCCTTCCCCCGCCCTCCCCCTCTTTTTGCTCCTGCTCCCCCCCCCCACCCCGCCTTTCTCCTTTTGCAAGAAAATAATTTGACAGTCGATTTGCTGACAAGGGAGGAATTTGCATCCTGGATTTAAAAAAAAAAAAAAGGCCGAGAGGAGCTTGGGAACGGTTGCTAGGGGTGGGTAATGGGTGAAAAAAGGGGGGTACCGGGGAGCGGATAAGGAGGGTTAAGGGAGGGGGCGAGGATGGGGAGCAATGCAAAGGTAAGGC",
|
341 |
+
"output": "promoter",
|
342 |
+
"model_response": "promoterpromoterpromo"
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
346 |
+
"input": "GACCAGTAAGTCGGGGGGGAGGGAGCCGGGGCCTGGCCCCGCCCTATGGGGAGGCACGGGTGGCGAGGTCGGCCGTCCCATGCCCCGCCTGCTCCAGGTGCCGCCGTCCCCACAGGTGCCCGCCCCAGGCCCGCTGGAGCAGCCTGTGGCACGTGGGGTAAGTGGAGGCCGCGGCGGAGCCGTGCGTCCGACGGTTCTGGGGCGGGGGTCACCGGGGCCAGGCCCGCCAGGCCCTTACCTCACCAGGCCGCCTCCGCTGTCGCCCAGTCCCGGCCGCTGGCGGGAACTGACCTGGAGCAA",
|
347 |
+
"output": "promoter",
|
348 |
+
"model_response": "promoterpromoterpromo"
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
352 |
+
"input": "GCTACCGCTGCCGGAAGAGCGCGGCGCCCGACGGAGCCGTGTGGAGGCCAAAACTCCTCCCGGAAGCCGCTACTGGCCCCGCTTGCCAGGCCCAGCGTCTTTTCTGCATAGGACCCGGGGGAAGCCGGGAAGCCGTTAGGGGGCGGGGCAAGCGGGCGGGCGTGCGTCGGTCAAGTTTCCCGGGTCGCGTCAGGCTTCTTTCTCAGCACCGAGCGAGCGACGTTCGTGAAGCTTTCGTTTTGAGCGGCCAGACTCTGCTGTTCTCAAGCCTGGGAGGCACAGAGAAGCAAGACTTCCGCA",
|
353 |
+
"output": "promoter",
|
354 |
+
"model_response": "promoterpromoterpromo"
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
358 |
+
"input": "GAGAATCCCTCTATAATATCCTTGCACAGTTCCTCCGCAAGCACAATATGATTAATTTTTACTTGGCTGATGCCGAGATCTTAACTGCCCTAATCCTCGGCTCGAGTCGAGTTGGGGTTTCCTAAATGAATTACTATTGCAAATCTGGTTATGGCGCTCCACGAGATTATGGCAAAGGCTGAAGAATTTATTGACAGATGTGTTGTTAGACTGTGCTACGTTTTAGTGGCAGGTAGTGGAGCAGCCACCGGCGGACTCGAGAACGCAGTAACATGGCTCAAAAATGGTAGTATTTGTTTT",
|
359 |
+
"output": "Non-promoter",
|
360 |
+
"model_response": "Non-promoter"
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
364 |
+
"input": "CGGGAACGCACCTGCGTCTAGACGCTGACGCCGTGCGCGGGGCGGGGGCCGGCGAGCATGCGCAGTGTGGTGTGGACAGGGCGTGGTTGGCAAGCATGCGCAATGTGGGCCGGGCGGGGCTGGCGAGCATGCGCAGTATGGGGCGGGGCGGGGCAGGGCTGGTGAGCAAGCCCGCGGGGAGCGAGCGCGGAGGCGGGCGTTGCTACGGCAACGGTGGGCGGCCAGGGAGTTGCCTGAGGGAAAAGGGGCAGACGTCCCTGGGTTCCGGTGTTCGCGGAGGAGTCGAGGCACGGAGAGGCT",
|
365 |
+
"output": "promoter",
|
366 |
+
"model_response": "promoterpromoterpromo"
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
370 |
+
"input": "GTTCTGATTCTCGGTGAAAACGGTAAACATCGGCGTGCTATGTATGGGGATAGCAGGAGCTCTAGCATTTTTGCGCTGGGAAGATTATTCTTCTCTATCCGTCTATGCGAGCCGGCGTTTCGCAGGGGTACCCCCGGGGGCACTGCGTGGGAGGCCAGGAGGTTTTAGAATGGTCACCCGCCGGTTAGCTCCTTGGTAGTATTCTCGATGGACTGCGCGTTTGAGTCTTAAGGGCGTTTTCACGCTTCCGCCTTCTGGAAGCCTGCACTCCGTTTAGAGGAGCCAGAGACCCGTCCCGGT",
|
371 |
+
"output": "Non-promoter",
|
372 |
+
"model_response": "Non-promoter"
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
376 |
+
"input": "GTATTGGGGATGCAGGGCTCCGCGGTAGTTGACCTAGCAGGGCTAGTCACATCACGCCATAGGGAGCAGCAAGGGGCGCAGCAGTCAGCACGGCCGCTATTCAGATACGGTAACCAGACGCCGTGCGGCGTGCGCACCTGGCATCCAATGGCTTTGGGGATAGGGGCCGCCTGTGGGTTGTGGGTACGAATTAACGGAGTCGGAGCGGCGAATGAGCAGTCGCAAAGCCGAAGCCGGGCGTGGCGCTTTAGAGCGTCCCAGGCTCTCCTTCGGAAAGATGTCGGACACGGCAGTAGCTGA",
|
377 |
+
"output": "Non-promoter",
|
378 |
+
"model_response": "Non-promoter"
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
382 |
+
"input": "AGCAGCGGCCGCGGCGACAGCTCCAGCTCCGGCTCCGGCTCCGGCTCCGGCTCCGGCTCCCGCGCCTGCCCCGCTCGGCCCAGCGCGCCCGGGCTCCGCGCCCCGACCCCGCCGCCGCGCCTGCCGGGGGCCTCGGGCGCCCCCGCCGCCCGCCTCACGCTGAAGTTCCTGGCCGTGCTGCTGGCCGCGGGCATGCTGGCGTTCCTCGGTGCCGTCATCTGCATCATCGCCAGCGTGCCCCTGGCGGCCAGCCCGGCGCGGGCGCTGCCCGGCGGCGCCGACAATGCTTCGGTCGCCTCG",
|
383 |
+
"output": "promoter",
|
384 |
+
"model_response": "promoterpromoterpromo"
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
388 |
+
"input": "CTTTTGAACATCTTGTGCCAGGATTTGACATTAAGCTCCAGTTCATCCGCACACCCCGCGGCCGCTTTCGCAAGGGACCGAGAGGGTGGCGGCCTCGCCCGGAGACTCGCGGCGCTGGGAGCTGCCTGCCCTCGGTGCGGGCTGCAGACCGGGCCGCGAGGCGGGCGCGCGGCGCGCACTAGGACCCAGCAGGGCTCCAGGCCGGGGTGGGAGCCGCGCCGCCGACCCCGGGCGGGCGCCGGGCGCAGGAGCCGGGGTTCCGGCCGCGATCTGCTGCAGCTCGGCCGGGAGACGGCGCGA",
|
389 |
+
"output": "promoter",
|
390 |
+
"model_response": "promoterpromoterpromo"
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
394 |
+
"input": "TGTGTGGCCAAAGAGAAGAAATGGGTTGAGACAGCAGGCCTGGCACTTACTTTACCTGGCCCAGTCTTGCCTGACAATTAAAAAAAGACGCTTTAGACTGGGCGCGGTGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCTGAGGCGGGCGGATCACGAGGCCAGGAGATTGAGACGATCCTGGCTAACACAGTGAAATCCCATCTCCACTAAAAATACAAAAACTTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGCGGCTGAGGCAGGAGAATCGCTTGAACC",
|
395 |
+
"output": "Non-promoter",
|
396 |
+
"model_response": "Non-promoter"
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
400 |
+
"input": "TCCATATAAACAGATTAACTGCATTCCCCAACGAGTAGAAATCTGCCCATCACTTTTGGGGTGGTTTGTTGCACCCACTATCGCCCAAGAAACTTCCCCAAGCCGGGTCATATTACAGGTCTCTAGCGCGCTGGCCGCACTCGCAGCCCACCATTCCCACAGAATGGTGTCATCGTAATCTGAAAAGTTGCCACTGGGCACCTGCGCAGGCTTGGCTGCGCCCTCTCGCGCCGCACGCTCCTTCCGGCGACGCGGATAGTCGTAAAGGTTTGAATACCTGATACAGGGAGAGAGGCCGAG",
|
401 |
+
"output": "Non-promoter",
|
402 |
+
"model_response": "Non-promoter"
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
406 |
+
"input": "TCTTCACAGGGCACAGCCCCTGCAGAGCATCTTGGTCATTTGGAAGAGGACACGGTATCCCCTCTGGCCAGAGTATGTCAGAGAAGGAAGAGTAGGGCTTTTTTGTTTTGTTTTTTTTTAAAGGTGCTTGCTTGTTTAATGTAAATAATAGAAAGCCTTAATATCTTTTCTGTAACACGGAGTAATATTTTAATGTCATGTTTTGGATGTACATAATATATTTATAACAAAGCAGCAAGAGTCTACTTAACCTTGGCTGCCTCGTGGTGTTTCCTGGCTGGGTGGGGTGGGGGGTATCAA",
|
407 |
+
"output": "Non-promoter",
|
408 |
+
"model_response": "promoterpromoterpromo"
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
412 |
+
"input": "TGTATTCAGACCAGAACAAACTCGAGTGCCCCAGCTCCTCTGGCAATTAGGTAGCCAATAGTTGAGATTGCGTAGGCAACTACGTAAAATGCGCCGTGCCGTGCCCCTTTCTGCCACTCTGAGTTCGGTAACATAGCTCCGGTTGCTAGGTACTTAAGGTATCCAGTTCCCCCAGCGATTTGCATGAACCGAGAGGGAGTGTCTTCTGCCAACAGATCAGGCGGGGGATCATGAACTCATCCTCCCAGGGAATGCCGGTCGGGGATCCCCCGCGCAGCTCACAGGCCCTGGGAGTGAGCT",
|
413 |
+
"output": "Non-promoter",
|
414 |
+
"model_response": "Non-promoter"
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
418 |
+
"input": "ACCTCGCTATTCTATGCACAGTTTTGAATCCTGCTTTTAAAATGCGATGTCTGCCTTGTCTTTAAATATTCTGAGGGGGAGGAGAGGAGATGTGTAATGCGACTGTCTATAATTTGAAACCGGACCTCACTATTTAGCGTCTCAGAAAACGCGGGGTTCATTGCCCTGGTCCCCCCCGCCTTCCCCCGCCCCCCGAACCTTCCGGCTGGCTCTCTTGCGGTCCCCGCCTCGGCGCTGATGTGGTCTGGCAGTGGAGATTGGCGCCCGGGCGGAGCACGATGGGCTTCCCCGCGGCAGGCA",
|
419 |
+
"output": "promoter",
|
420 |
+
"model_response": "promoterpromoterpromo"
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
424 |
+
"input": "CCACGAGCGCCAGAGCCCCGCGCCTCCCCTAGCATATCCGGCGGAGTCCCTGCCCCGCAAGCGCCCACTATACGGCCGGACAACTGTTGTGGGCGTGGATGCAACCACGGGGGAGGGGGCAGGCGTGATCTCAGTCCCGCCCGTGCCCGAGCCCGCGCCCGAGCAGGGACTACATTTCCCTCCCCCCTGGGTCAGGGAGGAGTCTCCTCCTCTATCTATTAAGGATTCTCGCCTCCGCACATGCGTTATCCTTCCAAGGCCGAAAGGATTTGGAGTCCTGTACGCCAGAGGCCGAACTCG",
|
425 |
+
"output": "Non-promoter",
|
426 |
+
"model_response": "Non-promoter"
|
427 |
+
},
|
428 |
+
{
|
429 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
430 |
+
"input": "ACCCTGCTGGGGGTACAGAGACGTTTCCGAAACTCAGCGGCTCGGTCGCCCCCAAAGGCCAGGCGGAACACACGCCCACACCGCGGCTCCCTTCCCGAAGTAAGACCGCCGGGCCACGGCCGCCCCCAGGAAGCCCCGCGCCCCGCACCCCACACCCGGGGACACAAACAGGCGCCGGGTAGCCGCGAGGGCCGGCGCGGCGCCTTTAAGAGGCGGCGGGCGGCGCTGCCCCCTGGCGGCCGCCCCGCCGCTTCCTCGCCGCCGCGGGCTCAAGCGGGGCGGCCGGGCCAGCGCGGGGCG",
|
431 |
+
"output": "promoter",
|
432 |
+
"model_response": "promoterpromoterpromo"
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
436 |
+
"input": "AGCTTCACAGTCAGAACGCGGTACCGTTGTGCCCCAGCTCGACTGCAGCCTCCCCGCCCCGATACACTTGTCCCATCCTGTCCACGGCGCCCTTGTTTTGATTCGCTGCTGTCCCAGAGAGCTTAGTTATTCGTACGGGCAATCTTGACCCTTTCCCAGGACCTCCCCTAATACTGTTCATTGATCCTGGACCGGCAGCGAGACCCCTTCCGGCATCCAGAGGCCCAGTACTTTCAATCTCCTCCCCCAAAATGCTTTCCCTTTGTGGGGACCCTCGACATAGATGGACACCCAGCGGCC",
|
437 |
+
"output": "Non-promoter",
|
438 |
+
"model_response": "Non-promoter"
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
442 |
+
"input": "CCGTTCTTGGCATCCTGAGAGGGCCAGTTAGTACTGAAGTCCCTTGGCTGCTCAAGGATTGCAGGGATGAGGCAAGTGGAACAGCCTCGGAACCTCCGAAAATGGGCACGCTCCAGGTCCCAGTTTCTATGGCAACCATACCGGCAAATTGGGCTCCGCAATGGTTTCTCCTGGAAAAACCGTGATTTTGGTTCCCGCGGACGTCTCTATGGTTTCGACAGCCTAGAAGGAACAAAACGGCATTTCCGGGAAGATGGCGGCGCACAAGTCAGGTCCGGCACATGTTTCCGCGGAGCGGAC",
|
443 |
+
"output": "promoter",
|
444 |
+
"model_response": "promoterpromoterpromo"
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
448 |
+
"input": "GCTCTCGCGCCGCTCGCGTGACCGGCCGGTGTGTGCGCGAGGCCCCGGCTCCCGGGGCACGGACGGCCGGGCGCGCGCCTCTGCGAGGGGCGTCCGGGTCCGAGTCGGCGGTCCGGGCCGGCGCGAGGTGCGTGCGGGCGGGCCGCGGGGGTCCCGGACGGACACAAGCGCACACACTCCCGGAGGAGCCTTCGAGGCTGCTCTTCCTCGGCCAGACGGAGAGCGGCACTGTCTCCCCGCCCAGCGCTCACTCGCCCCGCGTCTCCCCCCGCGGCGGCTGCTCCTCCTCGGCACCGCCAG",
|
449 |
+
"output": "promoter",
|
450 |
+
"model_response": "promoterpromoterpromo"
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
454 |
+
"input": "GTCCCTCCTCTGGTAGCCAGAATAAATTTTGCAGTAAGTTAGCAGAGTTGATTCCAAAAAGTCTGCACTGTGGCATCTCAGGAAACCAAACTTATCTGCTTAAGAAATAACTTGCAGAGAGAAAAGGAACTCTGACAAGGCTAATAACAGGGATTTCAAGCTGTGTTTCAGCAAGCCAGTCACAGAGTATTCACGTGTTAATTCACTGGCGTGTTCCGCGGCATAAGCACCCCCTCTCTGCCTTCCCCCAGTTCCAACTGTTGTGCTGCAGCAGATTTGGTCTGAGTCTGGGCAGAGCCC",
|
455 |
+
"output": "promoter",
|
456 |
+
"model_response": "promoterpromoterpromo"
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
460 |
+
"input": "ATGATGCTGGGGACGGGACACAAACAGGCGTGACAGCAGCGACCAGGCCGGCGCGGCGCCCGTAGACTGAGTCGAGACACCCTGGGCACAGGCGGCCGCCCCGCCTCTGTGATAGTTACTGGGACGTGTGCCCAACGGCCGGGCCAGCGCGGGGCGGCGGCGGGCAGGGGCGGCGAGTGCCTCGACGTTCTAGTGCGCCCTTCTCGGCGGTCTTATGCACTATGGCAGCAAGAGTTTTGAATATGTTGCGGTAAGTTGAAATTATCGAGAATGCCAGGCGAGTTGGCCGCGTGCGCCCCG",
|
461 |
+
"output": "Non-promoter",
|
462 |
+
"model_response": "Non-promoter"
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
466 |
+
"input": "CGCGACACGCCGTGCGCCTCCGCGGCTGCGCTACGAAAACGAGTCCCGGAGCGGCCCCGCGCCCGCCGCACCCGGCCCTCGCCCGCCCGAAGACAGGCGCCAAGCTGCCCCGCCGTCTCCCCAGCTAGCGCCCGGCCGCCGCCGCCTCGCGGGCCCCGGGCGGAAGGGGGCGGGGTCCCGATTCGCCCCGCCCCCGCGGAGGGATACGCGGCGCCGCGGCCCAAAACCCCCGGGCGAGGCGGCCGGGGCGGGTGAGGCGCTCCGCCTGCTGCGCGTCTACGCGGTCCCCGCGGGCCTTCC",
|
467 |
+
"output": "promoter",
|
468 |
+
"model_response": "promoterpromoterpromo"
|
469 |
+
},
|
470 |
+
{
|
471 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
472 |
+
"input": "CCCCTAGCCAGGAGGTAGCATCTGTCTCCAGGTGCCCGTGGGGTGTCCTCCATGAGGACCACCTCTTCTAACCACCTGCCCAGCTCAGAGGACCAGCTGGCAGGAAAGACGCTGCACCGCCCATGAGGCCAGAGCTACAAGTTGCTTCTAGGAGTGGCTGTGGGCGGAGGCTGGTGGTTCTGAAGGTGGCGGTGGTTCTGCAGCGTGGCTCCCCACAGCCCCTTTCATCTGAACAGTAGGGCTCATGCCACTTGTTAATCAGCCTTCCAGTGGCACGAGGAGTCATTTTCTGGGGATAGG",
|
473 |
+
"output": "promoter",
|
474 |
+
"model_response": "promoterpromoterpromo"
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
478 |
+
"input": "GCCTCCTCCCACGCTGCGCGCGCACCTCCCCGCCCCCACCCCTACCCGCTGGCGTGCCCAGTGGAACGGAGCCTTGTGTCTCCGCCTCAAGTCCCCGGATGCTCACCTCCCCGACTCGCCCCCGCTGTGGCCCCGCCCCCGCGCGGCTCTTCGTGCCACGTCACCGCCTGCGTCGCTTCCGGAGGCGCAGCGGGCGATGACGTAGAGGGACGTGCCCTCTATATGAGGTTGGGGAGCGGCTGAGTCGGCCTTTTCCGCCCGCTCCCCCCTCCCCCCGAGCGCCGCTCCGGCTGCACCGCG",
|
479 |
+
"output": "promoter",
|
480 |
+
"model_response": "promoterpromoterpromo"
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
484 |
+
"input": "CCATGCAGCGACGGCCGCCGCGGAGCTCCGAGCAGCGGTAGCGCCCCCCTGTAAAGCGGTTCGCTATGCCGGGGCCACTGTGAACCCTGCCGCCTGCCGGAACACTCTTCGCTCCGGACCAGCTCAGCCTCTGATAAGCTGGACTCGGCACGCCCGCAACAAGCACCGAGGAGTTAAGAGAGCCGCAAGCGCAGGGAAGGCCTCCCCGCACGGGTGGGGGAAAGCGGCCGGTGCAGCGCGGGGACAGGCACTCGGGCTGGCACTGGCTGCTAGGGATGTCGTCCTGGATAAGGTGGCATG",
|
485 |
+
"output": "promoter",
|
486 |
+
"model_response": "promoterpromoterpromo"
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
490 |
+
"input": "CCCCTGAGCCAGGCTGAAGCGGGATAGGAAGGGTAAAGGTGCTAGCTGAGCCGTAGTACGGCCCAGCAGTTCTCCAGGGGACAAGCTAGGCCCCCGCTGGAGGGAGTGGTGAGACTCAGCCGGGGCCTGATTCTGGTCAGCACCTTGGACAGCAGCCAGTGCCCTCCCTGGCCCCGCCTGGCCCCGCCTGGCCCCACATCCGCTACTCCTGGGCACCTCCTCAAATAGCACAGCCTCCAGCTGGTGCCCAGGGCCTGAATACACAGAGCGCTGAGAGAGTGGGGCAGTGTGGTCACGGAC",
|
491 |
+
"output": "promoter",
|
492 |
+
"model_response": "promoterpromoterpromo"
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
496 |
+
"input": "CCTGGGAAAAGCTGAGAGCATAATCGAGCGGCAAAGAAAACGCGACCACTGAGGACACTTAGAAGGGGGCGGGGCCTTTTTGTGGTACTAGGAGCTCGAGAAATTGACTAGATTACGAGTGGTTTACGACTCAAGCATACAACTTACAGCTTGGGCCTTCCGTAGCGGGCGTTGCGGGGGAACGTACAGTGCTTGAGGGGCGAGGTTTGCCGGAGCCTCATTCTAAAGGAATCTTCTAGTTCCAAGCGTTGTTCCAATTGGGCCGGAGCGTGCAACGGGACCCACGGAACTACAGGTGTG",
|
497 |
+
"output": "Non-promoter",
|
498 |
+
"model_response": "Non-promoter"
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
502 |
+
"input": "AAAGCCACCTCCGCGCCGGCAGCTCCGTTTCTAGGGAAACAGGACCAATTCCAGAAGTTGGGGTGTTACGTGTGCGATATACCTGCCCAAGAGCTGGTATCAATAGAGCGGGCGGCATGGTAGTGTTTCCCCCGGCCATGGTAAAATGCGAACCAGATCCTCGGCGGGTAACCGCGCCCCTAGGCGTTGGCATGTGTCGGGGTAGCATGACGGGGTCGTCCAGTTCTTAGCCAAGATGAATCCGGCTCCGAAGGTCCCCGGCTTCGTGGCCATAAAGGCGAACACCGCTGGAAAGGTGGC",
|
503 |
+
"output": "Non-promoter",
|
504 |
+
"model_response": "Non-promoter"
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
508 |
+
"input": "GGGCTCGGGGACGGGACGGGCGAGCAGTCATTTCTGAATAGCGCGTGCAGAGGGTCATCGCGAAAAGACTAAGACGCGACATAATGGACTGACTCGGGGCATGGCGGGGTGGAGGCGGGAAAGTGTCGAGGCCGGCTAATGCCTGGACAGGGAGGTGGGCATCGCCAGGGCGACGATGAGGGTCGCCACTCCGGAACACGATTAGCATCAGTGCACTGACCTCTGCCCCCACGACTCTCCAACACGCCCGATGATGTTGCGTGCGGCAGCTGTCGGCTAGGAGTCGCGGGGTCTGTGCGC",
|
509 |
+
"output": "Non-promoter",
|
510 |
+
"model_response": "Non-promoter"
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
514 |
+
"input": "TATGGTATTTCAAGGTCATAAATTACAAATGTGCATATTTATTATTCCCCTATAAAATGTTAATTAAAAAATTTTCATATTTACTTGATGTAACAGAAATGAAACCTGTAATGAGGGATCTAATTTCTTTTTCTTTTTTCATTCCAGAATGTATGACAACATGTCCACAATGGTGTACATAAAGGAAGACAAGTTGGAGAAGCTTACACAGGATGAAATTATTTCTAAGACAAAGCAAGTAATTCAGGGGCTGGAAGCTTTGAAGAATGAGCACAATTCCATTTTACAAAGTTTGCTGGA",
|
515 |
+
"output": "promoter",
|
516 |
+
"model_response": "promoterpromoterpromo"
|
517 |
+
},
|
518 |
+
{
|
519 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
520 |
+
"input": "ACGACCCTGGCCTCCGACTTCAACGACTTCATAAGGCGGCGTTTCTGGGCGCAGCCGTGTCGCTCCTGGTGAGAGGCCGCCGGCAGGCGGGATCCAGCGCCCTCCGGGGCACCGCGGGCGAGACCGTCGCCTTCGCACCCCCGGCGCGGTCGACCCCGCGGGGCCGTCGGGTCCTGGGTTCCCCGCCGCGTTGCGCTCGTCCCCCTCCTGTCAGAACCTGGGCCCCCGCCCCGCCCACCGGCGCGGGGCCTCTCCTCCTCCCGCTAACGGGCGGTCGGCCGCCTCCTTCCTCTCTTGGCT",
|
521 |
+
"output": "promoter",
|
522 |
+
"model_response": "promoterpromoterpromo"
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
526 |
+
"input": "GGTTTCTGAGGCTGAGGAGGTACCCAGGTAAGCACACTCTGGGTTCTCTGGTGAGCACACACCGAGCTGCCTGCATCTCCCCTACCCATGAAGGCAGCTAGCCGCCGGGCCAGTGGTGGGAAGGGGAGGCGGCCATTAGCGCCGTTCCCGCCGGGACTTGAAGCGCCCGGCCGCGGCAAGCCCCGCCCTCGGCGCGCCCCCGCGTCCGCGCGCGCTCCTCGGGTCTGCGCGGAGCCGGCGTCGGCGCGCGCTTGGGCGCCTGGCGAAGACCGAGAGAGGCTGGCGGGATCTCAGCGGCGC",
|
527 |
+
"output": "promoter",
|
528 |
+
"model_response": "promoterpromoterpromo"
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
532 |
+
"input": "GACCGCCCGTTGGACATACCTCTAAATTGGGAGCGCGGTGCCGATTTACGTGACCGCCGTGGTTCGCACGGTGGGTAAACAGTGGATCGTGGGGTGGTGAGAAGGTTCCGATGGGTGTTCTTACTACGGCCCGATCCTAGCAGTCGCCGACTTTGTTATTCTTATTGGTGGATGGGGGGCCGCTGATATAAATCTGGGAGATGAGGAGATAGGCGGGAGCGAGGGCTGACTCTGCGTGTCGAAGGAGGTAGAGAAAAAGCGTTGCTTAAAGGGAGGGAGCGTAGCTTGGTTGCTCCGTAG",
|
533 |
+
"output": "Non-promoter",
|
534 |
+
"model_response": "Non-promoter"
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
538 |
+
"input": "GACCTCTCTTCGGGCGGCAAGTGCCATACGTATAGATAAGCGGAATTTACATGGTGTCACAGTTAGCGCAAGGTCGAATAAATGCGGCTTAACACGACAGGTATCTCCAAATAAGTGTGTCTTACGGCGTAGATCGCGAGTTGAGCTGTCATTTCTTTCCAAGCGGCGCAGGACAGGACCCAGAAGCCAGAGCGCAGGAACTGCTTCCGGTTTGAGTTTTTATTAGAATAACGGGCTGTGGGAGAGGCAAGTGGCGGATCTTCAGGTCGGCGAGGGTGTGACATTCGACCCACAGGCCGC",
|
539 |
+
"output": "Non-promoter",
|
540 |
+
"model_response": "Non-promoter"
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
544 |
+
"input": "GCATATTCTGTCTGAAATCGTGTGCACCGAAATCCCCGCCTTGCGGTGGAGGCTGGCGCTAGGCGGCCTCAGCCTCGGCCTGCTGCGCTCAGGAACCCGCGCCCCGGCTCCTCGGCGATCCATTGCTCTTTCCTCTGGCGCCGGCCGCAGGCCTCGGTCACGCCCCCAGCGGCCCGTTGGTTTCCGGGTCCCGCGGGGTGCCCCCGCCCACACGCTATGCCTTAAATTGGGCCAGGCTGAGGCGCTGCTGCTGGAGCGGCCGATCCGAGACGTGGCTCCCTGGGCGGCAGAACCATGTTG",
|
545 |
+
"output": "promoter",
|
546 |
+
"model_response": "promoterpromoterpromo"
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
550 |
+
"input": "TGGTCTTGTGTCTCGGCGTACCATATCTTTTGGGTTCCCCGGCTGCCCCGGAGGTTCGCGGCGAGAGGACGGGCCGGGTGCGGCTAGTTTACCTGAGCAGTGTGAACTACCTCACTCCCATCGGGACCAGCCGGCATAAACACTCTTATTGCCTGTCAACGTTTTCGAGACATATTTTGTTCGCGAGCCCGCAGACAACGCGCATTCTGGAGTTCGAACGCTCAGTTCGCGTCCCCGAGAAGATATTATTCAGATGCCTGCGAGCTGGGGCCGGGTGGCCGCGCCGGGTCGGAGTGTGGC",
|
551 |
+
"output": "Non-promoter",
|
552 |
+
"model_response": "Non-promoter"
|
553 |
+
},
|
554 |
+
{
|
555 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
556 |
+
"input": "TCCCGGAGACAGGTGGGAGGGTGGGGGATGCCGGCAGGAGAGGGAAGCAGGACCGTGCGCGCCGCCACCCACCCGCAGTCCGTCCGCCCCCGGGCGCCCAGCGCGTGGCCCGAGCGGCGCTGCCCGCCGAGCCGGGAGCCCCCTCGGCCCCTCCCTTCAGGCGCGGGCCGGGGGCGGCTTCTCCGCGACCTTATGTAACCGGGCGGGAGGGGCCGGGCGGGCATGGGCCTTCCCGGCCCGGAGCTGGGAGTCGAAGGGGCGGGAGGCGTGATGGTGAACTCGCAAGAAGTTTGAGGGACG",
|
557 |
+
"output": "promoter",
|
558 |
+
"model_response": "promoterpromoterpromo"
|
559 |
+
},
|
560 |
+
{
|
561 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
562 |
+
"input": "GCAGGACTGAGTCACCTCCTGAAGGCCCCGTTCCAAAGCACTCCCACGGGGGATTAGGTTTCAACACAGGAATTTTTGGCGACACAAACATTGAGACCACAGCACCCAGCTCTCTGGGTTCCCAGAACCCTGTGGGAGGGCTGGGTCGCGCCTGCCCCCTTTTAAACACGAGGCTTCCATGGAAGCTTCTTGCCGGAGGCTGGACACTGAGGAATGGACAGGATCAACACTCTTCCACCGGCTCTTCCACCTCCCGATCACTTCCACCATCGCTGGGGGAAGAACGAATGCACCGCGTGG",
|
563 |
+
"output": "promoter",
|
564 |
+
"model_response": "promoterpromoterpromo"
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
568 |
+
"input": "CACAGCCCCAGATGGTGACTACTGGAGACTGCTGCCCCCAGGTATCCACATTGTCATTGCCCAAGCCCCTGGCTACGCCAAAGTCATCAAGAAAGTCATCATCCCCGCCCGGATGAAGAGGGCTGGCCGTGTGGACTTCATTCTGCAACCTCTGGGGATGGGACCCAAGAACTTTATTCATGGGCTGCGGAGGACTGGGCCCCACGACCCACTGGGAGGTGCCAGCTCTTTGGGGGAGGCCACGGAGCCCGACCCGCTCCGGGCGCGCAGGCAGCCCTCGGCCGACGGGAGTAAGCCCTG",
|
569 |
+
"output": "promoter",
|
570 |
+
"model_response": "promoterpromoterpromo"
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
574 |
+
"input": "AATCGTGTAATTCCCGCTTCCTATGGCAAGCGGCGCCCAGTTCTGGGTGACTTGTGAAATGAGCGTCCTGCTAGGCCATAGTCTCTACGAGGAGGATATAATGTAAAGTCTGGAGCGAACGTAAATGCCGTGACGTGTATTTGTGAGACTTCGCCCGGTCGGTGAGCTGGGTGCATAGGTTCCGAGCGCGCGACCACTTAGCTTCCCTTCTACAACAACCACCTGTGTGCGCATGCCCTCTCATCTTACAGTTGTCAAAGTGCTTTTGCCAGAGCGGTCACATAATCAACCTGAAAGAAG",
|
575 |
+
"output": "Non-promoter",
|
576 |
+
"model_response": "Non-promoter"
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
580 |
+
"input": "TCCTTAGAATATTGGGCATACAATTTACTTTTATAAATGTCTATGAAATAAAATTCTTATATAATTATATAAAATTACATAATATATAATTATATTGAAATAATTATAACTGTTGAATAAATGTATTATTTTTCCTTTGAACTATTTAACAGGGTTATATAACAAACTCATCACGAGTTGTGTCTAATAAGTCATCAGAGTTACTGTTTGACTTGACCCAGGATACAGGATTATCACATTACCAAGGGGGACCAACACTTTCTATGGCAGGTTGGTTTTAGTATTTTTTTCATAAAGGTT",
|
581 |
+
"output": "promoter",
|
582 |
+
"model_response": "promoterpromoterpromo"
|
583 |
+
},
|
584 |
+
{
|
585 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
586 |
+
"input": "CGCGCGCGCGCCGGGCCGAGGGGCCGAGAGGCGGGGTCTTACAGCGACCGCGGGAAGAGGGCGCCCCAGAGCTGAGCCGGAGGCCGGCTGCCAGCGGGGCGAGGGTGGGCGCGAGCGCAGGGGCGGGCCCCGAGGGAGGGCGGGGACGGTGAGGGGGCGGGGTCGGGCTAGGCGGGGACGCGCTCGCGGGGTGGGGAAGGCGGGGGCGCGGCGGTGGCGGGAGCGTGCCCGGTCCCCGCCCCTGTTCCCACTCTCCTTCCACCTCGGACCGGCCGGGGCTCCGCAGAGCCAAAGCTCGCT",
|
587 |
+
"output": "promoter",
|
588 |
+
"model_response": "promoterpromoterpromo"
|
589 |
+
},
|
590 |
+
{
|
591 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
592 |
+
"input": "TATTTCCGATTCCTTGACTAACGTACGGGGGCCCAGCACTTTCATTCCATCCCATCACCCCATGATCCTTCCCCAGACTCTAATACTGGCCGTAATATTTACCCCCCCCTGCTCCATGTGCGTAGATCAATGACACGAGCAAGCCATTGGTCTAAAACCCAGGGTTCTCTCGCAGAGGCCCTCCATTGTCTCTGCGGTGTCACTTCGCGCACACACATCAGAGCGGGCCTCTCTGTGAGGCGCGGGGTTGATGTACTCACACGCTGCGTTGATCGGAGGTCGCTGCCAAGCATGGCGCCC",
|
593 |
+
"output": "Non-promoter",
|
594 |
+
"model_response": "Non-promoter"
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
598 |
+
"input": "GGTCAGCAAAGGCTTCCCTGAGGAATTGGGAATAGCCAGCCATACGGGTGGAGCCGGTCAACTCCTTGTGGTTGGGAGCTAACCGATAGCCCCTACAAGGAGCATTCCAACTATGTTATATAACATTGTGATTTTCGCGAAGGCAACGACTTATTCTTAAGACAGTATCTATCTGAATCTGAAGATCCTTGGTCTGGAGATTGGTAAAAATCTCCAGTCATCTTATGTTTCTATTATACTCTACTGGCGTTTTTACGTGACGCGGGGAGTCTACCTCCCCCAACCATAATTTAACAGTAA",
|
599 |
+
"output": "Non-promoter",
|
600 |
+
"model_response": "Non-promoter"
|
601 |
+
}
|
602 |
+
]
|
04-gene-sft/.ipynb_checkpoints/merge_llama_with_dna_lora-checkpoint.py
ADDED
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Usage:
|
3 |
+
python merge_llama_with_chinese_lora.py \
|
4 |
+
--base_model path/to/llama/model \
|
5 |
+
--lora_model path/to/first/lora/model [path/to/second/lora/model] \
|
6 |
+
--output_type [pth|huggingface] \
|
7 |
+
--output_dir path/to/output/dir
|
8 |
+
"""
|
9 |
+
import subprocess
|
10 |
+
import os
|
11 |
+
# 设置环境变量, autodl一般区域
|
12 |
+
result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
|
13 |
+
output = result.stdout
|
14 |
+
for line in output.splitlines():
|
15 |
+
if '=' in line:
|
16 |
+
var, value = line.split('=', 1)
|
17 |
+
os.environ[var] = value
|
18 |
+
|
19 |
+
import argparse
|
20 |
+
import json
|
21 |
+
import os
|
22 |
+
import gc
|
23 |
+
import torch
|
24 |
+
import peft
|
25 |
+
from peft import PeftModel
|
26 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
27 |
+
from huggingface_hub import hf_hub_download
|
28 |
+
|
29 |
+
parser = argparse.ArgumentParser()
|
30 |
+
parser.add_argument('--base_model', default=None, required=True,
|
31 |
+
type=str, help="Please specify a base_model")
|
32 |
+
parser.add_argument('--lora_model', default=None, required=True,
|
33 |
+
type=str, help="Please specify LoRA models to be merged (ordered); use commas to separate multiple LoRA models.")
|
34 |
+
parser.add_argument('--offload_dir', default=None, type=str,
|
35 |
+
help="(Optional) Please specify a temp folder for offloading (useful for low-RAM machines). Default None (disable offload).")
|
36 |
+
parser.add_argument('--output_type', default='pth',choices=['pth','huggingface'], type=str,
|
37 |
+
help="save the merged model in pth or huggingface format.")
|
38 |
+
parser.add_argument('--output_dir', default='./', type=str)
|
39 |
+
|
40 |
+
|
41 |
+
emb_to_model_size = {
|
42 |
+
4096 : '7B',
|
43 |
+
5120 : '13B',
|
44 |
+
6656 : '33B',
|
45 |
+
8192 : '65B',
|
46 |
+
}
|
47 |
+
num_shards_of_models = {'7B': 1, '13B': 2, '33B': 4, '65B': 8}
|
48 |
+
params_of_models = {
|
49 |
+
'7B':
|
50 |
+
{
|
51 |
+
"dim": 4096,
|
52 |
+
"multiple_of": 256,
|
53 |
+
"n_heads": 32,
|
54 |
+
"n_layers": 32,
|
55 |
+
"norm_eps": 1e-06,
|
56 |
+
"vocab_size": -1,
|
57 |
+
},
|
58 |
+
'13B':
|
59 |
+
{
|
60 |
+
"dim": 5120,
|
61 |
+
"multiple_of": 256,
|
62 |
+
"n_heads": 40,
|
63 |
+
"n_layers": 40,
|
64 |
+
"norm_eps": 1e-06,
|
65 |
+
"vocab_size": -1,
|
66 |
+
},
|
67 |
+
'33B':
|
68 |
+
{
|
69 |
+
"dim": 6656,
|
70 |
+
"multiple_of": 256,
|
71 |
+
"n_heads": 52,
|
72 |
+
"n_layers": 60,
|
73 |
+
"norm_eps": 1e-06,
|
74 |
+
"vocab_size": -1,
|
75 |
+
},
|
76 |
+
'65B':
|
77 |
+
{
|
78 |
+
"dim": 8192,
|
79 |
+
"multiple_of": 256,
|
80 |
+
"n_heads": 64,
|
81 |
+
"n_layers": 80,
|
82 |
+
"norm_eps": 1e-05,
|
83 |
+
"vocab_size": -1,
|
84 |
+
},
|
85 |
+
}
|
86 |
+
|
87 |
+
def transpose(weight, fan_in_fan_out):
|
88 |
+
return weight.T if fan_in_fan_out else weight
|
89 |
+
|
90 |
+
# Borrowed and modified from https://github.com/tloen/alpaca-lora
|
91 |
+
def translate_state_dict_key(k):
|
92 |
+
k = k.replace("base_model.model.", "")
|
93 |
+
if k == "model.embed_tokens.weight":
|
94 |
+
return "tok_embeddings.weight"
|
95 |
+
elif k == "model.norm.weight":
|
96 |
+
return "norm.weight"
|
97 |
+
elif k == "lm_head.weight":
|
98 |
+
return "output.weight"
|
99 |
+
elif k.startswith("model.layers."):
|
100 |
+
layer = k.split(".")[2]
|
101 |
+
if k.endswith(".self_attn.q_proj.weight"):
|
102 |
+
return f"layers.{layer}.attention.wq.weight"
|
103 |
+
elif k.endswith(".self_attn.k_proj.weight"):
|
104 |
+
return f"layers.{layer}.attention.wk.weight"
|
105 |
+
elif k.endswith(".self_attn.v_proj.weight"):
|
106 |
+
return f"layers.{layer}.attention.wv.weight"
|
107 |
+
elif k.endswith(".self_attn.o_proj.weight"):
|
108 |
+
return f"layers.{layer}.attention.wo.weight"
|
109 |
+
elif k.endswith(".mlp.gate_proj.weight"):
|
110 |
+
return f"layers.{layer}.feed_forward.w1.weight"
|
111 |
+
elif k.endswith(".mlp.down_proj.weight"):
|
112 |
+
return f"layers.{layer}.feed_forward.w2.weight"
|
113 |
+
elif k.endswith(".mlp.up_proj.weight"):
|
114 |
+
return f"layers.{layer}.feed_forward.w3.weight"
|
115 |
+
elif k.endswith(".input_layernorm.weight"):
|
116 |
+
return f"layers.{layer}.attention_norm.weight"
|
117 |
+
elif k.endswith(".post_attention_layernorm.weight"):
|
118 |
+
return f"layers.{layer}.ffn_norm.weight"
|
119 |
+
elif k.endswith("rotary_emb.inv_freq") or "lora" in k:
|
120 |
+
return None
|
121 |
+
else:
|
122 |
+
print(layer, k)
|
123 |
+
raise NotImplementedError
|
124 |
+
else:
|
125 |
+
print(k)
|
126 |
+
raise NotImplementedError
|
127 |
+
|
128 |
+
|
129 |
+
def unpermute(w):
|
130 |
+
return (
|
131 |
+
w.view(n_heads, 2, dim // n_heads // 2, dim).transpose(1, 2).reshape(dim, dim)
|
132 |
+
)
|
133 |
+
|
134 |
+
|
135 |
+
def save_shards(model_sd, num_shards: int):
|
136 |
+
# Add the no_grad context manager
|
137 |
+
with torch.no_grad():
|
138 |
+
if num_shards == 1:
|
139 |
+
new_state_dict = {}
|
140 |
+
for k, v in model_sd.items():
|
141 |
+
new_k = translate_state_dict_key(k)
|
142 |
+
if new_k is not None:
|
143 |
+
if "wq" in new_k or "wk" in new_k:
|
144 |
+
new_state_dict[new_k] = unpermute(v)
|
145 |
+
else:
|
146 |
+
new_state_dict[new_k] = v
|
147 |
+
|
148 |
+
os.makedirs(output_dir, exist_ok=True)
|
149 |
+
print(f"Saving shard 1 of {num_shards} into {output_dir}/consolidated.00.pth")
|
150 |
+
torch.save(new_state_dict, output_dir + "/consolidated.00.pth")
|
151 |
+
with open(output_dir + "/params.json", "w") as f:
|
152 |
+
json.dump(params, f)
|
153 |
+
else:
|
154 |
+
new_state_dicts = [dict() for _ in range(num_shards)]
|
155 |
+
for k in list(model_sd.keys()):
|
156 |
+
v = model_sd[k]
|
157 |
+
new_k = translate_state_dict_key(k)
|
158 |
+
if new_k is not None:
|
159 |
+
if new_k=='tok_embeddings.weight':
|
160 |
+
print(f"Processing {new_k}")
|
161 |
+
assert v.size(1)%num_shards==0
|
162 |
+
splits = v.split(v.size(1)//num_shards,dim=1)
|
163 |
+
elif new_k=='output.weight':
|
164 |
+
print(f"Processing {new_k}")
|
165 |
+
if v.size(0)%num_shards==0:
|
166 |
+
splits = v.split(v.size(0)//num_shards,dim=0)
|
167 |
+
else:
|
168 |
+
size_list = [v.size(0)//num_shards] * num_shards
|
169 |
+
size_list[-1] += v.size(0)%num_shards
|
170 |
+
splits = v.split(size_list, dim=0) # 13B: size_list == [24976,24977]
|
171 |
+
elif new_k=='norm.weight':
|
172 |
+
print(f"Processing {new_k}")
|
173 |
+
splits = [v] * num_shards
|
174 |
+
elif 'ffn_norm.weight' in new_k:
|
175 |
+
print(f"Processing {new_k}")
|
176 |
+
splits = [v] * num_shards
|
177 |
+
elif 'attention_norm.weight' in new_k:
|
178 |
+
print(f"Processing {new_k}")
|
179 |
+
splits = [v] * num_shards
|
180 |
+
|
181 |
+
|
182 |
+
elif 'w1.weight' in new_k:
|
183 |
+
print(f"Processing {new_k}")
|
184 |
+
splits = v.split(v.size(0)//num_shards,dim=0)
|
185 |
+
elif 'w2.weight' in new_k:
|
186 |
+
print(f"Processing {new_k}")
|
187 |
+
splits = v.split(v.size(1)//num_shards,dim=1)
|
188 |
+
elif 'w3.weight' in new_k:
|
189 |
+
print(f"Processing {new_k}")
|
190 |
+
splits = v.split(v.size(0)//num_shards,dim=0)
|
191 |
+
|
192 |
+
|
193 |
+
elif 'wo.weight' in new_k:
|
194 |
+
print(f"Processing {new_k}")
|
195 |
+
splits = v.split(v.size(1)//num_shards,dim=1)
|
196 |
+
|
197 |
+
elif 'wv.weight' in new_k:
|
198 |
+
print(f"Processing {new_k}")
|
199 |
+
splits = v.split(v.size(0)//num_shards,dim=0)
|
200 |
+
|
201 |
+
elif "wq.weight" in new_k or "wk.weight" in new_k:
|
202 |
+
print(f"Processing {new_k}")
|
203 |
+
v = unpermute(v)
|
204 |
+
splits = v.split(v.size(0)//num_shards,dim=0)
|
205 |
+
else:
|
206 |
+
print(f"Unexpected key {new_k}")
|
207 |
+
raise ValueError
|
208 |
+
for sd,split in zip(new_state_dicts,splits):
|
209 |
+
sd[new_k] = split.clone()
|
210 |
+
del split
|
211 |
+
del splits
|
212 |
+
del model_sd[k],v
|
213 |
+
gc.collect() # Effectively enforce garbage collection
|
214 |
+
|
215 |
+
os.makedirs(output_dir, exist_ok=True)
|
216 |
+
for i,new_state_dict in enumerate(new_state_dicts):
|
217 |
+
print(f"Saving shard {i+1} of {num_shards} into {output_dir}/consolidated.0{i}.pth")
|
218 |
+
torch.save(new_state_dict, output_dir + f"/consolidated.0{i}.pth")
|
219 |
+
with open(output_dir + "/params.json", "w") as f:
|
220 |
+
print(f"Saving params.json into {output_dir}/params.json")
|
221 |
+
json.dump(params, f)
|
222 |
+
|
223 |
+
|
224 |
+
if __name__=='__main__':
|
225 |
+
|
226 |
+
args = parser.parse_args()
|
227 |
+
base_model_path = args.base_model
|
228 |
+
lora_model_paths = [s.strip() for s in args.lora_model.split(',') if len(s.strip())!=0]
|
229 |
+
output_dir = args.output_dir
|
230 |
+
output_type = args.output_type
|
231 |
+
offload_dir = args.offload_dir
|
232 |
+
|
233 |
+
print(f"Base model: {base_model_path}")
|
234 |
+
print(f"LoRA model(s) {lora_model_paths}:")
|
235 |
+
|
236 |
+
if offload_dir is not None:
|
237 |
+
# Load with offloading, which is useful for low-RAM machines.
|
238 |
+
# Note that if you have enough RAM, please use original method instead, as it is faster.
|
239 |
+
base_model = LlamaForCausalLM.from_pretrained(
|
240 |
+
base_model_path,
|
241 |
+
load_in_8bit=False,
|
242 |
+
torch_dtype=torch.float16,
|
243 |
+
offload_folder=offload_dir,
|
244 |
+
offload_state_dict=True,
|
245 |
+
low_cpu_mem_usage=True,
|
246 |
+
device_map={"": "cpu"},
|
247 |
+
)
|
248 |
+
else:
|
249 |
+
# Original method without offloading
|
250 |
+
base_model = LlamaForCausalLM.from_pretrained(
|
251 |
+
base_model_path,
|
252 |
+
load_in_8bit=False,
|
253 |
+
torch_dtype=torch.float16,
|
254 |
+
device_map={"": "cpu"},
|
255 |
+
cache_dir=None, # 不使用缓存目录
|
256 |
+
force_download=False, # 禁止从远程下载
|
257 |
+
local_files_only=True # 强制仅从本地文件加载
|
258 |
+
)
|
259 |
+
|
260 |
+
## infer the model size from the checkpoint
|
261 |
+
embedding_size = base_model.get_input_embeddings().weight.size(1)
|
262 |
+
model_size = emb_to_model_size[embedding_size]
|
263 |
+
print(f"Peft version: {peft.__version__}")
|
264 |
+
print(f"Loading LoRA for {model_size} model")
|
265 |
+
|
266 |
+
lora_model = None
|
267 |
+
lora_model_sd = None
|
268 |
+
for lora_index, lora_model_path in enumerate(lora_model_paths):
|
269 |
+
print(f"Loading LoRA {lora_model_path}...")
|
270 |
+
tokenizer = LlamaTokenizer.from_pretrained(lora_model_path,
|
271 |
+
cache_dir=None, # 不使用缓存目录
|
272 |
+
force_download=False, # 禁止从远程下载
|
273 |
+
local_files_only=True # 强制仅从本地文件加载
|
274 |
+
)
|
275 |
+
|
276 |
+
print(f"base_model vocab size: {base_model.get_input_embeddings().weight.size(0)}")
|
277 |
+
print(f"tokenizer vocab size: {len(tokenizer)}")
|
278 |
+
|
279 |
+
model_vocab_size = base_model.get_input_embeddings().weight.size(0)
|
280 |
+
assert len(tokenizer) >= model_vocab_size, \
|
281 |
+
(f"The vocab size of the tokenizer {len(tokenizer)} is smaller than the vocab size of the base model {model_vocab_size}\n"
|
282 |
+
"This is not the intended use. Please check your model and tokenizer.")
|
283 |
+
if model_vocab_size != len(tokenizer):
|
284 |
+
base_model.resize_token_embeddings(len(tokenizer))
|
285 |
+
print(f"Extended vocabulary size to {len(tokenizer)}")
|
286 |
+
|
287 |
+
first_weight = base_model.model.layers[0].self_attn.q_proj.weight
|
288 |
+
first_weight_old = first_weight.clone()
|
289 |
+
|
290 |
+
print(f"Loading LoRA weights")
|
291 |
+
if hasattr(peft.LoraModel,'merge_and_unload'):
|
292 |
+
try:
|
293 |
+
lora_model = PeftModel.from_pretrained(
|
294 |
+
base_model,
|
295 |
+
lora_model_path,
|
296 |
+
device_map={"": "cpu"},
|
297 |
+
torch_dtype=torch.float16,
|
298 |
+
local_files_only=True
|
299 |
+
)
|
300 |
+
except RuntimeError as e:
|
301 |
+
if '[49953, 4096]' in str(e):
|
302 |
+
print("The vocab size of the tokenizer does not match the vocab size of the LoRA weight. \n"
|
303 |
+
"Did you misuse the LLaMA tokenizer with the Alpaca-LoRA weight?\n"
|
304 |
+
"Make sure that you use LLaMA tokenizer with the LLaMA-LoRA weight and Alpaca tokenizer with the Alpaca-LoRA weight!")
|
305 |
+
raise e
|
306 |
+
assert torch.allclose(first_weight_old, first_weight)
|
307 |
+
print(f"Merging with merge_and_unload...")
|
308 |
+
base_model = lora_model.merge_and_unload()
|
309 |
+
else:
|
310 |
+
base_model_sd = base_model.state_dict()
|
311 |
+
try:
|
312 |
+
lora_model_sd = torch.load(os.path.join(lora_model_path,'adapter_model.bin'),map_location='cpu')
|
313 |
+
except FileNotFoundError:
|
314 |
+
print("Cannot find lora model on the disk. Downloading lora model from hub...")
|
315 |
+
filename = hf_hub_download(repo_id=lora_model_path,filename='adapter_model.bin')
|
316 |
+
lora_model_sd = torch.load(filename,map_location='cpu')
|
317 |
+
if 'base_model.model.model.embed_tokens.weight' in lora_model_sd:
|
318 |
+
assert lora_model_sd['base_model.model.model.embed_tokens.weight'].shape[0]==len(tokenizer), \
|
319 |
+
("The vocab size of the tokenizer does not match the vocab size of the LoRA weight. \n"
|
320 |
+
"Did you misuse the LLaMA tokenizer with the Alpaca-LoRA weight?\n"
|
321 |
+
"Make sure that you use LLaMA tokenizer with the LLaMA-LoRA weight and Alpaca tokenizer with the Alpaca-LoRA weight!")
|
322 |
+
|
323 |
+
lora_config = peft.LoraConfig.from_pretrained(lora_model_path)
|
324 |
+
lora_scaling = lora_config.lora_alpha / lora_config.r
|
325 |
+
fan_in_fan_out = lora_config.fan_in_fan_out
|
326 |
+
lora_keys = [k for k in lora_model_sd if 'lora_A' in k]
|
327 |
+
non_lora_keys = [k for k in lora_model_sd if not 'lora_' in k]
|
328 |
+
|
329 |
+
for k in non_lora_keys:
|
330 |
+
print(f"merging {k}")
|
331 |
+
original_k = k.replace('base_model.model.','')
|
332 |
+
base_model_sd[original_k].copy_(lora_model_sd[k])
|
333 |
+
|
334 |
+
for k in lora_keys:
|
335 |
+
print(f"merging {k}")
|
336 |
+
original_key = k.replace('.lora_A','').replace('base_model.model.','')
|
337 |
+
assert original_key in base_model_sd
|
338 |
+
lora_a_key = k
|
339 |
+
lora_b_key = k.replace('lora_A','lora_B')
|
340 |
+
base_model_sd[original_key] += (
|
341 |
+
transpose(lora_model_sd[lora_b_key].float() @ lora_model_sd[lora_a_key].float(),fan_in_fan_out) * lora_scaling
|
342 |
+
)
|
343 |
+
assert base_model_sd[original_key].dtype == torch.float16
|
344 |
+
|
345 |
+
# did we do anything?
|
346 |
+
assert not torch.allclose(first_weight_old, first_weight)
|
347 |
+
|
348 |
+
tokenizer.save_pretrained(output_dir)
|
349 |
+
|
350 |
+
if output_type=='huggingface':
|
351 |
+
print("Saving to Hugging Face format...")
|
352 |
+
LlamaForCausalLM.save_pretrained(base_model, output_dir) #, state_dict=deloreanized_sd)
|
353 |
+
else: # output_type=='pth
|
354 |
+
print("Saving to pth format...")
|
355 |
+
|
356 |
+
base_model_sd = base_model.state_dict()
|
357 |
+
del lora_model, base_model, lora_model_sd
|
358 |
+
|
359 |
+
params = params_of_models[model_size]
|
360 |
+
num_shards = num_shards_of_models[model_size]
|
361 |
+
n_layers = params["n_layers"]
|
362 |
+
n_heads = params["n_heads"]
|
363 |
+
dim = params["dim"]
|
364 |
+
dims_per_head = dim // n_heads
|
365 |
+
base = 10000.0
|
366 |
+
inv_freq = 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
|
367 |
+
|
368 |
+
save_shards(model_sd=base_model_sd, num_shards=num_shards)
|
04-gene-sft/.ipynb_checkpoints/pip_list-checkpoint.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:828ede2bc2aeec77ad37dd1e5ba9ff2fbbb481dbe3c2729b6cdafc4feb7ac669
|
3 |
+
size 7419
|
04-gene-sft/.ipynb_checkpoints/run_clm_pt_with_peft-checkpoint.py
CHANGED
@@ -21,6 +21,17 @@ https://huggingface.co/models?filter=text-generation
|
|
21 |
"""
|
22 |
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
import logging
|
25 |
import numpy as np
|
26 |
import math
|
|
|
21 |
"""
|
22 |
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
|
23 |
|
24 |
+
import subprocess
|
25 |
+
import os
|
26 |
+
# 设置环境变量, autodl一般区域
|
27 |
+
result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
|
28 |
+
output = result.stdout
|
29 |
+
for line in output.splitlines():
|
30 |
+
if '=' in line:
|
31 |
+
var, value = line.split('=', 1)
|
32 |
+
os.environ[var] = value
|
33 |
+
|
34 |
+
|
35 |
import logging
|
36 |
import numpy as np
|
37 |
import math
|
04-gene-sft/.ipynb_checkpoints/run_pt-checkpoint.sh
CHANGED
@@ -6,17 +6,17 @@ modules_to_save="embed_tokens,lm_head"
|
|
6 |
lora_dropout=0.05
|
7 |
|
8 |
pretrained_model=./llama-7b-hf
|
9 |
-
dna_eng_tokenizer_path=./
|
10 |
dataset_dir=./train_data
|
11 |
data_cache=temp_data_cache_dir
|
12 |
-
per_device_train_batch_size=
|
13 |
-
per_device_eval_batch_size=
|
14 |
gradient_accumulation_steps=8
|
15 |
output_dir=dnahlm_llama_7b
|
16 |
|
17 |
deepspeed_config_file=ds_zero2_no_offload.json
|
18 |
|
19 |
-
torchrun --nnodes 1 --nproc_per_node
|
20 |
--deepspeed ${deepspeed_config_file} \
|
21 |
--model_name_or_path ${pretrained_model} \
|
22 |
--tokenizer_name_or_path ${dna_eng_tokenizer_path} \
|
@@ -39,8 +39,8 @@ torchrun --nnodes 1 --nproc_per_node 6 run_clm_pt_with_peft.py \
|
|
39 |
--save_total_limit 3 \
|
40 |
--save_steps 200 \
|
41 |
--gradient_accumulation_steps ${gradient_accumulation_steps} \
|
42 |
-
--preprocessing_num_workers
|
43 |
-
--block_size
|
44 |
--output_dir ${output_dir} \
|
45 |
--overwrite_output_dir \
|
46 |
--ddp_timeout 30000 \
|
|
|
6 |
lora_dropout=0.05
|
7 |
|
8 |
pretrained_model=./llama-7b-hf
|
9 |
+
dna_eng_tokenizer_path=./merged_gene_eng_tokenizer_hf
|
10 |
dataset_dir=./train_data
|
11 |
data_cache=temp_data_cache_dir
|
12 |
+
per_device_train_batch_size=4
|
13 |
+
per_device_eval_batch_size=4
|
14 |
gradient_accumulation_steps=8
|
15 |
output_dir=dnahlm_llama_7b
|
16 |
|
17 |
deepspeed_config_file=ds_zero2_no_offload.json
|
18 |
|
19 |
+
torchrun --nnodes 1 --nproc_per_node 10 run_clm_pt_with_peft.py \
|
20 |
--deepspeed ${deepspeed_config_file} \
|
21 |
--model_name_or_path ${pretrained_model} \
|
22 |
--tokenizer_name_or_path ${dna_eng_tokenizer_path} \
|
|
|
39 |
--save_total_limit 3 \
|
40 |
--save_steps 200 \
|
41 |
--gradient_accumulation_steps ${gradient_accumulation_steps} \
|
42 |
+
--preprocessing_num_workers 64 \
|
43 |
+
--block_size 256 \
|
44 |
--output_dir ${output_dir} \
|
45 |
--overwrite_output_dir \
|
46 |
--ddp_timeout 30000 \
|
04-gene-sft/.ipynb_checkpoints/run_sft-checkpoint.sh
CHANGED
@@ -8,8 +8,8 @@ lora_dropout=0.05
|
|
8 |
pretrained_model=dnahlm-merge-hf
|
9 |
chinese_tokenizer_path=dnahlm-merge-hf
|
10 |
dataset_dir=sft_data
|
11 |
-
per_device_train_batch_size=
|
12 |
-
per_device_eval_batch_size=
|
13 |
gradient_accumulation_steps=8
|
14 |
output_dir=dnahlm-llama7b-sft
|
15 |
#peft_model=peft_model/dir
|
@@ -29,7 +29,7 @@ torchrun --nnodes 1 --nproc_per_node 6 run_clm_sft_with_peft.py \
|
|
29 |
--do_eval \
|
30 |
--seed $RANDOM \
|
31 |
--fp16 \
|
32 |
-
--num_train_epochs
|
33 |
--lr_scheduler_type cosine \
|
34 |
--learning_rate ${lr} \
|
35 |
--warmup_ratio 0.03 \
|
@@ -43,7 +43,7 @@ torchrun --nnodes 1 --nproc_per_node 6 run_clm_sft_with_peft.py \
|
|
43 |
--save_steps 200 \
|
44 |
--gradient_accumulation_steps ${gradient_accumulation_steps} \
|
45 |
--preprocessing_num_workers 4 \
|
46 |
-
--max_seq_length
|
47 |
--output_dir ${output_dir} \
|
48 |
--overwrite_output_dir \
|
49 |
--ddp_timeout 30000 \
|
|
|
8 |
pretrained_model=dnahlm-merge-hf
|
9 |
chinese_tokenizer_path=dnahlm-merge-hf
|
10 |
dataset_dir=sft_data
|
11 |
+
per_device_train_batch_size=8
|
12 |
+
per_device_eval_batch_size=8
|
13 |
gradient_accumulation_steps=8
|
14 |
output_dir=dnahlm-llama7b-sft
|
15 |
#peft_model=peft_model/dir
|
|
|
29 |
--do_eval \
|
30 |
--seed $RANDOM \
|
31 |
--fp16 \
|
32 |
+
--num_train_epochs 3 \
|
33 |
--lr_scheduler_type cosine \
|
34 |
--learning_rate ${lr} \
|
35 |
--warmup_ratio 0.03 \
|
|
|
43 |
--save_steps 200 \
|
44 |
--gradient_accumulation_steps ${gradient_accumulation_steps} \
|
45 |
--preprocessing_num_workers 4 \
|
46 |
+
--max_seq_length 256 \
|
47 |
--output_dir ${output_dir} \
|
48 |
--overwrite_output_dir \
|
49 |
--ddp_timeout 30000 \
|
04-gene-sft/2-gpt2-instruction-ft.ipynb
CHANGED
@@ -127,20 +127,76 @@
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "code",
|
130 |
-
"execution_count":
|
131 |
-
"id": "
|
132 |
"metadata": {},
|
133 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
"source": [
|
135 |
"from datasets import load_dataset\n",
|
136 |
"# 1. load ~11k samples from promoters prediction dataset\n",
|
137 |
-
"
|
138 |
-
"
|
139 |
]
|
140 |
},
|
141 |
{
|
142 |
"cell_type": "code",
|
143 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
"id": "93d09d8d-f521-49f7-b0e0-7ac089dfbf49",
|
145 |
"metadata": {},
|
146 |
"outputs": [],
|
@@ -169,10 +225,18 @@
|
|
169 |
},
|
170 |
{
|
171 |
"cell_type": "code",
|
172 |
-
"execution_count":
|
173 |
"id": "9f9c0e5a-6591-47ac-b358-d746a00dfc0a",
|
174 |
"metadata": {},
|
175 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
"source": [
|
177 |
"example = dna_dataset[\"train\"][0]\n",
|
178 |
"print(build_prompt(example))"
|
@@ -180,7 +244,7 @@
|
|
180 |
},
|
181 |
{
|
182 |
"cell_type": "code",
|
183 |
-
"execution_count":
|
184 |
"id": "83070a23-1604-4d28-b371-e01060331ed5",
|
185 |
"metadata": {},
|
186 |
"outputs": [],
|
@@ -199,10 +263,40 @@
|
|
199 |
},
|
200 |
{
|
201 |
"cell_type": "code",
|
202 |
-
"execution_count":
|
203 |
"id": "89fb8ed3-aa58-462f-b2a6-ce445c597a33",
|
204 |
"metadata": {},
|
205 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
"source": [
|
207 |
"dna_ft_dataset = load_dataset(\"json\", data_files='data/dna_promoter_300.jsonl')\n",
|
208 |
"dna_ft_dataset"
|
@@ -210,10 +304,30 @@
|
|
210 |
},
|
211 |
{
|
212 |
"cell_type": "code",
|
213 |
-
"execution_count":
|
214 |
"id": "e4f7b75f-6ccb-4fda-8004-40df7d52678f",
|
215 |
"metadata": {},
|
216 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
"source": [
|
218 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.9, seed=42)\n",
|
219 |
"data"
|
@@ -221,7 +335,7 @@
|
|
221 |
},
|
222 |
{
|
223 |
"cell_type": "code",
|
224 |
-
"execution_count":
|
225 |
"id": "36d9ee0e-8423-4529-aa7e-fda2728fab2f",
|
226 |
"metadata": {},
|
227 |
"outputs": [],
|
@@ -235,13 +349,14 @@
|
|
235 |
"from tokenizers import Tokenizer\n",
|
236 |
"from transformers import GPT2TokenizerFast\n",
|
237 |
"\n",
|
|
|
238 |
"tokenizer = GPT2Tokenizer.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")\n",
|
239 |
"tokenizer.pad_token = tokenizer.eos_token"
|
240 |
]
|
241 |
},
|
242 |
{
|
243 |
"cell_type": "code",
|
244 |
-
"execution_count":
|
245 |
"id": "871baee0-f06f-4422-a741-af533f7d92e1",
|
246 |
"metadata": {},
|
247 |
"outputs": [],
|
@@ -270,10 +385,23 @@
|
|
270 |
},
|
271 |
{
|
272 |
"cell_type": "code",
|
273 |
-
"execution_count":
|
274 |
"id": "bca1c275-cc3d-43df-923e-e6604d584226",
|
275 |
"metadata": {},
|
276 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
"source": [
|
278 |
"example = data[\"test\"][0]\n",
|
279 |
"example"
|
@@ -281,10 +409,27 @@
|
|
281 |
},
|
282 |
{
|
283 |
"cell_type": "code",
|
284 |
-
"execution_count":
|
285 |
"id": "76f2e027-0a31-4919-bb7e-404c786e1599",
|
286 |
"metadata": {},
|
287 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
"source": [
|
289 |
"prompt = build_prompt(example)\n",
|
290 |
"print(prompt)"
|
@@ -292,24 +437,80 @@
|
|
292 |
},
|
293 |
{
|
294 |
"cell_type": "code",
|
295 |
-
"execution_count":
|
296 |
"id": "932b54ca-7e27-47cd-b67d-7ef8386b6608",
|
297 |
"metadata": {},
|
298 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
"source": [
|
300 |
"print('tokens: ', ' '.join(tokenizer.tokenize(prompt)))"
|
301 |
]
|
302 |
},
|
303 |
{
|
304 |
"cell_type": "code",
|
305 |
-
"execution_count":
|
306 |
"id": "26671faf-68d0-4a44-978e-e1a24e86c9b1",
|
307 |
"metadata": {},
|
308 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
"source": [
|
310 |
"def tokenize_function(example):\n",
|
311 |
" prompt = build_prompt(example)\n",
|
312 |
-
" result = tokenizer(prompt, padding='max_length', truncation=True, max_length=
|
313 |
" return result\n",
|
314 |
"\n",
|
315 |
"\n",
|
@@ -322,17 +523,31 @@
|
|
322 |
},
|
323 |
{
|
324 |
"cell_type": "code",
|
325 |
-
"execution_count":
|
326 |
"id": "3d46c8b1-9fb3-431a-87ea-c278468543e7",
|
327 |
"metadata": {},
|
328 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
"source": [
|
330 |
"tokenized_datasets[\"train\"]"
|
331 |
]
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
335 |
-
"execution_count":
|
336 |
"id": "26985c81-4335-4ac0-9a5a-84a5b4f2d0e4",
|
337 |
"metadata": {},
|
338 |
"outputs": [],
|
@@ -346,17 +561,32 @@
|
|
346 |
},
|
347 |
{
|
348 |
"cell_type": "code",
|
349 |
-
"execution_count":
|
350 |
"id": "e18d3095-d6dd-423b-84fb-dca4a629d450",
|
351 |
"metadata": {},
|
352 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
"source": [
|
354 |
"model = GPT2LMHeadModel.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")"
|
355 |
]
|
356 |
},
|
357 |
{
|
358 |
"cell_type": "code",
|
359 |
-
"execution_count":
|
360 |
"id": "12134cf2-676a-4176-a733-35caab2fd520",
|
361 |
"metadata": {},
|
362 |
"outputs": [],
|
@@ -401,10 +631,41 @@
|
|
401 |
},
|
402 |
{
|
403 |
"cell_type": "code",
|
404 |
-
"execution_count":
|
405 |
"id": "b9a2e2a9-a1ff-44b0-a550-623a16d0d7a2",
|
406 |
"metadata": {},
|
407 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
"source": [
|
409 |
"input_text = format_input(data[\"test\"][0])\n",
|
410 |
"\n",
|
@@ -418,7 +679,7 @@
|
|
418 |
},
|
419 |
{
|
420 |
"cell_type": "code",
|
421 |
-
"execution_count":
|
422 |
"id": "63b54fe2-f077-4ca8-974e-1bcc41ce57d6",
|
423 |
"metadata": {},
|
424 |
"outputs": [],
|
@@ -437,10 +698,217 @@
|
|
437 |
},
|
438 |
{
|
439 |
"cell_type": "code",
|
440 |
-
"execution_count":
|
441 |
"id": "61df123d-e67d-4548-998a-de1e2781e774",
|
442 |
"metadata": {},
|
443 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
"source": [
|
445 |
"# 初始化Trainer\n",
|
446 |
"trainer = Trainer(\n",
|
@@ -454,10 +922,204 @@
|
|
454 |
},
|
455 |
{
|
456 |
"cell_type": "code",
|
457 |
-
"execution_count":
|
458 |
"id": "a9cd936a-5ea6-43e3-9848-27080f818606",
|
459 |
"metadata": {},
|
460 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
"source": [
|
462 |
"# 开始训练\n",
|
463 |
"trainer.train()"
|
@@ -465,10 +1127,18 @@
|
|
465 |
},
|
466 |
{
|
467 |
"cell_type": "code",
|
468 |
-
"execution_count":
|
469 |
"id": "315aae76-44b4-4513-8139-40ef22934873",
|
470 |
"metadata": {},
|
471 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
472 |
"source": [
|
473 |
"save_dir = 'gpt_ft/final'\n",
|
474 |
"trainer.save_model(save_dir)\n",
|
@@ -477,7 +1147,7 @@
|
|
477 |
},
|
478 |
{
|
479 |
"cell_type": "code",
|
480 |
-
"execution_count":
|
481 |
"id": "28d2dbbc-02ff-4120-b230-b19905a786cd",
|
482 |
"metadata": {},
|
483 |
"outputs": [],
|
@@ -488,20 +1158,92 @@
|
|
488 |
},
|
489 |
{
|
490 |
"cell_type": "code",
|
491 |
-
"execution_count":
|
492 |
"id": "08987c3c-063a-4e9b-9ebb-e637b0b5bccd",
|
493 |
"metadata": {},
|
494 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
"source": [
|
496 |
"finetuned_model"
|
497 |
]
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
-
"execution_count":
|
502 |
"id": "d75010e8-6d6a-40ef-852e-0d705adc3da8",
|
503 |
"metadata": {},
|
504 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
"source": [
|
506 |
"print(\"input (test):\", input_text)\n",
|
507 |
"\n",
|
@@ -517,10 +1259,217 @@
|
|
517 |
},
|
518 |
{
|
519 |
"cell_type": "code",
|
520 |
-
"execution_count":
|
521 |
"id": "64365e15-510e-4abf-92f5-c78b660b37dc",
|
522 |
"metadata": {},
|
523 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
"source": [
|
525 |
"test_data = data[\"test\"].select(range(100))\n",
|
526 |
"\n",
|
@@ -543,7 +1492,7 @@
|
|
543 |
},
|
544 |
{
|
545 |
"cell_type": "code",
|
546 |
-
"execution_count":
|
547 |
"id": "a45fb780-fc3f-401c-b6e0-6f7d0c1682de",
|
548 |
"metadata": {},
|
549 |
"outputs": [],
|
@@ -556,19 +1505,128 @@
|
|
556 |
"# 将 Dataset 对象导出为 JSON 文件\n",
|
557 |
"# test_data.to_json(output_file)\n",
|
558 |
"with open(output_file, \"w\") as file:\n",
|
559 |
-
" json.dump(data_list, file, indent=4) # \"indent\" for pretty-printing"
|
560 |
]
|
561 |
},
|
562 |
{
|
563 |
"cell_type": "code",
|
564 |
-
"execution_count":
|
565 |
"id": "a83c8881-c763-4bba-8b85-584a6722a38e",
|
566 |
"metadata": {},
|
567 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
"source": [
|
569 |
"import json\n",
|
570 |
"\n",
|
571 |
"\n",
|
|
|
572 |
"\n",
|
573 |
"with open(output_file, \"r\") as file:\n",
|
574 |
" test_data = json.load(file)\n",
|
@@ -580,15 +1638,30 @@
|
|
580 |
" output = item[\"output\"]\n",
|
581 |
" #output = \" \".join(tokenizer.tokenize(output))\n",
|
582 |
" model_response = item[\"model_response\"]\n",
|
|
|
|
|
|
|
583 |
" if model_response == output: #same it\n",
|
584 |
" same_sum = same_sum + 1\n",
|
585 |
" \n",
|
586 |
-
" if
|
587 |
-
"
|
|
|
|
|
|
|
|
|
588 |
"\n",
|
589 |
"\n",
|
590 |
-
"print(\"
|
591 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
}
|
593 |
],
|
594 |
"metadata": {
|
|
|
127 |
},
|
128 |
{
|
129 |
"cell_type": "code",
|
130 |
+
"execution_count": 1,
|
131 |
+
"id": "b28e3499-bbff-4548-9f85-2baee088cabf",
|
132 |
"metadata": {},
|
133 |
"outputs": [],
|
134 |
+
"source": [
|
135 |
+
"import subprocess\n",
|
136 |
+
"import os\n",
|
137 |
+
"# 设置环境变量, autodl一般区域\n",
|
138 |
+
"result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n",
|
139 |
+
"output = result.stdout\n",
|
140 |
+
"for line in output.splitlines():\n",
|
141 |
+
" if '=' in line:\n",
|
142 |
+
" var, value = line.split('=', 1)\n",
|
143 |
+
" os.environ[var] = value"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"cell_type": "code",
|
148 |
+
"execution_count": 6,
|
149 |
+
"id": "dc04d5e3-7623-4d59-9f3b-ad03e339db11",
|
150 |
+
"metadata": {},
|
151 |
+
"outputs": [
|
152 |
+
{
|
153 |
+
"data": {
|
154 |
+
"text/plain": [
|
155 |
+
"DatasetDict({\n",
|
156 |
+
" train: Dataset({\n",
|
157 |
+
" features: ['sequence', 'label'],\n",
|
158 |
+
" num_rows: 59195\n",
|
159 |
+
" })\n",
|
160 |
+
"})"
|
161 |
+
]
|
162 |
+
},
|
163 |
+
"execution_count": 6,
|
164 |
+
"metadata": {},
|
165 |
+
"output_type": "execute_result"
|
166 |
+
}
|
167 |
+
],
|
168 |
"source": [
|
169 |
"from datasets import load_dataset\n",
|
170 |
"# 1. load ~11k samples from promoters prediction dataset\n",
|
171 |
+
"dna_dataset = load_dataset(\"dnagpt/dna_promoter_300\")\n",
|
172 |
+
"dna_dataset"
|
173 |
]
|
174 |
},
|
175 |
{
|
176 |
"cell_type": "code",
|
177 |
+
"execution_count": 7,
|
178 |
+
"id": "f7332fa1-3343-4247-b4cc-54733dff6964",
|
179 |
+
"metadata": {},
|
180 |
+
"outputs": [
|
181 |
+
{
|
182 |
+
"data": {
|
183 |
+
"text/plain": [
|
184 |
+
"{'sequence': 'TAAATACGGAAGTTTATTACTTGAGGAATAGATGGAATCGTCGGGCGTGAGAGATCATAATCGGCTGCTTCTGGGAGCCGCACGTGGGAAAGACTTATCCCCGACGGAGCTGGGACTGGGGCACAAACCGGAAGGAACACATCTGACCGAGAAAGAGACCAAGTGGCTCAGGTAGGACCAAAGCGAGCAAGGCTGCGGGTCCTGTTGCTCTCTGTCCTGTAAATTTAAACGTTACGCCACCTGGTAATGATACCCTCGTCCTCCGAGGCGACAAGTCAGAACTTCCACCAAGGGCATTAC',\n",
|
185 |
+
" 'label': 0}"
|
186 |
+
]
|
187 |
+
},
|
188 |
+
"execution_count": 7,
|
189 |
+
"metadata": {},
|
190 |
+
"output_type": "execute_result"
|
191 |
+
}
|
192 |
+
],
|
193 |
+
"source": [
|
194 |
+
"dna_dataset[\"train\"][0]"
|
195 |
+
]
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"cell_type": "code",
|
199 |
+
"execution_count": 8,
|
200 |
"id": "93d09d8d-f521-49f7-b0e0-7ac089dfbf49",
|
201 |
"metadata": {},
|
202 |
"outputs": [],
|
|
|
225 |
},
|
226 |
{
|
227 |
"cell_type": "code",
|
228 |
+
"execution_count": 9,
|
229 |
"id": "9f9c0e5a-6591-47ac-b358-d746a00dfc0a",
|
230 |
"metadata": {},
|
231 |
+
"outputs": [
|
232 |
+
{
|
233 |
+
"name": "stdout",
|
234 |
+
"output_type": "stream",
|
235 |
+
"text": [
|
236 |
+
"{'instruction': 'Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.', 'input': 'TAAATACGGAAGTTTATTACTTGAGGAATAGATGGAATCGTCGGGCGTGAGAGATCATAATCGGCTGCTTCTGGGAGCCGCACGTGGGAAAGACTTATCCCCGACGGAGCTGGGACTGGGGCACAAACCGGAAGGAACACATCTGACCGAGAAAGAGACCAAGTGGCTCAGGTAGGACCAAAGCGAGCAAGGCTGCGGGTCCTGTTGCTCTCTGTCCTGTAAATTTAAACGTTACGCCACCTGGTAATGATACCCTCGTCCTCCGAGGCGACAAGTCAGAACTTCCACCAAGGGCATTAC', 'output': 'Non-promoter'}\n"
|
237 |
+
]
|
238 |
+
}
|
239 |
+
],
|
240 |
"source": [
|
241 |
"example = dna_dataset[\"train\"][0]\n",
|
242 |
"print(build_prompt(example))"
|
|
|
244 |
},
|
245 |
{
|
246 |
"cell_type": "code",
|
247 |
+
"execution_count": 10,
|
248 |
"id": "83070a23-1604-4d28-b371-e01060331ed5",
|
249 |
"metadata": {},
|
250 |
"outputs": [],
|
|
|
263 |
},
|
264 |
{
|
265 |
"cell_type": "code",
|
266 |
+
"execution_count": 11,
|
267 |
"id": "89fb8ed3-aa58-462f-b2a6-ce445c597a33",
|
268 |
"metadata": {},
|
269 |
+
"outputs": [
|
270 |
+
{
|
271 |
+
"data": {
|
272 |
+
"application/vnd.jupyter.widget-view+json": {
|
273 |
+
"model_id": "7b84d3a64bf645ada13d0cada2d9f524",
|
274 |
+
"version_major": 2,
|
275 |
+
"version_minor": 0
|
276 |
+
},
|
277 |
+
"text/plain": [
|
278 |
+
"Generating train split: 0 examples [00:00, ? examples/s]"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
"metadata": {},
|
282 |
+
"output_type": "display_data"
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"data": {
|
286 |
+
"text/plain": [
|
287 |
+
"DatasetDict({\n",
|
288 |
+
" train: Dataset({\n",
|
289 |
+
" features: ['instruction', 'input', 'output'],\n",
|
290 |
+
" num_rows: 59195\n",
|
291 |
+
" })\n",
|
292 |
+
"})"
|
293 |
+
]
|
294 |
+
},
|
295 |
+
"execution_count": 11,
|
296 |
+
"metadata": {},
|
297 |
+
"output_type": "execute_result"
|
298 |
+
}
|
299 |
+
],
|
300 |
"source": [
|
301 |
"dna_ft_dataset = load_dataset(\"json\", data_files='data/dna_promoter_300.jsonl')\n",
|
302 |
"dna_ft_dataset"
|
|
|
304 |
},
|
305 |
{
|
306 |
"cell_type": "code",
|
307 |
+
"execution_count": 12,
|
308 |
"id": "e4f7b75f-6ccb-4fda-8004-40df7d52678f",
|
309 |
"metadata": {},
|
310 |
+
"outputs": [
|
311 |
+
{
|
312 |
+
"data": {
|
313 |
+
"text/plain": [
|
314 |
+
"DatasetDict({\n",
|
315 |
+
" train: Dataset({\n",
|
316 |
+
" features: ['instruction', 'input', 'output'],\n",
|
317 |
+
" num_rows: 53275\n",
|
318 |
+
" })\n",
|
319 |
+
" test: Dataset({\n",
|
320 |
+
" features: ['instruction', 'input', 'output'],\n",
|
321 |
+
" num_rows: 5920\n",
|
322 |
+
" })\n",
|
323 |
+
"})"
|
324 |
+
]
|
325 |
+
},
|
326 |
+
"execution_count": 12,
|
327 |
+
"metadata": {},
|
328 |
+
"output_type": "execute_result"
|
329 |
+
}
|
330 |
+
],
|
331 |
"source": [
|
332 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.9, seed=42)\n",
|
333 |
"data"
|
|
|
335 |
},
|
336 |
{
|
337 |
"cell_type": "code",
|
338 |
+
"execution_count": 13,
|
339 |
"id": "36d9ee0e-8423-4529-aa7e-fda2728fab2f",
|
340 |
"metadata": {},
|
341 |
"outputs": [],
|
|
|
349 |
"from tokenizers import Tokenizer\n",
|
350 |
"from transformers import GPT2TokenizerFast\n",
|
351 |
"\n",
|
352 |
+
"#需要使用生物序列+英文的多模态大模型\n",
|
353 |
"tokenizer = GPT2Tokenizer.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")\n",
|
354 |
"tokenizer.pad_token = tokenizer.eos_token"
|
355 |
]
|
356 |
},
|
357 |
{
|
358 |
"cell_type": "code",
|
359 |
+
"execution_count": 14,
|
360 |
"id": "871baee0-f06f-4422-a741-af533f7d92e1",
|
361 |
"metadata": {},
|
362 |
"outputs": [],
|
|
|
385 |
},
|
386 |
{
|
387 |
"cell_type": "code",
|
388 |
+
"execution_count": 15,
|
389 |
"id": "bca1c275-cc3d-43df-923e-e6604d584226",
|
390 |
"metadata": {},
|
391 |
+
"outputs": [
|
392 |
+
{
|
393 |
+
"data": {
|
394 |
+
"text/plain": [
|
395 |
+
"{'instruction': 'Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.',\n",
|
396 |
+
" 'input': 'CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC',\n",
|
397 |
+
" 'output': 'promoter'}"
|
398 |
+
]
|
399 |
+
},
|
400 |
+
"execution_count": 15,
|
401 |
+
"metadata": {},
|
402 |
+
"output_type": "execute_result"
|
403 |
+
}
|
404 |
+
],
|
405 |
"source": [
|
406 |
"example = data[\"test\"][0]\n",
|
407 |
"example"
|
|
|
409 |
},
|
410 |
{
|
411 |
"cell_type": "code",
|
412 |
+
"execution_count": 16,
|
413 |
"id": "76f2e027-0a31-4919-bb7e-404c786e1599",
|
414 |
"metadata": {},
|
415 |
+
"outputs": [
|
416 |
+
{
|
417 |
+
"name": "stdout",
|
418 |
+
"output_type": "stream",
|
419 |
+
"text": [
|
420 |
+
"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
421 |
+
"\n",
|
422 |
+
"### Instruction:\n",
|
423 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
424 |
+
"\n",
|
425 |
+
"### Input:\n",
|
426 |
+
"CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC\n",
|
427 |
+
"\n",
|
428 |
+
"### Response:\n",
|
429 |
+
"promoter\n"
|
430 |
+
]
|
431 |
+
}
|
432 |
+
],
|
433 |
"source": [
|
434 |
"prompt = build_prompt(example)\n",
|
435 |
"print(prompt)"
|
|
|
437 |
},
|
438 |
{
|
439 |
"cell_type": "code",
|
440 |
+
"execution_count": 17,
|
441 |
"id": "932b54ca-7e27-47cd-b67d-7ef8386b6608",
|
442 |
"metadata": {},
|
443 |
+
"outputs": [
|
444 |
+
{
|
445 |
+
"name": "stdout",
|
446 |
+
"output_type": "stream",
|
447 |
+
"text": [
|
448 |
+
"tokens: Bel ow Ġ is Ġ an Ġ instruc tion Ġ th at Ġ describ es Ġ a Ġ t ask . ĠWrit e Ġ a Ġ respon se Ġ th at Ġ appropri at el y Ġ complet es Ġ the Ġ request . Ċ Ċ # # # ĠIn struc tion : Ċ D eter min e Ġ cor e Ġ promo ter Ġ det ec tion Ġ of Ġ follow ing Ġ d na Ġ sequenc e , ĠTh e Ġ resul t Ġ will Ġ be Ġ on e Ġ of Ġ the Ġ follow ing : ĠN on - promo ter , Ġ promo ter . Ċ Ċ # # # ĠIn put : Ċ CC AGGATGC GC TGACG ACCC GGCTGGC AGGC GGGTCC TCG TGGGCG AGGCG AGGGAGGC GGCG AGAGAGG AGCAATAG TTTCCC ACCGC TCCCTCTC AGGCGC AGGG TCTAG AGAAGC GCG AGGGG ATCTAG AGAAGCC GG AGGGG AGGAAGC GCG AGTCC GCGG CCCGCC CCG TTGCG TCCC ACCCACC GCG TCCCCTCCCC TCCCCTCCC GCTGC GGG AAAAGC GGCCGC GGGCGGC GGCGCCC ACTGTG GGGC GGGC GGAGC GCCGC GGGAGGC GGACG AGATGCG AGCGC GGCCGC Ċ Ċ # # # ĠR esp on se : Ċ promo ter\n"
|
449 |
+
]
|
450 |
+
}
|
451 |
+
],
|
452 |
"source": [
|
453 |
"print('tokens: ', ' '.join(tokenizer.tokenize(prompt)))"
|
454 |
]
|
455 |
},
|
456 |
{
|
457 |
"cell_type": "code",
|
458 |
+
"execution_count": 18,
|
459 |
"id": "26671faf-68d0-4a44-978e-e1a24e86c9b1",
|
460 |
"metadata": {},
|
461 |
+
"outputs": [
|
462 |
+
{
|
463 |
+
"data": {
|
464 |
+
"application/vnd.jupyter.widget-view+json": {
|
465 |
+
"model_id": "02d2f083e74a45e6ada46b9872822dfc",
|
466 |
+
"version_major": 2,
|
467 |
+
"version_minor": 0
|
468 |
+
},
|
469 |
+
"text/plain": [
|
470 |
+
"Map: 0%| | 0/53275 [00:00<?, ? examples/s]"
|
471 |
+
]
|
472 |
+
},
|
473 |
+
"metadata": {},
|
474 |
+
"output_type": "display_data"
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"data": {
|
478 |
+
"application/vnd.jupyter.widget-view+json": {
|
479 |
+
"model_id": "f9ebfc901049446cb70580ee6d90861b",
|
480 |
+
"version_major": 2,
|
481 |
+
"version_minor": 0
|
482 |
+
},
|
483 |
+
"text/plain": [
|
484 |
+
"Map: 0%| | 0/5920 [00:00<?, ? examples/s]"
|
485 |
+
]
|
486 |
+
},
|
487 |
+
"metadata": {},
|
488 |
+
"output_type": "display_data"
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"data": {
|
492 |
+
"text/plain": [
|
493 |
+
"DatasetDict({\n",
|
494 |
+
" train: Dataset({\n",
|
495 |
+
" features: ['input_ids', 'attention_mask'],\n",
|
496 |
+
" num_rows: 53275\n",
|
497 |
+
" })\n",
|
498 |
+
" test: Dataset({\n",
|
499 |
+
" features: ['input_ids', 'attention_mask'],\n",
|
500 |
+
" num_rows: 5920\n",
|
501 |
+
" })\n",
|
502 |
+
"})"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
"execution_count": 18,
|
506 |
+
"metadata": {},
|
507 |
+
"output_type": "execute_result"
|
508 |
+
}
|
509 |
+
],
|
510 |
"source": [
|
511 |
"def tokenize_function(example):\n",
|
512 |
" prompt = build_prompt(example)\n",
|
513 |
+
" result = tokenizer(prompt, padding='max_length', truncation=True, max_length=256) # max_length=256\n",
|
514 |
" return result\n",
|
515 |
"\n",
|
516 |
"\n",
|
|
|
523 |
},
|
524 |
{
|
525 |
"cell_type": "code",
|
526 |
+
"execution_count": 19,
|
527 |
"id": "3d46c8b1-9fb3-431a-87ea-c278468543e7",
|
528 |
"metadata": {},
|
529 |
+
"outputs": [
|
530 |
+
{
|
531 |
+
"data": {
|
532 |
+
"text/plain": [
|
533 |
+
"Dataset({\n",
|
534 |
+
" features: ['input_ids', 'attention_mask'],\n",
|
535 |
+
" num_rows: 53275\n",
|
536 |
+
"})"
|
537 |
+
]
|
538 |
+
},
|
539 |
+
"execution_count": 19,
|
540 |
+
"metadata": {},
|
541 |
+
"output_type": "execute_result"
|
542 |
+
}
|
543 |
+
],
|
544 |
"source": [
|
545 |
"tokenized_datasets[\"train\"]"
|
546 |
]
|
547 |
},
|
548 |
{
|
549 |
"cell_type": "code",
|
550 |
+
"execution_count": 20,
|
551 |
"id": "26985c81-4335-4ac0-9a5a-84a5b4f2d0e4",
|
552 |
"metadata": {},
|
553 |
"outputs": [],
|
|
|
561 |
},
|
562 |
{
|
563 |
"cell_type": "code",
|
564 |
+
"execution_count": 21,
|
565 |
"id": "e18d3095-d6dd-423b-84fb-dca4a629d450",
|
566 |
"metadata": {},
|
567 |
+
"outputs": [
|
568 |
+
{
|
569 |
+
"data": {
|
570 |
+
"application/vnd.jupyter.widget-view+json": {
|
571 |
+
"model_id": "08a24dc5ff954b8581d5c2378e0d60d6",
|
572 |
+
"version_major": 2,
|
573 |
+
"version_minor": 0
|
574 |
+
},
|
575 |
+
"text/plain": [
|
576 |
+
"generation_config.json: 0%| | 0.00/111 [00:00<?, ?B/s]"
|
577 |
+
]
|
578 |
+
},
|
579 |
+
"metadata": {},
|
580 |
+
"output_type": "display_data"
|
581 |
+
}
|
582 |
+
],
|
583 |
"source": [
|
584 |
"model = GPT2LMHeadModel.from_pretrained(\"dnagpt/gene_eng_gpt2_v0\")"
|
585 |
]
|
586 |
},
|
587 |
{
|
588 |
"cell_type": "code",
|
589 |
+
"execution_count": 22,
|
590 |
"id": "12134cf2-676a-4176-a733-35caab2fd520",
|
591 |
"metadata": {},
|
592 |
"outputs": [],
|
|
|
631 |
},
|
632 |
{
|
633 |
"cell_type": "code",
|
634 |
+
"execution_count": 23,
|
635 |
"id": "b9a2e2a9-a1ff-44b0-a550-623a16d0d7a2",
|
636 |
"metadata": {},
|
637 |
+
"outputs": [
|
638 |
+
{
|
639 |
+
"name": "stderr",
|
640 |
+
"output_type": "stream",
|
641 |
+
"text": [
|
642 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
643 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
644 |
+
"The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
|
645 |
+
]
|
646 |
+
},
|
647 |
+
{
|
648 |
+
"name": "stdout",
|
649 |
+
"output_type": "stream",
|
650 |
+
"text": [
|
651 |
+
"input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
652 |
+
"\n",
|
653 |
+
"### Instruction:\n",
|
654 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
655 |
+
"\n",
|
656 |
+
"### Input:\n",
|
657 |
+
"CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC\n",
|
658 |
+
"\n",
|
659 |
+
"### Response:\n",
|
660 |
+
"\n",
|
661 |
+
"--------------------------\n",
|
662 |
+
"\n",
|
663 |
+
"model's answer: \n",
|
664 |
+
"\n",
|
665 |
+
"TATAT\n"
|
666 |
+
]
|
667 |
+
}
|
668 |
+
],
|
669 |
"source": [
|
670 |
"input_text = format_input(data[\"test\"][0])\n",
|
671 |
"\n",
|
|
|
679 |
},
|
680 |
{
|
681 |
"cell_type": "code",
|
682 |
+
"execution_count": 24,
|
683 |
"id": "63b54fe2-f077-4ca8-974e-1bcc41ce57d6",
|
684 |
"metadata": {},
|
685 |
"outputs": [],
|
|
|
698 |
},
|
699 |
{
|
700 |
"cell_type": "code",
|
701 |
+
"execution_count": 25,
|
702 |
"id": "61df123d-e67d-4548-998a-de1e2781e774",
|
703 |
"metadata": {},
|
704 |
+
"outputs": [
|
705 |
+
{
|
706 |
+
"name": "stdout",
|
707 |
+
"output_type": "stream",
|
708 |
+
"text": [
|
709 |
+
"[2025-01-10 15:41:26,331] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
|
710 |
+
]
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"name": "stderr",
|
714 |
+
"output_type": "stream",
|
715 |
+
"text": [
|
716 |
+
"/root/miniconda3/compiler_compat/ld: cannot find -laio: No such file or directory\n",
|
717 |
+
"collect2: error: ld returned 1 exit status\n",
|
718 |
+
"/root/miniconda3/compiler_compat/ld: warning: libpthread.so.0, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
|
719 |
+
"/root/miniconda3/compiler_compat/ld: warning: libstdc++.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
|
720 |
+
"/root/miniconda3/compiler_compat/ld: warning: libm.so.6, needed by /usr/local/cuda/lib64/libcufile.so, not found (try using -rpath or -rpath-link)\n",
|
721 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'\n",
|
722 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'\n",
|
723 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'\n",
|
724 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'\n",
|
725 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'\n",
|
726 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for bool@CXXABI_1.3'\n",
|
727 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_logic_error(char const*)@GLIBCXX_3.4'\n",
|
728 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
729 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::logic_error@GLIBCXX_3.4'\n",
|
730 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::~locale()@GLIBCXX_3.4'\n",
|
731 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::string const&, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
|
732 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_end_catch@CXXABI_1.3'\n",
|
733 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ofstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
734 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::logic_error::~logic_error()@GLIBCXX_3.4'\n",
|
735 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__si_class_type_info@CXXABI_1.3'\n",
|
736 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n",
|
737 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
738 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new[](unsigned long)@GLIBCXX_3.4'\n",
|
739 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak_hard()@GLIBCXX_3.4'\n",
|
740 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ifstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
741 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >::basic_streambuf(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> > const&)@GLIBCXX_3.4'\n",
|
742 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*, unsigned long)@GLIBCXX_3.4'\n",
|
743 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::string const&)@GLIBCXX_3.4'\n",
|
744 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned short@CXXABI_1.3'\n",
|
745 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::resize(unsigned long, char)@GLIBCXX_3.4'\n",
|
746 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char const*@CXXABI_1.3'\n",
|
747 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ctype<char>::_M_widen_init() const@GLIBCXX_3.4.11'\n",
|
748 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_invalid_argument(char const*)@GLIBCXX_3.4'\n",
|
749 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::operator=(std::locale const&)@GLIBCXX_3.4'\n",
|
750 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<wchar_t, std::char_traits<wchar_t> >::_M_cache_locale(std::locale const&)@GLIBCXX_3.4'\n",
|
751 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n",
|
752 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_free_exception@CXXABI_1.3'\n",
|
753 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::notify_one()@GLIBCXX_3.4.11'\n",
|
754 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::~Init()@GLIBCXX_3.4'\n",
|
755 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string()@GLIBCXX_3.4'\n",
|
756 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_pure_virtual@CXXABI_1.3'\n",
|
757 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::flush()@GLIBCXX_3.4'\n",
|
758 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for __cxxabiv1::__class_type_info@CXXABI_1.3'\n",
|
759 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_rethrow@CXXABI_1.3'\n",
|
760 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
761 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_fstream<char, std::char_traits<char> >::~basic_fstream()@GLIBCXX_3.4'\n",
|
762 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::compare(char const*) const@GLIBCXX_3.4'\n",
|
763 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ostringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
764 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale()@GLIBCXX_3.4'\n",
|
765 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::system_clock::now()@GLIBCXX_3.4.19'\n",
|
766 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_ifstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
767 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Hash_bytes(void const*, unsigned long, unsigned long)@CXXABI_1.3.5'\n",
|
768 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<long long>(long long)@GLIBCXX_3.4.9'\n",
|
769 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for char*@CXXABI_1.3'\n",
|
770 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const@GLIBCXX_3.4.18'\n",
|
771 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::out_of_range@GLIBCXX_3.4'\n",
|
772 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<unsigned long>(unsigned long)@GLIBCXX_3.4.9'\n",
|
773 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base const*)@GLIBCXX_3.4'\n",
|
774 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::~ios_base()@GLIBCXX_3.4'\n",
|
775 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::range_error::~range_error()@GLIBCXX_3.4'\n",
|
776 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file<char>::~__basic_file()@GLIBCXX_3.4'\n",
|
777 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_acquire@CXXABI_1.3'\n",
|
778 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<bool>(bool)@GLIBCXX_3.4.9'\n",
|
779 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::overflow_error@GLIBCXX_3.4'\n",
|
780 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_fstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
781 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::range_error@GLIBCXX_3.4'\n",
|
782 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
783 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_filebuf<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
784 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete[](void*)@GLIBCXX_3.4'\n",
|
785 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
786 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(unsigned long, char, std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
787 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_transfer(std::__detail::_List_node_base*, std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n",
|
788 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::replace(unsigned long, unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n",
|
789 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::exception@GLIBCXX_3.4'\n",
|
790 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_M_destroy(std::allocator<wchar_t> const&)@GLIBCXX_3.4'\n",
|
791 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream& std::istream::_M_extract<double>(double&)@GLIBCXX_3.4.9'\n",
|
792 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
|
793 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_fstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
794 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::basic_ifstream(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n",
|
795 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(std::string const&)@GLIBCXX_3.4'\n",
|
796 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator new(unsigned long)@GLIBCXX_3.4'\n",
|
797 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
798 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned int@CXXABI_1.3'\n",
|
799 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::append(char const*)@GLIBCXX_3.4'\n",
|
800 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::domain_error@GLIBCXX_3.4'\n",
|
801 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char, unsigned long) const@GLIBCXX_3.4'\n",
|
802 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::put(char)@GLIBCXX_3.4'\n",
|
803 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for int@CXXABI_1.3'\n",
|
804 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_alloc()@GLIBCXX_3.4'\n",
|
805 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_thread_atexit@CXXABI_1.3.7'\n",
|
806 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_increment(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n",
|
807 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::~basic_ifstream()@GLIBCXX_3.4'\n",
|
808 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::Init::Init()@GLIBCXX_3.4'\n",
|
809 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::condition_variable()@GLIBCXX_3.4.11'\n",
|
810 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::basic_filebuf()@GLIBCXX_3.4'\n",
|
811 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
812 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::domain_error::~domain_error()@GLIBCXX_3.4'\n",
|
813 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cerr@GLIBCXX_3.4'\n",
|
814 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::find(char const*, unsigned long, unsigned long) const@GLIBCXX_3.4'\n",
|
815 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
816 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
817 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::str() const@GLIBCXX_3.4'\n",
|
818 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::invalid_argument@GLIBCXX_3.4'\n",
|
819 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void*@CXXABI_1.3'\n",
|
820 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(std::string const&)@GLIBCXX_3.4'\n",
|
821 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_ostringstream()@GLIBCXX_3.4'\n",
|
822 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_rebalance_for_erase(std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n",
|
823 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long@CXXABI_1.3'\n",
|
824 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_hook(std::__detail::_List_node_base*)@GLIBCXX_3.4.15'\n",
|
825 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__detail::_List_node_base::_M_unhook()@GLIBCXX_3.4.15'\n",
|
826 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
827 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<char, std::char_traits<char>, std::allocator<char> >::_M_sync(char*, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
|
828 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream<char, std::char_traits<char> >::~basic_iostream()@GLIBCXX_3.4'\n",
|
829 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::locale::locale(std::locale const&)@GLIBCXX_3.4'\n",
|
830 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_istringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
831 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `log2f@GLIBC_2.2.5'\n",
|
832 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(std::basic_streambuf<char, std::char_traits<char> >*)@GLIBCXX_3.4'\n",
|
833 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >@GLIBCXX_3.4'\n",
|
834 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::exception::~exception()@GLIBCXX_3.4'\n",
|
835 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_create(unsigned long, unsigned long, std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
836 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__basic_file<char>::is_open() const@GLIBCXX_3.4'\n",
|
837 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_istringstream()@GLIBCXX_3.4'\n",
|
838 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::swap(std::string&)@GLIBCXX_3.4'\n",
|
839 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long*@CXXABI_1.3'\n",
|
840 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ostringstream<char, std::char_traits<char>, std::allocator<char> >@GLIBCXX_3.4'\n",
|
841 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf(std::basic_streambuf<char, std::char_traits<char> > const&)@GLIBCXX_3.4'\n",
|
842 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::init(std::basic_streambuf<char, std::char_traits<char> >*)@GLIBCXX_3.4'\n",
|
843 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_bad_cast()@GLIBCXX_3.4'\n",
|
844 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)@GLIBCXX_3.4'\n",
|
845 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >::operator=(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> > const&)@GLIBCXX_3.4'\n",
|
846 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `operator delete(void*)@GLIBCXX_3.4'\n",
|
847 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::operator<<(int)@GLIBCXX_3.4'\n",
|
848 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n",
|
849 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_Rep::_M_destroy(std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
850 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_iostream<wchar_t, std::char_traits<wchar_t> >::~basic_iostream()@GLIBCXX_3.4'\n",
|
851 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::runtime_error@GLIBCXX_3.4'\n",
|
852 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ofstream<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
853 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&)@GLIBCXX_3.4'\n",
|
854 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringstream<char, std::char_traits<char>, std::allocator<char> >::~basic_stringstream()@GLIBCXX_3.4'\n",
|
855 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `VTT for std::basic_stringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
856 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<long>(long)@GLIBCXX_3.4.9'\n",
|
857 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::get()@GLIBCXX_3.4'\n",
|
858 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned long long@CXXABI_1.3'\n",
|
859 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)@GLIBCXX_3.4'\n",
|
860 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::out_of_range::~out_of_range()@GLIBCXX_3.4'\n",
|
861 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::length_error::~length_error()@GLIBCXX_3.4'\n",
|
862 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)@GLIBCXX_3.4.9'\n",
|
863 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::invalid_argument::~invalid_argument()@GLIBCXX_3.4'\n",
|
864 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::swap(std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >&)@GLIBCXX_3.4'\n",
|
865 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::cout@GLIBCXX_3.4'\n",
|
866 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<unsigned long long>(unsigned long long)@GLIBCXX_3.4.9'\n",
|
867 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for int*@CXXABI_1.3'\n",
|
868 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<void const*>(void const*)@GLIBCXX_3.4.9'\n",
|
869 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::underflow_error@GLIBCXX_3.4'\n",
|
870 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_streambuf<char, std::char_traits<char> >@GLIBCXX_3.4'\n",
|
871 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for std::out_of_range@GLIBCXX_3.4'\n",
|
872 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_allocate_exception@CXXABI_1.3'\n",
|
873 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_ios<wchar_t, std::char_traits<wchar_t> >@GLIBCXX_3.4'\n",
|
874 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for void const*@CXXABI_1.3'\n",
|
875 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ios<wchar_t, std::char_traits<wchar_t> >::init(std::basic_streambuf<wchar_t, std::char_traits<wchar_t> >*)@GLIBCXX_3.4'\n",
|
876 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::reserve(unsigned long)@GLIBCXX_3.4'\n",
|
877 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_begin_catch@CXXABI_1.3'\n",
|
878 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long@CXXABI_1.3'\n",
|
879 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_S_empty_rep_storage@GLIBCXX_3.4'\n",
|
880 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_leak()@GLIBCXX_3.4'\n",
|
881 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::open(char const*, std::_Ios_Openmode)@GLIBCXX_3.4'\n",
|
882 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_stringbuf<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_M_sync(wchar_t*, unsigned long, unsigned long)@GLIBCXX_3.4'\n",
|
883 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::istream::getline(char*, long, char)@GLIBCXX_3.4'\n",
|
884 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_istream<char, std::char_traits<char> >& std::getline<char, std::char_traits<char>, std::allocator<char> >(std::basic_istream<char, std::char_traits<char> >&, std::basic_string<char, std::char_traits<char>, std::allocator<char> >&, char)@GLIBCXX_3.4'\n",
|
885 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringstream<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
886 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::condition_variable::~condition_variable()@GLIBCXX_3.4.11'\n",
|
887 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::basic_stringbuf<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >@GLIBCXX_3.4'\n",
|
888 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::insert(unsigned long, char const*, unsigned long)@GLIBCXX_3.4'\n",
|
889 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::assign(char const*, unsigned long)@GLIBCXX_3.4'\n",
|
890 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for unsigned char@CXXABI_1.3'\n",
|
891 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ios_base::ios_base()@GLIBCXX_3.4'\n",
|
892 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_out_of_range(char const*)@GLIBCXX_3.4'\n",
|
893 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::overflow_error::~overflow_error()@GLIBCXX_3.4'\n",
|
894 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_length_error(char const*)@GLIBCXX_3.4'\n",
|
895 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::__throw_system_error(int)@GLIBCXX_3.4.11'\n",
|
896 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ofstream<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
|
897 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream& std::ostream::_M_insert<double>(double)@GLIBCXX_3.4.9'\n",
|
898 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_streambuf<char, std::char_traits<char> >::operator=(std::basic_streambuf<char, std::char_traits<char> > const&)@GLIBCXX_3.4'\n",
|
899 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `typeinfo for long long@CXXABI_1.3'\n",
|
900 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, unsigned long, std::allocator<char> const&)@GLIBCXX_3.4'\n",
|
901 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_ifstream<char, std::char_traits<char> >::close()@GLIBCXX_3.4'\n",
|
902 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_guard_release@CXXABI_1.3'\n",
|
903 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__cxa_throw@CXXABI_1.3'\n",
|
904 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::underflow_error::~underflow_error()@GLIBCXX_3.4'\n",
|
905 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::_Rb_tree_decrement(std::_Rb_tree_node_base*)@GLIBCXX_3.4'\n",
|
906 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `vtable for std::length_error@GLIBCXX_3.4'\n",
|
907 |
+
"/root/miniconda3/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::basic_filebuf<char, std::char_traits<char> >::~basic_filebuf()@GLIBCXX_3.4'\n",
|
908 |
+
"collect2: error: ld returned 1 exit status\n"
|
909 |
+
]
|
910 |
+
}
|
911 |
+
],
|
912 |
"source": [
|
913 |
"# 初始化Trainer\n",
|
914 |
"trainer = Trainer(\n",
|
|
|
922 |
},
|
923 |
{
|
924 |
"cell_type": "code",
|
925 |
+
"execution_count": 26,
|
926 |
"id": "a9cd936a-5ea6-43e3-9848-27080f818606",
|
927 |
"metadata": {},
|
928 |
+
"outputs": [
|
929 |
+
{
|
930 |
+
"data": {
|
931 |
+
"text/html": [
|
932 |
+
"\n",
|
933 |
+
" <div>\n",
|
934 |
+
" \n",
|
935 |
+
" <progress value='19980' max='19980' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
936 |
+
" [19980/19980 21:55, Epoch 3/3]\n",
|
937 |
+
" </div>\n",
|
938 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
939 |
+
" <thead>\n",
|
940 |
+
" <tr style=\"text-align: left;\">\n",
|
941 |
+
" <th>Step</th>\n",
|
942 |
+
" <th>Training Loss</th>\n",
|
943 |
+
" </tr>\n",
|
944 |
+
" </thead>\n",
|
945 |
+
" <tbody>\n",
|
946 |
+
" <tr>\n",
|
947 |
+
" <td>500</td>\n",
|
948 |
+
" <td>2.335700</td>\n",
|
949 |
+
" </tr>\n",
|
950 |
+
" <tr>\n",
|
951 |
+
" <td>1000</td>\n",
|
952 |
+
" <td>2.184100</td>\n",
|
953 |
+
" </tr>\n",
|
954 |
+
" <tr>\n",
|
955 |
+
" <td>1500</td>\n",
|
956 |
+
" <td>2.178500</td>\n",
|
957 |
+
" </tr>\n",
|
958 |
+
" <tr>\n",
|
959 |
+
" <td>2000</td>\n",
|
960 |
+
" <td>2.172400</td>\n",
|
961 |
+
" </tr>\n",
|
962 |
+
" <tr>\n",
|
963 |
+
" <td>2500</td>\n",
|
964 |
+
" <td>2.171400</td>\n",
|
965 |
+
" </tr>\n",
|
966 |
+
" <tr>\n",
|
967 |
+
" <td>3000</td>\n",
|
968 |
+
" <td>2.171900</td>\n",
|
969 |
+
" </tr>\n",
|
970 |
+
" <tr>\n",
|
971 |
+
" <td>3500</td>\n",
|
972 |
+
" <td>2.163100</td>\n",
|
973 |
+
" </tr>\n",
|
974 |
+
" <tr>\n",
|
975 |
+
" <td>4000</td>\n",
|
976 |
+
" <td>2.159300</td>\n",
|
977 |
+
" </tr>\n",
|
978 |
+
" <tr>\n",
|
979 |
+
" <td>4500</td>\n",
|
980 |
+
" <td>2.161000</td>\n",
|
981 |
+
" </tr>\n",
|
982 |
+
" <tr>\n",
|
983 |
+
" <td>5000</td>\n",
|
984 |
+
" <td>2.160200</td>\n",
|
985 |
+
" </tr>\n",
|
986 |
+
" <tr>\n",
|
987 |
+
" <td>5500</td>\n",
|
988 |
+
" <td>2.158400</td>\n",
|
989 |
+
" </tr>\n",
|
990 |
+
" <tr>\n",
|
991 |
+
" <td>6000</td>\n",
|
992 |
+
" <td>2.151600</td>\n",
|
993 |
+
" </tr>\n",
|
994 |
+
" <tr>\n",
|
995 |
+
" <td>6500</td>\n",
|
996 |
+
" <td>2.153700</td>\n",
|
997 |
+
" </tr>\n",
|
998 |
+
" <tr>\n",
|
999 |
+
" <td>7000</td>\n",
|
1000 |
+
" <td>2.129500</td>\n",
|
1001 |
+
" </tr>\n",
|
1002 |
+
" <tr>\n",
|
1003 |
+
" <td>7500</td>\n",
|
1004 |
+
" <td>2.119100</td>\n",
|
1005 |
+
" </tr>\n",
|
1006 |
+
" <tr>\n",
|
1007 |
+
" <td>8000</td>\n",
|
1008 |
+
" <td>2.119800</td>\n",
|
1009 |
+
" </tr>\n",
|
1010 |
+
" <tr>\n",
|
1011 |
+
" <td>8500</td>\n",
|
1012 |
+
" <td>2.121600</td>\n",
|
1013 |
+
" </tr>\n",
|
1014 |
+
" <tr>\n",
|
1015 |
+
" <td>9000</td>\n",
|
1016 |
+
" <td>2.122500</td>\n",
|
1017 |
+
" </tr>\n",
|
1018 |
+
" <tr>\n",
|
1019 |
+
" <td>9500</td>\n",
|
1020 |
+
" <td>2.122300</td>\n",
|
1021 |
+
" </tr>\n",
|
1022 |
+
" <tr>\n",
|
1023 |
+
" <td>10000</td>\n",
|
1024 |
+
" <td>2.121500</td>\n",
|
1025 |
+
" </tr>\n",
|
1026 |
+
" <tr>\n",
|
1027 |
+
" <td>10500</td>\n",
|
1028 |
+
" <td>2.119500</td>\n",
|
1029 |
+
" </tr>\n",
|
1030 |
+
" <tr>\n",
|
1031 |
+
" <td>11000</td>\n",
|
1032 |
+
" <td>2.123500</td>\n",
|
1033 |
+
" </tr>\n",
|
1034 |
+
" <tr>\n",
|
1035 |
+
" <td>11500</td>\n",
|
1036 |
+
" <td>2.119600</td>\n",
|
1037 |
+
" </tr>\n",
|
1038 |
+
" <tr>\n",
|
1039 |
+
" <td>12000</td>\n",
|
1040 |
+
" <td>2.119300</td>\n",
|
1041 |
+
" </tr>\n",
|
1042 |
+
" <tr>\n",
|
1043 |
+
" <td>12500</td>\n",
|
1044 |
+
" <td>2.121800</td>\n",
|
1045 |
+
" </tr>\n",
|
1046 |
+
" <tr>\n",
|
1047 |
+
" <td>13000</td>\n",
|
1048 |
+
" <td>2.123500</td>\n",
|
1049 |
+
" </tr>\n",
|
1050 |
+
" <tr>\n",
|
1051 |
+
" <td>13500</td>\n",
|
1052 |
+
" <td>2.103200</td>\n",
|
1053 |
+
" </tr>\n",
|
1054 |
+
" <tr>\n",
|
1055 |
+
" <td>14000</td>\n",
|
1056 |
+
" <td>2.080700</td>\n",
|
1057 |
+
" </tr>\n",
|
1058 |
+
" <tr>\n",
|
1059 |
+
" <td>14500</td>\n",
|
1060 |
+
" <td>2.082100</td>\n",
|
1061 |
+
" </tr>\n",
|
1062 |
+
" <tr>\n",
|
1063 |
+
" <td>15000</td>\n",
|
1064 |
+
" <td>2.082900</td>\n",
|
1065 |
+
" </tr>\n",
|
1066 |
+
" <tr>\n",
|
1067 |
+
" <td>15500</td>\n",
|
1068 |
+
" <td>2.086400</td>\n",
|
1069 |
+
" </tr>\n",
|
1070 |
+
" <tr>\n",
|
1071 |
+
" <td>16000</td>\n",
|
1072 |
+
" <td>2.086600</td>\n",
|
1073 |
+
" </tr>\n",
|
1074 |
+
" <tr>\n",
|
1075 |
+
" <td>16500</td>\n",
|
1076 |
+
" <td>2.083800</td>\n",
|
1077 |
+
" </tr>\n",
|
1078 |
+
" <tr>\n",
|
1079 |
+
" <td>17000</td>\n",
|
1080 |
+
" <td>2.085000</td>\n",
|
1081 |
+
" </tr>\n",
|
1082 |
+
" <tr>\n",
|
1083 |
+
" <td>17500</td>\n",
|
1084 |
+
" <td>2.082800</td>\n",
|
1085 |
+
" </tr>\n",
|
1086 |
+
" <tr>\n",
|
1087 |
+
" <td>18000</td>\n",
|
1088 |
+
" <td>2.077600</td>\n",
|
1089 |
+
" </tr>\n",
|
1090 |
+
" <tr>\n",
|
1091 |
+
" <td>18500</td>\n",
|
1092 |
+
" <td>2.080300</td>\n",
|
1093 |
+
" </tr>\n",
|
1094 |
+
" <tr>\n",
|
1095 |
+
" <td>19000</td>\n",
|
1096 |
+
" <td>2.086600</td>\n",
|
1097 |
+
" </tr>\n",
|
1098 |
+
" <tr>\n",
|
1099 |
+
" <td>19500</td>\n",
|
1100 |
+
" <td>2.084200</td>\n",
|
1101 |
+
" </tr>\n",
|
1102 |
+
" </tbody>\n",
|
1103 |
+
"</table><p>"
|
1104 |
+
],
|
1105 |
+
"text/plain": [
|
1106 |
+
"<IPython.core.display.HTML object>"
|
1107 |
+
]
|
1108 |
+
},
|
1109 |
+
"metadata": {},
|
1110 |
+
"output_type": "display_data"
|
1111 |
+
},
|
1112 |
+
{
|
1113 |
+
"data": {
|
1114 |
+
"text/plain": [
|
1115 |
+
"TrainOutput(global_step=19980, training_loss=2.1272921145021977, metrics={'train_runtime': 1315.5944, 'train_samples_per_second': 121.485, 'train_steps_per_second': 15.187, 'total_flos': 2.08804995072e+16, 'train_loss': 2.1272921145021977, 'epoch': 3.0})"
|
1116 |
+
]
|
1117 |
+
},
|
1118 |
+
"execution_count": 26,
|
1119 |
+
"metadata": {},
|
1120 |
+
"output_type": "execute_result"
|
1121 |
+
}
|
1122 |
+
],
|
1123 |
"source": [
|
1124 |
"# 开始训练\n",
|
1125 |
"trainer.train()"
|
|
|
1127 |
},
|
1128 |
{
|
1129 |
"cell_type": "code",
|
1130 |
+
"execution_count": 27,
|
1131 |
"id": "315aae76-44b4-4513-8139-40ef22934873",
|
1132 |
"metadata": {},
|
1133 |
+
"outputs": [
|
1134 |
+
{
|
1135 |
+
"name": "stdout",
|
1136 |
+
"output_type": "stream",
|
1137 |
+
"text": [
|
1138 |
+
"Saved model to: gpt_ft/final\n"
|
1139 |
+
]
|
1140 |
+
}
|
1141 |
+
],
|
1142 |
"source": [
|
1143 |
"save_dir = 'gpt_ft/final'\n",
|
1144 |
"trainer.save_model(save_dir)\n",
|
|
|
1147 |
},
|
1148 |
{
|
1149 |
"cell_type": "code",
|
1150 |
+
"execution_count": 28,
|
1151 |
"id": "28d2dbbc-02ff-4120-b230-b19905a786cd",
|
1152 |
"metadata": {},
|
1153 |
"outputs": [],
|
|
|
1158 |
},
|
1159 |
{
|
1160 |
"cell_type": "code",
|
1161 |
+
"execution_count": 29,
|
1162 |
"id": "08987c3c-063a-4e9b-9ebb-e637b0b5bccd",
|
1163 |
"metadata": {},
|
1164 |
+
"outputs": [
|
1165 |
+
{
|
1166 |
+
"data": {
|
1167 |
+
"text/plain": [
|
1168 |
+
"GPT2LMHeadModel(\n",
|
1169 |
+
" (transformer): GPT2Model(\n",
|
1170 |
+
" (wte): Embedding(90000, 768)\n",
|
1171 |
+
" (wpe): Embedding(1024, 768)\n",
|
1172 |
+
" (drop): Dropout(p=0.1, inplace=False)\n",
|
1173 |
+
" (h): ModuleList(\n",
|
1174 |
+
" (0-11): 12 x GPT2Block(\n",
|
1175 |
+
" (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
1176 |
+
" (attn): GPT2SdpaAttention(\n",
|
1177 |
+
" (c_attn): Conv1D(nf=2304, nx=768)\n",
|
1178 |
+
" (c_proj): Conv1D(nf=768, nx=768)\n",
|
1179 |
+
" (attn_dropout): Dropout(p=0.1, inplace=False)\n",
|
1180 |
+
" (resid_dropout): Dropout(p=0.1, inplace=False)\n",
|
1181 |
+
" )\n",
|
1182 |
+
" (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
1183 |
+
" (mlp): GPT2MLP(\n",
|
1184 |
+
" (c_fc): Conv1D(nf=3072, nx=768)\n",
|
1185 |
+
" (c_proj): Conv1D(nf=768, nx=3072)\n",
|
1186 |
+
" (act): NewGELUActivation()\n",
|
1187 |
+
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
1188 |
+
" )\n",
|
1189 |
+
" )\n",
|
1190 |
+
" )\n",
|
1191 |
+
" (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
1192 |
+
" )\n",
|
1193 |
+
" (lm_head): Linear(in_features=768, out_features=90000, bias=False)\n",
|
1194 |
+
")"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
"execution_count": 29,
|
1198 |
+
"metadata": {},
|
1199 |
+
"output_type": "execute_result"
|
1200 |
+
}
|
1201 |
+
],
|
1202 |
"source": [
|
1203 |
"finetuned_model"
|
1204 |
]
|
1205 |
},
|
1206 |
{
|
1207 |
"cell_type": "code",
|
1208 |
+
"execution_count": 30,
|
1209 |
"id": "d75010e8-6d6a-40ef-852e-0d705adc3da8",
|
1210 |
"metadata": {},
|
1211 |
+
"outputs": [
|
1212 |
+
{
|
1213 |
+
"name": "stderr",
|
1214 |
+
"output_type": "stream",
|
1215 |
+
"text": [
|
1216 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1217 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
|
1218 |
+
]
|
1219 |
+
},
|
1220 |
+
{
|
1221 |
+
"name": "stdout",
|
1222 |
+
"output_type": "stream",
|
1223 |
+
"text": [
|
1224 |
+
"input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
1225 |
+
"\n",
|
1226 |
+
"### Instruction:\n",
|
1227 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
1228 |
+
"\n",
|
1229 |
+
"### Input:\n",
|
1230 |
+
"CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC\n",
|
1231 |
+
"\n",
|
1232 |
+
"### Response:\n",
|
1233 |
+
"\n",
|
1234 |
+
"--------------------------\n",
|
1235 |
+
"\n",
|
1236 |
+
"model's answer: \n",
|
1237 |
+
"\n",
|
1238 |
+
"promoterpromoterpromo\n",
|
1239 |
+
"--------------------------\n",
|
1240 |
+
"\n",
|
1241 |
+
"real answer: \n",
|
1242 |
+
"\n",
|
1243 |
+
"promoter\n"
|
1244 |
+
]
|
1245 |
+
}
|
1246 |
+
],
|
1247 |
"source": [
|
1248 |
"print(\"input (test):\", input_text)\n",
|
1249 |
"\n",
|
|
|
1259 |
},
|
1260 |
{
|
1261 |
"cell_type": "code",
|
1262 |
+
"execution_count": 31,
|
1263 |
"id": "64365e15-510e-4abf-92f5-c78b660b37dc",
|
1264 |
"metadata": {},
|
1265 |
+
"outputs": [
|
1266 |
+
{
|
1267 |
+
"name": "stderr",
|
1268 |
+
"output_type": "stream",
|
1269 |
+
"text": [
|
1270 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1271 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1272 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1273 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1274 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1275 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1276 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1277 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1278 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1279 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1280 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1281 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1282 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1283 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1284 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1285 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1286 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1287 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1288 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1289 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1290 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1291 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1292 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1293 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1294 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1295 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1296 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1297 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1298 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1299 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1300 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1301 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1302 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1303 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1304 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1305 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1306 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1307 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1308 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1309 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1310 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1311 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1312 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1313 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1314 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1315 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1316 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1317 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1318 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1319 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1320 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1321 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1322 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1323 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1324 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1325 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1326 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1327 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1328 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1329 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1330 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1331 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1332 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1333 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1334 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1335 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1336 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1337 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1338 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1339 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1340 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1341 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1342 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1343 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1344 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1345 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1346 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1347 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1348 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1349 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1350 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1351 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1352 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1353 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1354 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1355 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1356 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1357 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1358 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1359 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1360 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1361 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1362 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1363 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1364 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1365 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1366 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1367 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1368 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1369 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1370 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1371 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1372 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1373 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1374 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1375 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1376 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1377 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1378 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1379 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1380 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1381 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1382 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1383 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1384 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1385 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1386 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1387 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1388 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1389 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1390 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1391 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1392 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1393 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1394 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1395 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1396 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1397 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1398 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1399 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1400 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1401 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1402 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1403 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1404 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1405 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1406 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1407 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1408 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1409 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1410 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1411 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1412 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1413 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1414 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1415 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1416 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1417 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1418 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1419 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1420 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1421 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1422 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1423 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1424 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1425 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1426 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1427 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1428 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1429 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1430 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1431 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1432 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1433 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1434 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1435 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1436 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1437 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1438 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1439 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1440 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1441 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1442 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1443 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1444 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1445 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1446 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1447 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1448 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1449 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1450 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1451 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1452 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1453 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1454 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1455 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1456 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1457 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1458 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1459 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1460 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1461 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1462 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1463 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1464 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1465 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1466 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1467 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n",
|
1468 |
+
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
1469 |
+
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
|
1470 |
+
]
|
1471 |
+
}
|
1472 |
+
],
|
1473 |
"source": [
|
1474 |
"test_data = data[\"test\"].select(range(100))\n",
|
1475 |
"\n",
|
|
|
1492 |
},
|
1493 |
{
|
1494 |
"cell_type": "code",
|
1495 |
+
"execution_count": 32,
|
1496 |
"id": "a45fb780-fc3f-401c-b6e0-6f7d0c1682de",
|
1497 |
"metadata": {},
|
1498 |
"outputs": [],
|
|
|
1505 |
"# 将 Dataset 对象导出为 JSON 文件\n",
|
1506 |
"# test_data.to_json(output_file)\n",
|
1507 |
"with open(output_file, \"w\") as file:\n",
|
1508 |
+
" json.dump(data_list, file, indent=4) # \"indent\" for pretty-printing\n"
|
1509 |
]
|
1510 |
},
|
1511 |
{
|
1512 |
"cell_type": "code",
|
1513 |
+
"execution_count": 1,
|
1514 |
"id": "a83c8881-c763-4bba-8b85-584a6722a38e",
|
1515 |
"metadata": {},
|
1516 |
+
"outputs": [
|
1517 |
+
{
|
1518 |
+
"name": "stdout",
|
1519 |
+
"output_type": "stream",
|
1520 |
+
"text": [
|
1521 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1522 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1523 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1524 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1525 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1526 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1527 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1528 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1529 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1530 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1531 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1532 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1533 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1534 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1535 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1536 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1537 |
+
"Non-promoter |||||||||||| promoterpromoterpromo\n",
|
1538 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1539 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1540 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1541 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1542 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1543 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1544 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1545 |
+
"promoter |||||||||||| Non-promoter\n",
|
1546 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1547 |
+
"Non-promoter |||||||||||| promoterpromoterpromo\n",
|
1548 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1549 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1550 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1551 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1552 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1553 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1554 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1555 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1556 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1557 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1558 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1559 |
+
"promoter |||||||||||| Non-promoter\n",
|
1560 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1561 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1562 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1563 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1564 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1565 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1566 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1567 |
+
"promoter |||||||||||| Non-promoter\n",
|
1568 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1569 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1570 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1571 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1572 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1573 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1574 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1575 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1576 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1577 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1578 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1579 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1580 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1581 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1582 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1583 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1584 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1585 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1586 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1587 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1588 |
+
"Non-promoter |||||||||||| promoterpromoterpromo\n",
|
1589 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1590 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1591 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1592 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1593 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1594 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1595 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1596 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1597 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1598 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1599 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1600 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1601 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1602 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1603 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1604 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1605 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1606 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1607 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1608 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1609 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1610 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1611 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1612 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1613 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1614 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1615 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1616 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1617 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1618 |
+
"promoter |||||||||||| promoterpromoterpromo\n",
|
1619 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1620 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
1621 |
+
"presicion 0.94 same 0.49\n"
|
1622 |
+
]
|
1623 |
+
}
|
1624 |
+
],
|
1625 |
"source": [
|
1626 |
"import json\n",
|
1627 |
"\n",
|
1628 |
"\n",
|
1629 |
+
"output_file = 'gpt2-small3-1024.json'\n",
|
1630 |
"\n",
|
1631 |
"with open(output_file, \"r\") as file:\n",
|
1632 |
" test_data = json.load(file)\n",
|
|
|
1638 |
" output = item[\"output\"]\n",
|
1639 |
" #output = \" \".join(tokenizer.tokenize(output))\n",
|
1640 |
" model_response = item[\"model_response\"]\n",
|
1641 |
+
"\n",
|
1642 |
+
" print(output,\"||||||||||||\", model_response)\n",
|
1643 |
+
"\n",
|
1644 |
" if model_response == output: #same it\n",
|
1645 |
" same_sum = same_sum + 1\n",
|
1646 |
" \n",
|
1647 |
+
" if output.find(\"Non\")==-1: # no Non\n",
|
1648 |
+
" if model_response.find(output)!=-1 and model_response.find(\"Non\")==-1: #find it, but no Non\n",
|
1649 |
+
" right_sum = right_sum + 1\n",
|
1650 |
+
" else:\n",
|
1651 |
+
" if model_response.find(output)!=-1: #find it\n",
|
1652 |
+
" right_sum = right_sum + 1\n",
|
1653 |
"\n",
|
1654 |
"\n",
|
1655 |
+
"print(\"Accuracy\", right_sum/all_num, \"same\", same_sum/all_num)"
|
1656 |
]
|
1657 |
+
},
|
1658 |
+
{
|
1659 |
+
"cell_type": "code",
|
1660 |
+
"execution_count": null,
|
1661 |
+
"id": "8bf88885-fb33-406e-9644-cd174a8a2f28",
|
1662 |
+
"metadata": {},
|
1663 |
+
"outputs": [],
|
1664 |
+
"source": []
|
1665 |
}
|
1666 |
],
|
1667 |
"metadata": {
|
04-gene-sft/4-deepspeed-intro.ipynb
CHANGED
@@ -10,12 +10,172 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
-
"id": "
|
14 |
"metadata": {},
|
15 |
"source": [
|
16 |
-
"##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
]
|
18 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
{
|
20 |
"cell_type": "markdown",
|
21 |
"id": "c0d29667-1e75-46df-8f65-cae27609ee3f",
|
@@ -169,171 +329,19 @@
|
|
169 |
]
|
170 |
},
|
171 |
{
|
172 |
-
"cell_type": "
|
173 |
-
"
|
|
|
174 |
"metadata": {},
|
175 |
-
"
|
176 |
-
|
177 |
-
"\n",
|
178 |
-
"大模型的并行训练旨在克服单个 GPU 显存的限制和加速训练过程,通常适用于参数规模较大的模型(如 GPT-3、T5 等)。并行训练主要包括以下几种方法,每种方法适用于不同的场景和模型特性。\n",
|
179 |
-
"\n",
|
180 |
-
"---\n",
|
181 |
-
"\n",
|
182 |
-
"### **1. 数据并行(Data Parallelism)**\n",
|
183 |
-
"\n",
|
184 |
-
"#### **原理**\n",
|
185 |
-
"- 将数据切分成多个小批次,每个 GPU 处理其中一部分。\n",
|
186 |
-
"- 模型副本被复制到每个 GPU。\n",
|
187 |
-
"- 每个 GPU 独立计算梯度,最终通过梯度同步(如 AllReduce 操作)更新参数。\n",
|
188 |
-
"\n",
|
189 |
-
"#### **特点**\n",
|
190 |
-
"- **优点**:\n",
|
191 |
-
" - 实现简单,是最常用的并行方法。\n",
|
192 |
-
" - 对模型大小没有限制。\n",
|
193 |
-
"- **缺点**:\n",
|
194 |
-
" - 模型副本需要完整加载到每个 GPU,占用显存。\n",
|
195 |
-
" - 在超大规模模型中,显存压力较大。\n",
|
196 |
-
"\n",
|
197 |
-
"#### **适用场景**\n",
|
198 |
-
"- 参数规模适中,显存可以容纳整个模型的场景。\n",
|
199 |
-
"\n",
|
200 |
-
"---\n",
|
201 |
-
"\n",
|
202 |
-
"### **2. 模型并行(Model Parallelism)**\n",
|
203 |
-
"\n",
|
204 |
-
"#### **原理**\n",
|
205 |
-
"- 将模型切分成不同的部分,将不同部分分配到不同的 GPU。\n",
|
206 |
-
"- 前向传播和后向传播时,数据在模型的不同部分之间传递。\n",
|
207 |
-
"\n",
|
208 |
-
"#### **特点**\n",
|
209 |
-
"- **优点**:\n",
|
210 |
-
" - 不需要复制整个模型,可以支持超大规模模型。\n",
|
211 |
-
"- **缺点**:\n",
|
212 |
-
" - GPU 之间通信频繁,可能成为性能瓶颈。\n",
|
213 |
-
" - 实现复杂,切分模型需要精心设计。\n",
|
214 |
-
" \n",
|
215 |
-
"#### **适用场景**\n",
|
216 |
-
"- 单个 GPU 无法容纳完整模型参数的场景。\n",
|
217 |
-
"\n",
|
218 |
-
"#### **具体实现**\n",
|
219 |
-
"- 将 Transformer 的不同层分配到不同的 GPU。\n",
|
220 |
-
"- 常用工具:DeepSpeed 的 Pipeline Parallelism、NVIDIA Megatron-LM。\n",
|
221 |
-
"\n",
|
222 |
-
"---\n",
|
223 |
-
"\n",
|
224 |
-
"### **3. 张量并行(Tensor Parallelism)**\n",
|
225 |
-
"\n",
|
226 |
-
"#### **原理**\n",
|
227 |
-
"- 将模型内部的张量(如权重矩阵)切分为多个子张量,并分配到不同 GPU。\n",
|
228 |
-
"- GPU 之间协作完成矩阵计算。\n",
|
229 |
-
"\n",
|
230 |
-
"#### **特点**\n",
|
231 |
-
"- **优点**:\n",
|
232 |
-
" - 减少了每个 GPU 的显存占用,同时保持模型整体完整性。\n",
|
233 |
-
"- **缺点**:\n",
|
234 |
-
" - 实现较复杂,需要优化通信操作。\n",
|
235 |
-
" - 通信开销较高,适合较大批量的训练。\n",
|
236 |
-
"\n",
|
237 |
-
"#### **适用场景**\n",
|
238 |
-
"- 参数非常大的模型(如 GPT-3)。\n",
|
239 |
-
"- 需要极致优化显存的场景。\n",
|
240 |
-
"\n",
|
241 |
-
"#### **具体实现**\n",
|
242 |
-
"- NVIDIA 的 Megatron-LM 和 Hugging Face Transformers 提供了张量并行的支持。\n",
|
243 |
-
"\n",
|
244 |
-
"---\n",
|
245 |
-
"\n",
|
246 |
-
"### **4. 管道并行(Pipeline Parallelism)**\n",
|
247 |
-
"\n",
|
248 |
-
"#### **原理**\n",
|
249 |
-
"- 将模型分为不同的部分(通常是按层划分),每部分分配到不同的 GPU。\n",
|
250 |
-
"- 数据按照流水线的方式流经每个 GPU。\n",
|
251 |
-
"\n",
|
252 |
-
"#### **特点**\n",
|
253 |
-
"- **优点**:\n",
|
254 |
-
" - 减少每个 GPU 的显存压力。\n",
|
255 |
-
" - 通过流水线增加计算效率。\n",
|
256 |
-
"- **缺点**:\n",
|
257 |
-
" - 引入流水线延迟。\n",
|
258 |
-
" - 实现复杂,需管理数据依赖和同步。\n",
|
259 |
-
"\n",
|
260 |
-
"#### **适用场景**\n",
|
261 |
-
"- 模型非常深,层数较多的场景。\n",
|
262 |
-
"\n",
|
263 |
-
"#### **具体实现**\n",
|
264 |
-
"- DeepSpeed 的 Pipeline Parallelism。\n",
|
265 |
-
"\n",
|
266 |
-
"---\n",
|
267 |
-
"\n",
|
268 |
-
"### **5. 混合并行(Hybrid Parallelism)**\n",
|
269 |
-
"\n",
|
270 |
-
"#### **原理**\n",
|
271 |
-
"- 将数据并行、模型并行、张量并行和管道并行组合使用,充分利用多 GPU 资源。\n",
|
272 |
-
"- 不同的并行方法在不同维度协同工作。\n",
|
273 |
-
"\n",
|
274 |
-
"#### **特点**\n",
|
275 |
-
"- **优点**:\n",
|
276 |
-
" - 灵活且适应性强,适合超大规模模型。\n",
|
277 |
-
"- **缺点**:\n",
|
278 |
-
" - 配置复杂,依赖于框架和训练任务。\n",
|
279 |
-
"\n",
|
280 |
-
"#### **适用场景**\n",
|
281 |
-
"- 超大规模模型(如 GPT-3 或参数量 >1T)。\n",
|
282 |
-
"- 多机多卡的大型训练环境。\n",
|
283 |
-
"\n",
|
284 |
-
"#### **具体实现**\n",
|
285 |
-
"- NVIDIA Megatron-LM 和 DeepSpeed 的混合并行支持。\n",
|
286 |
-
"\n",
|
287 |
-
"---\n",
|
288 |
-
"\n",
|
289 |
-
"### **6. ZeRO 优化并行(Zero Redundancy Optimizer)**\n",
|
290 |
-
"\n",
|
291 |
-
"#### **原理**\n",
|
292 |
-
"- 通过分片存储模型参数、优化器状态和梯度,显著减少每个 GPU 的显存占用。\n",
|
293 |
-
"\n",
|
294 |
-
"#### **特点**\n",
|
295 |
-
"- **优点**:\n",
|
296 |
-
" - 极大降低显存需求。\n",
|
297 |
-
" - 支持超大规模模型。\n",
|
298 |
-
"- **缺点**:\n",
|
299 |
-
" - 对 GPU 间通信要求较高。\n",
|
300 |
-
" - 比数据并行复杂。\n",
|
301 |
-
"\n",
|
302 |
-
"#### **适用场景**\n",
|
303 |
-
"- 超大模型的高效训练。\n",
|
304 |
-
"\n",
|
305 |
-
"#### **具体实现**\n",
|
306 |
-
"- DeepSpeed 提供的 ZeRO Stage 1/2/3。\n",
|
307 |
-
"\n",
|
308 |
-
"---\n",
|
309 |
-
"\n",
|
310 |
-
"### **方法对比**\n",
|
311 |
-
"\n",
|
312 |
-
"| 并行方法 | 主要优点 | 主要缺点 | 适用场景 |\n",
|
313 |
-
"|---------------|-------------------------------|-------------------------------|---------------------------|\n",
|
314 |
-
"| 数据并行 | 简单高效,易实现 | 模型副本占用大量显存 | 模型规模适中,显存足够 |\n",
|
315 |
-
"| 模型并行 | 支持大模型 | 通信开销大,切分复杂 | 超大模型,显存有限 |\n",
|
316 |
-
"| 张量并行 | 高效利用显存 | 实现复杂,通信频繁 | 参数规模极大的模型 |\n",
|
317 |
-
"| 管道并行 | 显存需求降低,适合深模型 | 流水线延迟,数据同步复杂 | 层数多的大型模型 |\n",
|
318 |
-
"| 混合并行 | 灵活适配超大规模模型 | 配置复杂,依赖框架 | 超大规模模型(如 GPT-3) |\n",
|
319 |
-
"| ZeRO 并行 | 极大节省显存,占用少 | 通信成本高 | 超大规模模型显存优化 |\n",
|
320 |
-
"\n",
|
321 |
-
"---\n",
|
322 |
-
"\n",
|
323 |
-
"### **总结**\n",
|
324 |
-
"- **中等规模模型**:优先使用 **数据并行**。\n",
|
325 |
-
"- **单卡显存不足**:采用 **模型并行** 或 **张量并行**。\n",
|
326 |
-
"- **超大规模模型**:使用 **混合并行** 或 DeepSpeed 的 **ZeRO 优化**。\n",
|
327 |
-
"\n",
|
328 |
-
"对于现代超大规模模型,通常采用混合并行方法,比如 NVIDIA 的 Megatron-LM 和微软的 DeepSpeed,它们综合了多种并行策略,能够有效利用计算资源并加速训练。如果您有具体的硬件环境或模型需求,可以进一步探讨适合的并行方案!"
|
329 |
-
]
|
330 |
},
|
331 |
{
|
332 |
"cell_type": "markdown",
|
333 |
"id": "cd848439-bac8-46b2-9a0f-59ae7c343954",
|
334 |
"metadata": {},
|
335 |
"source": [
|
336 |
-
"## deepspeed
|
337 |
"\n",
|
338 |
"\n",
|
339 |
"是的,DeepSpeed 支持多种并行策略,包括 **数据并行**、**模型并行** 和 **张量并行**,并且可以通过其��置文件灵活地设置这些并行模式。\n",
|
@@ -544,6 +552,22 @@
|
|
544 |
"DeepSpeed 的配置高度灵活,可以根据模型大小、显存限制和硬件条件选择适合的并行策略。"
|
545 |
]
|
546 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
547 |
{
|
548 |
"cell_type": "markdown",
|
549 |
"id": "ab2812bc-f743-4f18-b49c-972781484dc6",
|
@@ -569,14 +593,6 @@
|
|
569 |
"metadata": {},
|
570 |
"outputs": [],
|
571 |
"source": []
|
572 |
-
},
|
573 |
-
{
|
574 |
-
"cell_type": "code",
|
575 |
-
"execution_count": null,
|
576 |
-
"id": "ce701aeb-c8c7-450a-bbf9-b793a19cd0c6",
|
577 |
-
"metadata": {},
|
578 |
-
"outputs": [],
|
579 |
-
"source": []
|
580 |
}
|
581 |
],
|
582 |
"metadata": {
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
+
"id": "75b8219d-8069-4b18-96c8-d5024ee049f1",
|
14 |
"metadata": {},
|
15 |
"source": [
|
16 |
+
"## 大模型并行训练简介\n",
|
17 |
+
"\n",
|
18 |
+
"大模型的并行训练旨在克服单个 GPU 显存的限制和加速训练过程,通常适用于参数规模较大的模型(如 GPT-3、T5 等)。并行训练主要包括以下几种方法,每种方法适用于不同的场景和模型特性。\n",
|
19 |
+
"\n",
|
20 |
+
"---\n",
|
21 |
+
"\n",
|
22 |
+
"### **1. 数据并行(Data Parallelism)**\n",
|
23 |
+
"\n",
|
24 |
+
"#### **原理**\n",
|
25 |
+
"- 将数据切分成多个小批次,每个 GPU 处理其中一部分。\n",
|
26 |
+
"- 模型副本被复制到每个 GPU。\n",
|
27 |
+
"- 每个 GPU 独立计算梯度,最终通过梯度同步(如 AllReduce 操作)更新参数。\n",
|
28 |
+
"\n",
|
29 |
+
"#### **特点**\n",
|
30 |
+
"- **优点**:\n",
|
31 |
+
" - 实现简单,是最常用的并行方法。\n",
|
32 |
+
" - 对模型大小没有限制。\n",
|
33 |
+
"- **缺点**:\n",
|
34 |
+
" - 模型副本需要完整加载到每个 GPU,占用显存。\n",
|
35 |
+
" - 在超大规模模型中,显存压力较大。\n",
|
36 |
+
"\n",
|
37 |
+
"#### **适用场景**\n",
|
38 |
+
"- 参数规模适中,显存可以容纳整个模型的场景。\n",
|
39 |
+
"\n",
|
40 |
+
"---\n",
|
41 |
+
"\n",
|
42 |
+
"### **2. 模型并行(Model Parallelism)**\n",
|
43 |
+
"\n",
|
44 |
+
"#### **原理**\n",
|
45 |
+
"- 将模型切分成不同的部分,将不同部分分配到不同的 GPU。\n",
|
46 |
+
"- 前向传播和后向传播时,数据在模型的不同部分之间传递。\n",
|
47 |
+
"\n",
|
48 |
+
"#### **特点**\n",
|
49 |
+
"- **优点**:\n",
|
50 |
+
" - 不需要复制整个模型,可以支持超大规模模型。\n",
|
51 |
+
"- **缺点**:\n",
|
52 |
+
" - GPU 之间通信频繁,可能成为性能瓶颈。\n",
|
53 |
+
" - 实现复杂,切分模型需要精心设计。\n",
|
54 |
+
" \n",
|
55 |
+
"#### **适用场景**\n",
|
56 |
+
"- 单个 GPU 无法容纳完整模型参数的场景。\n",
|
57 |
+
"\n",
|
58 |
+
"#### **具体实现**\n",
|
59 |
+
"- 将 Transformer 的不同层分配到不同的 GPU。\n",
|
60 |
+
"- 常用工具:DeepSpeed 的 Pipeline Parallelism、NVIDIA Megatron-LM。\n",
|
61 |
+
"\n",
|
62 |
+
"---\n",
|
63 |
+
"\n",
|
64 |
+
"### **3. 张量并行(Tensor Parallelism)**\n",
|
65 |
+
"\n",
|
66 |
+
"#### **原理**\n",
|
67 |
+
"- 将模型内部的张量(如权重矩阵)切分为多个子张量,并分配到不同 GPU。\n",
|
68 |
+
"- GPU 之间协作完成矩阵计算。\n",
|
69 |
+
"\n",
|
70 |
+
"#### **特点**\n",
|
71 |
+
"- **优点**:\n",
|
72 |
+
" - 减少了每个 GPU 的显存占用,同时保持模型整体完整性。\n",
|
73 |
+
"- **缺点**:\n",
|
74 |
+
" - 实现较复杂,需要优化通信操作。\n",
|
75 |
+
" - 通信开销较高,适合较大批量的训练。\n",
|
76 |
+
"\n",
|
77 |
+
"#### **适用场景**\n",
|
78 |
+
"- 参数非常大的模型(如 GPT-3)。\n",
|
79 |
+
"- 需要极致优化显存的场景。\n",
|
80 |
+
"\n",
|
81 |
+
"#### **具体实现**\n",
|
82 |
+
"- NVIDIA 的 Megatron-LM 和 Hugging Face Transformers 提供了张量并行的支持。\n",
|
83 |
+
"\n",
|
84 |
+
"---\n",
|
85 |
+
"\n",
|
86 |
+
"### **4. 管道并行(Pipeline Parallelism)**\n",
|
87 |
+
"\n",
|
88 |
+
"#### **原理**\n",
|
89 |
+
"- 将模型分为不同的部分(通常是按层划分),每部分分配到不同的 GPU。\n",
|
90 |
+
"- 数据按照流水线的方式流经每个 GPU。\n",
|
91 |
+
"\n",
|
92 |
+
"#### **特点**\n",
|
93 |
+
"- **优点**:\n",
|
94 |
+
" - 减少每个 GPU 的显存压力。\n",
|
95 |
+
" - 通过流水线增加计算效率。\n",
|
96 |
+
"- **缺点**:\n",
|
97 |
+
" - 引入流水线延迟。\n",
|
98 |
+
" - 实现复杂,需管理数据依赖和同步。\n",
|
99 |
+
"\n",
|
100 |
+
"#### **适用场景**\n",
|
101 |
+
"- 模型非常深,层数较多的场景。\n",
|
102 |
+
"\n",
|
103 |
+
"#### **具体实现**\n",
|
104 |
+
"- DeepSpeed 的 Pipeline Parallelism。\n",
|
105 |
+
"\n",
|
106 |
+
"---\n",
|
107 |
+
"\n",
|
108 |
+
"### **5. 混合并行(Hybrid Parallelism)**\n",
|
109 |
+
"\n",
|
110 |
+
"#### **原理**\n",
|
111 |
+
"- 将数据并行、模型并行、张量并行和管道并行组合使用,充分利用多 GPU 资源。\n",
|
112 |
+
"- 不同的并行方法在不同维度协同工作。\n",
|
113 |
+
"\n",
|
114 |
+
"#### **特点**\n",
|
115 |
+
"- **优点**:\n",
|
116 |
+
" - 灵活且适应性强,适合超大规模模型。\n",
|
117 |
+
"- **缺点**:\n",
|
118 |
+
" - 配置复杂,依赖于框架和训练任务。\n",
|
119 |
+
"\n",
|
120 |
+
"#### **适用场景**\n",
|
121 |
+
"- 超大规模模型(如 GPT-3 或参数量 >1T)。\n",
|
122 |
+
"- 多机多卡的大型训练环境。\n",
|
123 |
+
"\n",
|
124 |
+
"#### **具体实现**\n",
|
125 |
+
"- NVIDIA Megatron-LM 和 DeepSpeed 的混合并行支持。\n",
|
126 |
+
"\n",
|
127 |
+
"---\n",
|
128 |
+
"\n",
|
129 |
+
"### **6. ZeRO 优化并行(Zero Redundancy Optimizer)**\n",
|
130 |
+
"\n",
|
131 |
+
"#### **原理**\n",
|
132 |
+
"- 通过分片存储模型参数、优化器状态和梯度,显著减少每个 GPU 的显存占用。\n",
|
133 |
+
"\n",
|
134 |
+
"#### **特点**\n",
|
135 |
+
"- **优点**:\n",
|
136 |
+
" - 极大降低显存需求。\n",
|
137 |
+
" - 支持超大规模模型。\n",
|
138 |
+
"- **缺点**:\n",
|
139 |
+
" - 对 GPU 间通信要求较高。\n",
|
140 |
+
" - 比数据并行复杂。\n",
|
141 |
+
"\n",
|
142 |
+
"#### **适用场景**\n",
|
143 |
+
"- 超大模型的高效训练。\n",
|
144 |
+
"\n",
|
145 |
+
"#### **具体实现**\n",
|
146 |
+
"- DeepSpeed 提供的 ZeRO Stage 1/2/3。\n",
|
147 |
+
"\n",
|
148 |
+
"---\n",
|
149 |
+
"\n",
|
150 |
+
"### **方法对比**\n",
|
151 |
+
"\n",
|
152 |
+
"| 并行方法 | 主要优点 | 主要缺点 | 适用场景 |\n",
|
153 |
+
"|---------------|-------------------------------|-------------------------------|---------------------------|\n",
|
154 |
+
"| 数据并行 | 简单高效,易实现 | 模型副本占用大量显存 | 模型规模适中,显存足够 |\n",
|
155 |
+
"| 模型并行 | 支持大模型 | 通信开销大,切分复杂 | 超大模型,显存有限 |\n",
|
156 |
+
"| 张量并行 | 高效利用显存 | 实现复杂,通信频繁 | 参数规模极大的模型 |\n",
|
157 |
+
"| 管道并行 | 显存需求降低,适合深模型 | 流水线延迟,数据同步复杂 | 层数多的大型模型 |\n",
|
158 |
+
"| 混合并行 | 灵活适配超大规模模型 | 配置复杂,依赖框架 | 超大规模模型(如 GPT-3) |\n",
|
159 |
+
"| ZeRO 并行 | 极大节省显存,占用少 | 通信成本高 | 超大规模模型显存优化 |\n",
|
160 |
+
"\n",
|
161 |
+
"---\n",
|
162 |
+
"\n",
|
163 |
+
"### **总结**\n",
|
164 |
+
"- **中等规模模型**:优先使用 **数据并行**。\n",
|
165 |
+
"- **单卡显存不足**:采用 **模型并行** 或 **张量并行**。\n",
|
166 |
+
"- **超大规模模型**:使用 **混合并行** 或 DeepSpeed 的 **ZeRO 优化**。\n",
|
167 |
+
"\n",
|
168 |
+
"对于现代超大规模模型,通常采用混合并行方法,比如 NVIDIA 的 Megatron-LM 和微软的 DeepSpeed,它们综合了多种并行策略,能够有效利用计算资源并加速训练。如果您有具体的硬件环境或模型需求,可以进一步探讨适合的并行方案!"
|
169 |
]
|
170 |
},
|
171 |
+
{
|
172 |
+
"cell_type": "code",
|
173 |
+
"execution_count": null,
|
174 |
+
"id": "06ddaa4d-e04a-41e0-beb5-f04dfaebcd54",
|
175 |
+
"metadata": {},
|
176 |
+
"outputs": [],
|
177 |
+
"source": []
|
178 |
+
},
|
179 |
{
|
180 |
"cell_type": "markdown",
|
181 |
"id": "c0d29667-1e75-46df-8f65-cae27609ee3f",
|
|
|
329 |
]
|
330 |
},
|
331 |
{
|
332 |
+
"cell_type": "code",
|
333 |
+
"execution_count": null,
|
334 |
+
"id": "a5372798-ced3-420c-b853-badd3ff05dc1",
|
335 |
"metadata": {},
|
336 |
+
"outputs": [],
|
337 |
+
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
},
|
339 |
{
|
340 |
"cell_type": "markdown",
|
341 |
"id": "cd848439-bac8-46b2-9a0f-59ae7c343954",
|
342 |
"metadata": {},
|
343 |
"source": [
|
344 |
+
"## deepspeed具体设置\n",
|
345 |
"\n",
|
346 |
"\n",
|
347 |
"是的,DeepSpeed 支持多种并行策略,包括 **数据并行**、**模型并行** 和 **张量并行**,并且可以通过其��置文件灵活地设置这些并行模式。\n",
|
|
|
552 |
"DeepSpeed 的配置高度灵活,可以根据模型大小、显存限制和硬件条件选择适合的并行策略。"
|
553 |
]
|
554 |
},
|
555 |
+
{
|
556 |
+
"cell_type": "code",
|
557 |
+
"execution_count": null,
|
558 |
+
"id": "a8e6de4c-adc1-4a1b-840a-c8542b4ed783",
|
559 |
+
"metadata": {},
|
560 |
+
"outputs": [],
|
561 |
+
"source": []
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"cell_type": "markdown",
|
565 |
+
"id": "3383c2d7-91a9-4940-b3b2-698fb7d9dbb7",
|
566 |
+
"metadata": {},
|
567 |
+
"source": [
|
568 |
+
"## 使用gpt2+deepspeed训练"
|
569 |
+
]
|
570 |
+
},
|
571 |
{
|
572 |
"cell_type": "markdown",
|
573 |
"id": "ab2812bc-f743-4f18-b49c-972781484dc6",
|
|
|
593 |
"metadata": {},
|
594 |
"outputs": [],
|
595 |
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
}
|
597 |
],
|
598 |
"metadata": {
|
04-gene-sft/5-peft-intro.ipynb
CHANGED
@@ -8,6 +8,14 @@
|
|
8 |
"# 4.5 peft简介"
|
9 |
]
|
10 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
{
|
12 |
"cell_type": "markdown",
|
13 |
"id": "182b82c4-d484-4c15-a600-03c3b51367ec",
|
@@ -139,6 +147,22 @@
|
|
139 |
"如果您需要实现高效微调,可以结合 Hugging Face 的 PEFT 库快速上手。"
|
140 |
]
|
141 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
{
|
143 |
"cell_type": "code",
|
144 |
"execution_count": 1,
|
@@ -366,14 +390,6 @@
|
|
366 |
" print(name)"
|
367 |
]
|
368 |
},
|
369 |
-
{
|
370 |
-
"cell_type": "code",
|
371 |
-
"execution_count": null,
|
372 |
-
"id": "37aa6abb-ab1c-4e9c-b968-579dd74044db",
|
373 |
-
"metadata": {},
|
374 |
-
"outputs": [],
|
375 |
-
"source": []
|
376 |
-
},
|
377 |
{
|
378 |
"cell_type": "markdown",
|
379 |
"id": "0add2f79-f35c-4638-80bb-0d8a87a9b6a7",
|
@@ -502,11 +518,19 @@
|
|
502 |
{
|
503 |
"cell_type": "code",
|
504 |
"execution_count": null,
|
505 |
-
"id": "
|
506 |
"metadata": {},
|
507 |
"outputs": [],
|
508 |
"source": []
|
509 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
510 |
{
|
511 |
"cell_type": "markdown",
|
512 |
"id": "10c99eb9-8007-4297-972e-7be71768c9c3",
|
@@ -595,62 +619,6 @@
|
|
595 |
"\n",
|
596 |
"---\n",
|
597 |
"\n",
|
598 |
-
"### **2. 微调哪些参数,冻结哪些参数**\n",
|
599 |
-
"\n",
|
600 |
-
"LoRA 的核心思想是通过 **分解矩阵**,只更新少量参数,而冻结模型的大部分参数。以下是常见设置的说明:\n",
|
601 |
-
"\n",
|
602 |
-
"#### **微调的参数**\n",
|
603 |
-
"- LoRA 通过 `target_modules` 指定的模块,例如:\n",
|
604 |
-
" - GPT-2 的 `c_attn`(自注意力模块)。\n",
|
605 |
-
" - BERT 的 `query` 和 `key`。\n",
|
606 |
-
"- 这些模块是模型中对性能贡献最大的部分,通过微调这些模块,任务性能可以显著提升。\n",
|
607 |
-
"\n",
|
608 |
-
"#### **冻结的参数**\n",
|
609 |
-
"- 除了 `target_modules` 中指定的参数外,所有其他模型参数默认冻结,包括:\n",
|
610 |
-
" - 预训练权重的绝大部分。\n",
|
611 |
-
" - 偏置参数(如果 `bias=\"none\"`)。\n",
|
612 |
-
"\n",
|
613 |
-
"---\n",
|
614 |
-
"\n",
|
615 |
-
"### **3. 一般如何设置**\n",
|
616 |
-
"\n",
|
617 |
-
"#### **(1)针对不同任务调整**\n",
|
618 |
-
"- **文本分类任务**:\n",
|
619 |
-
" - 优先选择自注意力模块(如 `c_attn`)作为 `target_modules`。\n",
|
620 |
-
" - `r=8` 或 `r=16` 是常见选择,适中计算开销。\n",
|
621 |
-
" - 设置适当的 dropout(如 `lora_dropout=0.1`)以防止过拟合。\n",
|
622 |
-
" \n",
|
623 |
-
"- **语言生成任务**:\n",
|
624 |
-
" - 对 GPT-2 或 GPT-3,选择 `q_proj` 和 `v_proj`(query 和 value 投影模块)。\n",
|
625 |
-
" - `r=16` 或更高,适应生成任务的高复杂性。\n",
|
626 |
-
"\n",
|
627 |
-
"- **命名实体识别任务**:\n",
|
628 |
-
" - 优先选择 `q_proj` 和 `k_proj`(query 和 key 模块)。\n",
|
629 |
-
"\n",
|
630 |
-
"#### **(2)参数量与显存的权衡**\n",
|
631 |
-
"- 如果显存有限,减少 `r` 的值。\n",
|
632 |
-
"- 对小型任务,`r=4` 或 `r=8` 通常已经足够。\n",
|
633 |
-
"\n",
|
634 |
-
"#### **(3)偏置设置**\n",
|
635 |
-
"- 偏置参数的影响较小,在大多数情况下,可以选择 `bias=\"none\"` 保持冻结。\n",
|
636 |
-
"- 对非常依赖偏置的任务(如生成风格微调),可以尝试 `bias=\"lora_only\"`。\n",
|
637 |
-
"\n",
|
638 |
-
"---\n",
|
639 |
-
"\n",
|
640 |
-
"### **4. 示例:如何选择目标模块**\n",
|
641 |
-
"\n",
|
642 |
-
"#### **GPT-2**\n",
|
643 |
-
"对 GPT-2 来说,以下模块通常是微调的目标:\n",
|
644 |
-
"- **`c_attn`**:注意力模块的组合层。\n",
|
645 |
-
"- **`q_proj` 和 `v_proj`**:Query 和 Value 的线性投影。\n",
|
646 |
-
"\n",
|
647 |
-
"#### **BERT**\n",
|
648 |
-
"对 BERT 来说,以下模块通常是微调的目标:\n",
|
649 |
-
"- **`query`**:Attention 的 Query 模块。\n",
|
650 |
-
"- **`key`**:Attention 的 Key 模块。\n",
|
651 |
-
"\n",
|
652 |
-
"---\n",
|
653 |
-
"\n",
|
654 |
"### **5. 总结建议**\n",
|
655 |
"- **微调的参数**:优先选择模型中注意力相关模块。\n",
|
656 |
"- **冻结的参数**:大部分参数默认冻结以节省显存。\n",
|
@@ -664,22 +632,82 @@
|
|
664 |
},
|
665 |
{
|
666 |
"cell_type": "code",
|
667 |
-
"execution_count":
|
668 |
-
"id": "
|
669 |
"metadata": {},
|
670 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
"source": [
|
672 |
"from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer\n",
|
673 |
"from peft import LoraConfig, get_peft_model, TaskType\n",
|
674 |
"from datasets import load_dataset\n",
|
675 |
"from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
|
|
|
676 |
"\n",
|
677 |
"# **1. 加载模型和分词器**\n",
|
678 |
-
"model_name = \"
|
679 |
"num_labels = 2 # 二分类任务\n",
|
680 |
"model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)\n",
|
681 |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
682 |
-
"
|
|
|
|
|
|
|
683 |
"\n",
|
684 |
"# **2. 定义数据集**\n",
|
685 |
"# 示例数据集:dna_promoter_300\n",
|
@@ -693,7 +721,10 @@
|
|
693 |
" )\n",
|
694 |
"\n",
|
695 |
"tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
|
696 |
-
"tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\") # Hugging Face Trainer 要求标签列名为 'labels'\n",
|
|
|
|
|
|
|
697 |
"\n",
|
698 |
"# **4. 划分数据集**\n",
|
699 |
"train_dataset = tokenized_datasets[\"train\"]\n",
|
@@ -711,8 +742,156 @@
|
|
711 |
"\n",
|
712 |
"# 使用 LoRA 包装模型\n",
|
713 |
"model = get_peft_model(model, lora_config)\n",
|
714 |
-
"model.print_trainable_parameters() #
|
715 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
716 |
"# **6. 计算指标**\n",
|
717 |
"def compute_metrics(eval_pred):\n",
|
718 |
" predictions, labels = eval_pred\n",
|
@@ -729,7 +908,7 @@
|
|
729 |
" learning_rate=2e-5, # 学习率\n",
|
730 |
" per_device_train_batch_size=8, # 每设备的批量大小\n",
|
731 |
" per_device_eval_batch_size=8, # 每设备评估的批量大小\n",
|
732 |
-
" num_train_epochs=
|
733 |
" weight_decay=0.01, # 权重衰减\n",
|
734 |
" logging_dir=\"./logs\", # 日志路径\n",
|
735 |
" fp16=True, # 启用混合精度训练\n",
|
@@ -746,6 +925,7 @@
|
|
746 |
" train_dataset=train_dataset,\n",
|
747 |
" eval_dataset=test_dataset,\n",
|
748 |
" tokenizer=tokenizer,\n",
|
|
|
749 |
" compute_metrics=compute_metrics,\n",
|
750 |
")\n",
|
751 |
"\n",
|
|
|
8 |
"# 4.5 peft简介"
|
9 |
]
|
10 |
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "f4288594-c676-4369-aca1-730446f293d7",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"## peft"
|
17 |
+
]
|
18 |
+
},
|
19 |
{
|
20 |
"cell_type": "markdown",
|
21 |
"id": "182b82c4-d484-4c15-a600-03c3b51367ec",
|
|
|
147 |
"如果您需要实现高效微调,可以结合 Hugging Face 的 PEFT 库快速上手。"
|
148 |
]
|
149 |
},
|
150 |
+
{
|
151 |
+
"cell_type": "code",
|
152 |
+
"execution_count": null,
|
153 |
+
"id": "a70b2631-c9b9-49da-96c6-6760c63040ac",
|
154 |
+
"metadata": {},
|
155 |
+
"outputs": [],
|
156 |
+
"source": []
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"cell_type": "markdown",
|
160 |
+
"id": "7b47ddf3-85c9-4dd8-bbbb-34fc3bd6aa1b",
|
161 |
+
"metadata": {},
|
162 |
+
"source": [
|
163 |
+
"## GPT2使用peft样例"
|
164 |
+
]
|
165 |
+
},
|
166 |
{
|
167 |
"cell_type": "code",
|
168 |
"execution_count": 1,
|
|
|
390 |
" print(name)"
|
391 |
]
|
392 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
{
|
394 |
"cell_type": "markdown",
|
395 |
"id": "0add2f79-f35c-4638-80bb-0d8a87a9b6a7",
|
|
|
518 |
{
|
519 |
"cell_type": "code",
|
520 |
"execution_count": null,
|
521 |
+
"id": "14f20171-0719-4dfa-b888-147b657ebff4",
|
522 |
"metadata": {},
|
523 |
"outputs": [],
|
524 |
"source": []
|
525 |
},
|
526 |
+
{
|
527 |
+
"cell_type": "markdown",
|
528 |
+
"id": "b4e7bff2-2a4f-4a1d-9cb1-dd02aead2f85",
|
529 |
+
"metadata": {},
|
530 |
+
"source": [
|
531 |
+
"## LoraConfig具体配置"
|
532 |
+
]
|
533 |
+
},
|
534 |
{
|
535 |
"cell_type": "markdown",
|
536 |
"id": "10c99eb9-8007-4297-972e-7be71768c9c3",
|
|
|
619 |
"\n",
|
620 |
"---\n",
|
621 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
622 |
"### **5. 总结建议**\n",
|
623 |
"- **微调的参数**:优先选择模型中注意力相关模块。\n",
|
624 |
"- **冻结的参数**:大部分参数默认冻结以节省显存。\n",
|
|
|
632 |
},
|
633 |
{
|
634 |
"cell_type": "code",
|
635 |
+
"execution_count": 1,
|
636 |
+
"id": "bbc080ba-3ee8-4bc6-afd9-2a3241f1bcda",
|
637 |
"metadata": {},
|
638 |
"outputs": [],
|
639 |
+
"source": [
|
640 |
+
"import subprocess\n",
|
641 |
+
"import os\n",
|
642 |
+
"# 设置环境变量, autodl一般区域\n",
|
643 |
+
"result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n",
|
644 |
+
"output = result.stdout\n",
|
645 |
+
"for line in output.splitlines():\n",
|
646 |
+
" if '=' in line:\n",
|
647 |
+
" var, value = line.split('=', 1)\n",
|
648 |
+
" os.environ[var] = value"
|
649 |
+
]
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"cell_type": "code",
|
653 |
+
"execution_count": 7,
|
654 |
+
"id": "26d9f362-18cc-471f-b208-f29a6933c06a",
|
655 |
+
"metadata": {},
|
656 |
+
"outputs": [
|
657 |
+
{
|
658 |
+
"name": "stderr",
|
659 |
+
"output_type": "stream",
|
660 |
+
"text": [
|
661 |
+
"Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at dnagpt/dna_gpt2_v0 and are newly initialized: ['score.weight']\n",
|
662 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
{
|
666 |
+
"data": {
|
667 |
+
"application/vnd.jupyter.widget-view+json": {
|
668 |
+
"model_id": "f7e72521368341d38a2b11028715a871",
|
669 |
+
"version_major": 2,
|
670 |
+
"version_minor": 0
|
671 |
+
},
|
672 |
+
"text/plain": [
|
673 |
+
"Map: 0%| | 0/5920 [00:00<?, ? examples/s]"
|
674 |
+
]
|
675 |
+
},
|
676 |
+
"metadata": {},
|
677 |
+
"output_type": "display_data"
|
678 |
+
},
|
679 |
+
{
|
680 |
+
"name": "stdout",
|
681 |
+
"output_type": "stream",
|
682 |
+
"text": [
|
683 |
+
"trainable params: 296,448 || all params: 109,180,416 || trainable%: 0.2715\n"
|
684 |
+
]
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"name": "stderr",
|
688 |
+
"output_type": "stream",
|
689 |
+
"text": [
|
690 |
+
"/root/miniconda3/lib/python3.12/site-packages/peft/tuners/lora/layer.py:1264: UserWarning: fan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True.\n",
|
691 |
+
" warnings.warn(\n"
|
692 |
+
]
|
693 |
+
}
|
694 |
+
],
|
695 |
"source": [
|
696 |
"from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer\n",
|
697 |
"from peft import LoraConfig, get_peft_model, TaskType\n",
|
698 |
"from datasets import load_dataset\n",
|
699 |
"from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
|
700 |
+
"from transformers import DataCollatorWithPadding\n",
|
701 |
"\n",
|
702 |
"# **1. 加载模型和分词器**\n",
|
703 |
+
"model_name = \"dnagpt/dna_gpt2_v0\" # 基础模型\n",
|
704 |
"num_labels = 2 # 二分类任务\n",
|
705 |
"model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)\n",
|
706 |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
707 |
+
"\n",
|
708 |
+
"tokenizer.pad_token = tokenizer.eos_token\n",
|
709 |
+
"model.config.pad_token_id = tokenizer.pad_token_id\n",
|
710 |
+
"\n",
|
711 |
"\n",
|
712 |
"# **2. 定义数据集**\n",
|
713 |
"# 示例数据集:dna_promoter_300\n",
|
|
|
721 |
" )\n",
|
722 |
"\n",
|
723 |
"tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
|
724 |
+
"#tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\") # Hugging Face Trainer 要求标签列名为 'labels'\n",
|
725 |
+
"\n",
|
726 |
+
"# 4. 创建一个数据收集器,用于动态填充和遮蔽\n",
|
727 |
+
"data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
|
728 |
"\n",
|
729 |
"# **4. 划分数据集**\n",
|
730 |
"train_dataset = tokenized_datasets[\"train\"]\n",
|
|
|
742 |
"\n",
|
743 |
"# 使用 LoRA 包装模型\n",
|
744 |
"model = get_peft_model(model, lora_config)\n",
|
745 |
+
"model.print_trainable_parameters() # 打印可训练的参数信息"
|
746 |
+
]
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"cell_type": "code",
|
750 |
+
"execution_count": 9,
|
751 |
+
"id": "7da39e7f-db92-483c-888d-19707ab35c5f",
|
752 |
+
"metadata": {},
|
753 |
+
"outputs": [
|
754 |
+
{
|
755 |
+
"name": "stderr",
|
756 |
+
"output_type": "stream",
|
757 |
+
"text": [
|
758 |
+
"/root/miniconda3/lib/python3.12/site-packages/transformers/training_args.py:1575: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
|
759 |
+
" warnings.warn(\n",
|
760 |
+
"/tmp/ipykernel_2399/3695291394.py:28: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
|
761 |
+
" trainer = Trainer(\n"
|
762 |
+
]
|
763 |
+
},
|
764 |
+
{
|
765 |
+
"data": {
|
766 |
+
"text/html": [
|
767 |
+
"\n",
|
768 |
+
" <div>\n",
|
769 |
+
" \n",
|
770 |
+
" <progress value='66600' max='66600' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
771 |
+
" [66600/66600 34:07, Epoch 10/10]\n",
|
772 |
+
" </div>\n",
|
773 |
+
" <table border=\"1\" class=\"dataframe\">\n",
|
774 |
+
" <thead>\n",
|
775 |
+
" <tr style=\"text-align: left;\">\n",
|
776 |
+
" <th>Epoch</th>\n",
|
777 |
+
" <th>Training Loss</th>\n",
|
778 |
+
" <th>Validation Loss</th>\n",
|
779 |
+
" <th>Accuracy</th>\n",
|
780 |
+
" <th>Precision</th>\n",
|
781 |
+
" <th>Recall</th>\n",
|
782 |
+
" <th>F1</th>\n",
|
783 |
+
" </tr>\n",
|
784 |
+
" </thead>\n",
|
785 |
+
" <tbody>\n",
|
786 |
+
" <tr>\n",
|
787 |
+
" <td>1</td>\n",
|
788 |
+
" <td>0.268300</td>\n",
|
789 |
+
" <td>0.307843</td>\n",
|
790 |
+
" <td>0.909797</td>\n",
|
791 |
+
" <td>0.916809</td>\n",
|
792 |
+
" <td>0.901987</td>\n",
|
793 |
+
" <td>0.909338</td>\n",
|
794 |
+
" </tr>\n",
|
795 |
+
" <tr>\n",
|
796 |
+
" <td>2</td>\n",
|
797 |
+
" <td>0.287400</td>\n",
|
798 |
+
" <td>0.278804</td>\n",
|
799 |
+
" <td>0.913514</td>\n",
|
800 |
+
" <td>0.901339</td>\n",
|
801 |
+
" <td>0.929269</td>\n",
|
802 |
+
" <td>0.915091</td>\n",
|
803 |
+
" </tr>\n",
|
804 |
+
" <tr>\n",
|
805 |
+
" <td>3</td>\n",
|
806 |
+
" <td>0.282800</td>\n",
|
807 |
+
" <td>0.291222</td>\n",
|
808 |
+
" <td>0.914527</td>\n",
|
809 |
+
" <td>0.913116</td>\n",
|
810 |
+
" <td>0.916807</td>\n",
|
811 |
+
" <td>0.914958</td>\n",
|
812 |
+
" </tr>\n",
|
813 |
+
" <tr>\n",
|
814 |
+
" <td>4</td>\n",
|
815 |
+
" <td>0.255200</td>\n",
|
816 |
+
" <td>0.281572</td>\n",
|
817 |
+
" <td>0.916385</td>\n",
|
818 |
+
" <td>0.896474</td>\n",
|
819 |
+
" <td>0.942068</td>\n",
|
820 |
+
" <td>0.918706</td>\n",
|
821 |
+
" </tr>\n",
|
822 |
+
" <tr>\n",
|
823 |
+
" <td>5</td>\n",
|
824 |
+
" <td>0.252000</td>\n",
|
825 |
+
" <td>0.271950</td>\n",
|
826 |
+
" <td>0.914527</td>\n",
|
827 |
+
" <td>0.913116</td>\n",
|
828 |
+
" <td>0.916807</td>\n",
|
829 |
+
" <td>0.914958</td>\n",
|
830 |
+
" </tr>\n",
|
831 |
+
" <tr>\n",
|
832 |
+
" <td>6</td>\n",
|
833 |
+
" <td>0.242300</td>\n",
|
834 |
+
" <td>0.288199</td>\n",
|
835 |
+
" <td>0.916385</td>\n",
|
836 |
+
" <td>0.916498</td>\n",
|
837 |
+
" <td>0.916807</td>\n",
|
838 |
+
" <td>0.916653</td>\n",
|
839 |
+
" </tr>\n",
|
840 |
+
" <tr>\n",
|
841 |
+
" <td>7</td>\n",
|
842 |
+
" <td>0.253500</td>\n",
|
843 |
+
" <td>0.268673</td>\n",
|
844 |
+
" <td>0.918750</td>\n",
|
845 |
+
" <td>0.909480</td>\n",
|
846 |
+
" <td>0.930616</td>\n",
|
847 |
+
" <td>0.919927</td>\n",
|
848 |
+
" </tr>\n",
|
849 |
+
" <tr>\n",
|
850 |
+
" <td>8</td>\n",
|
851 |
+
" <td>0.235900</td>\n",
|
852 |
+
" <td>0.277893</td>\n",
|
853 |
+
" <td>0.917568</td>\n",
|
854 |
+
" <td>0.906855</td>\n",
|
855 |
+
" <td>0.931290</td>\n",
|
856 |
+
" <td>0.918910</td>\n",
|
857 |
+
" </tr>\n",
|
858 |
+
" <tr>\n",
|
859 |
+
" <td>9</td>\n",
|
860 |
+
" <td>0.238600</td>\n",
|
861 |
+
" <td>0.280647</td>\n",
|
862 |
+
" <td>0.917568</td>\n",
|
863 |
+
" <td>0.913362</td>\n",
|
864 |
+
" <td>0.923206</td>\n",
|
865 |
+
" <td>0.918258</td>\n",
|
866 |
+
" </tr>\n",
|
867 |
+
" <tr>\n",
|
868 |
+
" <td>10</td>\n",
|
869 |
+
" <td>0.237900</td>\n",
|
870 |
+
" <td>0.284149</td>\n",
|
871 |
+
" <td>0.917736</td>\n",
|
872 |
+
" <td>0.913391</td>\n",
|
873 |
+
" <td>0.923543</td>\n",
|
874 |
+
" <td>0.918439</td>\n",
|
875 |
+
" </tr>\n",
|
876 |
+
" </tbody>\n",
|
877 |
+
"</table><p>"
|
878 |
+
],
|
879 |
+
"text/plain": [
|
880 |
+
"<IPython.core.display.HTML object>"
|
881 |
+
]
|
882 |
+
},
|
883 |
+
"metadata": {},
|
884 |
+
"output_type": "display_data"
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"name": "stdout",
|
888 |
+
"output_type": "stream",
|
889 |
+
"text": [
|
890 |
+
"训练完成,模型已保存至 ./gpt2_lora_text_classification\n"
|
891 |
+
]
|
892 |
+
}
|
893 |
+
],
|
894 |
+
"source": [
|
895 |
"# **6. 计算指标**\n",
|
896 |
"def compute_metrics(eval_pred):\n",
|
897 |
" predictions, labels = eval_pred\n",
|
|
|
908 |
" learning_rate=2e-5, # 学习率\n",
|
909 |
" per_device_train_batch_size=8, # 每设备的批量大小\n",
|
910 |
" per_device_eval_batch_size=8, # 每设备评估的批量大小\n",
|
911 |
+
" num_train_epochs=10, # 训练轮数\n",
|
912 |
" weight_decay=0.01, # 权重衰减\n",
|
913 |
" logging_dir=\"./logs\", # 日志路径\n",
|
914 |
" fp16=True, # 启用混合精度训练\n",
|
|
|
925 |
" train_dataset=train_dataset,\n",
|
926 |
" eval_dataset=test_dataset,\n",
|
927 |
" tokenizer=tokenizer,\n",
|
928 |
+
" data_collator=data_collator,\n",
|
929 |
" compute_metrics=compute_metrics,\n",
|
930 |
")\n",
|
931 |
"\n",
|
04-gene-sft/6-llama-continue-train.ipynb
CHANGED
@@ -330,6 +330,38 @@
|
|
330 |
"本节任务是基于llama。训练一个能够处理dna和protein蛋白质数据的基础预训练大模型,数据为第一章中的预训练数据,包括英文数据。"
|
331 |
]
|
332 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
{
|
334 |
"cell_type": "markdown",
|
335 |
"id": "b1bd33b8-2e05-4b59-9d8f-c48de194cfd6",
|
@@ -339,8 +371,18 @@
|
|
339 |
"\n",
|
340 |
"```\n",
|
341 |
"# 复制第一章训练数据,包括dna,protein,还有英文数据,添加英文数据是为了避免遗忘问题\n",
|
|
|
342 |
"mkdir train_data\n",
|
343 |
"cp ../01-data_env/data/*.txt train_data/\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
"\n",
|
345 |
"#持续预训练\n",
|
346 |
"./run_pt.sh\n",
|
@@ -361,7 +403,7 @@
|
|
361 |
},
|
362 |
{
|
363 |
"cell_type": "code",
|
364 |
-
"execution_count":
|
365 |
"id": "69b3e97f-a801-4264-a651-a854bcfba9c6",
|
366 |
"metadata": {},
|
367 |
"outputs": [],
|
@@ -376,10 +418,25 @@
|
|
376 |
},
|
377 |
{
|
378 |
"cell_type": "code",
|
379 |
-
"execution_count":
|
380 |
"id": "339435d9-9379-4b30-ae8b-50feee1ba714",
|
381 |
"metadata": {},
|
382 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
"source": [
|
384 |
"tokenizer = LlamaTokenizer.from_pretrained(\"dnahlm-merge-hf\")\n",
|
385 |
"tokenizer.pad_token = tokenizer.eos_token\n",
|
@@ -388,10 +445,61 @@
|
|
388 |
},
|
389 |
{
|
390 |
"cell_type": "code",
|
391 |
-
"execution_count":
|
392 |
"id": "d0f154bb-b1ab-4611-a14c-9b403043fd96",
|
393 |
"metadata": {},
|
394 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
"source": [
|
396 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-merge-hf\") #continue pretrain\n",
|
397 |
"model"
|
@@ -399,10 +507,51 @@
|
|
399 |
},
|
400 |
{
|
401 |
"cell_type": "code",
|
402 |
-
"execution_count":
|
403 |
"id": "792a9f78-1828-4695-9f6e-479a704ea7e8",
|
404 |
"metadata": {},
|
405 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
"source": [
|
407 |
"from transformers import AutoConfig\n",
|
408 |
"# 加载配置\n",
|
@@ -412,10 +561,22 @@
|
|
412 |
},
|
413 |
{
|
414 |
"cell_type": "code",
|
415 |
-
"execution_count":
|
416 |
"id": "49021c65-54bb-4a97-a96d-b030cc3dcd13",
|
417 |
"metadata": {},
|
418 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
"source": [
|
420 |
"text='''GCTGACTCTGCCAGGATGGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTAGTCAGACATAGTCACATAGGCAAGTAAGGGAACCTAAAATTGCTTGGAAT,\n",
|
421 |
"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKEGIAITGCWPRWPDEMDERSAVWRVEPYTRHFGRVLYSFGV,\n",
|
@@ -426,10 +587,45 @@
|
|
426 |
},
|
427 |
{
|
428 |
"cell_type": "code",
|
429 |
-
"execution_count":
|
430 |
"id": "ebf869c8-866d-4770-8f64-79d671f88663",
|
431 |
"metadata": {},
|
432 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
"source": [
|
434 |
"import torch\n",
|
435 |
"from transformers import pipeline\n",
|
@@ -448,23 +644,53 @@
|
|
448 |
},
|
449 |
{
|
450 |
"cell_type": "code",
|
451 |
-
"execution_count":
|
452 |
"id": "40a22c70-f1c4-4cd5-a118-2f5db40790e6",
|
453 |
"metadata": {},
|
454 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
"source": [
|
456 |
"pipe(\"GGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCT\")"
|
457 |
]
|
458 |
},
|
459 |
{
|
460 |
"cell_type": "code",
|
461 |
-
"execution_count":
|
462 |
"id": "aec95d0a-4269-4540-bf14-4ce157b9a194",
|
463 |
"metadata": {},
|
464 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
"source": [
|
466 |
-
"pipe(\"
|
467 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
}
|
469 |
],
|
470 |
"metadata": {
|
|
|
330 |
"本节任务是基于llama。训练一个能够处理dna和protein蛋白质数据的基础预训练大模型,数据为第一章中的预训练数据,包括英文数据。"
|
331 |
]
|
332 |
},
|
333 |
+
{
|
334 |
+
"cell_type": "markdown",
|
335 |
+
"id": "aec90d65-ac62-4394-a526-ca62d8bdbad4",
|
336 |
+
"metadata": {},
|
337 |
+
"source": [
|
338 |
+
"## 环境设置\n",
|
339 |
+
"并行环境对transformer、peft等的版本要求比较高,如果版本不匹配可能会出现各种异常问题\n",
|
340 |
+
"之前的课程,都是单GPU运行,一般不存在版本问题,默认安装的都是最新版本。但运行并行环境时,需要确认下版本再运行,本课程运行并行环境如下:\n",
|
341 |
+
"\n",
|
342 |
+
"* Python 3.12.3\n",
|
343 |
+
"* transformers 4.45.2\n",
|
344 |
+
"* peft 0.3.0.dev0\n",
|
345 |
+
"* deepspeed 0.15.2\n",
|
346 |
+
"* accelerate 1.0.0\n",
|
347 |
+
"\n",
|
348 |
+
"如果不是,可以重新安装即可:\n",
|
349 |
+
"```\n",
|
350 |
+
"pip install transformers==4.45.2 deepspeed==0.15.2 accelerate==1.0.0\n",
|
351 |
+
"\n",
|
352 |
+
"#peft参考使用的是chinese llama的版本,需要git安装\n",
|
353 |
+
"\n",
|
354 |
+
"git clone https://github.com/huggingface/peft.git\n",
|
355 |
+
"\n",
|
356 |
+
"cd peft\n",
|
357 |
+
"\n",
|
358 |
+
"git checkout 13e53fc\n",
|
359 |
+
"\n",
|
360 |
+
"pip install . \n",
|
361 |
+
"```\n",
|
362 |
+
"如果有环境问题,可以查看本目录下的pip_list.txt"
|
363 |
+
]
|
364 |
+
},
|
365 |
{
|
366 |
"cell_type": "markdown",
|
367 |
"id": "b1bd33b8-2e05-4b59-9d8f-c48de194cfd6",
|
|
|
371 |
"\n",
|
372 |
"```\n",
|
373 |
"# 复制第一章训练数据,包括dna,protein,还有英文数据,添加英文数据是为了避免遗忘问题\n",
|
374 |
+
"\n",
|
375 |
"mkdir train_data\n",
|
376 |
"cp ../01-data_env/data/*.txt train_data/\n",
|
377 |
+
"使用这些数据,6卡4090大概大致需要训练16个小时,autodl也需要近200块钱了。\n",
|
378 |
+
"\n",
|
379 |
+
"建议学习时,可以使用1/10的数据训练:\n",
|
380 |
+
"awk ‘NR%10==1’ dna_1g.txt > dna.txt\n",
|
381 |
+
"rm dna_1g.txt\n",
|
382 |
+
"其他2类数据依次类推\n",
|
383 |
+
"\n",
|
384 |
+
"这样大概需要2到3个小时就能训练完成了\n",
|
385 |
+
"\n",
|
386 |
"\n",
|
387 |
"#持续预训练\n",
|
388 |
"./run_pt.sh\n",
|
|
|
403 |
},
|
404 |
{
|
405 |
"cell_type": "code",
|
406 |
+
"execution_count": 1,
|
407 |
"id": "69b3e97f-a801-4264-a651-a854bcfba9c6",
|
408 |
"metadata": {},
|
409 |
"outputs": [],
|
|
|
418 |
},
|
419 |
{
|
420 |
"cell_type": "code",
|
421 |
+
"execution_count": 2,
|
422 |
"id": "339435d9-9379-4b30-ae8b-50feee1ba714",
|
423 |
"metadata": {},
|
424 |
+
"outputs": [
|
425 |
+
{
|
426 |
+
"data": {
|
427 |
+
"text/plain": [
|
428 |
+
"LlamaTokenizer(name_or_path='dnahlm-merge-hf', vocab_size=91643, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={\n",
|
429 |
+
"\t0: AddedToken(\"<unk>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
430 |
+
"\t1: AddedToken(\"<s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
431 |
+
"\t2: AddedToken(\"</s>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
|
432 |
+
"}"
|
433 |
+
]
|
434 |
+
},
|
435 |
+
"execution_count": 2,
|
436 |
+
"metadata": {},
|
437 |
+
"output_type": "execute_result"
|
438 |
+
}
|
439 |
+
],
|
440 |
"source": [
|
441 |
"tokenizer = LlamaTokenizer.from_pretrained(\"dnahlm-merge-hf\")\n",
|
442 |
"tokenizer.pad_token = tokenizer.eos_token\n",
|
|
|
445 |
},
|
446 |
{
|
447 |
"cell_type": "code",
|
448 |
+
"execution_count": 3,
|
449 |
"id": "d0f154bb-b1ab-4611-a14c-9b403043fd96",
|
450 |
"metadata": {},
|
451 |
+
"outputs": [
|
452 |
+
{
|
453 |
+
"data": {
|
454 |
+
"application/vnd.jupyter.widget-view+json": {
|
455 |
+
"model_id": "342e4ab139b64bb78f0429c2f92c8310",
|
456 |
+
"version_major": 2,
|
457 |
+
"version_minor": 0
|
458 |
+
},
|
459 |
+
"text/plain": [
|
460 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
461 |
+
]
|
462 |
+
},
|
463 |
+
"metadata": {},
|
464 |
+
"output_type": "display_data"
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"data": {
|
468 |
+
"text/plain": [
|
469 |
+
"LlamaForCausalLM(\n",
|
470 |
+
" (model): LlamaModel(\n",
|
471 |
+
" (embed_tokens): Embedding(91643, 4096, padding_idx=0)\n",
|
472 |
+
" (layers): ModuleList(\n",
|
473 |
+
" (0-31): 32 x LlamaDecoderLayer(\n",
|
474 |
+
" (self_attn): LlamaSdpaAttention(\n",
|
475 |
+
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
476 |
+
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
477 |
+
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
478 |
+
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
479 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
480 |
+
" )\n",
|
481 |
+
" (mlp): LlamaMLP(\n",
|
482 |
+
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
483 |
+
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
484 |
+
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
|
485 |
+
" (act_fn): SiLU()\n",
|
486 |
+
" )\n",
|
487 |
+
" (input_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
488 |
+
" (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
489 |
+
" )\n",
|
490 |
+
" )\n",
|
491 |
+
" (norm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
492 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
493 |
+
" )\n",
|
494 |
+
" (lm_head): Linear(in_features=4096, out_features=91643, bias=False)\n",
|
495 |
+
")"
|
496 |
+
]
|
497 |
+
},
|
498 |
+
"execution_count": 3,
|
499 |
+
"metadata": {},
|
500 |
+
"output_type": "execute_result"
|
501 |
+
}
|
502 |
+
],
|
503 |
"source": [
|
504 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-merge-hf\") #continue pretrain\n",
|
505 |
"model"
|
|
|
507 |
},
|
508 |
{
|
509 |
"cell_type": "code",
|
510 |
+
"execution_count": 4,
|
511 |
"id": "792a9f78-1828-4695-9f6e-479a704ea7e8",
|
512 |
"metadata": {},
|
513 |
+
"outputs": [
|
514 |
+
{
|
515 |
+
"data": {
|
516 |
+
"text/plain": [
|
517 |
+
"LlamaConfig {\n",
|
518 |
+
" \"_name_or_path\": \"dnahlm-merge-hf\",\n",
|
519 |
+
" \"architectures\": [\n",
|
520 |
+
" \"LlamaForCausalLM\"\n",
|
521 |
+
" ],\n",
|
522 |
+
" \"attention_bias\": false,\n",
|
523 |
+
" \"attention_dropout\": 0.0,\n",
|
524 |
+
" \"bos_token_id\": 1,\n",
|
525 |
+
" \"eos_token_id\": 2,\n",
|
526 |
+
" \"head_dim\": 128,\n",
|
527 |
+
" \"hidden_act\": \"silu\",\n",
|
528 |
+
" \"hidden_size\": 4096,\n",
|
529 |
+
" \"initializer_range\": 0.02,\n",
|
530 |
+
" \"intermediate_size\": 11008,\n",
|
531 |
+
" \"max_position_embeddings\": 2048,\n",
|
532 |
+
" \"mlp_bias\": false,\n",
|
533 |
+
" \"model_type\": \"llama\",\n",
|
534 |
+
" \"num_attention_heads\": 32,\n",
|
535 |
+
" \"num_hidden_layers\": 32,\n",
|
536 |
+
" \"num_key_value_heads\": 32,\n",
|
537 |
+
" \"pad_token_id\": 0,\n",
|
538 |
+
" \"pretraining_tp\": 1,\n",
|
539 |
+
" \"rms_norm_eps\": 1e-06,\n",
|
540 |
+
" \"rope_scaling\": null,\n",
|
541 |
+
" \"rope_theta\": 10000.0,\n",
|
542 |
+
" \"tie_word_embeddings\": false,\n",
|
543 |
+
" \"torch_dtype\": \"float16\",\n",
|
544 |
+
" \"transformers_version\": \"4.45.2\",\n",
|
545 |
+
" \"use_cache\": true,\n",
|
546 |
+
" \"vocab_size\": 91643\n",
|
547 |
+
"}"
|
548 |
+
]
|
549 |
+
},
|
550 |
+
"execution_count": 4,
|
551 |
+
"metadata": {},
|
552 |
+
"output_type": "execute_result"
|
553 |
+
}
|
554 |
+
],
|
555 |
"source": [
|
556 |
"from transformers import AutoConfig\n",
|
557 |
"# 加载配置\n",
|
|
|
561 |
},
|
562 |
{
|
563 |
"cell_type": "code",
|
564 |
+
"execution_count": 5,
|
565 |
"id": "49021c65-54bb-4a97-a96d-b030cc3dcd13",
|
566 |
"metadata": {},
|
567 |
+
"outputs": [
|
568 |
+
{
|
569 |
+
"name": "stdout",
|
570 |
+
"output_type": "stream",
|
571 |
+
"text": [
|
572 |
+
"Test text:\n",
|
573 |
+
" GCTGACTCTGCCAGGATGGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTAGTCAGACATAGTCACATAGGCAAGTAAGGGAACCTAAAATTGCTTGGAAT,\n",
|
574 |
+
"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKEGIAITGCWPRWPDEMDERSAVWRVEPYTRHFGRVLYSFGV,\n",
|
575 |
+
"The primary use of LLaMA is research on large language models, including\n",
|
576 |
+
"Tokenized by DNA-LLaMA tokenizer:['▁GC', 'TGA', 'CT', 'C', 'TGCC', 'AGGATGG', 'AATG', 'AAATT', 'AGGTTG', 'TTTTAATT', 'ATAATGTAA', 'AGTCAG', 'TTCTAG', 'TCAG', 'ACATAG', 'TC', 'ACATAGG', 'CA', 'AGTAAGGG', 'AAC', 'CT', 'AAAATTGC', 'TTGG', 'AAT', ',', '<0x0A>', 'KCG', 'FVGP', 'MVHL', 'KV', 'HLE', 'ADV', 'ASSC', 'RSAV', 'I', 'YL', 'TSEE', 'P', 'FEG', 'VLGL', 'RLK', 'EGI', 'AI', 'TGC', 'W', 'PRW', 'P', 'DEM', 'DER', 'SAV', 'W', 'RVE', 'PY', 'TRH', 'FG', 'RVLY', 'SFGV', ',', '<0x0A>', 'The', '▁primary', '▁use', '▁of', '▁L', 'La', 'MA', '▁is', '▁research', '▁on', '▁large', '▁language', '▁models', ',', '▁including']\n"
|
577 |
+
]
|
578 |
+
}
|
579 |
+
],
|
580 |
"source": [
|
581 |
"text='''GCTGACTCTGCCAGGATGGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTAGTCAGACATAGTCACATAGGCAAGTAAGGGAACCTAAAATTGCTTGGAAT,\n",
|
582 |
"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKEGIAITGCWPRWPDEMDERSAVWRVEPYTRHFGRVLYSFGV,\n",
|
|
|
587 |
},
|
588 |
{
|
589 |
"cell_type": "code",
|
590 |
+
"execution_count": 6,
|
591 |
"id": "ebf869c8-866d-4770-8f64-79d671f88663",
|
592 |
"metadata": {},
|
593 |
+
"outputs": [
|
594 |
+
{
|
595 |
+
"data": {
|
596 |
+
"application/vnd.jupyter.widget-view+json": {
|
597 |
+
"model_id": "e497889a1c3c484cb57c4b6fd93b45ab",
|
598 |
+
"version_major": 2,
|
599 |
+
"version_minor": 0
|
600 |
+
},
|
601 |
+
"text/plain": [
|
602 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
603 |
+
]
|
604 |
+
},
|
605 |
+
"metadata": {},
|
606 |
+
"output_type": "display_data"
|
607 |
+
},
|
608 |
+
{
|
609 |
+
"name": "stderr",
|
610 |
+
"output_type": "stream",
|
611 |
+
"text": [
|
612 |
+
"Some parameters are on the meta device because they were offloaded to the cpu.\n",
|
613 |
+
"/root/miniconda3/lib/python3.12/site-packages/transformers/generation/utils.py:1220: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
|
614 |
+
" warnings.warn(\n",
|
615 |
+
"Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n"
|
616 |
+
]
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"data": {
|
620 |
+
"text/plain": [
|
621 |
+
"[{'generated_text': 'The key to life is to accept the fact that you are going to die. The key to'}]"
|
622 |
+
]
|
623 |
+
},
|
624 |
+
"execution_count": 6,
|
625 |
+
"metadata": {},
|
626 |
+
"output_type": "execute_result"
|
627 |
+
}
|
628 |
+
],
|
629 |
"source": [
|
630 |
"import torch\n",
|
631 |
"from transformers import pipeline\n",
|
|
|
644 |
},
|
645 |
{
|
646 |
"cell_type": "code",
|
647 |
+
"execution_count": 7,
|
648 |
"id": "40a22c70-f1c4-4cd5-a118-2f5db40790e6",
|
649 |
"metadata": {},
|
650 |
+
"outputs": [
|
651 |
+
{
|
652 |
+
"data": {
|
653 |
+
"text/plain": [
|
654 |
+
"[{'generated_text': 'GGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCTCTCCTCCTCCTCCTC'}]"
|
655 |
+
]
|
656 |
+
},
|
657 |
+
"execution_count": 7,
|
658 |
+
"metadata": {},
|
659 |
+
"output_type": "execute_result"
|
660 |
+
}
|
661 |
+
],
|
662 |
"source": [
|
663 |
"pipe(\"GGAATGAAATTAGGTTGTTTTAATTATAATGTAAAGTCAGTTCT\")"
|
664 |
]
|
665 |
},
|
666 |
{
|
667 |
"cell_type": "code",
|
668 |
+
"execution_count": 9,
|
669 |
"id": "aec95d0a-4269-4540-bf14-4ce157b9a194",
|
670 |
"metadata": {},
|
671 |
+
"outputs": [
|
672 |
+
{
|
673 |
+
"data": {
|
674 |
+
"text/plain": [
|
675 |
+
"[{'generated_text': 'KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLKETLK'}]"
|
676 |
+
]
|
677 |
+
},
|
678 |
+
"execution_count": 9,
|
679 |
+
"metadata": {},
|
680 |
+
"output_type": "execute_result"
|
681 |
+
}
|
682 |
+
],
|
683 |
"source": [
|
684 |
+
"pipe(\"KCGFVGPMVHLKVHLEADVASSCRSAVIYLTSEEPFEGVLGLRLK\")"
|
685 |
]
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"cell_type": "code",
|
689 |
+
"execution_count": null,
|
690 |
+
"id": "c1cfab60-2820-4885-8961-0290c49dfbec",
|
691 |
+
"metadata": {},
|
692 |
+
"outputs": [],
|
693 |
+
"source": []
|
694 |
}
|
695 |
],
|
696 |
"metadata": {
|
04-gene-sft/7-llama-instruction-ft.ipynb
CHANGED
@@ -184,11 +184,21 @@
|
|
184 |
"指令微调通过在特定格式的数据集上进一步训练大模型,使其能够更好地理解和执行用户的自然语言指令。这种方法适合多任务场景,并能提升模型的交互能力和领域适应性。借助高质量的指令数据集和高效的微调技术,大模型在实际应用中的表现可以得到显著提升。"
|
185 |
]
|
186 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
{
|
188 |
"cell_type": "markdown",
|
189 |
"id": "7be8b814-42f6-4fb6-bf4b-ae23292030f6",
|
190 |
"metadata": {},
|
191 |
-
"source": [
|
|
|
|
|
192 |
},
|
193 |
{
|
194 |
"cell_type": "markdown",
|
@@ -293,6 +303,8 @@
|
|
293 |
"#微调\n",
|
294 |
"./run_sft.sh\n",
|
295 |
"\n",
|
|
|
|
|
296 |
"#合并模型\n",
|
297 |
"./merge_sft_model.sh\n",
|
298 |
"\n",
|
@@ -325,17 +337,9 @@
|
|
325 |
" os.environ[var] = value"
|
326 |
]
|
327 |
},
|
328 |
-
{
|
329 |
-
"cell_type": "markdown",
|
330 |
-
"id": "17bdb69d-3f0f-465e-bd60-2047a088e264",
|
331 |
-
"metadata": {},
|
332 |
-
"source": [
|
333 |
-
"如果您不确定模型中有哪些模块可以微调,可以打印模型结构:"
|
334 |
-
]
|
335 |
-
},
|
336 |
{
|
337 |
"cell_type": "code",
|
338 |
-
"execution_count":
|
339 |
"id": "054a2956-9045-4ad5-a878-1bfc84ad4ed8",
|
340 |
"metadata": {},
|
341 |
"outputs": [],
|
@@ -350,10 +354,26 @@
|
|
350 |
},
|
351 |
{
|
352 |
"cell_type": "code",
|
353 |
-
"execution_count":
|
354 |
"id": "63c8bf16-9576-41bc-b27c-c92ba4289cf4",
|
355 |
"metadata": {},
|
356 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
"source": [
|
358 |
"from datasets import load_dataset\n",
|
359 |
"dna_ft_dataset = load_dataset('json', data_files='val_data.json')\n",
|
@@ -362,10 +382,30 @@
|
|
362 |
},
|
363 |
{
|
364 |
"cell_type": "code",
|
365 |
-
"execution_count":
|
366 |
"id": "95928da3-ca64-4a17-80f4-945da395702c",
|
367 |
"metadata": {},
|
368 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
"source": [
|
370 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.1, seed=42)\n",
|
371 |
"data"
|
@@ -373,7 +413,7 @@
|
|
373 |
},
|
374 |
{
|
375 |
"cell_type": "code",
|
376 |
-
"execution_count":
|
377 |
"id": "a3e65bcd-85ce-4261-8ba6-7665c4ec60e2",
|
378 |
"metadata": {},
|
379 |
"outputs": [],
|
@@ -384,10 +424,61 @@
|
|
384 |
},
|
385 |
{
|
386 |
"cell_type": "code",
|
387 |
-
"execution_count":
|
388 |
"id": "3d3fe49b-f48f-42b2-bc97-028e443111e4",
|
389 |
"metadata": {},
|
390 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
"source": [
|
392 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-llama-7b-sft-v0\") #continue pretrain\n",
|
393 |
"model"
|
@@ -395,7 +486,7 @@
|
|
395 |
},
|
396 |
{
|
397 |
"cell_type": "code",
|
398 |
-
"execution_count":
|
399 |
"id": "c54df9fe-86c4-4963-b313-b438894bf9dd",
|
400 |
"metadata": {},
|
401 |
"outputs": [],
|
@@ -424,10 +515,23 @@
|
|
424 |
},
|
425 |
{
|
426 |
"cell_type": "code",
|
427 |
-
"execution_count":
|
428 |
"id": "ee540cfb-1f6e-4e02-a3bc-c814e43685cb",
|
429 |
"metadata": {},
|
430 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
"source": [
|
432 |
"example = data[\"test\"][0]\n",
|
433 |
"example"
|
@@ -435,10 +539,27 @@
|
|
435 |
},
|
436 |
{
|
437 |
"cell_type": "code",
|
438 |
-
"execution_count":
|
439 |
"id": "7ee35528-7b3f-4e60-b88b-1bc3e950012b",
|
440 |
"metadata": {},
|
441 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
"source": [
|
443 |
"prompt = build_prompt(example)\n",
|
444 |
"print(prompt)"
|
@@ -446,17 +567,116 @@
|
|
446 |
},
|
447 |
{
|
448 |
"cell_type": "code",
|
449 |
-
"execution_count":
|
450 |
"id": "8aa6f38f-3bcc-4566-8a66-a541db91e031",
|
451 |
"metadata": {},
|
452 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
"source": [
|
454 |
"tokenizer.tokenize(prompt)"
|
455 |
]
|
456 |
},
|
457 |
{
|
458 |
"cell_type": "code",
|
459 |
-
"execution_count":
|
460 |
"id": "11875339-4901-4912-86e5-afe8c74921d9",
|
461 |
"metadata": {},
|
462 |
"outputs": [],
|
@@ -498,10 +718,48 @@
|
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
-
"execution_count":
|
502 |
"id": "1b02644a-8b24-45aa-b22d-0f7ce2270dd9",
|
503 |
"metadata": {},
|
504 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
"source": [
|
506 |
"input_text = format_input(data[\"test\"][0])\n",
|
507 |
"\n",
|
@@ -562,10 +820,118 @@
|
|
562 |
},
|
563 |
{
|
564 |
"cell_type": "code",
|
565 |
-
"execution_count":
|
566 |
"id": "68831e19-5a99-46d8-9f40-e8bf6957dbfc",
|
567 |
"metadata": {},
|
568 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
569 |
"source": [
|
570 |
"import json\n",
|
571 |
"from tqdm import tqdm\n",
|
@@ -598,6 +964,14 @@
|
|
598 |
"\n",
|
599 |
"print(\"presicion\", right_sum/all_num, \"same\", same_sum/all_num)\n"
|
600 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
}
|
602 |
],
|
603 |
"metadata": {
|
|
|
184 |
"指令微调通过在特定格式的数据集上进一步训练大模型,使其能够更好地理解和执行用户的自然语言指令。这种方法适合多任务场景,并能提升模型的交互能力和领域适应性。借助高质量的指令数据集和高效的微调技术,大模型在实际应用中的表现可以得到显著提升。"
|
185 |
]
|
186 |
},
|
187 |
+
{
|
188 |
+
"cell_type": "code",
|
189 |
+
"execution_count": null,
|
190 |
+
"id": "e77f8b39-e75a-4014-a98a-bde5b2534bf1",
|
191 |
+
"metadata": {},
|
192 |
+
"outputs": [],
|
193 |
+
"source": []
|
194 |
+
},
|
195 |
{
|
196 |
"cell_type": "markdown",
|
197 |
"id": "7be8b814-42f6-4fb6-bf4b-ae23292030f6",
|
198 |
"metadata": {},
|
199 |
+
"source": [
|
200 |
+
"## 持续预训练 VS 指令微调"
|
201 |
+
]
|
202 |
},
|
203 |
{
|
204 |
"cell_type": "markdown",
|
|
|
303 |
"#微调\n",
|
304 |
"./run_sft.sh\n",
|
305 |
"\n",
|
306 |
+
"运行时间约3小时\n",
|
307 |
+
"\n",
|
308 |
"#合并模型\n",
|
309 |
"./merge_sft_model.sh\n",
|
310 |
"\n",
|
|
|
337 |
" os.environ[var] = value"
|
338 |
]
|
339 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
+
"execution_count": 2,
|
343 |
"id": "054a2956-9045-4ad5-a878-1bfc84ad4ed8",
|
344 |
"metadata": {},
|
345 |
"outputs": [],
|
|
|
354 |
},
|
355 |
{
|
356 |
"cell_type": "code",
|
357 |
+
"execution_count": 3,
|
358 |
"id": "63c8bf16-9576-41bc-b27c-c92ba4289cf4",
|
359 |
"metadata": {},
|
360 |
+
"outputs": [
|
361 |
+
{
|
362 |
+
"data": {
|
363 |
+
"text/plain": [
|
364 |
+
"DatasetDict({\n",
|
365 |
+
" train: Dataset({\n",
|
366 |
+
" features: ['instruction', 'input', 'output'],\n",
|
367 |
+
" num_rows: 19839\n",
|
368 |
+
" })\n",
|
369 |
+
"})"
|
370 |
+
]
|
371 |
+
},
|
372 |
+
"execution_count": 3,
|
373 |
+
"metadata": {},
|
374 |
+
"output_type": "execute_result"
|
375 |
+
}
|
376 |
+
],
|
377 |
"source": [
|
378 |
"from datasets import load_dataset\n",
|
379 |
"dna_ft_dataset = load_dataset('json', data_files='val_data.json')\n",
|
|
|
382 |
},
|
383 |
{
|
384 |
"cell_type": "code",
|
385 |
+
"execution_count": 4,
|
386 |
"id": "95928da3-ca64-4a17-80f4-945da395702c",
|
387 |
"metadata": {},
|
388 |
+
"outputs": [
|
389 |
+
{
|
390 |
+
"data": {
|
391 |
+
"text/plain": [
|
392 |
+
"DatasetDict({\n",
|
393 |
+
" train: Dataset({\n",
|
394 |
+
" features: ['instruction', 'input', 'output'],\n",
|
395 |
+
" num_rows: 1983\n",
|
396 |
+
" })\n",
|
397 |
+
" test: Dataset({\n",
|
398 |
+
" features: ['instruction', 'input', 'output'],\n",
|
399 |
+
" num_rows: 17856\n",
|
400 |
+
" })\n",
|
401 |
+
"})"
|
402 |
+
]
|
403 |
+
},
|
404 |
+
"execution_count": 4,
|
405 |
+
"metadata": {},
|
406 |
+
"output_type": "execute_result"
|
407 |
+
}
|
408 |
+
],
|
409 |
"source": [
|
410 |
"data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.1, seed=42)\n",
|
411 |
"data"
|
|
|
413 |
},
|
414 |
{
|
415 |
"cell_type": "code",
|
416 |
+
"execution_count": 5,
|
417 |
"id": "a3e65bcd-85ce-4261-8ba6-7665c4ec60e2",
|
418 |
"metadata": {},
|
419 |
"outputs": [],
|
|
|
424 |
},
|
425 |
{
|
426 |
"cell_type": "code",
|
427 |
+
"execution_count": 6,
|
428 |
"id": "3d3fe49b-f48f-42b2-bc97-028e443111e4",
|
429 |
"metadata": {},
|
430 |
+
"outputs": [
|
431 |
+
{
|
432 |
+
"data": {
|
433 |
+
"application/vnd.jupyter.widget-view+json": {
|
434 |
+
"model_id": "4f060ff2029447b9bad5e2b2e40b7133",
|
435 |
+
"version_major": 2,
|
436 |
+
"version_minor": 0
|
437 |
+
},
|
438 |
+
"text/plain": [
|
439 |
+
"Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
|
440 |
+
]
|
441 |
+
},
|
442 |
+
"metadata": {},
|
443 |
+
"output_type": "display_data"
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"data": {
|
447 |
+
"text/plain": [
|
448 |
+
"LlamaForCausalLM(\n",
|
449 |
+
" (model): LlamaModel(\n",
|
450 |
+
" (embed_tokens): Embedding(91644, 4096, padding_idx=0)\n",
|
451 |
+
" (layers): ModuleList(\n",
|
452 |
+
" (0-31): 32 x LlamaDecoderLayer(\n",
|
453 |
+
" (self_attn): LlamaSdpaAttention(\n",
|
454 |
+
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
455 |
+
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
456 |
+
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
457 |
+
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
|
458 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
459 |
+
" )\n",
|
460 |
+
" (mlp): LlamaMLP(\n",
|
461 |
+
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
462 |
+
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
|
463 |
+
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
|
464 |
+
" (act_fn): SiLU()\n",
|
465 |
+
" )\n",
|
466 |
+
" (input_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
467 |
+
" (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
468 |
+
" )\n",
|
469 |
+
" )\n",
|
470 |
+
" (norm): LlamaRMSNorm((4096,), eps=1e-06)\n",
|
471 |
+
" (rotary_emb): LlamaRotaryEmbedding()\n",
|
472 |
+
" )\n",
|
473 |
+
" (lm_head): Linear(in_features=4096, out_features=91644, bias=False)\n",
|
474 |
+
")"
|
475 |
+
]
|
476 |
+
},
|
477 |
+
"execution_count": 6,
|
478 |
+
"metadata": {},
|
479 |
+
"output_type": "execute_result"
|
480 |
+
}
|
481 |
+
],
|
482 |
"source": [
|
483 |
"model = LlamaForCausalLM.from_pretrained(\"dnahlm-llama-7b-sft-v0\") #continue pretrain\n",
|
484 |
"model"
|
|
|
486 |
},
|
487 |
{
|
488 |
"cell_type": "code",
|
489 |
+
"execution_count": 7,
|
490 |
"id": "c54df9fe-86c4-4963-b313-b438894bf9dd",
|
491 |
"metadata": {},
|
492 |
"outputs": [],
|
|
|
515 |
},
|
516 |
{
|
517 |
"cell_type": "code",
|
518 |
+
"execution_count": 8,
|
519 |
"id": "ee540cfb-1f6e-4e02-a3bc-c814e43685cb",
|
520 |
"metadata": {},
|
521 |
+
"outputs": [
|
522 |
+
{
|
523 |
+
"data": {
|
524 |
+
"text/plain": [
|
525 |
+
"{'instruction': 'Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.',\n",
|
526 |
+
" 'input': 'CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC',\n",
|
527 |
+
" 'output': 'promoter'}"
|
528 |
+
]
|
529 |
+
},
|
530 |
+
"execution_count": 8,
|
531 |
+
"metadata": {},
|
532 |
+
"output_type": "execute_result"
|
533 |
+
}
|
534 |
+
],
|
535 |
"source": [
|
536 |
"example = data[\"test\"][0]\n",
|
537 |
"example"
|
|
|
539 |
},
|
540 |
{
|
541 |
"cell_type": "code",
|
542 |
+
"execution_count": 9,
|
543 |
"id": "7ee35528-7b3f-4e60-b88b-1bc3e950012b",
|
544 |
"metadata": {},
|
545 |
+
"outputs": [
|
546 |
+
{
|
547 |
+
"name": "stdout",
|
548 |
+
"output_type": "stream",
|
549 |
+
"text": [
|
550 |
+
"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
551 |
+
"\n",
|
552 |
+
"### Instruction:\n",
|
553 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
554 |
+
"\n",
|
555 |
+
"### Input:\n",
|
556 |
+
"CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC\n",
|
557 |
+
"\n",
|
558 |
+
"### Response:\n",
|
559 |
+
"promoter\n"
|
560 |
+
]
|
561 |
+
}
|
562 |
+
],
|
563 |
"source": [
|
564 |
"prompt = build_prompt(example)\n",
|
565 |
"print(prompt)"
|
|
|
567 |
},
|
568 |
{
|
569 |
"cell_type": "code",
|
570 |
+
"execution_count": 10,
|
571 |
"id": "8aa6f38f-3bcc-4566-8a66-a541db91e031",
|
572 |
"metadata": {},
|
573 |
+
"outputs": [
|
574 |
+
{
|
575 |
+
"data": {
|
576 |
+
"text/plain": [
|
577 |
+
"['▁Below',\n",
|
578 |
+
" '▁is',\n",
|
579 |
+
" '▁an',\n",
|
580 |
+
" '▁instruction',\n",
|
581 |
+
" '▁that',\n",
|
582 |
+
" '▁describes',\n",
|
583 |
+
" '▁a',\n",
|
584 |
+
" '▁task',\n",
|
585 |
+
" '.',\n",
|
586 |
+
" '▁Write',\n",
|
587 |
+
" '▁a',\n",
|
588 |
+
" '▁response',\n",
|
589 |
+
" '▁that',\n",
|
590 |
+
" '▁appropri',\n",
|
591 |
+
" 'ately',\n",
|
592 |
+
" '▁comple',\n",
|
593 |
+
" 'tes',\n",
|
594 |
+
" '▁the',\n",
|
595 |
+
" '▁request',\n",
|
596 |
+
" '.',\n",
|
597 |
+
" '<0x0A>',\n",
|
598 |
+
" '<0x0A>',\n",
|
599 |
+
" '##',\n",
|
600 |
+
" '#',\n",
|
601 |
+
" '▁Inst',\n",
|
602 |
+
" 'ruction',\n",
|
603 |
+
" ':',\n",
|
604 |
+
" '<0x0A>',\n",
|
605 |
+
" 'Det',\n",
|
606 |
+
" 'erm',\n",
|
607 |
+
" 'ine',\n",
|
608 |
+
" '▁core',\n",
|
609 |
+
" '▁prom',\n",
|
610 |
+
" 'oter',\n",
|
611 |
+
" '▁detection',\n",
|
612 |
+
" '▁of',\n",
|
613 |
+
" '▁following',\n",
|
614 |
+
" '▁d',\n",
|
615 |
+
" 'na',\n",
|
616 |
+
" '▁sequence',\n",
|
617 |
+
" ',',\n",
|
618 |
+
" '▁The',\n",
|
619 |
+
" '▁result',\n",
|
620 |
+
" '▁will',\n",
|
621 |
+
" '▁be',\n",
|
622 |
+
" '▁one',\n",
|
623 |
+
" '▁of',\n",
|
624 |
+
" '▁the',\n",
|
625 |
+
" '▁following',\n",
|
626 |
+
" ':',\n",
|
627 |
+
" '▁Non',\n",
|
628 |
+
" '-',\n",
|
629 |
+
" 'prom',\n",
|
630 |
+
" 'oter',\n",
|
631 |
+
" ',',\n",
|
632 |
+
" '▁prom',\n",
|
633 |
+
" 'oter',\n",
|
634 |
+
" '.',\n",
|
635 |
+
" '<0x0A>',\n",
|
636 |
+
" '<0x0A>',\n",
|
637 |
+
" '##',\n",
|
638 |
+
" '#',\n",
|
639 |
+
" '▁Input',\n",
|
640 |
+
" ':',\n",
|
641 |
+
" '<0x0A>',\n",
|
642 |
+
" 'CCG',\n",
|
643 |
+
" 'TGCG',\n",
|
644 |
+
" 'ACCGG',\n",
|
645 |
+
" 'AAG',\n",
|
646 |
+
" 'TGGGGC',\n",
|
647 |
+
" 'GGCG',\n",
|
648 |
+
" 'ACCCCGG',\n",
|
649 |
+
" 'AAG',\n",
|
650 |
+
" 'TCCCC',\n",
|
651 |
+
" 'GCCGGG',\n",
|
652 |
+
" 'TGCAGC',\n",
|
653 |
+
" 'TTGG',\n",
|
654 |
+
" 'TCGG',\n",
|
655 |
+
" 'TTCG',\n",
|
656 |
+
" 'ATCGCC',\n",
|
657 |
+
" '<0x0A>',\n",
|
658 |
+
" '<0x0A>',\n",
|
659 |
+
" '##',\n",
|
660 |
+
" '#',\n",
|
661 |
+
" '▁Response',\n",
|
662 |
+
" ':',\n",
|
663 |
+
" '<0x0A>',\n",
|
664 |
+
" 'prom',\n",
|
665 |
+
" 'oter']"
|
666 |
+
]
|
667 |
+
},
|
668 |
+
"execution_count": 10,
|
669 |
+
"metadata": {},
|
670 |
+
"output_type": "execute_result"
|
671 |
+
}
|
672 |
+
],
|
673 |
"source": [
|
674 |
"tokenizer.tokenize(prompt)"
|
675 |
]
|
676 |
},
|
677 |
{
|
678 |
"cell_type": "code",
|
679 |
+
"execution_count": 11,
|
680 |
"id": "11875339-4901-4912-86e5-afe8c74921d9",
|
681 |
"metadata": {},
|
682 |
"outputs": [],
|
|
|
718 |
},
|
719 |
{
|
720 |
"cell_type": "code",
|
721 |
+
"execution_count": 12,
|
722 |
"id": "1b02644a-8b24-45aa-b22d-0f7ce2270dd9",
|
723 |
"metadata": {},
|
724 |
+
"outputs": [
|
725 |
+
{
|
726 |
+
"name": "stdout",
|
727 |
+
"output_type": "stream",
|
728 |
+
"text": [
|
729 |
+
"input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
730 |
+
"\n",
|
731 |
+
"### Instruction:\n",
|
732 |
+
"Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n",
|
733 |
+
"\n",
|
734 |
+
"### Input:\n",
|
735 |
+
"CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC\n",
|
736 |
+
"\n",
|
737 |
+
"### Response:\n",
|
738 |
+
"\n",
|
739 |
+
"real answer: promoter\n",
|
740 |
+
"--------------------------\n",
|
741 |
+
"\n",
|
742 |
+
"model's answer: \n",
|
743 |
+
"\n"
|
744 |
+
]
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"name": "stderr",
|
748 |
+
"output_type": "stream",
|
749 |
+
"text": [
|
750 |
+
"/root/miniconda3/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:601: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.01` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
|
751 |
+
" warnings.warn(\n",
|
752 |
+
"Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n"
|
753 |
+
]
|
754 |
+
},
|
755 |
+
{
|
756 |
+
"name": "stdout",
|
757 |
+
"output_type": "stream",
|
758 |
+
"text": [
|
759 |
+
"promoter\n"
|
760 |
+
]
|
761 |
+
}
|
762 |
+
],
|
763 |
"source": [
|
764 |
"input_text = format_input(data[\"test\"][0])\n",
|
765 |
"\n",
|
|
|
820 |
},
|
821 |
{
|
822 |
"cell_type": "code",
|
823 |
+
"execution_count": 16,
|
824 |
"id": "68831e19-5a99-46d8-9f40-e8bf6957dbfc",
|
825 |
"metadata": {},
|
826 |
+
"outputs": [
|
827 |
+
{
|
828 |
+
"name": "stdout",
|
829 |
+
"output_type": "stream",
|
830 |
+
"text": [
|
831 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
832 |
+
"promoter |||||||||||| promoter\n",
|
833 |
+
"promoter |||||||||||| promoter\n",
|
834 |
+
"promoter |||||||||||| Non-promoter\n",
|
835 |
+
"promoter |||||||||||| promoter\n",
|
836 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
837 |
+
"promoter |||||||||||| promoter\n",
|
838 |
+
"promoter |||||||||||| Non-promoter\n",
|
839 |
+
"Non-promoter |||||||||||| promoter\n",
|
840 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
841 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
842 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
843 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
844 |
+
"Non-promoter |||||||||||| promoter\n",
|
845 |
+
"promoter |||||||||||| promoter\n",
|
846 |
+
"promoter |||||||||||| promoter\n",
|
847 |
+
"Donor Sites |||||||||||| Splice Sites\n",
|
848 |
+
"Background Sequences |||||||||||| Background Sequences\n",
|
849 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
850 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
851 |
+
"promoter |||||||||||| Non-promoter\n",
|
852 |
+
"promoter |||||||||||| promoter\n",
|
853 |
+
"promoter |||||||||||| promoter\n",
|
854 |
+
"promoter |||||||||||| Non-promoter\n",
|
855 |
+
"promoter |||||||||||| promoter\n",
|
856 |
+
"promoter |||||||||||| promoter\n",
|
857 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
858 |
+
"Non-Splice Sites |||||||||||| Non-Splice Sites\n",
|
859 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
860 |
+
"promoter |||||||||||| Non-promoter\n",
|
861 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
862 |
+
"Binding Sites |||||||||||| Background Sequences\n",
|
863 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
864 |
+
"Non-Splice Sites |||||||||||| Non-Splice Sites\n",
|
865 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
866 |
+
"Non-promoter |||||||||||| promoter\n",
|
867 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
868 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
869 |
+
"Non-promoter |||||||||||| promoter\n",
|
870 |
+
"promoter |||||||||||| promoter\n",
|
871 |
+
"Background Sequences |||||||||||| Background Sequences\n",
|
872 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
873 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
874 |
+
"promoter |||||||||||| promoter\n",
|
875 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
876 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
877 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
878 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
879 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
880 |
+
"promoter |||||||||||| promoter\n",
|
881 |
+
"promoter |||||||||||| promoter\n",
|
882 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
883 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
884 |
+
"promoter |||||||||||| Non-promoter\n",
|
885 |
+
"promoter |||||||||||| promoter\n",
|
886 |
+
"Background Sequences |||||||||||| Binding Sites\n",
|
887 |
+
"promoter |||||||||||| promoter\n",
|
888 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
889 |
+
"Background Sequences |||||||||||| Background Sequences\n",
|
890 |
+
"promoter |||||||||||| promoter\n",
|
891 |
+
"promoter |||||||||||| Non-promoter\n",
|
892 |
+
"promoter |||||||||||| promoter\n",
|
893 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
894 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
895 |
+
"promoter |||||||||||| promoter\n",
|
896 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
897 |
+
"Non-promoter |||||||||||| promoter\n",
|
898 |
+
"Binding Sites |||||||||||| Binding Sites\n",
|
899 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
900 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
901 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
902 |
+
"Non-promoter |||||||||||| promoter\n",
|
903 |
+
"promoter |||||||||||| promoter\n",
|
904 |
+
"promoter |||||||||||| promoter\n",
|
905 |
+
"promoter |||||||||||| promoter\n",
|
906 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
907 |
+
"Acceptor Sites |||||||||||| Acceptor Sites\n",
|
908 |
+
"promoter |||||||||||| promoter\n",
|
909 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
910 |
+
"Donor Sites |||||||||||| Acceptor Sites\n",
|
911 |
+
"promoter |||||||||||| promoter\n",
|
912 |
+
"promoter |||||||||||| promoter\n",
|
913 |
+
"promoter |||||||||||| promoter\n",
|
914 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
915 |
+
"Non-promoter |||||||||||| promoter\n",
|
916 |
+
"promoter |||||||||||| Non-promoter\n",
|
917 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
918 |
+
"promoter |||||||||||| promoter\n",
|
919 |
+
"Background Sequences |||||||||||| Binding Sites\n",
|
920 |
+
"Acceptor Sites |||||||||||| Splice Sites\n",
|
921 |
+
"Non-Splice Sites |||||||||||| Non-Splice Sites\n",
|
922 |
+
"Donor Sites |||||||||||| Non-Splice Sites\n",
|
923 |
+
"Donor Sites |||||||||||| Donor Sites\n",
|
924 |
+
"Non-promoter |||||||||||| Non-promoter\n",
|
925 |
+
"promoter |||||||||||| promoter\n",
|
926 |
+
"Background Sequences |||||||||||| Binding Sites\n",
|
927 |
+
"promoter |||||||||||| promoter\n",
|
928 |
+
"promoter |||||||||||| promoter\n",
|
929 |
+
"Acceptor Sites |||||||||||| Splice Sites\n",
|
930 |
+
"promoter |||||||||||| promoter\n",
|
931 |
+
"presicion 0.73 same 0.3\n"
|
932 |
+
]
|
933 |
+
}
|
934 |
+
],
|
935 |
"source": [
|
936 |
"import json\n",
|
937 |
"from tqdm import tqdm\n",
|
|
|
964 |
"\n",
|
965 |
"print(\"presicion\", right_sum/all_num, \"same\", same_sum/all_num)\n"
|
966 |
]
|
967 |
+
},
|
968 |
+
{
|
969 |
+
"cell_type": "code",
|
970 |
+
"execution_count": null,
|
971 |
+
"id": "7bc38f47-4a7d-43eb-abe8-db4310d280e3",
|
972 |
+
"metadata": {},
|
973 |
+
"outputs": [],
|
974 |
+
"source": []
|
975 |
}
|
976 |
],
|
977 |
"metadata": {
|
04-gene-sft/__pycache__/build_dataset.cpython-312.pyc
ADDED
Binary file (5.89 kB). View file
|
|
04-gene-sft/data/.ipynb_checkpoints/dna_promoter_300-checkpoint.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:490769d2870d4336ccd377292d22b3f78cfac78a4f0776043103562168c347b2
|
3 |
+
size 28531988
|
04-gene-sft/data/dna_promoter_300.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:490769d2870d4336ccd377292d22b3f78cfac78a4f0776043103562168c347b2
|
3 |
+
size 28531988
|
04-gene-sft/gpt2-small3-1024.json
ADDED
@@ -0,0 +1,602 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
4 |
+
"input": "CCAGGATGCGCTGACGACCCGGCTGGCAGGCGGGTCCTCGTGGGCGAGGCGAGGGAGGCGGCGAGAGAGGAGCAATAGTTTCCCACCGCTCCCTCTCAGGCGCAGGGTCTAGAGAAGCGCGAGGGGATCTAGAGAAGCCGGAGGGGAGGAAGCGCGAGTCCGCGGCCCGCCCCGTTGCGTCCCACCCACCGCGTCCCCTCCCCTCCCCTCCCGCTGCGGGAAAAGCGGCCGCGGGCGGCGGCGCCCACTGTGGGGCGGGCGGAGCGCCGCGGGAGGCGGACGAGATGCGAGCGCGGCCGC",
|
5 |
+
"output": "promoter",
|
6 |
+
"model_response": "promoterpromoterpromo"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
10 |
+
"input": "AGTCATGCCTGCAGGGAGAGAGGGCTGGGGCGCTATCTATGGTAGAAGTCAGAAGAGGCTGTCTATATGTCAGTGGGAGGAGAAAAGTGGCTCCAAAGCGGACTGTCTGTCCCTGTGCCAGAAGACATTGCTATTCAACGTTCACTCGCTTCTGCACAGGCCCACGTCACCCACAGACCGAGAACTCATCCACTAAGCGCTGCGGGTGGGCAGTACATACCGACGAAGTGCCTTCGCTACCCCTCTGGGTGTCCGTGCTCCCGCGAACCGGCTGGGGCTGCAGACGGGAAAGGAATGTCC",
|
11 |
+
"output": "Non-promoter",
|
12 |
+
"model_response": "Non-promoter"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
16 |
+
"input": "AGGGTTTGCACTAACCGATCCACTCCGCGGGGACCTCTTAGAATTCTGCTGTGGATAAGGATATCGAACTACCGAGTCACTGGACCTAAAGAGAGATTCACTGTATCATCGTCTGCCTTAGACTGTTCCGCACCCAGGAACTGGGGCTGTTGCGCCCTTAAGGTTACTTTGAAGCCAAGGTCGCAAACAGACTTCCGCATTGACGTCAGTAGCCGAACGCTGATTTTCTTAATCTAGTATTTAGGATGGGCCTCTGTCGCCTAGCCGCTATCGCAGAGTGGAGCGGGGCTGGGAGCAAAG",
|
17 |
+
"output": "Non-promoter",
|
18 |
+
"model_response": "Non-promoter"
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
22 |
+
"input": "TCCTCGTAAGGACCTAACTGTTCCAGCACTACACAGCTCATGGTGTCCTCAAGATTAGTACATATCAGAGGCCATGAACTTAGTCTTACGCTCACTGTGGCACTGGACGCGCGGGGGATGCGGGGTTGTTCCCAGCGATTTATGATTCGTTATTTGCCGCGTTGACTCGCCGTCTGTAGCCCCATGACAACGACATTCCTGCATTCTCTGCCTGGAGAGCGAAGTGACAATACTGAATTGAAACGGCTAGAATGTCGCTCGCTGAGGCTCCGGACCTTGGAGCGTCTAGAGTCTGGCTAC",
|
23 |
+
"output": "Non-promoter",
|
24 |
+
"model_response": "Non-promoter"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
28 |
+
"input": "TGCCACATAAGTCGGGTCAGCAAGTCGGTGGCGAAAAGAGGGCCACGTCCCAGACTTTCTGGGAGGAGGCGGGACAAAGGGGCGGGGCGAGCGCAGCGCCCTCCGGGTGGGCGCCGTCAAGAGGCCGGGGGCGGGGCCGAGCGCGGCTGGGCGGGGCCTTGAGAGGCCGGCCGGGGGCGGGGAGGCTGGCGGGTCGGCGCGGGCCCAGCCGTGCGTGCTCACGTGACGGGTCCGCGAGGCCCAGCTCGCGCAGTCGTTCGGGTGAGCGAAGATGGCGGCCGAGAGGGAACCTCCTCCGCT",
|
29 |
+
"output": "promoter",
|
30 |
+
"model_response": "promoterpromoterpromo"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
34 |
+
"input": "ATAGGATGCGTCTGTACTCTGATTAACAAACTGCTGAAGGAATAAATATGTACTTGCTGGGCAGCGCCGCCGGCCGAATGGAGATAAGCCTATGCAGCTTCGTGCGCGGCTCCCCAGCCCTTTGCTGCGCCGCGAGCTGCGCCCTGAGACCCCCGCCTCGCTGCCAGCTACTTACCTGCCCCGGCGGAGGGGGCCATGTTGCTACACCTAGGCAGGCGGCAAGAAAGCACGCGTAATGAATTCCTTATATCCCCCGCGCCCCAACGGCGGCGGCGCGCCGGCCGGCATGGAGCCCCGCGC",
|
35 |
+
"output": "Non-promoter",
|
36 |
+
"model_response": "Non-promoter"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
40 |
+
"input": "AGGGTAAAAAAGGGGACAAATGGAAATTTGTGTTGCAGATAGATGAGGAGCCAACAAAAAAGAGCCTCAGGATCCAGCACACATTATCACAAACTTAGTGTCCATCCATCACTGCTGACCCTCTCCGGACCTGACTCCACCCCTGAGGGACACAGGTCAGCCTTGACCAATGACTTTTAAGTACCATGGAGAACAGGGGGCCAGAACTTCGGCAGTAAAGAATAAAAGGCCAGACAGAGAGGCAGCAGCACATATCTGCTTCCGACACAGCTGCAATCACTAGCAAGCTCTCAGGCCTGG",
|
41 |
+
"output": "promoter",
|
42 |
+
"model_response": "promoterpromoterpromo"
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
46 |
+
"input": "TCTGGGTCCTAGAGGCTACTGAGACAAATGCTGGCTGAAGCCGAGGTACCTAGATTTGTGCAAATCGAGGAGTTCTATTCTGCGTGTTTGTGAAAGGGGGTTAGACGAGAGGAGTGCGGTGGAGGCGAAGATAATAGGATTATTCTTACAGTGTAATAATAATTATTGGTGCTAATGTTTAGGAAGCCGTGAAAGATAACACGAGGGGAGAAAGTGGATAGAAAAACCACCGCTGGCAGTTCTTTTCATTTGAAAGTGAATTGGGAAAAAGCGAATAAAAATTAGTCATGAGTTTTGGTA",
|
47 |
+
"output": "Non-promoter",
|
48 |
+
"model_response": "Non-promoter"
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
52 |
+
"input": "CTCGTTGCGTAATCCTCCTTCCGCACCAACGCAAGGCAGGACAGGAGGTCAAATGAGTTCGAGATCAGGCGCCACAGACTCCACTAAAGCACAGACACCAGGTGCGGTAAGACACGCTGGCTCCAAAGTAGGACACCAAACCATTGCTAATGCCTGGGGAGGGCCAAACATGGGCAGGCTGGAGCACTCTCCTCAAAGAGGACCACAAGCCGTGACAGCAGAGCCCCTCGGACTGATGGGACACGGCTTAAACTGAAGTTCGAGGACTCTGTTCTGCAGAGGTCTGAGGCTGGAAGGGGG",
|
53 |
+
"output": "Non-promoter",
|
54 |
+
"model_response": "Non-promoter"
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
58 |
+
"input": "CGGGAGAGAGCTGGAGTCTCGCGGCCTCGGAGGCCGGGAAGCCAGCCCCGGCGGGGACCCGGGCGGGGCGGAGCCAGCGGAGGCCCCACCCCCGGCGTCACCGGCCCCCAGGGGGGCGTCGCCCCCACCCCGCGCTCCAGGTAGCGGCGGCCTCAGCTGCCGGCGGTGGCTGGCTGAGCTCCCCAGCGGCCCGCGGGCCGGGGCGGGGGCGGGCCGGGGGCGTTCCCGCGGGCCACCGCCCGTGATGTCACAATCGCGGCGGGCCGCGGCGCTCCGGGGTCGGCGGTGGGCGGCGGGCCG",
|
59 |
+
"output": "promoter",
|
60 |
+
"model_response": "promoterpromoterpromo"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
64 |
+
"input": "AGACTATACCACTTGCCGGCCGCGCAGAGCCCTCCCCAGGTCGCGCAAGGAGCTCGAGCTACTAACGGAGGAGCGGCGCAGGCCCCGCCCTTAGTTCATGAGCTTACTTGTACAGATCTCGGCCCCGGCCCCGGCCCCGCCCCCTGCCTCATCGTGGACCATCACGCTTCCAGCCCAGTCGGCTACGGAGATAATCCATGGAGCTCCGAGGCGCCGAGAGAGCAACCTTAAAGTATCGGGATCAGCGACATTATCAGCGCTGGTCCCTGCAGCTTCTGGTTCGAGATCTGGAAGGGCGGT",
|
65 |
+
"output": "Non-promoter",
|
66 |
+
"model_response": "Non-promoter"
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
70 |
+
"input": "AAGTTTAAAAATGAACCCCCAACAGAGCCACCCGCCGGGGACGGTTCTGTTTTCAGCACAACCACGGGGTTCGTTTAGCCTGTCGCCCAAGTTTGTTTCCTCTAGGTCGCCCACCTTCCGTCGAATTCTGTCACTAGACTTTTATACTGGGTTTGTATGTAGCTCGATTCTGCATACAAGCTGGACTGAGAACTGGCAGGAGGCGAAGATGAGGGGGCGGCATCCAATTTGTTGTGATCCTTTTGCGTCAGGCTTCTGCCTGAGCTCGGTGAGGTCAAGCCTCCTCTGCTTCCACCCCTC",
|
71 |
+
"output": "Non-promoter",
|
72 |
+
"model_response": "Non-promoter"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
76 |
+
"input": "GGTCCGCTCCGCGGGGCAGGCGTCGTGGGCCCAAGGAGGCCAGGCCAGGAGTCTCGGCCTCGGGGTTACGCGGCGGCGGGCAGCTCTTCCGCTCCGCTCAGGGTGCTGCTAACCCACAGCCAAGCAGCGACCCGCAGGACCGGCGAGAACCCCGCCACTGACTCCGGCCGGAAGAGAACGTGCAGAGATGTACGTCACTTCCGGGGCGTGGCGTGGCCGGGCGTGGCCGTCCCGCCCCTGGCGGCGTGCCTGGTGCCCTGTTCCGCGTCTGTGCGACCGTCCGTCCCGAGCGCGCAGCCG",
|
77 |
+
"output": "promoter",
|
78 |
+
"model_response": "promoterpromoterpromo"
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
82 |
+
"input": "TACCGGTGCTACTTTACGCTGCGTGTTCCCTAGCAGGGGACCTAGTGTGCTACCATAAAAGCACGACGATAAGACTATACCTCTTCGCCTGCTACTAACATAGGGCACTTCTTCCCCCTCAATCTATCGACGTTCGTTAATATCACCGACTCCCAGGGAGGATCTAATAAAAGGAGGAGGGCTCCTGGAATAATAGAGGAGTGGTGCTGCGAGGGGAAGGAAAAGCCAATCCGTGACCCCTTTCCCGGAAAGCTTCGAACCGACAGCCAGAAGCCCGCACGCACCTCGCACCATGAGATG",
|
83 |
+
"output": "Non-promoter",
|
84 |
+
"model_response": "Non-promoter"
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
88 |
+
"input": "GGTCGAGAAGACCTGGGCTACTGAATGACGAATGGGACGCACACGTACTGGCTACTTTTGTGGAAGATGGTCCGCCCTGGTTTGGATTGCGCTTTGCCACGAGAACCGCCGGGAAGGTGGGGGACGGGGACGGGAGCAGCCAATCACGCGCCGTCCCGTTCGCTTCGTCAGGGCGCGATCGCGGATTCGGCACTCAGTGTATGGTACTTCGGGCGAGCGAGGAGTCGCGAGCGCTGTGAGTTTCTCCTGGAGTTTTCTGCCGTTTTCGGACCCTAAAGCTTATCTAGGCTCGCGCTCTGC",
|
89 |
+
"output": "Non-promoter",
|
90 |
+
"model_response": "Non-promoter"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
94 |
+
"input": "CCCGTCCTGCTCCAATTGCCAGGAGATACCATTAGCGGACTGACTCCAAGAAAAGAGATCCCTTGAAAAAGGCAGTGCTGAACGCGAGTATGGCGCTGCTTGCGGGCCATTCTGTGCGCTACGCTACCCGCTCGTATTAATCAACAACAGGACCGGACTCTGGTTGCTTGGGAGTTCAGGTGGACAACAGCAACTACTAACAAGGTTTTTCGTTCATAGTGCGGGAATGGCAAAACACGAGAGAAACGCACTCTTCATAGGTCACTATCCGTCTGGAAGAAGGCTTTGGACAGAACAGGG",
|
95 |
+
"output": "Non-promoter",
|
96 |
+
"model_response": "Non-promoter"
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
100 |
+
"input": "CCCCTGCCCCCAGGCACTGACAGGGTAGCAGAGGGAAGACACATGAGCTGCTTTTTAATTTTTTTTAGAATTAATAGAGACGGGGTCTCACTCTGTCACTATGTTGCCCAGGCTAGTTTTGAACTCCTGGCCGCAAGCAGTCCTCCTACCTCAGCCTCCCAAAATGCTGGGATTACAGGTGTGAGCCACCGTGCTTAGCCATGAATTGATTTTAAAACCTTAAATGAGGGGCCAGGTGTTGTCTTTCCGTTTGCCTGAGTGACGCGGGTCTTCTCCAGGACATCCGGCCACAGATCTGCC",
|
101 |
+
"output": "Non-promoter",
|
102 |
+
"model_response": "promoterpromoterpromo"
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
106 |
+
"input": "CAGCCCCTATGTTGGTGTGGAGACCCTCACCCGAGCTGGGCCTGCACCCTACACCAATCCGAGGTACCACGATCGGATTACTACAACAGCGGGGTCAGCTCAGGCGCTCGGTCCCCCGGGATCCTGCTCCCAGGCACACGATTCCGTTCGTTATAATTATGTAGACGGGGCCTGCGGTAGACCGACTGCGGGTGACCGAGTAGTGCAAAGTCTGGCTTCTCTTTTGATAGAGCACATCGATCTGTCTTCCTCTTTTCACCGTCGATGAGCCCCGCGTGCCTGGGACTGCTCCAGGCTCCA",
|
107 |
+
"output": "Non-promoter",
|
108 |
+
"model_response": "Non-promoter"
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
112 |
+
"input": "GTCGCCGCCCACATTAGTGTGGGGCCCTGCGGCCTAGCGTCCCTCACCAGAGGCCTCCCCTTGCCTAGCTGGACCGCCGAGGGACATCGACGAGTATCCTCCTCCTGCTGTCCCCGGCTTCGCCTGCCGCCCCTAACCGGCCAGTCAAGATGGCCGCCGCTGGGTGAGGCAAGCTGGCGCGCCGCGGGGGCGTCTGGGAGTTGTAGTTCGGGACGGCGGGCTGACGCACTTCGCCGCCGGCCGACGGGCGCCATTGTGCGGCGCGCGCCGGGTGAGTGCCGCGCGAAACCTGCGTCCGTC",
|
113 |
+
"output": "promoter",
|
114 |
+
"model_response": "promoterpromoterpromo"
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
118 |
+
"input": "GCGGGTAGCGATGGGCCGGGCCTCAGTTGTTTAAAGGCCCGATGCGACCCCGCTTAAGACGTCAGATCGTTCACCCCCGAGGGTCGCCGGATTTCGCTCCCGCGCGAGATGGCCTCCACCAGAGACTGCTGCTGCGCCTTGGAGCGGTGCCTCGCCTCCCAATCCTCGTCGTGAATTGCCCCAATTGCTGATCTGGGAACAAGCTCGTGCCAGGCGGGCGGAGAGTACGAACTCCTTATAGACCAGGATGGGCGTAGCAAGAGCGCGGGCGGCCCATGAATGTCTGCAGCGGTGAGGACC",
|
119 |
+
"output": "Non-promoter",
|
120 |
+
"model_response": "Non-promoter"
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
124 |
+
"input": "CGGATGCGGGGCCGGAGGGGATGGAAGGGGCCGATCTAACACGATATTCCGTTCAAGAACTACAACCGTCTACAGCACACCATAGCCGCTCGCTGAAGGATGTCGAAGGGCGCCCAGGCACCCACCACGTTTGGCCCCTGCCCGGGCCGGTTCCGATATGACTCACTCATTTACTCCCCTAGAGCTCCGAGCCTCGGATCGCCGAGGTGACCCTTACCCGCCTTCCTACCACCTTAGGGGATTTGGCCCGGAGAACGAGATCACCCTCTCATCGTGGCGGGGTATTCCCTTTAAGGTTTG",
|
125 |
+
"output": "Non-promoter",
|
126 |
+
"model_response": "Non-promoter"
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
130 |
+
"input": "TCCCCGTCGGTCCGCGGGGCTCTCGCTGGAGCCTCCCCGACTCCGGTTTCCCCTCGTCCAGAAGCCCGACGTAACCAAAGCCCAGTCTGTCACTTTAAACACGCCCCGCCCCGCCTCCCGCGGCTGTGTTGCCTCCTCGCTGGAGAACACCCTGGTCGACCTCTGTGCGTCCGTGTGCGCGAGCGCGTCCCGCCGAGGCGGTGGGCAGGGCGGACGGTGCGCAGTGCGTTCCCGCTGGTCGGAGCCAGCACACTAACCACGCCACGCGCCCTGCCGTCCCTTCGCCTCCAGCCGCTGCAG",
|
131 |
+
"output": "promoter",
|
132 |
+
"model_response": "promoterpromoterpromo"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
136 |
+
"input": "CACCTACTCGGGGTCCGAGGAGGGAGGATTGCCTAGGCCCAGGACTTTGAGAACACGCCTCTACAGAGATTTACATTTTAAGAAAATTAGCTGAGTTCGGGTCGGGCGCAGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCTGGCGGATCACCTGAGGTCAGGAATTCGAGACCAGCCTGACCAAAACGGCGAAACCCGTTCTCTACTAAAAATACAAACACTAGCCGGGCGTGGTGGCAGTCGCCTGTAATCCCAGCTACTCCAGAAGCTGAGGCAGGAGAATCGCTT",
|
137 |
+
"output": "Non-promoter",
|
138 |
+
"model_response": "Non-promoter"
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
142 |
+
"input": "CCCGGCCGGTGTGCGGGACACACTATTGTAATCCTAAAAGGCCATGGGCCCCTGCTGGGGTCAAGAATAATGCTCAAAGTTTAAACTACCGAGTGTTGCTGAGGCTTGGGTTGCACATCGAGAGTCGCGATAACACTCACCCAGGGGAACATACCGCGAGGGATTACGGACGATGGAGAGATCACCCAAGGCAGTGGGAGTCCGCGCTTCCTCCCTTAAGTACCGCTGGTTGTCCCCCGACGTAGTGTAGATAGTGAGCCCTCTTGTCCGCTGGCACGTGATGTATGTGTGTCCTTGTCT",
|
143 |
+
"output": "Non-promoter",
|
144 |
+
"model_response": "Non-promoter"
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
148 |
+
"input": "GGGTTAGGCGCGTGCCGCGAGAACAGAGTGGACGGAGCGTAGGAGAGACCGAAAAGGCTGGGGGTGGGAGTAGCGGATTTGAAGCACTTGTTGGCCTACAGAGGTGTGGCAAGCAGAGCACCTCAGAACTCAGGCGTACTGCCCGCCGCCCGAGCCCTGCGAGGGCCGATAGCGAGGGTGTGGCCCTTATCTGCACCCAGCAGAGCGCCGGCGGGGTACGGTCTTAGGACCTCGATCTCCTTCTCCCTCATTTTCTCTCATCCCTACCTATTGTGGGTGAGTCCTGGCCCCTGGACGGGG",
|
149 |
+
"output": "promoter",
|
150 |
+
"model_response": "Non-promoter"
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
154 |
+
"input": "CTTGCATTAATACACCCGGCGCCAGAGGAACGTGGGGATCAATCATATTACACTGCTTCAGACTTGTAATTTTATTTGGTTCAGGCATCGTAGTGGTGAGGCTTGGCAGAGAGCCCGAGATGGAGCTATTGAGACGTTTCAGGCTGGGCGGATAATCGCATCAGTAGGGACTTGGCTAGATCTCCCAAGCCTTCTCCACTCTTCAAACTTTCACCAAACTCTTAGGACCCGTCGGGTCGGTTTCATAGACTCAGCCGCTCAACCTTTTAAGCGAGCGGCGCGGCGGGGCGGGCGAACTCG",
|
155 |
+
"output": "Non-promoter",
|
156 |
+
"model_response": "Non-promoter"
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
160 |
+
"input": "GGAGCGTCATCTTATCATTCTCTCTTTTTTTTTTTTTTCTGGTAATGATTTTTTTTGTTTTTTTTTTTTTTGATATTGACAAAAGTTTAATCATTTCAATTAAAAATGCCACTAATTTGACTTTTTAAGTAAAAAATGTAGGGGGTTTTAAAACTACTTTCCTACTACCAAAAAATCAGAAAGTATCTAGCTTTCTAAATTGGGAAAGCAAGCAATGTTATAAAAACACTGAAGGAATCTCTTTCTTCGTGACCTTTTGTTAAACTCGGTTTAAGCTGTAGACCTTATTTAAAATAAAAT",
|
161 |
+
"output": "Non-promoter",
|
162 |
+
"model_response": "promoterpromoterpromo"
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
166 |
+
"input": "GGCTGTGAAACAGGGCCACCTGCAACCGTGGCTTTTCAAAGCTGAAGCCATACCCTGGAGGGAGAGAGGATCCTTGGGCACTGACCCTGGCTGTCCACAGCTAAATGATCATGGGACACCCATGACCATGGGCGTGGCTGGAGGAGGCCCTGCCCTCAGGGGTCTATTGGCATCCAGGGGAAGTGTCTACGGGGTAATGATTAACCCACACCTGCCACCTGGTTTTCAGCCTTCTTGCCCTGCTGACTCATTTTGCAAATCCCACTTGCAAAATTTGGGAGCAGCCTGGGTCCAGCCCCC",
|
167 |
+
"output": "promoter",
|
168 |
+
"model_response": "promoterpromoterpromo"
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
172 |
+
"input": "CTGTGACAATCATGGAGCAGTTGATTGACCCCTTTTCGAGCGCTCGCTATGCTTTAATTTCAAGACCCGCTCTCTAAGGTGCTCTTCCCCTTTAAGACTCACGGCTCTTCCCGTCAGGCTAGCTCGACCTGAGGATTCGTCAACCTCCTTGACAGCTTCCGCTGGCCTGCACCCGAGATTGGGGACGGTTTTCCGCACATCGTGCCTTCATTAGTTAGGTCCTGCGAGTGCATCCAAGGGCCCTCCCCCAAGCCAACAGCAGAACCAGCTCAATTTCTACCCTCACGGCGCTCCGCGGTG",
|
173 |
+
"output": "Non-promoter",
|
174 |
+
"model_response": "Non-promoter"
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
178 |
+
"input": "TAGAGAGGCCAGAATCCCTCGGGAGTGCTACCCGGTCAAAAAATCGCGCCGCCCCGTGGGGGACCGAACTTAGCGATGACGGTTCGTCTAATCCCGTAGGAGAGTATCGGGCGAGCTCGGATTGCGAGATTACCTAGTGTTAGTGCGACCGCCCAGGCACATCTGCAGACTGACGTCAGTATGCAAATCAGAGTCGGAAAAGCTGCCAAAATTTTCTCCTGCTGCGCCACCTAACCTGCGTTCGGGGGTAAACCCATCTAGGGCTACTTCAACAAAACTTTGCTGCCCTTCCTGCTCCTC",
|
179 |
+
"output": "Non-promoter",
|
180 |
+
"model_response": "Non-promoter"
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
184 |
+
"input": "AAGCTTTGTTTAGGTCCGGAAGGCGGGCTTTCCTGGGAGTGGGTGGGGAGGGGGCGTTGATTCTTGACCAATCCTTTCAGTCCGTTGGGTGGTGACCAGCCAATGGGCCGGATGGATAGGACGCTCCTCCCGGAGAGTAGTGAGACCCCTGGTGCGGGGCGATTGGCGGCGGGAGCGATGAGTGGCAGCCGCACGGCCCAACGGGAGCTGTGCGTGGGCCGCGGGGCGGGGCCAGGGCGGGTGCGCGGCGGCGGCGGGGTGGCTGGGCCGGCGGCGGCGGCGGTACGAGGCGCGCGCTCG",
|
185 |
+
"output": "promoter",
|
186 |
+
"model_response": "promoterpromoterpromo"
|
187 |
+
},
|
188 |
+
{
|
189 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
190 |
+
"input": "TCCCATCCAGCGAGAGGGGCAGGTTCCGCATTTTCTCTTCCCCTTTCCCAGCGCTTCCTCCAGCACCCGAAGCCCCAACCCTGCGGGTCAGGAACTCCCTAGTCCCCAAGTCTAGGGATGAGATGGGGGAAGGAGAGCCGTCAGGGTTGACCTGGAGTTTTGTCCGCTCCTCCCCTACAGTGATCCCTCTAGCCTTCTCCAGTCGCCTCCGCCATGTCCGAGGAGCTGGCCCAGGGCCCCAAGGAGAGCCCCCCGGCGCCGCGTGCGGGCCCCAGGGAGGTGTGGAAGAAGGGTGGCCGC",
|
191 |
+
"output": "promoter",
|
192 |
+
"model_response": "promoterpromoterpromo"
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
196 |
+
"input": "ACATCGTATTGCGCTTACTCGGGTCGTGCAATGCACTGACTAGGAAAGGGCTGCGGATGCCTACGTACATAATGCTGTTGCGCACCTATGCACGCGGGTCTGGTCCTGGGCAAGAACCGCCCCCTCTCCGGGCCTGGTTGCCTTTTGATTTGCAGAACAACGGGCCAGGCCCCTTCCCTCTACCGCTCACAAGCGTACATCGCGACCATACCTGGTCCAATGCGCTACGGACGAGGGCAATTCTCGTAATAGGACCCACAGCAAGTCTTGTATCACCTTTGGCTAGCCTGTGCCGGCTGT",
|
197 |
+
"output": "Non-promoter",
|
198 |
+
"model_response": "Non-promoter"
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
202 |
+
"input": "ACGTGACCGACGTCTTCCGCATACCCGGGGCTTCCCTCACTTCCAGGACAACCTGTTGCGCCTCTATCTCCTCCCCTCCCCGCAGTTTCCCCGCCTTGGCCTCTAATAGTAGTGAGAGCACTTTAGGCGGTATAAAGTCTGACGCTAGAATCAATCTAGCCGCGGCGATTGGCCACCTGGGGAATCGTTAGCAAAATGTCACGTTTACTACTTTACGGCGATCTTCCAGGGTCCGGGGACTGTGCCAAGGATCCTACCGGGGCTGGCAGGAGCCTAAGACAAATACGCTAGCGGCGGAGC",
|
203 |
+
"output": "Non-promoter",
|
204 |
+
"model_response": "Non-promoter"
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
208 |
+
"input": "GAGATTTCACGCCTAGCGGCTCAGGATCGTGTGTGCTACAAAAATAGGGCCGCAGGGCCGTATAGTCGCACGAACATACAGGTCGTCCGATTCAGGCTCGGGGGCTTGGACGGAGATGACGGCCAACTCTCACCCTCACCCCTCATTTAGGCGGGCGCCGGGGGCATCATCGGGGTCCCTGGGCCCTCCCCCTGCTCAGTGCGTAGCTCCGATAGAAAAGCTAGGGCATACGTGCAGAGACGGGTTGTCAGGCAGACACCCTGTCTGTTGGCAGCTAGCAGCTGGCGGACGCGACCCGGA",
|
209 |
+
"output": "Non-promoter",
|
210 |
+
"model_response": "Non-promoter"
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
214 |
+
"input": "AAACCATGCAATCCGGATGTGGCAAACAAAGGGTGGGAGCCCACTTGTCATGGCATTAATTGCACGTCTGCTTAGGGCGAGTTTTAACCGTGGCACACAAAACAGCACGATCGAGGATACGGGAGGTCGCCGCGTGATATAACGTCTACCCATGGTTCTCATCGAACCTCTACCTAGACACTGACCAAACCAACGCTCCCAGTGCGCACGGCCATTTCTCATAGATATCTTGAGCAGGTTGCACCACCAAGATAGTGAGCAGGGCGGTGGGTGGTGCGGAGTGCCGAGCGGCCTCACCCC",
|
215 |
+
"output": "Non-promoter",
|
216 |
+
"model_response": "Non-promoter"
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
220 |
+
"input": "GTCGGAGGTCGGCGGCCAGAAGCCAGATTCCCTTGGCCCCAGCTCGTCCACTTCCATCCAGCAGTAGCCAAACAACCGTTGAAAATGGCCTGGCGGCGGCCGGGAAACACCAGAGAACTATCTTCCAACCCCTAGATCCCGCCCATCTCGTTCCGGCGGTGTTTCCGTGGCGACGCTATCCGAAGTGCGGCTGCGCAAGGGTGACGGCGCGCGAGCAAGGGGGAGGGGGTGTTTTGGTTCTAGCCGCTCGCCGTCCTTGCAGGCTCTGCCGTCGGAAAGCCGCTCATTCTCGCTTCCCCT",
|
221 |
+
"output": "promoter",
|
222 |
+
"model_response": "promoterpromoterpromo"
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
226 |
+
"input": "GGTGGTTGTAGGTGGGAAGAATCCTGGGTTCCCATGCTGTCTGAACCAGAGTCATTTGTGGCCCTGCCCTCCCCTGGGACTCAATTTCCCCACCTATAAAATAAGCCCCAGTGTCCGCGAACCCTGGAGGGGCCCGCACCACTGCAGGAGCGGCCGCCGGCGCCAGGGGGCGCCTCCTCGATAACTCGGCGCTCGGCTGGCCAGGCACCGGCGCGTCGGCCGCTCGATTGGTCGGGTCGGGGCCGGCCTGAGCGCCGCGGGCCTGCGCCATTGAGGAGCGGCGGGGAGGAAACGCCGCGC",
|
227 |
+
"output": "promoter",
|
228 |
+
"model_response": "promoterpromoterpromo"
|
229 |
+
},
|
230 |
+
{
|
231 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
232 |
+
"input": "TAGGAGGCCTGTCTGCACAGCTGTGCTGGGAAGCTGATCCATTCCAACCACCGCCTCATGGCCGCTTACGTGCAGCTCATGCCTGCCCTGGTACAGCGCCGCATCGCAGACTACGAGGCTGCCTCGGCTGTGCCAGGCGTTGCTGCTGAACAGCCTGGGGTCTCTCCATCAGGCAGCTAGCCATACCCAACCCCAGGAAGGAAGGCCTTGGATGGACCCTCAGATTGAAGGACCCGGTGGACCTTGGGGTTGGTGAATCCTAAACAGAGAGAATTCGAGGTTGCCTGAAAGCTGGGTGTC",
|
233 |
+
"output": "promoter",
|
234 |
+
"model_response": "Non-promoter"
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
238 |
+
"input": "TGTTCTAGCTATCTTGAAATACGCAATACTTTGACATTAACACACTGCTATGTTCTCTAATGACATCCCTGGAAACCTCCTAGGGGCAGCCAGATCTTTCATGATAGTGGTTGTCAGTCCTCATATGGAGGGTGGAGGTTTGAAGCAGAGAGCCAAGGGAGGTTTTGTGCACCTATGCTTGTTGTGTTTGTACACAATGACTATGCATACGCTGTGAGTATAAAAGGCTCATTTAATCCTATTGTGTCCCAGGCTTGGTTTGTTTTCAAATCATTACAGCATGAATTTAGAAGGTTTTGT",
|
239 |
+
"output": "Non-promoter",
|
240 |
+
"model_response": "Non-promoter"
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
244 |
+
"input": "CCCCGAAACCCCTCATCTTGGGGGGCCCACGAGACCTCTGAGACAGGAACTGCGAAATGCTCACGAGATTAGGACACGCGCCAAGGCGGGGGCAGGGAGCTGCGAGCGCTGGGGACGCAGCCGGGCGGCCGCAGAAGCGCCCAGGCCCGCGCGCCACCCCTCTGGCGCCACCGTGGTTGAGCCCGTGACGTTTACACTCATTCATAAAACGCTTGTTATAAAAGCAGTGGCTGCGGCGCCTCGTACTCCAACCGCATCTGCAGCGAGCATCTGAGAAGCCAAGACTGAGCCGGCGGCCGC",
|
245 |
+
"output": "promoter",
|
246 |
+
"model_response": "promoterpromoterpromo"
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
250 |
+
"input": "AGCATCTTAGAGTTTCATACAAGGATCAGATTTGTGCTATCGGAGGCCGCCGATCACCCAAGTAAGCTTCATACGGGGAGGGAGGGAGGGCGCGAGCTTGGGAGTGGAGAGGGAAGGAGCAAACTTTGGCATGTATCACAGGGTAGCTCTGCCCCTGCGCGCCCCCTCCAGATCCGGCCCGCGGCTCCCCTCCCGCGGGACATGCTGCGGGGAAGCACGGCGAACAGCCCCTCCTATGATCACAAATGGCTCACCGCTCTCGGAGCCCGGCCGCCACCGCCTGGTGTGCGAGGCTATCCG",
|
251 |
+
"output": "Non-promoter",
|
252 |
+
"model_response": "Non-promoter"
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
256 |
+
"input": "TCCCTCTGACTCGGTTTCCCCTCTCCCCCGGCTCCGCGGTCGCCCTCCTCCTGGACTGCTTATTTCGTCCTCGGCCACTGCCTCTCGGGCTCGAGCTTTCTGCTTCAGAGCAGGAGAGAAACGGAGCGAAAGCAGTTTCTGTCTCCCCGGGGTCTGACTCGGTCCCCCGCGCTCGGTTCTCTGTCCCCTCCCCCTCCCGCCATTGTTCCCGGCAGGAGGAGTGCGCGGCGCAAACTTGCGAGTTCCCCCAGTCTTTGCCCCCGCAGGGCCGCGGACGGAGGTGGTGGCGGAGTTCCCGCT",
|
257 |
+
"output": "promoter",
|
258 |
+
"model_response": "promoterpromoterpromo"
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
262 |
+
"input": "GGCATGATGTTGGCCGACTTGTGCTAAGGTCCTTGGAGAGGTGGAGGCTCTACGGGGGGTGGGCAGTACCCCTTAGAAAACACCCCTCACTCTCAACAACAGAGGCGGTCTGACGTATTGGAACTCATTCTTTAAGGCCAGAGCTGTGGTTGCTCCAGCTGCAATGTGCGCTCTGATGATACTTTATAATAGATTTGAAAAGGGGAGGGACAAGCCTAAGGTCATGGCAGGAGGCTGGGCCAGGGAAGTGTGGGATCCCGTGGTTGTAGTCGGCGAACCGAACGTCTCGGCCTCTCTCCC",
|
263 |
+
"output": "Non-promoter",
|
264 |
+
"model_response": "Non-promoter"
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
268 |
+
"input": "TGGGGGTTTTTTGGGGGCTCGGGCGTCGACTGTGGGGCGCTGCGGAAGGGTGGGTCGTCCGCGTGGGGTCCTGGAGCGCAGGAGGCGGAGGGGAAGCCGCGCAGGGGCCACTAGGGAGGGGCTCTGGCGGGGTCGCAGGGGCGCGGTCTGCGGGCGTGGGGGCGGGACCTCGGGGGCGGGGCCTGAGGGCGCGCGCACGCCTCCGGCGCGCCCCCTCCCGGCCGCCATGTTGGCTGGTGTGTGGGTGTCAAACTGAGCCAGACGCGGCGGTGGCGGCGGCTCCGCGGGCTACGGTCGCTC",
|
269 |
+
"output": "promoter",
|
270 |
+
"model_response": "promoterpromoterpromo"
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
274 |
+
"input": "GGAGAGGGGCGGTACGCACCACGGGGGAAGCCAATGAGAAAATCAGGCCCAGCCCTAGGGGGCGGTGCTGTCGGTCACATGCGCACCTGGGGCGGGTGGTGGCGGCGGCGCGGGCACCGCGAGCCGGCGGAAGGGAGAGGGGCCGGCCTGGGGCGGGGTTAGGCAGGTGAGTGACAGGCTCCAGGGGGCCGGCCCCTGCCTGGTGCCCCGAGCGAGCCGGGAGTAGCTGCGGCGGTGCCCGCCCCCTCTCTCCGCCCCTCCAGCGGAGCTGGTCTCCGGCCGGGCACCGTCGCGGGCCCC",
|
275 |
+
"output": "promoter",
|
276 |
+
"model_response": "promoterpromoterpromo"
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
280 |
+
"input": "CGACAACCCGCCCTTGTTCCCGGTAGAGGAACAAAGAGTGCTGATTTACGAATCTAGGCTGCCAGATTCTGTGTTTCCACTGGAGGGCGCGTCCGATGCAGATGTTGGCTCAAATTCCATCTTAACCTATAAACTCAGTTCTAGCGAATACTTCGGGCTAGATGTGAAAATAAACAGTGATGACAATAAACAAATTGGGCTCTTATTAAAGAAATCCTTGGACAGAGAGGAAGCTCCTGCACACAACTTATTCCTGACAGCCACAGATGGGGGCAAACCTGAGCTCACAGGCACTGTTCA",
|
281 |
+
"output": "promoter",
|
282 |
+
"model_response": "Non-promoter"
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
286 |
+
"input": "TTCCAGCACTCCGGGAGCCCGATTGCGAGTTTGTGAGAGTCCTTGGTGGAGGAGGTTGGAAAGCGTTCTCCCTCCCACAAAACCCGGTGCAGAGGGAGGGAGAGCCAGGTCCGTAGCGTGGGTCCAAGAAACTATGGATCCTGGAAGTGAGCACACTTTTGTGTCTTAAAAAGTTTGGTGTCTCGTTTTGAGGTGAAAAATGAATGAGATAAGTGCTTTTTAAAAAGGTAAAGTTTGTCTTACAACTGAAAAAGCTTTTAAACAAAACTATAGTTTTTGCGAGTAACTCCCGTGACACCT",
|
287 |
+
"output": "Non-promoter",
|
288 |
+
"model_response": "Non-promoter"
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
292 |
+
"input": "ATCAGCCCTGGCTAGGACCAGCTTCCCACTAAATCGCTTGCGGCAAGGAAGTCTCAGCCCCCGAGGCAGTCCTCACTGTGAGCAAACCAAAAGATGGGATCACTTGGGGCGCGTACGGGCTCCCAGCGCTGGCAGAAACTGTAGCGTTGTAGTAGCCTTTGTGACTGGTTGCCTGTTGGAAGTCTACCCCACCCCCAAACCATAAATCCACCGGAAACTCCTCCAGGACACCCTCTCAAGCAAGCCCCTGGGATGTAACTGCCACAGGGGCTGCGCCACGATGTTCCCTGGCTCTCAGGT",
|
293 |
+
"output": "Non-promoter",
|
294 |
+
"model_response": "Non-promoter"
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
298 |
+
"input": "CACCGCGTGAGGATGTGCCGGGTGGTCCTTTCCTCCTCCTCTTCCTCCTCCTCCCGGCTCCCTGCCTAGTCTCCATATAAAAGCGGCGCCGCCTCCCCGCCCTCTCTCACTCCCCGCTCCTCTCCGCCGCGCACTCTCCGCGGCGCTGGGAGAGGGCGGAGGGGGAGGCGGCGCGCGGCGCCAGAGGAGGGGGGACGCAGGGGGCGGAGCGGAGACAGTACCTTCGGAGATAATCCTTTCTCCTGCCGCAGTGGAGAGGAGCGGCCGGAGCGAGACACTTCGCCGAGGCACAGCAGCCGG",
|
299 |
+
"output": "promoter",
|
300 |
+
"model_response": "promoterpromoterpromo"
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
304 |
+
"input": "TCAGGAGTCGGTAGGGGCAAGAACGTTTCCGGCTCCTCAACGGGAGAAGCGCGGGAAATTTGTTGACGTGGAATAGCTCGACGAAATCATCGGCAGGCGCCCGCCAGGGAGCTAGCAACGGAGCATATGCGACCGTCGGTTCGCAAGAGACACACCTCGGGCCTTGCCCCGGAACGCCGCAATGGCTCCTACCTTTCCGCCGAGTGCAGGTTGATGCTGCACACCCAGAACACTTTGCGGCACGGGGGGGGTTATATGACATGCAAAGCCCTGATTCAACTTGGAGATGAGCGGGTCTGT",
|
305 |
+
"output": "Non-promoter",
|
306 |
+
"model_response": "Non-promoter"
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
310 |
+
"input": "GAACTCCTGAACTCAGGTGATCCGCTTGCCTCGGCCTCCCAAAATGCTGGGATTACAGGCGTGAGCCACTGCGCCGGCATCCTGGATGCAACACGATCAACGAATGATTGCCCTTTATTCTGCCTGGTCTCCCAGCGGTGCTGTTGTGTCTTCATTTTAATGGATTTTAATTGATGTCACTTAGGGGGGCCCTCAAGCCTTCCTTCCTCTATCCATTTACGTGGTGTACCATCGAACTTTATCGACTCAGAAGTCGTGGGCGGAGGGCGTTCCGATAGAACCAATTGGCTCACGTCTATA",
|
311 |
+
"output": "Non-promoter",
|
312 |
+
"model_response": "Non-promoter"
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
316 |
+
"input": "AAGATGGCCTGAGTCAGGAGCAGGGGCAGAGCTCAGGTTGTCGTGGGGTCCCGCTGCCCACGTCAGACTGGAGGTGAGGGATGGGCGGGGCCTGACAGCAGGCCTGGAAGGAACAGGATGTCTATGCTGGAGATAGAGGGAGAGGACAGTGCCAAAACCCAGCTCCTGGCCAGTCCCCAGCTCCTCCCTGCCTGGCCCTATCCCAGGATCCCCTCCCCGGCCTCCCAGCTATGATCTACCCCGGGGCCCAGACTTCAGGCGCCTTCACGATGCCGGCGGTCAGTGGTCCAGGTCCCTTAT",
|
317 |
+
"output": "promoter",
|
318 |
+
"model_response": "promoterpromoterpromo"
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
322 |
+
"input": "CCTGACACCCTTCGGCCATTTTTTTTAAACGTGTTGGGGTTTGGGTTTCAGCCACAGGCCTAGCACATGGAACCAAAACGCTACCACGGACTATGGAGTTGTGAACCGCGGAACAGCCGGGCGGACTCCGGGGCTCCTGTGTTTGAGATACGCGTGCCATGAACCTCCGCGCCGGAGGCGAGCCACTGGTAATTTACTTTCCAGTTGCGCCAATCCGAGTCTACCTTGATATTGAAAGGTAAGTCAAGAACAAAATTATCGCATAAGACTGTTGAGGTTACCCCGCGGCGGGGTCGGGGC",
|
323 |
+
"output": "Non-promoter",
|
324 |
+
"model_response": "Non-promoter"
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
328 |
+
"input": "CTGAGGCTGGCAGGCACTGGGCAGGCAATGAGTTCTTTCCCTGTAAGTTGGGCACAAAAAGATCGGCATGGGAGCAGCCTCCTTTGAGACAGCTGCTCTGAGAGAATGCAATAAGCAGGGAGCAGCCAGCAATTCCTCCTAGCAGAGGGCGACTCGTGGGAGGAGTTCAGTTTGCCAAGTATTGTCATTTGTTGAGAGAAGGTGTGTGCTCAAGGAGGAGTTTTAACCTGGAGGATCATTAACTCTTTTAGTCAGCTGAGGAGCTGCGGTGGCTCGGCGAGTTGGAGTTCATCCTGGAAG",
|
329 |
+
"output": "promoter",
|
330 |
+
"model_response": "promoterpromoterpromo"
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
334 |
+
"input": "GCGCGCCCCTCCCCCCGGGTCGCGCGCCGCGGGCCCGAGCGCGAGACGCCGCTGCTCCCGCCCCCGCCGCCTCCTGGAGTCGCCGGGGCGGACGCGCAGTCCATGGGGCGCGGTGGGCCGGGGAGTTGCCCCAGGGGCCGCGGGAGTTGCTGAGAGGAGACAGGGTTGGGCTTTCTCCTCGCCCAGACCCCACCCCACCCTGCCGACCCCACCCCCTGCTCCTTCCTCCCCGGGGGCGCGCACTCGGGCACGCGCTCGGAAGTCGGGGGTCGGCGCGGAGTGCAGGCTGCTCCCGGGGTA",
|
335 |
+
"output": "promoter",
|
336 |
+
"model_response": "promoterpromoterpromo"
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
340 |
+
"input": "TATCTCCCCTCCTCGGTCCCTCTCCCTCCCTTCCCCCTCCCTTCCCCCGCCCTCCCCCTCTTTTTGCTCCTGCTCCCCCCCCCCACCCCGCCTTTCTCCTTTTGCAAGAAAATAATTTGACAGTCGATTTGCTGACAAGGGAGGAATTTGCATCCTGGATTTAAAAAAAAAAAAAAGGCCGAGAGGAGCTTGGGAACGGTTGCTAGGGGTGGGTAATGGGTGAAAAAAGGGGGGTACCGGGGAGCGGATAAGGAGGGTTAAGGGAGGGGGCGAGGATGGGGAGCAATGCAAAGGTAAGGC",
|
341 |
+
"output": "promoter",
|
342 |
+
"model_response": "promoterpromoterpromo"
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
346 |
+
"input": "GACCAGTAAGTCGGGGGGGAGGGAGCCGGGGCCTGGCCCCGCCCTATGGGGAGGCACGGGTGGCGAGGTCGGCCGTCCCATGCCCCGCCTGCTCCAGGTGCCGCCGTCCCCACAGGTGCCCGCCCCAGGCCCGCTGGAGCAGCCTGTGGCACGTGGGGTAAGTGGAGGCCGCGGCGGAGCCGTGCGTCCGACGGTTCTGGGGCGGGGGTCACCGGGGCCAGGCCCGCCAGGCCCTTACCTCACCAGGCCGCCTCCGCTGTCGCCCAGTCCCGGCCGCTGGCGGGAACTGACCTGGAGCAA",
|
347 |
+
"output": "promoter",
|
348 |
+
"model_response": "promoterpromoterpromo"
|
349 |
+
},
|
350 |
+
{
|
351 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
352 |
+
"input": "GCTACCGCTGCCGGAAGAGCGCGGCGCCCGACGGAGCCGTGTGGAGGCCAAAACTCCTCCCGGAAGCCGCTACTGGCCCCGCTTGCCAGGCCCAGCGTCTTTTCTGCATAGGACCCGGGGGAAGCCGGGAAGCCGTTAGGGGGCGGGGCAAGCGGGCGGGCGTGCGTCGGTCAAGTTTCCCGGGTCGCGTCAGGCTTCTTTCTCAGCACCGAGCGAGCGACGTTCGTGAAGCTTTCGTTTTGAGCGGCCAGACTCTGCTGTTCTCAAGCCTGGGAGGCACAGAGAAGCAAGACTTCCGCA",
|
353 |
+
"output": "promoter",
|
354 |
+
"model_response": "promoterpromoterpromo"
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
358 |
+
"input": "GAGAATCCCTCTATAATATCCTTGCACAGTTCCTCCGCAAGCACAATATGATTAATTTTTACTTGGCTGATGCCGAGATCTTAACTGCCCTAATCCTCGGCTCGAGTCGAGTTGGGGTTTCCTAAATGAATTACTATTGCAAATCTGGTTATGGCGCTCCACGAGATTATGGCAAAGGCTGAAGAATTTATTGACAGATGTGTTGTTAGACTGTGCTACGTTTTAGTGGCAGGTAGTGGAGCAGCCACCGGCGGACTCGAGAACGCAGTAACATGGCTCAAAAATGGTAGTATTTGTTTT",
|
359 |
+
"output": "Non-promoter",
|
360 |
+
"model_response": "Non-promoter"
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
364 |
+
"input": "CGGGAACGCACCTGCGTCTAGACGCTGACGCCGTGCGCGGGGCGGGGGCCGGCGAGCATGCGCAGTGTGGTGTGGACAGGGCGTGGTTGGCAAGCATGCGCAATGTGGGCCGGGCGGGGCTGGCGAGCATGCGCAGTATGGGGCGGGGCGGGGCAGGGCTGGTGAGCAAGCCCGCGGGGAGCGAGCGCGGAGGCGGGCGTTGCTACGGCAACGGTGGGCGGCCAGGGAGTTGCCTGAGGGAAAAGGGGCAGACGTCCCTGGGTTCCGGTGTTCGCGGAGGAGTCGAGGCACGGAGAGGCT",
|
365 |
+
"output": "promoter",
|
366 |
+
"model_response": "promoterpromoterpromo"
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
370 |
+
"input": "GTTCTGATTCTCGGTGAAAACGGTAAACATCGGCGTGCTATGTATGGGGATAGCAGGAGCTCTAGCATTTTTGCGCTGGGAAGATTATTCTTCTCTATCCGTCTATGCGAGCCGGCGTTTCGCAGGGGTACCCCCGGGGGCACTGCGTGGGAGGCCAGGAGGTTTTAGAATGGTCACCCGCCGGTTAGCTCCTTGGTAGTATTCTCGATGGACTGCGCGTTTGAGTCTTAAGGGCGTTTTCACGCTTCCGCCTTCTGGAAGCCTGCACTCCGTTTAGAGGAGCCAGAGACCCGTCCCGGT",
|
371 |
+
"output": "Non-promoter",
|
372 |
+
"model_response": "Non-promoter"
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
376 |
+
"input": "GTATTGGGGATGCAGGGCTCCGCGGTAGTTGACCTAGCAGGGCTAGTCACATCACGCCATAGGGAGCAGCAAGGGGCGCAGCAGTCAGCACGGCCGCTATTCAGATACGGTAACCAGACGCCGTGCGGCGTGCGCACCTGGCATCCAATGGCTTTGGGGATAGGGGCCGCCTGTGGGTTGTGGGTACGAATTAACGGAGTCGGAGCGGCGAATGAGCAGTCGCAAAGCCGAAGCCGGGCGTGGCGCTTTAGAGCGTCCCAGGCTCTCCTTCGGAAAGATGTCGGACACGGCAGTAGCTGA",
|
377 |
+
"output": "Non-promoter",
|
378 |
+
"model_response": "Non-promoter"
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
382 |
+
"input": "AGCAGCGGCCGCGGCGACAGCTCCAGCTCCGGCTCCGGCTCCGGCTCCGGCTCCGGCTCCCGCGCCTGCCCCGCTCGGCCCAGCGCGCCCGGGCTCCGCGCCCCGACCCCGCCGCCGCGCCTGCCGGGGGCCTCGGGCGCCCCCGCCGCCCGCCTCACGCTGAAGTTCCTGGCCGTGCTGCTGGCCGCGGGCATGCTGGCGTTCCTCGGTGCCGTCATCTGCATCATCGCCAGCGTGCCCCTGGCGGCCAGCCCGGCGCGGGCGCTGCCCGGCGGCGCCGACAATGCTTCGGTCGCCTCG",
|
383 |
+
"output": "promoter",
|
384 |
+
"model_response": "promoterpromoterpromo"
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
388 |
+
"input": "CTTTTGAACATCTTGTGCCAGGATTTGACATTAAGCTCCAGTTCATCCGCACACCCCGCGGCCGCTTTCGCAAGGGACCGAGAGGGTGGCGGCCTCGCCCGGAGACTCGCGGCGCTGGGAGCTGCCTGCCCTCGGTGCGGGCTGCAGACCGGGCCGCGAGGCGGGCGCGCGGCGCGCACTAGGACCCAGCAGGGCTCCAGGCCGGGGTGGGAGCCGCGCCGCCGACCCCGGGCGGGCGCCGGGCGCAGGAGCCGGGGTTCCGGCCGCGATCTGCTGCAGCTCGGCCGGGAGACGGCGCGA",
|
389 |
+
"output": "promoter",
|
390 |
+
"model_response": "promoterpromoterpromo"
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
394 |
+
"input": "TGTGTGGCCAAAGAGAAGAAATGGGTTGAGACAGCAGGCCTGGCACTTACTTTACCTGGCCCAGTCTTGCCTGACAATTAAAAAAAGACGCTTTAGACTGGGCGCGGTGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCTGAGGCGGGCGGATCACGAGGCCAGGAGATTGAGACGATCCTGGCTAACACAGTGAAATCCCATCTCCACTAAAAATACAAAAACTTAGCCGGGCATGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGCGGCTGAGGCAGGAGAATCGCTTGAACC",
|
395 |
+
"output": "Non-promoter",
|
396 |
+
"model_response": "Non-promoter"
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
400 |
+
"input": "TCCATATAAACAGATTAACTGCATTCCCCAACGAGTAGAAATCTGCCCATCACTTTTGGGGTGGTTTGTTGCACCCACTATCGCCCAAGAAACTTCCCCAAGCCGGGTCATATTACAGGTCTCTAGCGCGCTGGCCGCACTCGCAGCCCACCATTCCCACAGAATGGTGTCATCGTAATCTGAAAAGTTGCCACTGGGCACCTGCGCAGGCTTGGCTGCGCCCTCTCGCGCCGCACGCTCCTTCCGGCGACGCGGATAGTCGTAAAGGTTTGAATACCTGATACAGGGAGAGAGGCCGAG",
|
401 |
+
"output": "Non-promoter",
|
402 |
+
"model_response": "Non-promoter"
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
406 |
+
"input": "TCTTCACAGGGCACAGCCCCTGCAGAGCATCTTGGTCATTTGGAAGAGGACACGGTATCCCCTCTGGCCAGAGTATGTCAGAGAAGGAAGAGTAGGGCTTTTTTGTTTTGTTTTTTTTTAAAGGTGCTTGCTTGTTTAATGTAAATAATAGAAAGCCTTAATATCTTTTCTGTAACACGGAGTAATATTTTAATGTCATGTTTTGGATGTACATAATATATTTATAACAAAGCAGCAAGAGTCTACTTAACCTTGGCTGCCTCGTGGTGTTTCCTGGCTGGGTGGGGTGGGGGGTATCAA",
|
407 |
+
"output": "Non-promoter",
|
408 |
+
"model_response": "promoterpromoterpromo"
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
412 |
+
"input": "TGTATTCAGACCAGAACAAACTCGAGTGCCCCAGCTCCTCTGGCAATTAGGTAGCCAATAGTTGAGATTGCGTAGGCAACTACGTAAAATGCGCCGTGCCGTGCCCCTTTCTGCCACTCTGAGTTCGGTAACATAGCTCCGGTTGCTAGGTACTTAAGGTATCCAGTTCCCCCAGCGATTTGCATGAACCGAGAGGGAGTGTCTTCTGCCAACAGATCAGGCGGGGGATCATGAACTCATCCTCCCAGGGAATGCCGGTCGGGGATCCCCCGCGCAGCTCACAGGCCCTGGGAGTGAGCT",
|
413 |
+
"output": "Non-promoter",
|
414 |
+
"model_response": "Non-promoter"
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
418 |
+
"input": "ACCTCGCTATTCTATGCACAGTTTTGAATCCTGCTTTTAAAATGCGATGTCTGCCTTGTCTTTAAATATTCTGAGGGGGAGGAGAGGAGATGTGTAATGCGACTGTCTATAATTTGAAACCGGACCTCACTATTTAGCGTCTCAGAAAACGCGGGGTTCATTGCCCTGGTCCCCCCCGCCTTCCCCCGCCCCCCGAACCTTCCGGCTGGCTCTCTTGCGGTCCCCGCCTCGGCGCTGATGTGGTCTGGCAGTGGAGATTGGCGCCCGGGCGGAGCACGATGGGCTTCCCCGCGGCAGGCA",
|
419 |
+
"output": "promoter",
|
420 |
+
"model_response": "promoterpromoterpromo"
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
424 |
+
"input": "CCACGAGCGCCAGAGCCCCGCGCCTCCCCTAGCATATCCGGCGGAGTCCCTGCCCCGCAAGCGCCCACTATACGGCCGGACAACTGTTGTGGGCGTGGATGCAACCACGGGGGAGGGGGCAGGCGTGATCTCAGTCCCGCCCGTGCCCGAGCCCGCGCCCGAGCAGGGACTACATTTCCCTCCCCCCTGGGTCAGGGAGGAGTCTCCTCCTCTATCTATTAAGGATTCTCGCCTCCGCACATGCGTTATCCTTCCAAGGCCGAAAGGATTTGGAGTCCTGTACGCCAGAGGCCGAACTCG",
|
425 |
+
"output": "Non-promoter",
|
426 |
+
"model_response": "Non-promoter"
|
427 |
+
},
|
428 |
+
{
|
429 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
430 |
+
"input": "ACCCTGCTGGGGGTACAGAGACGTTTCCGAAACTCAGCGGCTCGGTCGCCCCCAAAGGCCAGGCGGAACACACGCCCACACCGCGGCTCCCTTCCCGAAGTAAGACCGCCGGGCCACGGCCGCCCCCAGGAAGCCCCGCGCCCCGCACCCCACACCCGGGGACACAAACAGGCGCCGGGTAGCCGCGAGGGCCGGCGCGGCGCCTTTAAGAGGCGGCGGGCGGCGCTGCCCCCTGGCGGCCGCCCCGCCGCTTCCTCGCCGCCGCGGGCTCAAGCGGGGCGGCCGGGCCAGCGCGGGGCG",
|
431 |
+
"output": "promoter",
|
432 |
+
"model_response": "promoterpromoterpromo"
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
436 |
+
"input": "AGCTTCACAGTCAGAACGCGGTACCGTTGTGCCCCAGCTCGACTGCAGCCTCCCCGCCCCGATACACTTGTCCCATCCTGTCCACGGCGCCCTTGTTTTGATTCGCTGCTGTCCCAGAGAGCTTAGTTATTCGTACGGGCAATCTTGACCCTTTCCCAGGACCTCCCCTAATACTGTTCATTGATCCTGGACCGGCAGCGAGACCCCTTCCGGCATCCAGAGGCCCAGTACTTTCAATCTCCTCCCCCAAAATGCTTTCCCTTTGTGGGGACCCTCGACATAGATGGACACCCAGCGGCC",
|
437 |
+
"output": "Non-promoter",
|
438 |
+
"model_response": "Non-promoter"
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
442 |
+
"input": "CCGTTCTTGGCATCCTGAGAGGGCCAGTTAGTACTGAAGTCCCTTGGCTGCTCAAGGATTGCAGGGATGAGGCAAGTGGAACAGCCTCGGAACCTCCGAAAATGGGCACGCTCCAGGTCCCAGTTTCTATGGCAACCATACCGGCAAATTGGGCTCCGCAATGGTTTCTCCTGGAAAAACCGTGATTTTGGTTCCCGCGGACGTCTCTATGGTTTCGACAGCCTAGAAGGAACAAAACGGCATTTCCGGGAAGATGGCGGCGCACAAGTCAGGTCCGGCACATGTTTCCGCGGAGCGGAC",
|
443 |
+
"output": "promoter",
|
444 |
+
"model_response": "promoterpromoterpromo"
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
448 |
+
"input": "GCTCTCGCGCCGCTCGCGTGACCGGCCGGTGTGTGCGCGAGGCCCCGGCTCCCGGGGCACGGACGGCCGGGCGCGCGCCTCTGCGAGGGGCGTCCGGGTCCGAGTCGGCGGTCCGGGCCGGCGCGAGGTGCGTGCGGGCGGGCCGCGGGGGTCCCGGACGGACACAAGCGCACACACTCCCGGAGGAGCCTTCGAGGCTGCTCTTCCTCGGCCAGACGGAGAGCGGCACTGTCTCCCCGCCCAGCGCTCACTCGCCCCGCGTCTCCCCCCGCGGCGGCTGCTCCTCCTCGGCACCGCCAG",
|
449 |
+
"output": "promoter",
|
450 |
+
"model_response": "promoterpromoterpromo"
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
454 |
+
"input": "GTCCCTCCTCTGGTAGCCAGAATAAATTTTGCAGTAAGTTAGCAGAGTTGATTCCAAAAAGTCTGCACTGTGGCATCTCAGGAAACCAAACTTATCTGCTTAAGAAATAACTTGCAGAGAGAAAAGGAACTCTGACAAGGCTAATAACAGGGATTTCAAGCTGTGTTTCAGCAAGCCAGTCACAGAGTATTCACGTGTTAATTCACTGGCGTGTTCCGCGGCATAAGCACCCCCTCTCTGCCTTCCCCCAGTTCCAACTGTTGTGCTGCAGCAGATTTGGTCTGAGTCTGGGCAGAGCCC",
|
455 |
+
"output": "promoter",
|
456 |
+
"model_response": "promoterpromoterpromo"
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
460 |
+
"input": "ATGATGCTGGGGACGGGACACAAACAGGCGTGACAGCAGCGACCAGGCCGGCGCGGCGCCCGTAGACTGAGTCGAGACACCCTGGGCACAGGCGGCCGCCCCGCCTCTGTGATAGTTACTGGGACGTGTGCCCAACGGCCGGGCCAGCGCGGGGCGGCGGCGGGCAGGGGCGGCGAGTGCCTCGACGTTCTAGTGCGCCCTTCTCGGCGGTCTTATGCACTATGGCAGCAAGAGTTTTGAATATGTTGCGGTAAGTTGAAATTATCGAGAATGCCAGGCGAGTTGGCCGCGTGCGCCCCG",
|
461 |
+
"output": "Non-promoter",
|
462 |
+
"model_response": "Non-promoter"
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
466 |
+
"input": "CGCGACACGCCGTGCGCCTCCGCGGCTGCGCTACGAAAACGAGTCCCGGAGCGGCCCCGCGCCCGCCGCACCCGGCCCTCGCCCGCCCGAAGACAGGCGCCAAGCTGCCCCGCCGTCTCCCCAGCTAGCGCCCGGCCGCCGCCGCCTCGCGGGCCCCGGGCGGAAGGGGGCGGGGTCCCGATTCGCCCCGCCCCCGCGGAGGGATACGCGGCGCCGCGGCCCAAAACCCCCGGGCGAGGCGGCCGGGGCGGGTGAGGCGCTCCGCCTGCTGCGCGTCTACGCGGTCCCCGCGGGCCTTCC",
|
467 |
+
"output": "promoter",
|
468 |
+
"model_response": "promoterpromoterpromo"
|
469 |
+
},
|
470 |
+
{
|
471 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
472 |
+
"input": "CCCCTAGCCAGGAGGTAGCATCTGTCTCCAGGTGCCCGTGGGGTGTCCTCCATGAGGACCACCTCTTCTAACCACCTGCCCAGCTCAGAGGACCAGCTGGCAGGAAAGACGCTGCACCGCCCATGAGGCCAGAGCTACAAGTTGCTTCTAGGAGTGGCTGTGGGCGGAGGCTGGTGGTTCTGAAGGTGGCGGTGGTTCTGCAGCGTGGCTCCCCACAGCCCCTTTCATCTGAACAGTAGGGCTCATGCCACTTGTTAATCAGCCTTCCAGTGGCACGAGGAGTCATTTTCTGGGGATAGG",
|
473 |
+
"output": "promoter",
|
474 |
+
"model_response": "promoterpromoterpromo"
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
478 |
+
"input": "GCCTCCTCCCACGCTGCGCGCGCACCTCCCCGCCCCCACCCCTACCCGCTGGCGTGCCCAGTGGAACGGAGCCTTGTGTCTCCGCCTCAAGTCCCCGGATGCTCACCTCCCCGACTCGCCCCCGCTGTGGCCCCGCCCCCGCGCGGCTCTTCGTGCCACGTCACCGCCTGCGTCGCTTCCGGAGGCGCAGCGGGCGATGACGTAGAGGGACGTGCCCTCTATATGAGGTTGGGGAGCGGCTGAGTCGGCCTTTTCCGCCCGCTCCCCCCTCCCCCCGAGCGCCGCTCCGGCTGCACCGCG",
|
479 |
+
"output": "promoter",
|
480 |
+
"model_response": "promoterpromoterpromo"
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
484 |
+
"input": "CCATGCAGCGACGGCCGCCGCGGAGCTCCGAGCAGCGGTAGCGCCCCCCTGTAAAGCGGTTCGCTATGCCGGGGCCACTGTGAACCCTGCCGCCTGCCGGAACACTCTTCGCTCCGGACCAGCTCAGCCTCTGATAAGCTGGACTCGGCACGCCCGCAACAAGCACCGAGGAGTTAAGAGAGCCGCAAGCGCAGGGAAGGCCTCCCCGCACGGGTGGGGGAAAGCGGCCGGTGCAGCGCGGGGACAGGCACTCGGGCTGGCACTGGCTGCTAGGGATGTCGTCCTGGATAAGGTGGCATG",
|
485 |
+
"output": "promoter",
|
486 |
+
"model_response": "promoterpromoterpromo"
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
490 |
+
"input": "CCCCTGAGCCAGGCTGAAGCGGGATAGGAAGGGTAAAGGTGCTAGCTGAGCCGTAGTACGGCCCAGCAGTTCTCCAGGGGACAAGCTAGGCCCCCGCTGGAGGGAGTGGTGAGACTCAGCCGGGGCCTGATTCTGGTCAGCACCTTGGACAGCAGCCAGTGCCCTCCCTGGCCCCGCCTGGCCCCGCCTGGCCCCACATCCGCTACTCCTGGGCACCTCCTCAAATAGCACAGCCTCCAGCTGGTGCCCAGGGCCTGAATACACAGAGCGCTGAGAGAGTGGGGCAGTGTGGTCACGGAC",
|
491 |
+
"output": "promoter",
|
492 |
+
"model_response": "promoterpromoterpromo"
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
496 |
+
"input": "CCTGGGAAAAGCTGAGAGCATAATCGAGCGGCAAAGAAAACGCGACCACTGAGGACACTTAGAAGGGGGCGGGGCCTTTTTGTGGTACTAGGAGCTCGAGAAATTGACTAGATTACGAGTGGTTTACGACTCAAGCATACAACTTACAGCTTGGGCCTTCCGTAGCGGGCGTTGCGGGGGAACGTACAGTGCTTGAGGGGCGAGGTTTGCCGGAGCCTCATTCTAAAGGAATCTTCTAGTTCCAAGCGTTGTTCCAATTGGGCCGGAGCGTGCAACGGGACCCACGGAACTACAGGTGTG",
|
497 |
+
"output": "Non-promoter",
|
498 |
+
"model_response": "Non-promoter"
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
502 |
+
"input": "AAAGCCACCTCCGCGCCGGCAGCTCCGTTTCTAGGGAAACAGGACCAATTCCAGAAGTTGGGGTGTTACGTGTGCGATATACCTGCCCAAGAGCTGGTATCAATAGAGCGGGCGGCATGGTAGTGTTTCCCCCGGCCATGGTAAAATGCGAACCAGATCCTCGGCGGGTAACCGCGCCCCTAGGCGTTGGCATGTGTCGGGGTAGCATGACGGGGTCGTCCAGTTCTTAGCCAAGATGAATCCGGCTCCGAAGGTCCCCGGCTTCGTGGCCATAAAGGCGAACACCGCTGGAAAGGTGGC",
|
503 |
+
"output": "Non-promoter",
|
504 |
+
"model_response": "Non-promoter"
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
508 |
+
"input": "GGGCTCGGGGACGGGACGGGCGAGCAGTCATTTCTGAATAGCGCGTGCAGAGGGTCATCGCGAAAAGACTAAGACGCGACATAATGGACTGACTCGGGGCATGGCGGGGTGGAGGCGGGAAAGTGTCGAGGCCGGCTAATGCCTGGACAGGGAGGTGGGCATCGCCAGGGCGACGATGAGGGTCGCCACTCCGGAACACGATTAGCATCAGTGCACTGACCTCTGCCCCCACGACTCTCCAACACGCCCGATGATGTTGCGTGCGGCAGCTGTCGGCTAGGAGTCGCGGGGTCTGTGCGC",
|
509 |
+
"output": "Non-promoter",
|
510 |
+
"model_response": "Non-promoter"
|
511 |
+
},
|
512 |
+
{
|
513 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
514 |
+
"input": "TATGGTATTTCAAGGTCATAAATTACAAATGTGCATATTTATTATTCCCCTATAAAATGTTAATTAAAAAATTTTCATATTTACTTGATGTAACAGAAATGAAACCTGTAATGAGGGATCTAATTTCTTTTTCTTTTTTCATTCCAGAATGTATGACAACATGTCCACAATGGTGTACATAAAGGAAGACAAGTTGGAGAAGCTTACACAGGATGAAATTATTTCTAAGACAAAGCAAGTAATTCAGGGGCTGGAAGCTTTGAAGAATGAGCACAATTCCATTTTACAAAGTTTGCTGGA",
|
515 |
+
"output": "promoter",
|
516 |
+
"model_response": "promoterpromoterpromo"
|
517 |
+
},
|
518 |
+
{
|
519 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
520 |
+
"input": "ACGACCCTGGCCTCCGACTTCAACGACTTCATAAGGCGGCGTTTCTGGGCGCAGCCGTGTCGCTCCTGGTGAGAGGCCGCCGGCAGGCGGGATCCAGCGCCCTCCGGGGCACCGCGGGCGAGACCGTCGCCTTCGCACCCCCGGCGCGGTCGACCCCGCGGGGCCGTCGGGTCCTGGGTTCCCCGCCGCGTTGCGCTCGTCCCCCTCCTGTCAGAACCTGGGCCCCCGCCCCGCCCACCGGCGCGGGGCCTCTCCTCCTCCCGCTAACGGGCGGTCGGCCGCCTCCTTCCTCTCTTGGCT",
|
521 |
+
"output": "promoter",
|
522 |
+
"model_response": "promoterpromoterpromo"
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
526 |
+
"input": "GGTTTCTGAGGCTGAGGAGGTACCCAGGTAAGCACACTCTGGGTTCTCTGGTGAGCACACACCGAGCTGCCTGCATCTCCCCTACCCATGAAGGCAGCTAGCCGCCGGGCCAGTGGTGGGAAGGGGAGGCGGCCATTAGCGCCGTTCCCGCCGGGACTTGAAGCGCCCGGCCGCGGCAAGCCCCGCCCTCGGCGCGCCCCCGCGTCCGCGCGCGCTCCTCGGGTCTGCGCGGAGCCGGCGTCGGCGCGCGCTTGGGCGCCTGGCGAAGACCGAGAGAGGCTGGCGGGATCTCAGCGGCGC",
|
527 |
+
"output": "promoter",
|
528 |
+
"model_response": "promoterpromoterpromo"
|
529 |
+
},
|
530 |
+
{
|
531 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
532 |
+
"input": "GACCGCCCGTTGGACATACCTCTAAATTGGGAGCGCGGTGCCGATTTACGTGACCGCCGTGGTTCGCACGGTGGGTAAACAGTGGATCGTGGGGTGGTGAGAAGGTTCCGATGGGTGTTCTTACTACGGCCCGATCCTAGCAGTCGCCGACTTTGTTATTCTTATTGGTGGATGGGGGGCCGCTGATATAAATCTGGGAGATGAGGAGATAGGCGGGAGCGAGGGCTGACTCTGCGTGTCGAAGGAGGTAGAGAAAAAGCGTTGCTTAAAGGGAGGGAGCGTAGCTTGGTTGCTCCGTAG",
|
533 |
+
"output": "Non-promoter",
|
534 |
+
"model_response": "Non-promoter"
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
538 |
+
"input": "GACCTCTCTTCGGGCGGCAAGTGCCATACGTATAGATAAGCGGAATTTACATGGTGTCACAGTTAGCGCAAGGTCGAATAAATGCGGCTTAACACGACAGGTATCTCCAAATAAGTGTGTCTTACGGCGTAGATCGCGAGTTGAGCTGTCATTTCTTTCCAAGCGGCGCAGGACAGGACCCAGAAGCCAGAGCGCAGGAACTGCTTCCGGTTTGAGTTTTTATTAGAATAACGGGCTGTGGGAGAGGCAAGTGGCGGATCTTCAGGTCGGCGAGGGTGTGACATTCGACCCACAGGCCGC",
|
539 |
+
"output": "Non-promoter",
|
540 |
+
"model_response": "Non-promoter"
|
541 |
+
},
|
542 |
+
{
|
543 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
544 |
+
"input": "GCATATTCTGTCTGAAATCGTGTGCACCGAAATCCCCGCCTTGCGGTGGAGGCTGGCGCTAGGCGGCCTCAGCCTCGGCCTGCTGCGCTCAGGAACCCGCGCCCCGGCTCCTCGGCGATCCATTGCTCTTTCCTCTGGCGCCGGCCGCAGGCCTCGGTCACGCCCCCAGCGGCCCGTTGGTTTCCGGGTCCCGCGGGGTGCCCCCGCCCACACGCTATGCCTTAAATTGGGCCAGGCTGAGGCGCTGCTGCTGGAGCGGCCGATCCGAGACGTGGCTCCCTGGGCGGCAGAACCATGTTG",
|
545 |
+
"output": "promoter",
|
546 |
+
"model_response": "promoterpromoterpromo"
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
550 |
+
"input": "TGGTCTTGTGTCTCGGCGTACCATATCTTTTGGGTTCCCCGGCTGCCCCGGAGGTTCGCGGCGAGAGGACGGGCCGGGTGCGGCTAGTTTACCTGAGCAGTGTGAACTACCTCACTCCCATCGGGACCAGCCGGCATAAACACTCTTATTGCCTGTCAACGTTTTCGAGACATATTTTGTTCGCGAGCCCGCAGACAACGCGCATTCTGGAGTTCGAACGCTCAGTTCGCGTCCCCGAGAAGATATTATTCAGATGCCTGCGAGCTGGGGCCGGGTGGCCGCGCCGGGTCGGAGTGTGGC",
|
551 |
+
"output": "Non-promoter",
|
552 |
+
"model_response": "Non-promoter"
|
553 |
+
},
|
554 |
+
{
|
555 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
556 |
+
"input": "TCCCGGAGACAGGTGGGAGGGTGGGGGATGCCGGCAGGAGAGGGAAGCAGGACCGTGCGCGCCGCCACCCACCCGCAGTCCGTCCGCCCCCGGGCGCCCAGCGCGTGGCCCGAGCGGCGCTGCCCGCCGAGCCGGGAGCCCCCTCGGCCCCTCCCTTCAGGCGCGGGCCGGGGGCGGCTTCTCCGCGACCTTATGTAACCGGGCGGGAGGGGCCGGGCGGGCATGGGCCTTCCCGGCCCGGAGCTGGGAGTCGAAGGGGCGGGAGGCGTGATGGTGAACTCGCAAGAAGTTTGAGGGACG",
|
557 |
+
"output": "promoter",
|
558 |
+
"model_response": "promoterpromoterpromo"
|
559 |
+
},
|
560 |
+
{
|
561 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
562 |
+
"input": "GCAGGACTGAGTCACCTCCTGAAGGCCCCGTTCCAAAGCACTCCCACGGGGGATTAGGTTTCAACACAGGAATTTTTGGCGACACAAACATTGAGACCACAGCACCCAGCTCTCTGGGTTCCCAGAACCCTGTGGGAGGGCTGGGTCGCGCCTGCCCCCTTTTAAACACGAGGCTTCCATGGAAGCTTCTTGCCGGAGGCTGGACACTGAGGAATGGACAGGATCAACACTCTTCCACCGGCTCTTCCACCTCCCGATCACTTCCACCATCGCTGGGGGAAGAACGAATGCACCGCGTGG",
|
563 |
+
"output": "promoter",
|
564 |
+
"model_response": "promoterpromoterpromo"
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
568 |
+
"input": "CACAGCCCCAGATGGTGACTACTGGAGACTGCTGCCCCCAGGTATCCACATTGTCATTGCCCAAGCCCCTGGCTACGCCAAAGTCATCAAGAAAGTCATCATCCCCGCCCGGATGAAGAGGGCTGGCCGTGTGGACTTCATTCTGCAACCTCTGGGGATGGGACCCAAGAACTTTATTCATGGGCTGCGGAGGACTGGGCCCCACGACCCACTGGGAGGTGCCAGCTCTTTGGGGGAGGCCACGGAGCCCGACCCGCTCCGGGCGCGCAGGCAGCCCTCGGCCGACGGGAGTAAGCCCTG",
|
569 |
+
"output": "promoter",
|
570 |
+
"model_response": "promoterpromoterpromo"
|
571 |
+
},
|
572 |
+
{
|
573 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
574 |
+
"input": "AATCGTGTAATTCCCGCTTCCTATGGCAAGCGGCGCCCAGTTCTGGGTGACTTGTGAAATGAGCGTCCTGCTAGGCCATAGTCTCTACGAGGAGGATATAATGTAAAGTCTGGAGCGAACGTAAATGCCGTGACGTGTATTTGTGAGACTTCGCCCGGTCGGTGAGCTGGGTGCATAGGTTCCGAGCGCGCGACCACTTAGCTTCCCTTCTACAACAACCACCTGTGTGCGCATGCCCTCTCATCTTACAGTTGTCAAAGTGCTTTTGCCAGAGCGGTCACATAATCAACCTGAAAGAAG",
|
575 |
+
"output": "Non-promoter",
|
576 |
+
"model_response": "Non-promoter"
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
580 |
+
"input": "TCCTTAGAATATTGGGCATACAATTTACTTTTATAAATGTCTATGAAATAAAATTCTTATATAATTATATAAAATTACATAATATATAATTATATTGAAATAATTATAACTGTTGAATAAATGTATTATTTTTCCTTTGAACTATTTAACAGGGTTATATAACAAACTCATCACGAGTTGTGTCTAATAAGTCATCAGAGTTACTGTTTGACTTGACCCAGGATACAGGATTATCACATTACCAAGGGGGACCAACACTTTCTATGGCAGGTTGGTTTTAGTATTTTTTTCATAAAGGTT",
|
581 |
+
"output": "promoter",
|
582 |
+
"model_response": "promoterpromoterpromo"
|
583 |
+
},
|
584 |
+
{
|
585 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
586 |
+
"input": "CGCGCGCGCGCCGGGCCGAGGGGCCGAGAGGCGGGGTCTTACAGCGACCGCGGGAAGAGGGCGCCCCAGAGCTGAGCCGGAGGCCGGCTGCCAGCGGGGCGAGGGTGGGCGCGAGCGCAGGGGCGGGCCCCGAGGGAGGGCGGGGACGGTGAGGGGGCGGGGTCGGGCTAGGCGGGGACGCGCTCGCGGGGTGGGGAAGGCGGGGGCGCGGCGGTGGCGGGAGCGTGCCCGGTCCCCGCCCCTGTTCCCACTCTCCTTCCACCTCGGACCGGCCGGGGCTCCGCAGAGCCAAAGCTCGCT",
|
587 |
+
"output": "promoter",
|
588 |
+
"model_response": "promoterpromoterpromo"
|
589 |
+
},
|
590 |
+
{
|
591 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
592 |
+
"input": "TATTTCCGATTCCTTGACTAACGTACGGGGGCCCAGCACTTTCATTCCATCCCATCACCCCATGATCCTTCCCCAGACTCTAATACTGGCCGTAATATTTACCCCCCCCTGCTCCATGTGCGTAGATCAATGACACGAGCAAGCCATTGGTCTAAAACCCAGGGTTCTCTCGCAGAGGCCCTCCATTGTCTCTGCGGTGTCACTTCGCGCACACACATCAGAGCGGGCCTCTCTGTGAGGCGCGGGGTTGATGTACTCACACGCTGCGTTGATCGGAGGTCGCTGCCAAGCATGGCGCCC",
|
593 |
+
"output": "Non-promoter",
|
594 |
+
"model_response": "Non-promoter"
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"instruction": "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.",
|
598 |
+
"input": "GGTCAGCAAAGGCTTCCCTGAGGAATTGGGAATAGCCAGCCATACGGGTGGAGCCGGTCAACTCCTTGTGGTTGGGAGCTAACCGATAGCCCCTACAAGGAGCATTCCAACTATGTTATATAACATTGTGATTTTCGCGAAGGCAACGACTTATTCTTAAGACAGTATCTATCTGAATCTGAAGATCCTTGGTCTGGAGATTGGTAAAAATCTCCAGTCATCTTATGTTTCTATTATACTCTACTGGCGTTTTTACGTGACGCGGGGAGTCTACCTCCCCCAACCATAATTTAACAGTAA",
|
599 |
+
"output": "Non-promoter",
|
600 |
+
"model_response": "Non-promoter"
|
601 |
+
}
|
602 |
+
]
|
04-gene-sft/gpt2_lora_text_classification/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: dnagpt/dna_gpt2_v0
|
3 |
+
library_name: peft
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.14.0
|
04-gene-sft/gpt2_lora_text_classification/adapter_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "dnagpt/dna_gpt2_v0",
|
5 |
+
"bias": "none",
|
6 |
+
"eva_config": null,
|
7 |
+
"exclude_modules": null,
|
8 |
+
"fan_in_fan_out": true,
|
9 |
+
"inference_mode": true,
|
10 |
+
"init_lora_weights": true,
|
11 |
+
"layer_replication": null,
|
12 |
+
"layers_pattern": null,
|
13 |
+
"layers_to_transform": null,
|
14 |
+
"loftq_config": {},
|
15 |
+
"lora_alpha": 32,
|
16 |
+
"lora_bias": false,
|
17 |
+
"lora_dropout": 0.1,
|
18 |
+
"megatron_config": null,
|
19 |
+
"megatron_core": "megatron.core",
|
20 |
+
"modules_to_save": [
|
21 |
+
"classifier",
|
22 |
+
"score"
|
23 |
+
],
|
24 |
+
"peft_type": "LORA",
|
25 |
+
"r": 8,
|
26 |
+
"rank_pattern": {},
|
27 |
+
"revision": null,
|
28 |
+
"target_modules": [
|
29 |
+
"c_attn"
|
30 |
+
],
|
31 |
+
"task_type": "SEQ_CLS",
|
32 |
+
"use_dora": false,
|
33 |
+
"use_rslora": false
|
34 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:420bd2ac394927379af63039e7cf7cc8ce58744863a2ed076cc1710f3a5b22f1
|
3 |
+
size 1188920
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: dnagpt/dna_gpt2_v0
|
3 |
+
library_name: peft
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.14.0
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/adapter_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "dnagpt/dna_gpt2_v0",
|
5 |
+
"bias": "none",
|
6 |
+
"eva_config": null,
|
7 |
+
"exclude_modules": null,
|
8 |
+
"fan_in_fan_out": true,
|
9 |
+
"inference_mode": true,
|
10 |
+
"init_lora_weights": true,
|
11 |
+
"layer_replication": null,
|
12 |
+
"layers_pattern": null,
|
13 |
+
"layers_to_transform": null,
|
14 |
+
"loftq_config": {},
|
15 |
+
"lora_alpha": 32,
|
16 |
+
"lora_bias": false,
|
17 |
+
"lora_dropout": 0.1,
|
18 |
+
"megatron_config": null,
|
19 |
+
"megatron_core": "megatron.core",
|
20 |
+
"modules_to_save": [
|
21 |
+
"classifier",
|
22 |
+
"score"
|
23 |
+
],
|
24 |
+
"peft_type": "LORA",
|
25 |
+
"r": 8,
|
26 |
+
"rank_pattern": {},
|
27 |
+
"revision": null,
|
28 |
+
"target_modules": [
|
29 |
+
"c_attn"
|
30 |
+
],
|
31 |
+
"task_type": "SEQ_CLS",
|
32 |
+
"use_dora": false,
|
33 |
+
"use_rslora": false
|
34 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:420bd2ac394927379af63039e7cf7cc8ce58744863a2ed076cc1710f3a5b22f1
|
3 |
+
size 1188920
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/merges.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c79d89982c6ffe11f99a9830590377eba204aa277e9e00da4b44db9a758babd
|
3 |
+
size 323115
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c652df18dc71913c4ff5b64303622dc2ff04d1b88200b58d9797fadf6661fc95
|
3 |
+
size 2393043
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3d2234629fa3aa7c9b33952182cfbee58c837dbd0f86b57ab65e76090e887de
|
3 |
+
size 14244
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef93b247d1663864b151670cf47ed8f850f23e8283b1785b667f0bb66522304c
|
3 |
+
size 1064
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/tokenizer_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
}
|
13 |
+
},
|
14 |
+
"bos_token": "<|endoftext|>",
|
15 |
+
"clean_up_tokenization_spaces": false,
|
16 |
+
"eos_token": "<|endoftext|>",
|
17 |
+
"errors": "replace",
|
18 |
+
"extra_special_tokens": {},
|
19 |
+
"model_max_length": 1000000000000000019884624838656,
|
20 |
+
"pad_token": "<|endoftext|>",
|
21 |
+
"tokenizer_class": "GPT2Tokenizer",
|
22 |
+
"unk_token": "<|endoftext|>"
|
23 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/trainer_state.json
ADDED
@@ -0,0 +1,768 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.91875,
|
3 |
+
"best_model_checkpoint": "./gpt2_lora_text_classification/checkpoint-46620",
|
4 |
+
"epoch": 7.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 46620,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.07507507507507508,
|
13 |
+
"grad_norm": 0.6601091623306274,
|
14 |
+
"learning_rate": 1.9850750750750752e-05,
|
15 |
+
"loss": 0.2791,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.15015015015015015,
|
20 |
+
"grad_norm": 15.015149116516113,
|
21 |
+
"learning_rate": 1.9700600600600603e-05,
|
22 |
+
"loss": 0.2868,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.22522522522522523,
|
27 |
+
"grad_norm": 6.76954984664917,
|
28 |
+
"learning_rate": 1.955045045045045e-05,
|
29 |
+
"loss": 0.2775,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.3003003003003003,
|
34 |
+
"grad_norm": 15.123693466186523,
|
35 |
+
"learning_rate": 1.94003003003003e-05,
|
36 |
+
"loss": 0.2902,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.37537537537537535,
|
41 |
+
"grad_norm": 14.731389045715332,
|
42 |
+
"learning_rate": 1.925015015015015e-05,
|
43 |
+
"loss": 0.2755,
|
44 |
+
"step": 2500
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.45045045045045046,
|
48 |
+
"grad_norm": 6.209009647369385,
|
49 |
+
"learning_rate": 1.91e-05,
|
50 |
+
"loss": 0.296,
|
51 |
+
"step": 3000
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.5255255255255256,
|
55 |
+
"grad_norm": 36.11541748046875,
|
56 |
+
"learning_rate": 1.894984984984985e-05,
|
57 |
+
"loss": 0.2902,
|
58 |
+
"step": 3500
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.6006006006006006,
|
62 |
+
"grad_norm": 13.450925827026367,
|
63 |
+
"learning_rate": 1.87996996996997e-05,
|
64 |
+
"loss": 0.26,
|
65 |
+
"step": 4000
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.6756756756756757,
|
69 |
+
"grad_norm": 19.150455474853516,
|
70 |
+
"learning_rate": 1.864984984984985e-05,
|
71 |
+
"loss": 0.2725,
|
72 |
+
"step": 4500
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.7507507507507507,
|
76 |
+
"grad_norm": 1.4110784530639648,
|
77 |
+
"learning_rate": 1.84996996996997e-05,
|
78 |
+
"loss": 0.2842,
|
79 |
+
"step": 5000
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.8258258258258259,
|
83 |
+
"grad_norm": 1.0682216882705688,
|
84 |
+
"learning_rate": 1.834954954954955e-05,
|
85 |
+
"loss": 0.2743,
|
86 |
+
"step": 5500
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.9009009009009009,
|
90 |
+
"grad_norm": 0.35149845480918884,
|
91 |
+
"learning_rate": 1.8199699699699703e-05,
|
92 |
+
"loss": 0.2616,
|
93 |
+
"step": 6000
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.975975975975976,
|
97 |
+
"grad_norm": 12.264653205871582,
|
98 |
+
"learning_rate": 1.8049549549549553e-05,
|
99 |
+
"loss": 0.2683,
|
100 |
+
"step": 6500
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.0,
|
104 |
+
"eval_accuracy": 0.9097972972972973,
|
105 |
+
"eval_f1": 0.9093378607809848,
|
106 |
+
"eval_loss": 0.3078426420688629,
|
107 |
+
"eval_precision": 0.9168093118794933,
|
108 |
+
"eval_recall": 0.901987201077804,
|
109 |
+
"eval_runtime": 6.9148,
|
110 |
+
"eval_samples_per_second": 856.136,
|
111 |
+
"eval_steps_per_second": 107.017,
|
112 |
+
"step": 6660
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 1.0510510510510511,
|
116 |
+
"grad_norm": 0.6039217114448547,
|
117 |
+
"learning_rate": 1.7899399399399403e-05,
|
118 |
+
"loss": 0.2609,
|
119 |
+
"step": 7000
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 1.1261261261261262,
|
123 |
+
"grad_norm": 15.304805755615234,
|
124 |
+
"learning_rate": 1.774924924924925e-05,
|
125 |
+
"loss": 0.2652,
|
126 |
+
"step": 7500
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 1.2012012012012012,
|
130 |
+
"grad_norm": 1.7410695552825928,
|
131 |
+
"learning_rate": 1.7599399399399403e-05,
|
132 |
+
"loss": 0.2701,
|
133 |
+
"step": 8000
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 1.2762762762762763,
|
137 |
+
"grad_norm": 0.29760709404945374,
|
138 |
+
"learning_rate": 1.7449249249249253e-05,
|
139 |
+
"loss": 0.2592,
|
140 |
+
"step": 8500
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 1.3513513513513513,
|
144 |
+
"grad_norm": 9.87925910949707,
|
145 |
+
"learning_rate": 1.72990990990991e-05,
|
146 |
+
"loss": 0.2571,
|
147 |
+
"step": 9000
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 1.4264264264264264,
|
151 |
+
"grad_norm": 3.006570339202881,
|
152 |
+
"learning_rate": 1.714894894894895e-05,
|
153 |
+
"loss": 0.2793,
|
154 |
+
"step": 9500
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 1.5015015015015014,
|
158 |
+
"grad_norm": 15.687701225280762,
|
159 |
+
"learning_rate": 1.69987987987988e-05,
|
160 |
+
"loss": 0.2658,
|
161 |
+
"step": 10000
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 1.5765765765765765,
|
165 |
+
"grad_norm": 14.420040130615234,
|
166 |
+
"learning_rate": 1.684894894894895e-05,
|
167 |
+
"loss": 0.2525,
|
168 |
+
"step": 10500
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 1.6516516516516515,
|
172 |
+
"grad_norm": 5.118183135986328,
|
173 |
+
"learning_rate": 1.66987987987988e-05,
|
174 |
+
"loss": 0.2773,
|
175 |
+
"step": 11000
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"epoch": 1.7267267267267268,
|
179 |
+
"grad_norm": 4.803476333618164,
|
180 |
+
"learning_rate": 1.654864864864865e-05,
|
181 |
+
"loss": 0.2533,
|
182 |
+
"step": 11500
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"epoch": 1.8018018018018018,
|
186 |
+
"grad_norm": 12.475725173950195,
|
187 |
+
"learning_rate": 1.63984984984985e-05,
|
188 |
+
"loss": 0.2778,
|
189 |
+
"step": 12000
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 1.8768768768768769,
|
193 |
+
"grad_norm": 0.4601811170578003,
|
194 |
+
"learning_rate": 1.624834834834835e-05,
|
195 |
+
"loss": 0.2595,
|
196 |
+
"step": 12500
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 1.951951951951952,
|
200 |
+
"grad_norm": 9.139686584472656,
|
201 |
+
"learning_rate": 1.60981981981982e-05,
|
202 |
+
"loss": 0.2874,
|
203 |
+
"step": 13000
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"epoch": 2.0,
|
207 |
+
"eval_accuracy": 0.9135135135135135,
|
208 |
+
"eval_f1": 0.9150912106135987,
|
209 |
+
"eval_loss": 0.27880367636680603,
|
210 |
+
"eval_precision": 0.9013394315583143,
|
211 |
+
"eval_recall": 0.9292691141798586,
|
212 |
+
"eval_runtime": 6.9239,
|
213 |
+
"eval_samples_per_second": 855.011,
|
214 |
+
"eval_steps_per_second": 106.876,
|
215 |
+
"step": 13320
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 2.027027027027027,
|
219 |
+
"grad_norm": 6.4723801612854,
|
220 |
+
"learning_rate": 1.594804804804805e-05,
|
221 |
+
"loss": 0.2424,
|
222 |
+
"step": 13500
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 2.1021021021021022,
|
226 |
+
"grad_norm": 2.6300477981567383,
|
227 |
+
"learning_rate": 1.57978978978979e-05,
|
228 |
+
"loss": 0.253,
|
229 |
+
"step": 14000
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"epoch": 2.1771771771771773,
|
233 |
+
"grad_norm": 0.37813109159469604,
|
234 |
+
"learning_rate": 1.564804804804805e-05,
|
235 |
+
"loss": 0.2745,
|
236 |
+
"step": 14500
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 2.2522522522522523,
|
240 |
+
"grad_norm": 8.724522590637207,
|
241 |
+
"learning_rate": 1.54981981981982e-05,
|
242 |
+
"loss": 0.2575,
|
243 |
+
"step": 15000
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"epoch": 2.3273273273273274,
|
247 |
+
"grad_norm": 16.1109619140625,
|
248 |
+
"learning_rate": 1.534804804804805e-05,
|
249 |
+
"loss": 0.2579,
|
250 |
+
"step": 15500
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"epoch": 2.4024024024024024,
|
254 |
+
"grad_norm": 9.96057415008545,
|
255 |
+
"learning_rate": 1.51978978978979e-05,
|
256 |
+
"loss": 0.248,
|
257 |
+
"step": 16000
|
258 |
+
},
|
259 |
+
{
|
260 |
+
"epoch": 2.4774774774774775,
|
261 |
+
"grad_norm": 1.349557876586914,
|
262 |
+
"learning_rate": 1.504774774774775e-05,
|
263 |
+
"loss": 0.2422,
|
264 |
+
"step": 16500
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 2.5525525525525525,
|
268 |
+
"grad_norm": 0.23730000853538513,
|
269 |
+
"learning_rate": 1.48975975975976e-05,
|
270 |
+
"loss": 0.2905,
|
271 |
+
"step": 17000
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"epoch": 2.6276276276276276,
|
275 |
+
"grad_norm": 13.847168922424316,
|
276 |
+
"learning_rate": 1.474774774774775e-05,
|
277 |
+
"loss": 0.2584,
|
278 |
+
"step": 17500
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 2.7027027027027026,
|
282 |
+
"grad_norm": 8.907866477966309,
|
283 |
+
"learning_rate": 1.45975975975976e-05,
|
284 |
+
"loss": 0.2855,
|
285 |
+
"step": 18000
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"epoch": 2.7777777777777777,
|
289 |
+
"grad_norm": 9.752605438232422,
|
290 |
+
"learning_rate": 1.4447447447447448e-05,
|
291 |
+
"loss": 0.239,
|
292 |
+
"step": 18500
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"epoch": 2.8528528528528527,
|
296 |
+
"grad_norm": 16.184972763061523,
|
297 |
+
"learning_rate": 1.4297297297297299e-05,
|
298 |
+
"loss": 0.2749,
|
299 |
+
"step": 19000
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"epoch": 2.9279279279279278,
|
303 |
+
"grad_norm": 17.949094772338867,
|
304 |
+
"learning_rate": 1.4147147147147149e-05,
|
305 |
+
"loss": 0.2828,
|
306 |
+
"step": 19500
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"epoch": 3.0,
|
310 |
+
"eval_accuracy": 0.914527027027027,
|
311 |
+
"eval_f1": 0.9149579831932773,
|
312 |
+
"eval_loss": 0.29122158885002136,
|
313 |
+
"eval_precision": 0.9131164038913117,
|
314 |
+
"eval_recall": 0.9168070057258336,
|
315 |
+
"eval_runtime": 6.8853,
|
316 |
+
"eval_samples_per_second": 859.802,
|
317 |
+
"eval_steps_per_second": 107.475,
|
318 |
+
"step": 19980
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"epoch": 3.003003003003003,
|
322 |
+
"grad_norm": 2.0944182872772217,
|
323 |
+
"learning_rate": 1.3996996996996999e-05,
|
324 |
+
"loss": 0.2426,
|
325 |
+
"step": 20000
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"epoch": 3.078078078078078,
|
329 |
+
"grad_norm": 24.096668243408203,
|
330 |
+
"learning_rate": 1.3847147147147148e-05,
|
331 |
+
"loss": 0.2566,
|
332 |
+
"step": 20500
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 3.153153153153153,
|
336 |
+
"grad_norm": 22.673879623413086,
|
337 |
+
"learning_rate": 1.3696996996996998e-05,
|
338 |
+
"loss": 0.2579,
|
339 |
+
"step": 21000
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 3.2282282282282284,
|
343 |
+
"grad_norm": 14.733776092529297,
|
344 |
+
"learning_rate": 1.3546846846846849e-05,
|
345 |
+
"loss": 0.2613,
|
346 |
+
"step": 21500
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"epoch": 3.3033033033033035,
|
350 |
+
"grad_norm": 3.1102752685546875,
|
351 |
+
"learning_rate": 1.3396696696696699e-05,
|
352 |
+
"loss": 0.2571,
|
353 |
+
"step": 22000
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"epoch": 3.3783783783783785,
|
357 |
+
"grad_norm": 12.633967399597168,
|
358 |
+
"learning_rate": 1.3246546546546547e-05,
|
359 |
+
"loss": 0.2528,
|
360 |
+
"step": 22500
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"epoch": 3.4534534534534536,
|
364 |
+
"grad_norm": 13.392036437988281,
|
365 |
+
"learning_rate": 1.3096696696696698e-05,
|
366 |
+
"loss": 0.2507,
|
367 |
+
"step": 23000
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"epoch": 3.5285285285285286,
|
371 |
+
"grad_norm": 17.981300354003906,
|
372 |
+
"learning_rate": 1.2946546546546549e-05,
|
373 |
+
"loss": 0.2486,
|
374 |
+
"step": 23500
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"epoch": 3.6036036036036037,
|
378 |
+
"grad_norm": 12.166417121887207,
|
379 |
+
"learning_rate": 1.2796396396396397e-05,
|
380 |
+
"loss": 0.2392,
|
381 |
+
"step": 24000
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"epoch": 3.6786786786786787,
|
385 |
+
"grad_norm": 1.0378447771072388,
|
386 |
+
"learning_rate": 1.2646246246246247e-05,
|
387 |
+
"loss": 0.2799,
|
388 |
+
"step": 24500
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"epoch": 3.7537537537537538,
|
392 |
+
"grad_norm": 18.767398834228516,
|
393 |
+
"learning_rate": 1.2496096096096097e-05,
|
394 |
+
"loss": 0.2654,
|
395 |
+
"step": 25000
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"epoch": 3.828828828828829,
|
399 |
+
"grad_norm": 36.50725173950195,
|
400 |
+
"learning_rate": 1.2346246246246247e-05,
|
401 |
+
"loss": 0.245,
|
402 |
+
"step": 25500
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"epoch": 3.903903903903904,
|
406 |
+
"grad_norm": 1.3716295957565308,
|
407 |
+
"learning_rate": 1.2196096096096097e-05,
|
408 |
+
"loss": 0.2468,
|
409 |
+
"step": 26000
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"epoch": 3.978978978978979,
|
413 |
+
"grad_norm": 8.420230865478516,
|
414 |
+
"learning_rate": 1.2045945945945947e-05,
|
415 |
+
"loss": 0.2552,
|
416 |
+
"step": 26500
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"epoch": 4.0,
|
420 |
+
"eval_accuracy": 0.9163851351351351,
|
421 |
+
"eval_f1": 0.9187058630316965,
|
422 |
+
"eval_loss": 0.2815721333026886,
|
423 |
+
"eval_precision": 0.896474358974359,
|
424 |
+
"eval_recall": 0.9420680363758841,
|
425 |
+
"eval_runtime": 6.8819,
|
426 |
+
"eval_samples_per_second": 860.228,
|
427 |
+
"eval_steps_per_second": 107.529,
|
428 |
+
"step": 26640
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 4.054054054054054,
|
432 |
+
"grad_norm": 12.154544830322266,
|
433 |
+
"learning_rate": 1.1895795795795797e-05,
|
434 |
+
"loss": 0.2599,
|
435 |
+
"step": 27000
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"epoch": 4.129129129129129,
|
439 |
+
"grad_norm": 18.989110946655273,
|
440 |
+
"learning_rate": 1.1745645645645648e-05,
|
441 |
+
"loss": 0.2299,
|
442 |
+
"step": 27500
|
443 |
+
},
|
444 |
+
{
|
445 |
+
"epoch": 4.2042042042042045,
|
446 |
+
"grad_norm": 15.256402969360352,
|
447 |
+
"learning_rate": 1.1595495495495496e-05,
|
448 |
+
"loss": 0.2793,
|
449 |
+
"step": 28000
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"epoch": 4.2792792792792795,
|
453 |
+
"grad_norm": 0.3791729211807251,
|
454 |
+
"learning_rate": 1.1445645645645647e-05,
|
455 |
+
"loss": 0.2362,
|
456 |
+
"step": 28500
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"epoch": 4.354354354354355,
|
460 |
+
"grad_norm": 0.3302192986011505,
|
461 |
+
"learning_rate": 1.1295495495495497e-05,
|
462 |
+
"loss": 0.2487,
|
463 |
+
"step": 29000
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"epoch": 4.42942942942943,
|
467 |
+
"grad_norm": 0.19345639646053314,
|
468 |
+
"learning_rate": 1.1145345345345346e-05,
|
469 |
+
"loss": 0.2693,
|
470 |
+
"step": 29500
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 4.504504504504505,
|
474 |
+
"grad_norm": 0.5276665091514587,
|
475 |
+
"learning_rate": 1.0995195195195196e-05,
|
476 |
+
"loss": 0.2515,
|
477 |
+
"step": 30000
|
478 |
+
},
|
479 |
+
{
|
480 |
+
"epoch": 4.57957957957958,
|
481 |
+
"grad_norm": 17.693134307861328,
|
482 |
+
"learning_rate": 1.0845045045045046e-05,
|
483 |
+
"loss": 0.2398,
|
484 |
+
"step": 30500
|
485 |
+
},
|
486 |
+
{
|
487 |
+
"epoch": 4.654654654654655,
|
488 |
+
"grad_norm": 4.102345943450928,
|
489 |
+
"learning_rate": 1.0695195195195196e-05,
|
490 |
+
"loss": 0.2534,
|
491 |
+
"step": 31000
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"epoch": 4.72972972972973,
|
495 |
+
"grad_norm": 14.490029335021973,
|
496 |
+
"learning_rate": 1.0545045045045046e-05,
|
497 |
+
"loss": 0.2591,
|
498 |
+
"step": 31500
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"epoch": 4.804804804804805,
|
502 |
+
"grad_norm": 0.18800705671310425,
|
503 |
+
"learning_rate": 1.0394894894894896e-05,
|
504 |
+
"loss": 0.2433,
|
505 |
+
"step": 32000
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"epoch": 4.87987987987988,
|
509 |
+
"grad_norm": 1.4547662734985352,
|
510 |
+
"learning_rate": 1.0244744744744746e-05,
|
511 |
+
"loss": 0.2514,
|
512 |
+
"step": 32500
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"epoch": 4.954954954954955,
|
516 |
+
"grad_norm": 23.680500030517578,
|
517 |
+
"learning_rate": 1.0094894894894895e-05,
|
518 |
+
"loss": 0.252,
|
519 |
+
"step": 33000
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 5.0,
|
523 |
+
"eval_accuracy": 0.914527027027027,
|
524 |
+
"eval_f1": 0.9149579831932773,
|
525 |
+
"eval_loss": 0.27195000648498535,
|
526 |
+
"eval_precision": 0.9131164038913117,
|
527 |
+
"eval_recall": 0.9168070057258336,
|
528 |
+
"eval_runtime": 6.9368,
|
529 |
+
"eval_samples_per_second": 853.416,
|
530 |
+
"eval_steps_per_second": 106.677,
|
531 |
+
"step": 33300
|
532 |
+
},
|
533 |
+
{
|
534 |
+
"epoch": 5.03003003003003,
|
535 |
+
"grad_norm": 0.7097603678703308,
|
536 |
+
"learning_rate": 9.944744744744746e-06,
|
537 |
+
"loss": 0.2542,
|
538 |
+
"step": 33500
|
539 |
+
},
|
540 |
+
{
|
541 |
+
"epoch": 5.105105105105105,
|
542 |
+
"grad_norm": 9.578977584838867,
|
543 |
+
"learning_rate": 9.794594594594596e-06,
|
544 |
+
"loss": 0.2614,
|
545 |
+
"step": 34000
|
546 |
+
},
|
547 |
+
{
|
548 |
+
"epoch": 5.18018018018018,
|
549 |
+
"grad_norm": 55.42725372314453,
|
550 |
+
"learning_rate": 9.644444444444444e-06,
|
551 |
+
"loss": 0.2489,
|
552 |
+
"step": 34500
|
553 |
+
},
|
554 |
+
{
|
555 |
+
"epoch": 5.255255255255255,
|
556 |
+
"grad_norm": 9.72994613647461,
|
557 |
+
"learning_rate": 9.494594594594595e-06,
|
558 |
+
"loss": 0.2535,
|
559 |
+
"step": 35000
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"epoch": 5.33033033033033,
|
563 |
+
"grad_norm": 9.581122398376465,
|
564 |
+
"learning_rate": 9.344444444444446e-06,
|
565 |
+
"loss": 0.2594,
|
566 |
+
"step": 35500
|
567 |
+
},
|
568 |
+
{
|
569 |
+
"epoch": 5.405405405405405,
|
570 |
+
"grad_norm": 8.783069610595703,
|
571 |
+
"learning_rate": 9.194294294294294e-06,
|
572 |
+
"loss": 0.2499,
|
573 |
+
"step": 36000
|
574 |
+
},
|
575 |
+
{
|
576 |
+
"epoch": 5.48048048048048,
|
577 |
+
"grad_norm": 21.566892623901367,
|
578 |
+
"learning_rate": 9.044144144144144e-06,
|
579 |
+
"loss": 0.2391,
|
580 |
+
"step": 36500
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"epoch": 5.555555555555555,
|
584 |
+
"grad_norm": 0.5095661878585815,
|
585 |
+
"learning_rate": 8.893993993993994e-06,
|
586 |
+
"loss": 0.228,
|
587 |
+
"step": 37000
|
588 |
+
},
|
589 |
+
{
|
590 |
+
"epoch": 5.63063063063063,
|
591 |
+
"grad_norm": 0.5652422904968262,
|
592 |
+
"learning_rate": 8.743843843843845e-06,
|
593 |
+
"loss": 0.2354,
|
594 |
+
"step": 37500
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 5.7057057057057055,
|
598 |
+
"grad_norm": 22.595531463623047,
|
599 |
+
"learning_rate": 8.593693693693695e-06,
|
600 |
+
"loss": 0.2313,
|
601 |
+
"step": 38000
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"epoch": 5.7807807807807805,
|
605 |
+
"grad_norm": 10.465389251708984,
|
606 |
+
"learning_rate": 8.443543543543543e-06,
|
607 |
+
"loss": 0.2495,
|
608 |
+
"step": 38500
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 5.8558558558558556,
|
612 |
+
"grad_norm": 13.11806583404541,
|
613 |
+
"learning_rate": 8.293693693693694e-06,
|
614 |
+
"loss": 0.2603,
|
615 |
+
"step": 39000
|
616 |
+
},
|
617 |
+
{
|
618 |
+
"epoch": 5.930930930930931,
|
619 |
+
"grad_norm": 13.471338272094727,
|
620 |
+
"learning_rate": 8.143543543543545e-06,
|
621 |
+
"loss": 0.2423,
|
622 |
+
"step": 39500
|
623 |
+
},
|
624 |
+
{
|
625 |
+
"epoch": 6.0,
|
626 |
+
"eval_accuracy": 0.9163851351351351,
|
627 |
+
"eval_f1": 0.9166526351237582,
|
628 |
+
"eval_loss": 0.28819864988327026,
|
629 |
+
"eval_precision": 0.9164983164983165,
|
630 |
+
"eval_recall": 0.9168070057258336,
|
631 |
+
"eval_runtime": 6.9985,
|
632 |
+
"eval_samples_per_second": 845.898,
|
633 |
+
"eval_steps_per_second": 105.737,
|
634 |
+
"step": 39960
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 6.006006006006006,
|
638 |
+
"grad_norm": 1.7739392518997192,
|
639 |
+
"learning_rate": 7.993393393393393e-06,
|
640 |
+
"loss": 0.247,
|
641 |
+
"step": 40000
|
642 |
+
},
|
643 |
+
{
|
644 |
+
"epoch": 6.081081081081081,
|
645 |
+
"grad_norm": 7.755850315093994,
|
646 |
+
"learning_rate": 7.843243243243243e-06,
|
647 |
+
"loss": 0.2288,
|
648 |
+
"step": 40500
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"epoch": 6.156156156156156,
|
652 |
+
"grad_norm": 4.462266445159912,
|
653 |
+
"learning_rate": 7.693093093093093e-06,
|
654 |
+
"loss": 0.2566,
|
655 |
+
"step": 41000
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"epoch": 6.231231231231231,
|
659 |
+
"grad_norm": 14.290399551391602,
|
660 |
+
"learning_rate": 7.543243243243244e-06,
|
661 |
+
"loss": 0.2559,
|
662 |
+
"step": 41500
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 6.306306306306306,
|
666 |
+
"grad_norm": 14.638737678527832,
|
667 |
+
"learning_rate": 7.393093093093093e-06,
|
668 |
+
"loss": 0.2237,
|
669 |
+
"step": 42000
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 6.381381381381382,
|
673 |
+
"grad_norm": 0.5656157732009888,
|
674 |
+
"learning_rate": 7.242942942942943e-06,
|
675 |
+
"loss": 0.2517,
|
676 |
+
"step": 42500
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 6.456456456456457,
|
680 |
+
"grad_norm": 5.043838977813721,
|
681 |
+
"learning_rate": 7.092792792792793e-06,
|
682 |
+
"loss": 0.2511,
|
683 |
+
"step": 43000
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 6.531531531531532,
|
687 |
+
"grad_norm": 1.055748462677002,
|
688 |
+
"learning_rate": 6.942942942942944e-06,
|
689 |
+
"loss": 0.2385,
|
690 |
+
"step": 43500
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 6.606606606606607,
|
694 |
+
"grad_norm": 0.2881987392902374,
|
695 |
+
"learning_rate": 6.792792792792793e-06,
|
696 |
+
"loss": 0.2537,
|
697 |
+
"step": 44000
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"epoch": 6.681681681681682,
|
701 |
+
"grad_norm": 26.814804077148438,
|
702 |
+
"learning_rate": 6.642642642642643e-06,
|
703 |
+
"loss": 0.2544,
|
704 |
+
"step": 44500
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 6.756756756756757,
|
708 |
+
"grad_norm": 3.904442548751831,
|
709 |
+
"learning_rate": 6.4924924924924924e-06,
|
710 |
+
"loss": 0.2322,
|
711 |
+
"step": 45000
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"epoch": 6.831831831831832,
|
715 |
+
"grad_norm": 0.3679012358188629,
|
716 |
+
"learning_rate": 6.342642642642643e-06,
|
717 |
+
"loss": 0.2314,
|
718 |
+
"step": 45500
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"epoch": 6.906906906906907,
|
722 |
+
"grad_norm": 0.13983987271785736,
|
723 |
+
"learning_rate": 6.192492492492494e-06,
|
724 |
+
"loss": 0.2559,
|
725 |
+
"step": 46000
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 6.981981981981982,
|
729 |
+
"grad_norm": 13.696492195129395,
|
730 |
+
"learning_rate": 6.042342342342343e-06,
|
731 |
+
"loss": 0.2535,
|
732 |
+
"step": 46500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 7.0,
|
736 |
+
"eval_accuracy": 0.91875,
|
737 |
+
"eval_f1": 0.9199267521225237,
|
738 |
+
"eval_loss": 0.2686730921268463,
|
739 |
+
"eval_precision": 0.9094799210006583,
|
740 |
+
"eval_recall": 0.9306163691478613,
|
741 |
+
"eval_runtime": 6.8805,
|
742 |
+
"eval_samples_per_second": 860.409,
|
743 |
+
"eval_steps_per_second": 107.551,
|
744 |
+
"step": 46620
|
745 |
+
}
|
746 |
+
],
|
747 |
+
"logging_steps": 500,
|
748 |
+
"max_steps": 66600,
|
749 |
+
"num_input_tokens_seen": 0,
|
750 |
+
"num_train_epochs": 10,
|
751 |
+
"save_steps": 500,
|
752 |
+
"stateful_callbacks": {
|
753 |
+
"TrainerControl": {
|
754 |
+
"args": {
|
755 |
+
"should_epoch_stop": false,
|
756 |
+
"should_evaluate": false,
|
757 |
+
"should_log": false,
|
758 |
+
"should_save": true,
|
759 |
+
"should_training_stop": false
|
760 |
+
},
|
761 |
+
"attributes": {}
|
762 |
+
}
|
763 |
+
},
|
764 |
+
"total_flos": 2.44459272830976e+16,
|
765 |
+
"train_batch_size": 8,
|
766 |
+
"trial_name": null,
|
767 |
+
"trial_params": null
|
768 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0e641b388947ac47c75e10e6525af9bbc8a067f3617ad53397af08cf2c7e505
|
3 |
+
size 5304
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-46620/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: dnagpt/dna_gpt2_v0
|
3 |
+
library_name: peft
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.14.0
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/adapter_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "dnagpt/dna_gpt2_v0",
|
5 |
+
"bias": "none",
|
6 |
+
"eva_config": null,
|
7 |
+
"exclude_modules": null,
|
8 |
+
"fan_in_fan_out": true,
|
9 |
+
"inference_mode": true,
|
10 |
+
"init_lora_weights": true,
|
11 |
+
"layer_replication": null,
|
12 |
+
"layers_pattern": null,
|
13 |
+
"layers_to_transform": null,
|
14 |
+
"loftq_config": {},
|
15 |
+
"lora_alpha": 32,
|
16 |
+
"lora_bias": false,
|
17 |
+
"lora_dropout": 0.1,
|
18 |
+
"megatron_config": null,
|
19 |
+
"megatron_core": "megatron.core",
|
20 |
+
"modules_to_save": [
|
21 |
+
"classifier",
|
22 |
+
"score"
|
23 |
+
],
|
24 |
+
"peft_type": "LORA",
|
25 |
+
"r": 8,
|
26 |
+
"rank_pattern": {},
|
27 |
+
"revision": null,
|
28 |
+
"target_modules": [
|
29 |
+
"c_attn"
|
30 |
+
],
|
31 |
+
"task_type": "SEQ_CLS",
|
32 |
+
"use_dora": false,
|
33 |
+
"use_rslora": false
|
34 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8735630e2a1c93b91017c41487698662f8d4e7944955bcdb26edd08027cbaa60
|
3 |
+
size 1188920
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/merges.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c79d89982c6ffe11f99a9830590377eba204aa277e9e00da4b44db9a758babd
|
3 |
+
size 323115
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be30cc6993dc0245ec03aabb8f719b5ce79b7444959f2f45205f83b3e10921da
|
3 |
+
size 2393043
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c78e95d7f2ce329e1ecbb8ee93a141949f25fff087fd73bd658b885c2b131d4
|
3 |
+
size 14244
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0144db6a26dfbd90570a8a7979f3a1e463104b56b08635ada71ef34fdace1e16
|
3 |
+
size 1064
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/tokenizer_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
}
|
13 |
+
},
|
14 |
+
"bos_token": "<|endoftext|>",
|
15 |
+
"clean_up_tokenization_spaces": false,
|
16 |
+
"eos_token": "<|endoftext|>",
|
17 |
+
"errors": "replace",
|
18 |
+
"extra_special_tokens": {},
|
19 |
+
"model_max_length": 1000000000000000019884624838656,
|
20 |
+
"pad_token": "<|endoftext|>",
|
21 |
+
"tokenizer_class": "GPT2Tokenizer",
|
22 |
+
"unk_token": "<|endoftext|>"
|
23 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/trainer_state.json
ADDED
@@ -0,0 +1,1084 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.91875,
|
3 |
+
"best_model_checkpoint": "./gpt2_lora_text_classification/checkpoint-46620",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 66600,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.07507507507507508,
|
13 |
+
"grad_norm": 0.6601091623306274,
|
14 |
+
"learning_rate": 1.9850750750750752e-05,
|
15 |
+
"loss": 0.2791,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.15015015015015015,
|
20 |
+
"grad_norm": 15.015149116516113,
|
21 |
+
"learning_rate": 1.9700600600600603e-05,
|
22 |
+
"loss": 0.2868,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.22522522522522523,
|
27 |
+
"grad_norm": 6.76954984664917,
|
28 |
+
"learning_rate": 1.955045045045045e-05,
|
29 |
+
"loss": 0.2775,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.3003003003003003,
|
34 |
+
"grad_norm": 15.123693466186523,
|
35 |
+
"learning_rate": 1.94003003003003e-05,
|
36 |
+
"loss": 0.2902,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.37537537537537535,
|
41 |
+
"grad_norm": 14.731389045715332,
|
42 |
+
"learning_rate": 1.925015015015015e-05,
|
43 |
+
"loss": 0.2755,
|
44 |
+
"step": 2500
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.45045045045045046,
|
48 |
+
"grad_norm": 6.209009647369385,
|
49 |
+
"learning_rate": 1.91e-05,
|
50 |
+
"loss": 0.296,
|
51 |
+
"step": 3000
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.5255255255255256,
|
55 |
+
"grad_norm": 36.11541748046875,
|
56 |
+
"learning_rate": 1.894984984984985e-05,
|
57 |
+
"loss": 0.2902,
|
58 |
+
"step": 3500
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.6006006006006006,
|
62 |
+
"grad_norm": 13.450925827026367,
|
63 |
+
"learning_rate": 1.87996996996997e-05,
|
64 |
+
"loss": 0.26,
|
65 |
+
"step": 4000
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.6756756756756757,
|
69 |
+
"grad_norm": 19.150455474853516,
|
70 |
+
"learning_rate": 1.864984984984985e-05,
|
71 |
+
"loss": 0.2725,
|
72 |
+
"step": 4500
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.7507507507507507,
|
76 |
+
"grad_norm": 1.4110784530639648,
|
77 |
+
"learning_rate": 1.84996996996997e-05,
|
78 |
+
"loss": 0.2842,
|
79 |
+
"step": 5000
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.8258258258258259,
|
83 |
+
"grad_norm": 1.0682216882705688,
|
84 |
+
"learning_rate": 1.834954954954955e-05,
|
85 |
+
"loss": 0.2743,
|
86 |
+
"step": 5500
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.9009009009009009,
|
90 |
+
"grad_norm": 0.35149845480918884,
|
91 |
+
"learning_rate": 1.8199699699699703e-05,
|
92 |
+
"loss": 0.2616,
|
93 |
+
"step": 6000
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.975975975975976,
|
97 |
+
"grad_norm": 12.264653205871582,
|
98 |
+
"learning_rate": 1.8049549549549553e-05,
|
99 |
+
"loss": 0.2683,
|
100 |
+
"step": 6500
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 1.0,
|
104 |
+
"eval_accuracy": 0.9097972972972973,
|
105 |
+
"eval_f1": 0.9093378607809848,
|
106 |
+
"eval_loss": 0.3078426420688629,
|
107 |
+
"eval_precision": 0.9168093118794933,
|
108 |
+
"eval_recall": 0.901987201077804,
|
109 |
+
"eval_runtime": 6.9148,
|
110 |
+
"eval_samples_per_second": 856.136,
|
111 |
+
"eval_steps_per_second": 107.017,
|
112 |
+
"step": 6660
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 1.0510510510510511,
|
116 |
+
"grad_norm": 0.6039217114448547,
|
117 |
+
"learning_rate": 1.7899399399399403e-05,
|
118 |
+
"loss": 0.2609,
|
119 |
+
"step": 7000
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 1.1261261261261262,
|
123 |
+
"grad_norm": 15.304805755615234,
|
124 |
+
"learning_rate": 1.774924924924925e-05,
|
125 |
+
"loss": 0.2652,
|
126 |
+
"step": 7500
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 1.2012012012012012,
|
130 |
+
"grad_norm": 1.7410695552825928,
|
131 |
+
"learning_rate": 1.7599399399399403e-05,
|
132 |
+
"loss": 0.2701,
|
133 |
+
"step": 8000
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 1.2762762762762763,
|
137 |
+
"grad_norm": 0.29760709404945374,
|
138 |
+
"learning_rate": 1.7449249249249253e-05,
|
139 |
+
"loss": 0.2592,
|
140 |
+
"step": 8500
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"epoch": 1.3513513513513513,
|
144 |
+
"grad_norm": 9.87925910949707,
|
145 |
+
"learning_rate": 1.72990990990991e-05,
|
146 |
+
"loss": 0.2571,
|
147 |
+
"step": 9000
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 1.4264264264264264,
|
151 |
+
"grad_norm": 3.006570339202881,
|
152 |
+
"learning_rate": 1.714894894894895e-05,
|
153 |
+
"loss": 0.2793,
|
154 |
+
"step": 9500
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 1.5015015015015014,
|
158 |
+
"grad_norm": 15.687701225280762,
|
159 |
+
"learning_rate": 1.69987987987988e-05,
|
160 |
+
"loss": 0.2658,
|
161 |
+
"step": 10000
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"epoch": 1.5765765765765765,
|
165 |
+
"grad_norm": 14.420040130615234,
|
166 |
+
"learning_rate": 1.684894894894895e-05,
|
167 |
+
"loss": 0.2525,
|
168 |
+
"step": 10500
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 1.6516516516516515,
|
172 |
+
"grad_norm": 5.118183135986328,
|
173 |
+
"learning_rate": 1.66987987987988e-05,
|
174 |
+
"loss": 0.2773,
|
175 |
+
"step": 11000
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"epoch": 1.7267267267267268,
|
179 |
+
"grad_norm": 4.803476333618164,
|
180 |
+
"learning_rate": 1.654864864864865e-05,
|
181 |
+
"loss": 0.2533,
|
182 |
+
"step": 11500
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"epoch": 1.8018018018018018,
|
186 |
+
"grad_norm": 12.475725173950195,
|
187 |
+
"learning_rate": 1.63984984984985e-05,
|
188 |
+
"loss": 0.2778,
|
189 |
+
"step": 12000
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 1.8768768768768769,
|
193 |
+
"grad_norm": 0.4601811170578003,
|
194 |
+
"learning_rate": 1.624834834834835e-05,
|
195 |
+
"loss": 0.2595,
|
196 |
+
"step": 12500
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 1.951951951951952,
|
200 |
+
"grad_norm": 9.139686584472656,
|
201 |
+
"learning_rate": 1.60981981981982e-05,
|
202 |
+
"loss": 0.2874,
|
203 |
+
"step": 13000
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"epoch": 2.0,
|
207 |
+
"eval_accuracy": 0.9135135135135135,
|
208 |
+
"eval_f1": 0.9150912106135987,
|
209 |
+
"eval_loss": 0.27880367636680603,
|
210 |
+
"eval_precision": 0.9013394315583143,
|
211 |
+
"eval_recall": 0.9292691141798586,
|
212 |
+
"eval_runtime": 6.9239,
|
213 |
+
"eval_samples_per_second": 855.011,
|
214 |
+
"eval_steps_per_second": 106.876,
|
215 |
+
"step": 13320
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 2.027027027027027,
|
219 |
+
"grad_norm": 6.4723801612854,
|
220 |
+
"learning_rate": 1.594804804804805e-05,
|
221 |
+
"loss": 0.2424,
|
222 |
+
"step": 13500
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 2.1021021021021022,
|
226 |
+
"grad_norm": 2.6300477981567383,
|
227 |
+
"learning_rate": 1.57978978978979e-05,
|
228 |
+
"loss": 0.253,
|
229 |
+
"step": 14000
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"epoch": 2.1771771771771773,
|
233 |
+
"grad_norm": 0.37813109159469604,
|
234 |
+
"learning_rate": 1.564804804804805e-05,
|
235 |
+
"loss": 0.2745,
|
236 |
+
"step": 14500
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"epoch": 2.2522522522522523,
|
240 |
+
"grad_norm": 8.724522590637207,
|
241 |
+
"learning_rate": 1.54981981981982e-05,
|
242 |
+
"loss": 0.2575,
|
243 |
+
"step": 15000
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"epoch": 2.3273273273273274,
|
247 |
+
"grad_norm": 16.1109619140625,
|
248 |
+
"learning_rate": 1.534804804804805e-05,
|
249 |
+
"loss": 0.2579,
|
250 |
+
"step": 15500
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"epoch": 2.4024024024024024,
|
254 |
+
"grad_norm": 9.96057415008545,
|
255 |
+
"learning_rate": 1.51978978978979e-05,
|
256 |
+
"loss": 0.248,
|
257 |
+
"step": 16000
|
258 |
+
},
|
259 |
+
{
|
260 |
+
"epoch": 2.4774774774774775,
|
261 |
+
"grad_norm": 1.349557876586914,
|
262 |
+
"learning_rate": 1.504774774774775e-05,
|
263 |
+
"loss": 0.2422,
|
264 |
+
"step": 16500
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 2.5525525525525525,
|
268 |
+
"grad_norm": 0.23730000853538513,
|
269 |
+
"learning_rate": 1.48975975975976e-05,
|
270 |
+
"loss": 0.2905,
|
271 |
+
"step": 17000
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"epoch": 2.6276276276276276,
|
275 |
+
"grad_norm": 13.847168922424316,
|
276 |
+
"learning_rate": 1.474774774774775e-05,
|
277 |
+
"loss": 0.2584,
|
278 |
+
"step": 17500
|
279 |
+
},
|
280 |
+
{
|
281 |
+
"epoch": 2.7027027027027026,
|
282 |
+
"grad_norm": 8.907866477966309,
|
283 |
+
"learning_rate": 1.45975975975976e-05,
|
284 |
+
"loss": 0.2855,
|
285 |
+
"step": 18000
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"epoch": 2.7777777777777777,
|
289 |
+
"grad_norm": 9.752605438232422,
|
290 |
+
"learning_rate": 1.4447447447447448e-05,
|
291 |
+
"loss": 0.239,
|
292 |
+
"step": 18500
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"epoch": 2.8528528528528527,
|
296 |
+
"grad_norm": 16.184972763061523,
|
297 |
+
"learning_rate": 1.4297297297297299e-05,
|
298 |
+
"loss": 0.2749,
|
299 |
+
"step": 19000
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"epoch": 2.9279279279279278,
|
303 |
+
"grad_norm": 17.949094772338867,
|
304 |
+
"learning_rate": 1.4147147147147149e-05,
|
305 |
+
"loss": 0.2828,
|
306 |
+
"step": 19500
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"epoch": 3.0,
|
310 |
+
"eval_accuracy": 0.914527027027027,
|
311 |
+
"eval_f1": 0.9149579831932773,
|
312 |
+
"eval_loss": 0.29122158885002136,
|
313 |
+
"eval_precision": 0.9131164038913117,
|
314 |
+
"eval_recall": 0.9168070057258336,
|
315 |
+
"eval_runtime": 6.8853,
|
316 |
+
"eval_samples_per_second": 859.802,
|
317 |
+
"eval_steps_per_second": 107.475,
|
318 |
+
"step": 19980
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"epoch": 3.003003003003003,
|
322 |
+
"grad_norm": 2.0944182872772217,
|
323 |
+
"learning_rate": 1.3996996996996999e-05,
|
324 |
+
"loss": 0.2426,
|
325 |
+
"step": 20000
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"epoch": 3.078078078078078,
|
329 |
+
"grad_norm": 24.096668243408203,
|
330 |
+
"learning_rate": 1.3847147147147148e-05,
|
331 |
+
"loss": 0.2566,
|
332 |
+
"step": 20500
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 3.153153153153153,
|
336 |
+
"grad_norm": 22.673879623413086,
|
337 |
+
"learning_rate": 1.3696996996996998e-05,
|
338 |
+
"loss": 0.2579,
|
339 |
+
"step": 21000
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 3.2282282282282284,
|
343 |
+
"grad_norm": 14.733776092529297,
|
344 |
+
"learning_rate": 1.3546846846846849e-05,
|
345 |
+
"loss": 0.2613,
|
346 |
+
"step": 21500
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"epoch": 3.3033033033033035,
|
350 |
+
"grad_norm": 3.1102752685546875,
|
351 |
+
"learning_rate": 1.3396696696696699e-05,
|
352 |
+
"loss": 0.2571,
|
353 |
+
"step": 22000
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"epoch": 3.3783783783783785,
|
357 |
+
"grad_norm": 12.633967399597168,
|
358 |
+
"learning_rate": 1.3246546546546547e-05,
|
359 |
+
"loss": 0.2528,
|
360 |
+
"step": 22500
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"epoch": 3.4534534534534536,
|
364 |
+
"grad_norm": 13.392036437988281,
|
365 |
+
"learning_rate": 1.3096696696696698e-05,
|
366 |
+
"loss": 0.2507,
|
367 |
+
"step": 23000
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"epoch": 3.5285285285285286,
|
371 |
+
"grad_norm": 17.981300354003906,
|
372 |
+
"learning_rate": 1.2946546546546549e-05,
|
373 |
+
"loss": 0.2486,
|
374 |
+
"step": 23500
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"epoch": 3.6036036036036037,
|
378 |
+
"grad_norm": 12.166417121887207,
|
379 |
+
"learning_rate": 1.2796396396396397e-05,
|
380 |
+
"loss": 0.2392,
|
381 |
+
"step": 24000
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"epoch": 3.6786786786786787,
|
385 |
+
"grad_norm": 1.0378447771072388,
|
386 |
+
"learning_rate": 1.2646246246246247e-05,
|
387 |
+
"loss": 0.2799,
|
388 |
+
"step": 24500
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"epoch": 3.7537537537537538,
|
392 |
+
"grad_norm": 18.767398834228516,
|
393 |
+
"learning_rate": 1.2496096096096097e-05,
|
394 |
+
"loss": 0.2654,
|
395 |
+
"step": 25000
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"epoch": 3.828828828828829,
|
399 |
+
"grad_norm": 36.50725173950195,
|
400 |
+
"learning_rate": 1.2346246246246247e-05,
|
401 |
+
"loss": 0.245,
|
402 |
+
"step": 25500
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"epoch": 3.903903903903904,
|
406 |
+
"grad_norm": 1.3716295957565308,
|
407 |
+
"learning_rate": 1.2196096096096097e-05,
|
408 |
+
"loss": 0.2468,
|
409 |
+
"step": 26000
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"epoch": 3.978978978978979,
|
413 |
+
"grad_norm": 8.420230865478516,
|
414 |
+
"learning_rate": 1.2045945945945947e-05,
|
415 |
+
"loss": 0.2552,
|
416 |
+
"step": 26500
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"epoch": 4.0,
|
420 |
+
"eval_accuracy": 0.9163851351351351,
|
421 |
+
"eval_f1": 0.9187058630316965,
|
422 |
+
"eval_loss": 0.2815721333026886,
|
423 |
+
"eval_precision": 0.896474358974359,
|
424 |
+
"eval_recall": 0.9420680363758841,
|
425 |
+
"eval_runtime": 6.8819,
|
426 |
+
"eval_samples_per_second": 860.228,
|
427 |
+
"eval_steps_per_second": 107.529,
|
428 |
+
"step": 26640
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 4.054054054054054,
|
432 |
+
"grad_norm": 12.154544830322266,
|
433 |
+
"learning_rate": 1.1895795795795797e-05,
|
434 |
+
"loss": 0.2599,
|
435 |
+
"step": 27000
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"epoch": 4.129129129129129,
|
439 |
+
"grad_norm": 18.989110946655273,
|
440 |
+
"learning_rate": 1.1745645645645648e-05,
|
441 |
+
"loss": 0.2299,
|
442 |
+
"step": 27500
|
443 |
+
},
|
444 |
+
{
|
445 |
+
"epoch": 4.2042042042042045,
|
446 |
+
"grad_norm": 15.256402969360352,
|
447 |
+
"learning_rate": 1.1595495495495496e-05,
|
448 |
+
"loss": 0.2793,
|
449 |
+
"step": 28000
|
450 |
+
},
|
451 |
+
{
|
452 |
+
"epoch": 4.2792792792792795,
|
453 |
+
"grad_norm": 0.3791729211807251,
|
454 |
+
"learning_rate": 1.1445645645645647e-05,
|
455 |
+
"loss": 0.2362,
|
456 |
+
"step": 28500
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"epoch": 4.354354354354355,
|
460 |
+
"grad_norm": 0.3302192986011505,
|
461 |
+
"learning_rate": 1.1295495495495497e-05,
|
462 |
+
"loss": 0.2487,
|
463 |
+
"step": 29000
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"epoch": 4.42942942942943,
|
467 |
+
"grad_norm": 0.19345639646053314,
|
468 |
+
"learning_rate": 1.1145345345345346e-05,
|
469 |
+
"loss": 0.2693,
|
470 |
+
"step": 29500
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 4.504504504504505,
|
474 |
+
"grad_norm": 0.5276665091514587,
|
475 |
+
"learning_rate": 1.0995195195195196e-05,
|
476 |
+
"loss": 0.2515,
|
477 |
+
"step": 30000
|
478 |
+
},
|
479 |
+
{
|
480 |
+
"epoch": 4.57957957957958,
|
481 |
+
"grad_norm": 17.693134307861328,
|
482 |
+
"learning_rate": 1.0845045045045046e-05,
|
483 |
+
"loss": 0.2398,
|
484 |
+
"step": 30500
|
485 |
+
},
|
486 |
+
{
|
487 |
+
"epoch": 4.654654654654655,
|
488 |
+
"grad_norm": 4.102345943450928,
|
489 |
+
"learning_rate": 1.0695195195195196e-05,
|
490 |
+
"loss": 0.2534,
|
491 |
+
"step": 31000
|
492 |
+
},
|
493 |
+
{
|
494 |
+
"epoch": 4.72972972972973,
|
495 |
+
"grad_norm": 14.490029335021973,
|
496 |
+
"learning_rate": 1.0545045045045046e-05,
|
497 |
+
"loss": 0.2591,
|
498 |
+
"step": 31500
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"epoch": 4.804804804804805,
|
502 |
+
"grad_norm": 0.18800705671310425,
|
503 |
+
"learning_rate": 1.0394894894894896e-05,
|
504 |
+
"loss": 0.2433,
|
505 |
+
"step": 32000
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"epoch": 4.87987987987988,
|
509 |
+
"grad_norm": 1.4547662734985352,
|
510 |
+
"learning_rate": 1.0244744744744746e-05,
|
511 |
+
"loss": 0.2514,
|
512 |
+
"step": 32500
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"epoch": 4.954954954954955,
|
516 |
+
"grad_norm": 23.680500030517578,
|
517 |
+
"learning_rate": 1.0094894894894895e-05,
|
518 |
+
"loss": 0.252,
|
519 |
+
"step": 33000
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 5.0,
|
523 |
+
"eval_accuracy": 0.914527027027027,
|
524 |
+
"eval_f1": 0.9149579831932773,
|
525 |
+
"eval_loss": 0.27195000648498535,
|
526 |
+
"eval_precision": 0.9131164038913117,
|
527 |
+
"eval_recall": 0.9168070057258336,
|
528 |
+
"eval_runtime": 6.9368,
|
529 |
+
"eval_samples_per_second": 853.416,
|
530 |
+
"eval_steps_per_second": 106.677,
|
531 |
+
"step": 33300
|
532 |
+
},
|
533 |
+
{
|
534 |
+
"epoch": 5.03003003003003,
|
535 |
+
"grad_norm": 0.7097603678703308,
|
536 |
+
"learning_rate": 9.944744744744746e-06,
|
537 |
+
"loss": 0.2542,
|
538 |
+
"step": 33500
|
539 |
+
},
|
540 |
+
{
|
541 |
+
"epoch": 5.105105105105105,
|
542 |
+
"grad_norm": 9.578977584838867,
|
543 |
+
"learning_rate": 9.794594594594596e-06,
|
544 |
+
"loss": 0.2614,
|
545 |
+
"step": 34000
|
546 |
+
},
|
547 |
+
{
|
548 |
+
"epoch": 5.18018018018018,
|
549 |
+
"grad_norm": 55.42725372314453,
|
550 |
+
"learning_rate": 9.644444444444444e-06,
|
551 |
+
"loss": 0.2489,
|
552 |
+
"step": 34500
|
553 |
+
},
|
554 |
+
{
|
555 |
+
"epoch": 5.255255255255255,
|
556 |
+
"grad_norm": 9.72994613647461,
|
557 |
+
"learning_rate": 9.494594594594595e-06,
|
558 |
+
"loss": 0.2535,
|
559 |
+
"step": 35000
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"epoch": 5.33033033033033,
|
563 |
+
"grad_norm": 9.581122398376465,
|
564 |
+
"learning_rate": 9.344444444444446e-06,
|
565 |
+
"loss": 0.2594,
|
566 |
+
"step": 35500
|
567 |
+
},
|
568 |
+
{
|
569 |
+
"epoch": 5.405405405405405,
|
570 |
+
"grad_norm": 8.783069610595703,
|
571 |
+
"learning_rate": 9.194294294294294e-06,
|
572 |
+
"loss": 0.2499,
|
573 |
+
"step": 36000
|
574 |
+
},
|
575 |
+
{
|
576 |
+
"epoch": 5.48048048048048,
|
577 |
+
"grad_norm": 21.566892623901367,
|
578 |
+
"learning_rate": 9.044144144144144e-06,
|
579 |
+
"loss": 0.2391,
|
580 |
+
"step": 36500
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"epoch": 5.555555555555555,
|
584 |
+
"grad_norm": 0.5095661878585815,
|
585 |
+
"learning_rate": 8.893993993993994e-06,
|
586 |
+
"loss": 0.228,
|
587 |
+
"step": 37000
|
588 |
+
},
|
589 |
+
{
|
590 |
+
"epoch": 5.63063063063063,
|
591 |
+
"grad_norm": 0.5652422904968262,
|
592 |
+
"learning_rate": 8.743843843843845e-06,
|
593 |
+
"loss": 0.2354,
|
594 |
+
"step": 37500
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 5.7057057057057055,
|
598 |
+
"grad_norm": 22.595531463623047,
|
599 |
+
"learning_rate": 8.593693693693695e-06,
|
600 |
+
"loss": 0.2313,
|
601 |
+
"step": 38000
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"epoch": 5.7807807807807805,
|
605 |
+
"grad_norm": 10.465389251708984,
|
606 |
+
"learning_rate": 8.443543543543543e-06,
|
607 |
+
"loss": 0.2495,
|
608 |
+
"step": 38500
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 5.8558558558558556,
|
612 |
+
"grad_norm": 13.11806583404541,
|
613 |
+
"learning_rate": 8.293693693693694e-06,
|
614 |
+
"loss": 0.2603,
|
615 |
+
"step": 39000
|
616 |
+
},
|
617 |
+
{
|
618 |
+
"epoch": 5.930930930930931,
|
619 |
+
"grad_norm": 13.471338272094727,
|
620 |
+
"learning_rate": 8.143543543543545e-06,
|
621 |
+
"loss": 0.2423,
|
622 |
+
"step": 39500
|
623 |
+
},
|
624 |
+
{
|
625 |
+
"epoch": 6.0,
|
626 |
+
"eval_accuracy": 0.9163851351351351,
|
627 |
+
"eval_f1": 0.9166526351237582,
|
628 |
+
"eval_loss": 0.28819864988327026,
|
629 |
+
"eval_precision": 0.9164983164983165,
|
630 |
+
"eval_recall": 0.9168070057258336,
|
631 |
+
"eval_runtime": 6.9985,
|
632 |
+
"eval_samples_per_second": 845.898,
|
633 |
+
"eval_steps_per_second": 105.737,
|
634 |
+
"step": 39960
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 6.006006006006006,
|
638 |
+
"grad_norm": 1.7739392518997192,
|
639 |
+
"learning_rate": 7.993393393393393e-06,
|
640 |
+
"loss": 0.247,
|
641 |
+
"step": 40000
|
642 |
+
},
|
643 |
+
{
|
644 |
+
"epoch": 6.081081081081081,
|
645 |
+
"grad_norm": 7.755850315093994,
|
646 |
+
"learning_rate": 7.843243243243243e-06,
|
647 |
+
"loss": 0.2288,
|
648 |
+
"step": 40500
|
649 |
+
},
|
650 |
+
{
|
651 |
+
"epoch": 6.156156156156156,
|
652 |
+
"grad_norm": 4.462266445159912,
|
653 |
+
"learning_rate": 7.693093093093093e-06,
|
654 |
+
"loss": 0.2566,
|
655 |
+
"step": 41000
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"epoch": 6.231231231231231,
|
659 |
+
"grad_norm": 14.290399551391602,
|
660 |
+
"learning_rate": 7.543243243243244e-06,
|
661 |
+
"loss": 0.2559,
|
662 |
+
"step": 41500
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 6.306306306306306,
|
666 |
+
"grad_norm": 14.638737678527832,
|
667 |
+
"learning_rate": 7.393093093093093e-06,
|
668 |
+
"loss": 0.2237,
|
669 |
+
"step": 42000
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 6.381381381381382,
|
673 |
+
"grad_norm": 0.5656157732009888,
|
674 |
+
"learning_rate": 7.242942942942943e-06,
|
675 |
+
"loss": 0.2517,
|
676 |
+
"step": 42500
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 6.456456456456457,
|
680 |
+
"grad_norm": 5.043838977813721,
|
681 |
+
"learning_rate": 7.092792792792793e-06,
|
682 |
+
"loss": 0.2511,
|
683 |
+
"step": 43000
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 6.531531531531532,
|
687 |
+
"grad_norm": 1.055748462677002,
|
688 |
+
"learning_rate": 6.942942942942944e-06,
|
689 |
+
"loss": 0.2385,
|
690 |
+
"step": 43500
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 6.606606606606607,
|
694 |
+
"grad_norm": 0.2881987392902374,
|
695 |
+
"learning_rate": 6.792792792792793e-06,
|
696 |
+
"loss": 0.2537,
|
697 |
+
"step": 44000
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"epoch": 6.681681681681682,
|
701 |
+
"grad_norm": 26.814804077148438,
|
702 |
+
"learning_rate": 6.642642642642643e-06,
|
703 |
+
"loss": 0.2544,
|
704 |
+
"step": 44500
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 6.756756756756757,
|
708 |
+
"grad_norm": 3.904442548751831,
|
709 |
+
"learning_rate": 6.4924924924924924e-06,
|
710 |
+
"loss": 0.2322,
|
711 |
+
"step": 45000
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"epoch": 6.831831831831832,
|
715 |
+
"grad_norm": 0.3679012358188629,
|
716 |
+
"learning_rate": 6.342642642642643e-06,
|
717 |
+
"loss": 0.2314,
|
718 |
+
"step": 45500
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"epoch": 6.906906906906907,
|
722 |
+
"grad_norm": 0.13983987271785736,
|
723 |
+
"learning_rate": 6.192492492492494e-06,
|
724 |
+
"loss": 0.2559,
|
725 |
+
"step": 46000
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 6.981981981981982,
|
729 |
+
"grad_norm": 13.696492195129395,
|
730 |
+
"learning_rate": 6.042342342342343e-06,
|
731 |
+
"loss": 0.2535,
|
732 |
+
"step": 46500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 7.0,
|
736 |
+
"eval_accuracy": 0.91875,
|
737 |
+
"eval_f1": 0.9199267521225237,
|
738 |
+
"eval_loss": 0.2686730921268463,
|
739 |
+
"eval_precision": 0.9094799210006583,
|
740 |
+
"eval_recall": 0.9306163691478613,
|
741 |
+
"eval_runtime": 6.8805,
|
742 |
+
"eval_samples_per_second": 860.409,
|
743 |
+
"eval_steps_per_second": 107.551,
|
744 |
+
"step": 46620
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 7.057057057057057,
|
748 |
+
"grad_norm": 1.2342840433120728,
|
749 |
+
"learning_rate": 5.892192192192193e-06,
|
750 |
+
"loss": 0.2461,
|
751 |
+
"step": 47000
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 7.132132132132132,
|
755 |
+
"grad_norm": 0.11590774357318878,
|
756 |
+
"learning_rate": 5.742042042042042e-06,
|
757 |
+
"loss": 0.2341,
|
758 |
+
"step": 47500
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 7.207207207207207,
|
762 |
+
"grad_norm": 0.7022895216941833,
|
763 |
+
"learning_rate": 5.592192192192193e-06,
|
764 |
+
"loss": 0.2617,
|
765 |
+
"step": 48000
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"epoch": 7.282282282282282,
|
769 |
+
"grad_norm": 0.6305549740791321,
|
770 |
+
"learning_rate": 5.442042042042043e-06,
|
771 |
+
"loss": 0.2379,
|
772 |
+
"step": 48500
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"epoch": 7.357357357357357,
|
776 |
+
"grad_norm": 1.8172821998596191,
|
777 |
+
"learning_rate": 5.291891891891892e-06,
|
778 |
+
"loss": 0.2402,
|
779 |
+
"step": 49000
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 7.4324324324324325,
|
783 |
+
"grad_norm": 5.735633373260498,
|
784 |
+
"learning_rate": 5.1417417417417425e-06,
|
785 |
+
"loss": 0.2468,
|
786 |
+
"step": 49500
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 7.5075075075075075,
|
790 |
+
"grad_norm": 0.6238136291503906,
|
791 |
+
"learning_rate": 4.991891891891893e-06,
|
792 |
+
"loss": 0.2284,
|
793 |
+
"step": 50000
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"epoch": 7.5825825825825826,
|
797 |
+
"grad_norm": 1.6428614854812622,
|
798 |
+
"learning_rate": 4.841741741741742e-06,
|
799 |
+
"loss": 0.2242,
|
800 |
+
"step": 50500
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 7.657657657657658,
|
804 |
+
"grad_norm": 3.6709420680999756,
|
805 |
+
"learning_rate": 4.691591591591592e-06,
|
806 |
+
"loss": 0.2313,
|
807 |
+
"step": 51000
|
808 |
+
},
|
809 |
+
{
|
810 |
+
"epoch": 7.732732732732733,
|
811 |
+
"grad_norm": 1.36298406124115,
|
812 |
+
"learning_rate": 4.541441441441442e-06,
|
813 |
+
"loss": 0.2372,
|
814 |
+
"step": 51500
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"epoch": 7.807807807807808,
|
818 |
+
"grad_norm": 1.058044672012329,
|
819 |
+
"learning_rate": 4.391291291291292e-06,
|
820 |
+
"loss": 0.2267,
|
821 |
+
"step": 52000
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 7.882882882882883,
|
825 |
+
"grad_norm": 9.829523086547852,
|
826 |
+
"learning_rate": 4.241441441441442e-06,
|
827 |
+
"loss": 0.2695,
|
828 |
+
"step": 52500
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"epoch": 7.957957957957958,
|
832 |
+
"grad_norm": 6.040225982666016,
|
833 |
+
"learning_rate": 4.091591591591592e-06,
|
834 |
+
"loss": 0.2359,
|
835 |
+
"step": 53000
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"epoch": 8.0,
|
839 |
+
"eval_accuracy": 0.9175675675675675,
|
840 |
+
"eval_f1": 0.9189099368560983,
|
841 |
+
"eval_loss": 0.2778928279876709,
|
842 |
+
"eval_precision": 0.9068547064611348,
|
843 |
+
"eval_recall": 0.9312899966318626,
|
844 |
+
"eval_runtime": 6.9239,
|
845 |
+
"eval_samples_per_second": 855.009,
|
846 |
+
"eval_steps_per_second": 106.876,
|
847 |
+
"step": 53280
|
848 |
+
},
|
849 |
+
{
|
850 |
+
"epoch": 8.033033033033034,
|
851 |
+
"grad_norm": 0.8012737035751343,
|
852 |
+
"learning_rate": 3.941441441441442e-06,
|
853 |
+
"loss": 0.2353,
|
854 |
+
"step": 53500
|
855 |
+
},
|
856 |
+
{
|
857 |
+
"epoch": 8.108108108108109,
|
858 |
+
"grad_norm": 33.150291442871094,
|
859 |
+
"learning_rate": 3.7912912912912915e-06,
|
860 |
+
"loss": 0.2411,
|
861 |
+
"step": 54000
|
862 |
+
},
|
863 |
+
{
|
864 |
+
"epoch": 8.183183183183184,
|
865 |
+
"grad_norm": 0.2979067265987396,
|
866 |
+
"learning_rate": 3.6411411411411413e-06,
|
867 |
+
"loss": 0.2546,
|
868 |
+
"step": 54500
|
869 |
+
},
|
870 |
+
{
|
871 |
+
"epoch": 8.258258258258259,
|
872 |
+
"grad_norm": 0.2562500536441803,
|
873 |
+
"learning_rate": 3.490990990990991e-06,
|
874 |
+
"loss": 0.2189,
|
875 |
+
"step": 55000
|
876 |
+
},
|
877 |
+
{
|
878 |
+
"epoch": 8.333333333333334,
|
879 |
+
"grad_norm": 0.5810413360595703,
|
880 |
+
"learning_rate": 3.340840840840841e-06,
|
881 |
+
"loss": 0.2389,
|
882 |
+
"step": 55500
|
883 |
+
},
|
884 |
+
{
|
885 |
+
"epoch": 8.408408408408409,
|
886 |
+
"grad_norm": 15.872923851013184,
|
887 |
+
"learning_rate": 3.190690690690691e-06,
|
888 |
+
"loss": 0.2512,
|
889 |
+
"step": 56000
|
890 |
+
},
|
891 |
+
{
|
892 |
+
"epoch": 8.483483483483484,
|
893 |
+
"grad_norm": 1.3113569021224976,
|
894 |
+
"learning_rate": 3.040540540540541e-06,
|
895 |
+
"loss": 0.2436,
|
896 |
+
"step": 56500
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"epoch": 8.558558558558559,
|
900 |
+
"grad_norm": 13.303168296813965,
|
901 |
+
"learning_rate": 2.890690690690691e-06,
|
902 |
+
"loss": 0.2288,
|
903 |
+
"step": 57000
|
904 |
+
},
|
905 |
+
{
|
906 |
+
"epoch": 8.633633633633634,
|
907 |
+
"grad_norm": 22.601531982421875,
|
908 |
+
"learning_rate": 2.7405405405405404e-06,
|
909 |
+
"loss": 0.2466,
|
910 |
+
"step": 57500
|
911 |
+
},
|
912 |
+
{
|
913 |
+
"epoch": 8.70870870870871,
|
914 |
+
"grad_norm": 0.9369896054267883,
|
915 |
+
"learning_rate": 2.5903903903903906e-06,
|
916 |
+
"loss": 0.2633,
|
917 |
+
"step": 58000
|
918 |
+
},
|
919 |
+
{
|
920 |
+
"epoch": 8.783783783783784,
|
921 |
+
"grad_norm": 10.832354545593262,
|
922 |
+
"learning_rate": 2.4402402402402404e-06,
|
923 |
+
"loss": 0.2297,
|
924 |
+
"step": 58500
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"epoch": 8.85885885885886,
|
928 |
+
"grad_norm": 7.654428958892822,
|
929 |
+
"learning_rate": 2.2903903903903905e-06,
|
930 |
+
"loss": 0.2336,
|
931 |
+
"step": 59000
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"epoch": 8.933933933933934,
|
935 |
+
"grad_norm": 20.344663619995117,
|
936 |
+
"learning_rate": 2.1402402402402403e-06,
|
937 |
+
"loss": 0.2386,
|
938 |
+
"step": 59500
|
939 |
+
},
|
940 |
+
{
|
941 |
+
"epoch": 9.0,
|
942 |
+
"eval_accuracy": 0.9175675675675675,
|
943 |
+
"eval_f1": 0.9182579564489112,
|
944 |
+
"eval_loss": 0.2806470990180969,
|
945 |
+
"eval_precision": 0.9133622125958014,
|
946 |
+
"eval_recall": 0.9232064668238464,
|
947 |
+
"eval_runtime": 6.8833,
|
948 |
+
"eval_samples_per_second": 860.047,
|
949 |
+
"eval_steps_per_second": 107.506,
|
950 |
+
"step": 59940
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 9.00900900900901,
|
954 |
+
"grad_norm": 8.42081069946289,
|
955 |
+
"learning_rate": 1.99009009009009e-06,
|
956 |
+
"loss": 0.2485,
|
957 |
+
"step": 60000
|
958 |
+
},
|
959 |
+
{
|
960 |
+
"epoch": 9.084084084084084,
|
961 |
+
"grad_norm": 25.98297119140625,
|
962 |
+
"learning_rate": 1.83993993993994e-06,
|
963 |
+
"loss": 0.2313,
|
964 |
+
"step": 60500
|
965 |
+
},
|
966 |
+
{
|
967 |
+
"epoch": 9.15915915915916,
|
968 |
+
"grad_norm": 19.054916381835938,
|
969 |
+
"learning_rate": 1.6900900900900902e-06,
|
970 |
+
"loss": 0.2339,
|
971 |
+
"step": 61000
|
972 |
+
},
|
973 |
+
{
|
974 |
+
"epoch": 9.234234234234235,
|
975 |
+
"grad_norm": 17.01715850830078,
|
976 |
+
"learning_rate": 1.53993993993994e-06,
|
977 |
+
"loss": 0.2264,
|
978 |
+
"step": 61500
|
979 |
+
},
|
980 |
+
{
|
981 |
+
"epoch": 9.30930930930931,
|
982 |
+
"grad_norm": 0.13271598517894745,
|
983 |
+
"learning_rate": 1.38978978978979e-06,
|
984 |
+
"loss": 0.2224,
|
985 |
+
"step": 62000
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"epoch": 9.384384384384385,
|
989 |
+
"grad_norm": 0.999711275100708,
|
990 |
+
"learning_rate": 1.2396396396396398e-06,
|
991 |
+
"loss": 0.2321,
|
992 |
+
"step": 62500
|
993 |
+
},
|
994 |
+
{
|
995 |
+
"epoch": 9.45945945945946,
|
996 |
+
"grad_norm": 13.851149559020996,
|
997 |
+
"learning_rate": 1.0897897897897899e-06,
|
998 |
+
"loss": 0.2227,
|
999 |
+
"step": 63000
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"epoch": 9.534534534534535,
|
1003 |
+
"grad_norm": 0.29212486743927,
|
1004 |
+
"learning_rate": 9.396396396396397e-07,
|
1005 |
+
"loss": 0.2307,
|
1006 |
+
"step": 63500
|
1007 |
+
},
|
1008 |
+
{
|
1009 |
+
"epoch": 9.60960960960961,
|
1010 |
+
"grad_norm": 2.285646438598633,
|
1011 |
+
"learning_rate": 7.894894894894896e-07,
|
1012 |
+
"loss": 0.2517,
|
1013 |
+
"step": 64000
|
1014 |
+
},
|
1015 |
+
{
|
1016 |
+
"epoch": 9.684684684684685,
|
1017 |
+
"grad_norm": 0.19670137763023376,
|
1018 |
+
"learning_rate": 6.393393393393393e-07,
|
1019 |
+
"loss": 0.2697,
|
1020 |
+
"step": 64500
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 9.75975975975976,
|
1024 |
+
"grad_norm": 19.214984893798828,
|
1025 |
+
"learning_rate": 4.891891891891892e-07,
|
1026 |
+
"loss": 0.2662,
|
1027 |
+
"step": 65000
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 9.834834834834835,
|
1031 |
+
"grad_norm": 0.19054338335990906,
|
1032 |
+
"learning_rate": 3.393393393393394e-07,
|
1033 |
+
"loss": 0.217,
|
1034 |
+
"step": 65500
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 9.90990990990991,
|
1038 |
+
"grad_norm": 23.02910804748535,
|
1039 |
+
"learning_rate": 1.8918918918918921e-07,
|
1040 |
+
"loss": 0.2481,
|
1041 |
+
"step": 66000
|
1042 |
+
},
|
1043 |
+
{
|
1044 |
+
"epoch": 9.984984984984985,
|
1045 |
+
"grad_norm": 0.41014552116394043,
|
1046 |
+
"learning_rate": 3.903903903903904e-08,
|
1047 |
+
"loss": 0.2379,
|
1048 |
+
"step": 66500
|
1049 |
+
},
|
1050 |
+
{
|
1051 |
+
"epoch": 10.0,
|
1052 |
+
"eval_accuracy": 0.9177364864864865,
|
1053 |
+
"eval_f1": 0.9184391224250544,
|
1054 |
+
"eval_loss": 0.2841491401195526,
|
1055 |
+
"eval_precision": 0.9133910726182545,
|
1056 |
+
"eval_recall": 0.9235432805658471,
|
1057 |
+
"eval_runtime": 6.9645,
|
1058 |
+
"eval_samples_per_second": 850.023,
|
1059 |
+
"eval_steps_per_second": 106.253,
|
1060 |
+
"step": 66600
|
1061 |
+
}
|
1062 |
+
],
|
1063 |
+
"logging_steps": 500,
|
1064 |
+
"max_steps": 66600,
|
1065 |
+
"num_input_tokens_seen": 0,
|
1066 |
+
"num_train_epochs": 10,
|
1067 |
+
"save_steps": 500,
|
1068 |
+
"stateful_callbacks": {
|
1069 |
+
"TrainerControl": {
|
1070 |
+
"args": {
|
1071 |
+
"should_epoch_stop": false,
|
1072 |
+
"should_evaluate": false,
|
1073 |
+
"should_log": false,
|
1074 |
+
"should_save": true,
|
1075 |
+
"should_training_stop": true
|
1076 |
+
},
|
1077 |
+
"attributes": {}
|
1078 |
+
}
|
1079 |
+
},
|
1080 |
+
"total_flos": 3.4922753261568e+16,
|
1081 |
+
"train_batch_size": 8,
|
1082 |
+
"trial_name": null,
|
1083 |
+
"trial_params": null
|
1084 |
+
}
|
04-gene-sft/gpt2_lora_text_classification/checkpoint-66600/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0e641b388947ac47c75e10e6525af9bbc8a067f3617ad53397af08cf2c7e505
|
3 |
+
size 5304
|