ga89tiy commited on
Commit
1db0e44
1 Parent(s): e7184d5
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LLAVA_Biovil/cog.yaml +0 -37
  2. LLAVA_Biovil/install.md +0 -6
  3. LLAVA_Biovil/predict.py +0 -157
  4. LLAVA_Biovil/pyproject.toml +0 -36
  5. LLAVA_Biovil/scripts/convert_gqa_for_eval.py +0 -18
  6. LLAVA_Biovil/scripts/convert_mmbench_for_submission.py +0 -27
  7. LLAVA_Biovil/scripts/convert_mmvet_for_eval.py +0 -18
  8. LLAVA_Biovil/scripts/convert_seed_for_submission.py +0 -74
  9. LLAVA_Biovil/scripts/convert_sqa_to_llava.py +0 -88
  10. LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py +0 -334
  11. LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py +0 -47
  12. LLAVA_Biovil/scripts/convert_vqav2_for_submission.py +0 -56
  13. LLAVA_Biovil/scripts/extract_mm_projector.py +0 -47
  14. LLAVA_Biovil/scripts/finetune.sh +0 -48
  15. LLAVA_Biovil/scripts/finetune_full_schedule.sh +0 -48
  16. LLAVA_Biovil/scripts/finetune_lora.sh +0 -49
  17. LLAVA_Biovil/scripts/finetune_qlora.sh +0 -50
  18. LLAVA_Biovil/scripts/finetune_sqa.sh +0 -36
  19. LLAVA_Biovil/scripts/merge_lora_weights.py +0 -22
  20. LLAVA_Biovil/scripts/pretrain.sh +0 -46
  21. LLAVA_Biovil/scripts/pretrain_xformers.sh +0 -44
  22. LLAVA_Biovil/scripts/sqa_eval_batch.sh +0 -13
  23. LLAVA_Biovil/scripts/sqa_eval_gather.sh +0 -18
  24. LLAVA_Biovil/scripts/v1_5/eval/gqa.sh +0 -39
  25. LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh +0 -23
  26. LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh +0 -19
  27. LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh +0 -20
  28. LLAVA_Biovil/scripts/v1_5/eval/mme.sh +0 -17
  29. LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh +0 -16
  30. LLAVA_Biovil/scripts/v1_5/eval/pope.sh +0 -14
  31. LLAVA_Biovil/scripts/v1_5/eval/qbench.sh +0 -18
  32. LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh +0 -20
  33. LLAVA_Biovil/scripts/v1_5/eval/seed.sh +0 -39
  34. LLAVA_Biovil/scripts/v1_5/eval/sqa.sh +0 -16
  35. LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh +0 -13
  36. LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh +0 -14
  37. LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh +0 -36
  38. LLAVA_Biovil/scripts/v1_5/finetune.sh +0 -37
  39. LLAVA_Biovil/scripts/v1_5/finetune_lora.sh +0 -38
  40. LLAVA_Biovil/scripts/v1_5/finetune_task.sh +0 -36
  41. LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh +0 -37
  42. LLAVA_Biovil/scripts/v1_5/pretrain.sh +0 -35
  43. LLAVA_Biovil/slurm_config.conf +0 -60
  44. LLAVA_Biovil/slurm_config_biovil_frozen.conf +0 -60
  45. LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf +0 -60
  46. LLAVA_Biovil/slurm_config_biovil_unfrozen.conf +0 -61
  47. LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf +0 -61
  48. LLAVA_Biovil/slurm_config_llavamed.conf +0 -61
  49. LLAVA_Biovil/slurm_config_ms_cxr_t.conf +0 -61
  50. LLAVA_Biovil/slurm_config_pretrain.conf +0 -61
LLAVA_Biovil/cog.yaml DELETED
@@ -1,37 +0,0 @@
1
- # Configuration for Cog ⚙️
2
- # Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
3
-
4
- build:
5
- gpu: true
6
-
7
- python_version: "3.11"
8
-
9
- python_packages:
10
- - "torch==2.0.1"
11
- - "accelerate==0.21.0"
12
- - "bitsandbytes==0.41.0"
13
- - "deepspeed==0.9.5"
14
- - "einops-exts==0.0.4"
15
- - "einops==0.6.1"
16
- - "gradio==3.35.2"
17
- - "gradio_client==0.2.9"
18
- - "httpx==0.24.0"
19
- - "markdown2==2.4.10"
20
- - "numpy==1.26.0"
21
- - "peft==0.4.0"
22
- - "scikit-learn==1.2.2"
23
- - "sentencepiece==0.1.99"
24
- - "shortuuid==1.0.11"
25
- - "timm==0.6.13"
26
- - "tokenizers==0.13.3"
27
- - "torch==2.0.1"
28
- - "torchvision==0.15.2"
29
- - "transformers==4.31.0"
30
- - "wandb==0.15.12"
31
- - "wavedrom==2.0.3.post3"
32
- - "Pygments==2.16.1"
33
- run:
34
- - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget
35
-
36
- # predict.py defines how predictions are run on your model
37
- predict: "predict.py:Predictor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/install.md DELETED
@@ -1,6 +0,0 @@
1
- step 1: clone Llava
2
- step 2: git clone https://github.com/Dao-AILab/flash-attention.git
3
- step 3: conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
4
- step 4: pip install -e .
5
- step 5: pip install -e ".[train]"
6
- step 6: in flash attention folder, run: python setup.py install
 
 
 
 
 
 
 
LLAVA_Biovil/predict.py DELETED
@@ -1,157 +0,0 @@
1
- import torch
2
-
3
- from llava import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
4
- from llava import conv_templates, SeparatorStyle
5
- from llava import load_pretrained_model
6
- from llava import disable_torch_init
7
- from llava import tokenizer_image_token, KeywordsStoppingCriteria
8
- from transformers.generation.streamers import TextIteratorStreamer
9
-
10
- from PIL import Image
11
-
12
- import requests
13
- from io import BytesIO
14
-
15
- from cog import BasePredictor, Input, Path, ConcatenateIterator
16
- import time
17
- import subprocess
18
- from threading import Thread
19
-
20
- import os
21
- os.environ["HUGGINGFACE_HUB_CACHE"] = os.getcwd() + "/weights"
22
-
23
- # url for the weights mirror
24
- REPLICATE_WEIGHTS_URL = "https://weights.replicate.delivery/default"
25
- # files to download from the weights mirrors
26
- weights = [
27
- {
28
- "dest": "liuhaotian/llava-v1.5-13b",
29
- # git commit hash from huggingface
30
- "src": "llava-v1.5-13b/006818fc465ebda4c003c0998674d9141d8d95f8",
31
- "files": [
32
- "config.json",
33
- "generation_config.json",
34
- "pytorch_model-00001-of-00003.bin",
35
- "pytorch_model-00002-of-00003.bin",
36
- "pytorch_model-00003-of-00003.bin",
37
- "pytorch_model.bin.index.json",
38
- "special_tokens_map.json",
39
- "tokenizer.model",
40
- "tokenizer_config.json",
41
- ]
42
- },
43
- {
44
- "dest": "openai/clip-vit-large-patch14-336",
45
- "src": "clip-vit-large-patch14-336/ce19dc912ca5cd21c8a653c79e251e808ccabcd1",
46
- "files": [
47
- "config.json",
48
- "preprocessor_config.json",
49
- "pytorch_model.bin"
50
- ],
51
- }
52
- ]
53
-
54
- def download_json(url: str, dest: Path):
55
- res = requests.get(url, allow_redirects=True)
56
- if res.status_code == 200 and res.content:
57
- with dest.open("wb") as f:
58
- f.write(res.content)
59
- else:
60
- print(f"Failed to download {url}. Status code: {res.status_code}")
61
-
62
- def download_weights(baseurl: str, basedest: str, files: list[str]):
63
- basedest = Path(basedest)
64
- start = time.time()
65
- print("downloading to: ", basedest)
66
- basedest.mkdir(parents=True, exist_ok=True)
67
- for f in files:
68
- dest = basedest / f
69
- url = os.path.join(REPLICATE_WEIGHTS_URL, baseurl, f)
70
- if not dest.exists():
71
- print("downloading url: ", url)
72
- if dest.suffix == ".json":
73
- download_json(url, dest)
74
- else:
75
- subprocess.check_call(["pget", url, str(dest)], close_fds=False)
76
- print("downloading took: ", time.time() - start)
77
-
78
- class Predictor(BasePredictor):
79
- def setup(self) -> None:
80
- """Load the model into memory to make running multiple predictions efficient"""
81
- for weight in weights:
82
- download_weights(weight["src"], weight["dest"], weight["files"])
83
- disable_torch_init()
84
-
85
- self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model("liuhaotian/llava-v1.5-13b", model_name="llava-v1.5-13b", model_base=None, load_8bit=False, load_4bit=False)
86
-
87
- def predict(
88
- self,
89
- image: Path = Input(description="Input image"),
90
- prompt: str = Input(description="Prompt to use for text generation"),
91
- top_p: float = Input(description="When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens", ge=0.0, le=1.0, default=1.0),
92
- temperature: float = Input(description="Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic", default=0.2, ge=0.0),
93
- max_tokens: int = Input(description="Maximum number of tokens to generate. A word is generally 2-3 tokens", default=1024, ge=0),
94
- ) -> ConcatenateIterator[str]:
95
- """Run a single prediction on the model"""
96
-
97
- conv_mode = "llava_v1"
98
- conv = conv_templates[conv_mode].copy()
99
-
100
- image_data = load_image(str(image))
101
- image_tensor = self.image_processor.preprocess(image_data, return_tensors='pt')['pixel_values'].half().cuda()
102
-
103
- # loop start
104
-
105
- # just one turn, always prepend image token
106
- inp = DEFAULT_IMAGE_TOKEN + '\n' + prompt
107
- conv.append_message(conv.roles[0], inp)
108
-
109
- conv.append_message(conv.roles[1], None)
110
- prompt = conv.get_prompt()
111
-
112
- input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
113
- stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
114
- keywords = [stop_str]
115
- stopping_criteria = KeywordsStoppingCriteria(keywords, self.tokenizer, input_ids)
116
- streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, timeout=20.0)
117
-
118
- with torch.inference_mode():
119
- thread = Thread(target=self.model.generate, kwargs=dict(
120
- inputs=input_ids,
121
- images=image_tensor,
122
- do_sample=True,
123
- temperature=temperature,
124
- top_p=top_p,
125
- max_new_tokens=max_tokens,
126
- streamer=streamer,
127
- use_cache=True,
128
- stopping_criteria=[stopping_criteria]))
129
- thread.start()
130
- # workaround: second-to-last token is always " "
131
- # but we want to keep it if it's not the second-to-last token
132
- prepend_space = False
133
- for new_text in streamer:
134
- if new_text == " ":
135
- prepend_space = True
136
- continue
137
- if new_text.endswith(stop_str):
138
- new_text = new_text[:-len(stop_str)].strip()
139
- prepend_space = False
140
- elif prepend_space:
141
- new_text = " " + new_text
142
- prepend_space = False
143
- if len(new_text):
144
- yield new_text
145
- if prepend_space:
146
- yield " "
147
- thread.join()
148
-
149
-
150
- def load_image(image_file):
151
- if image_file.startswith('http') or image_file.startswith('https'):
152
- response = requests.get(image_file)
153
- image = Image.open(BytesIO(response.content)).convert('RGB')
154
- else:
155
- image = Image.open(image_file).convert('RGB')
156
- return image
157
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/pyproject.toml DELETED
@@ -1,36 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "llava"
7
- version = "1.1.3"
8
- description = "Towards GPT-4 like large language and visual assistant."
9
- readme = "README.md"
10
- requires-python = ">=3.8"
11
- classifiers = [
12
- "Programming Language :: Python :: 3",
13
- "License :: OSI Approved :: Apache Software License",
14
- ]
15
- dependencies = [
16
- "torch==2.0.1", "torchvision==0.15.2",
17
- "transformers==4.31.0", "tokenizers>=0.12.1,<0.14", "sentencepiece==0.1.99", "shortuuid",
18
- "accelerate==0.21.0", "peft==0.4.0", "bitsandbytes==0.41.0",
19
- "pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
20
- "gradio==3.35.2", "gradio_client==0.2.9",
21
- "requests", "httpx==0.24.0", "uvicorn", "fastapi",
22
- "einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
23
- ]
24
-
25
- [project.optional-dependencies]
26
- train = ["deepspeed==0.9.5", "ninja", "wandb"]
27
-
28
- [project.urls]
29
- "Homepage" = "https://llava-vl.github.io"
30
- "Bug Tracker" = "https://github.com/haotian-liu/LLaVA/issues"
31
-
32
- [tool.setuptools.packages.find]
33
- exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
34
-
35
- [tool.wheel]
36
- exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_gqa_for_eval.py DELETED
@@ -1,18 +0,0 @@
1
- import os
2
- import json
3
- import argparse
4
-
5
- parser = argparse.ArgumentParser()
6
- parser.add_argument("--src", type=str)
7
- parser.add_argument("--dst", type=str)
8
- args = parser.parse_args()
9
-
10
- all_answers = []
11
- for line_idx, line in enumerate(open(args.src)):
12
- res = json.loads(line)
13
- question_id = res['question_id']
14
- text = res['text'].rstrip('.').lower()
15
- all_answers.append({"questionId": question_id, "prediction": text})
16
-
17
- with open(args.dst, 'w') as f:
18
- json.dump(all_answers, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_mmbench_for_submission.py DELETED
@@ -1,27 +0,0 @@
1
- import os
2
- import json
3
- import argparse
4
- import pandas as pd
5
-
6
- def get_args():
7
- parser = argparse.ArgumentParser()
8
- parser.add_argument("--annotation-file", type=str, required=True)
9
- parser.add_argument("--result-dir", type=str, required=True)
10
- parser.add_argument("--upload-dir", type=str, required=True)
11
- parser.add_argument("--experiment", type=str, required=True)
12
-
13
- return parser.parse_args()
14
-
15
- if __name__ == "__main__":
16
- args = get_args()
17
-
18
- df = pd.read_table(args.annotation_file)
19
-
20
- cur_df = df.copy()
21
- cur_df = cur_df.drop(columns=['hint', 'category', 'source', 'image', 'comment', 'l2-category'])
22
- cur_df.insert(6, 'prediction', None)
23
- for pred in open(os.path.join(args.result_dir, f"{args.experiment}.jsonl")):
24
- pred = json.loads(pred)
25
- cur_df.loc[df['index'] == pred['question_id'], 'prediction'] = pred['text']
26
-
27
- cur_df.to_excel(os.path.join(args.upload_dir, f"{args.experiment}.xlsx"), index=False, engine='openpyxl')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_mmvet_for_eval.py DELETED
@@ -1,18 +0,0 @@
1
- import os
2
- import json
3
- import argparse
4
-
5
- parser = argparse.ArgumentParser()
6
- parser.add_argument("--src", type=str)
7
- parser.add_argument("--dst", type=str)
8
- args = parser.parse_args()
9
-
10
- cur_result = {}
11
-
12
- for line in open(args.src):
13
- data = json.loads(line)
14
- qid = data['question_id']
15
- cur_result[f'v1_{qid}'] = data['text']
16
-
17
- with open(args.dst, 'w') as f:
18
- json.dump(cur_result, f, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_seed_for_submission.py DELETED
@@ -1,74 +0,0 @@
1
- import os
2
- import json
3
- import argparse
4
-
5
-
6
- def get_args():
7
- parser = argparse.ArgumentParser()
8
- parser.add_argument("--annotation-file", type=str)
9
- parser.add_argument("--result-file", type=str)
10
- parser.add_argument("--result-upload-file", type=str)
11
- return parser.parse_args()
12
-
13
-
14
- def eval_single(result_file, eval_only_type=None):
15
- results = {}
16
- for line in open(result_file):
17
- row = json.loads(line)
18
- results[row['question_id']] = row
19
-
20
- type_counts = {}
21
- correct_counts = {}
22
- for question_data in data['questions']:
23
- if eval_only_type is not None and question_data['data_type'] != eval_only_type: continue
24
- data_type = question_data['question_type_id']
25
- type_counts[data_type] = type_counts.get(data_type, 0) + 1
26
- try:
27
- question_id = int(question_data['question_id'])
28
- except:
29
- question_id = question_data['question_id']
30
- if question_id not in results:
31
- correct_counts[data_type] = correct_counts.get(data_type, 0)
32
- continue
33
- row = results[question_id]
34
- if row['text'] == question_data['answer']:
35
- correct_counts[data_type] = correct_counts.get(data_type, 0) + 1
36
-
37
- total_count = 0
38
- total_correct = 0
39
- for data_type in sorted(type_counts.keys()):
40
- accuracy = correct_counts[data_type] / type_counts[data_type] * 100
41
- if eval_only_type is None:
42
- print(f"{ques_type_id_to_name[data_type]}: {accuracy:.2f}%")
43
-
44
- total_count += type_counts[data_type]
45
- total_correct += correct_counts[data_type]
46
-
47
- total_accuracy = total_correct / total_count * 100
48
- if eval_only_type is None:
49
- print(f"Total accuracy: {total_accuracy:.2f}%")
50
- else:
51
- print(f"{eval_only_type} accuracy: {total_accuracy:.2f}%")
52
-
53
- return results
54
-
55
- if __name__ == "__main__":
56
- args = get_args()
57
- data = json.load(open(args.annotation_file))
58
- ques_type_id_to_name = {id:n for n,id in data['question_type'].items()}
59
-
60
- results = eval_single(args.result_file)
61
- eval_single(args.result_file, eval_only_type='image')
62
- eval_single(args.result_file, eval_only_type='video')
63
-
64
- with open(args.result_upload_file, 'w') as fp:
65
- for question in data['questions']:
66
- qid = question['question_id']
67
- if qid in results:
68
- result = results[qid]
69
- else:
70
- result = results[int(qid)]
71
- fp.write(json.dumps({
72
- 'question_id': qid,
73
- 'prediction': result['text']
74
- }) + '\n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_sqa_to_llava.py DELETED
@@ -1,88 +0,0 @@
1
- import json
2
- import os
3
- import fire
4
- import re
5
- from convert_sqa_to_llava_base_prompt import build_prompt_chatbot
6
-
7
-
8
- def convert_to_llava(base_dir, split, prompt_format="QCM-LEA"):
9
- split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[split]
10
- problems = json.load(open(os.path.join(base_dir, "problems.json")))
11
-
12
- split_problems = build_prompt_chatbot(
13
- problems, split_indices, prompt_format,
14
- use_caption=False, is_test=False)
15
-
16
- target_format = []
17
- for prob_id, (input, output) in split_problems.items():
18
- if input.startswith('Question: '):
19
- input = input.replace('Question: ', '')
20
- if output.startswith('Answer: '):
21
- output = output.replace('Answer: ', '')
22
-
23
- raw_prob_data = problems[prob_id]
24
- if raw_prob_data['image'] is None:
25
- target_format.append({
26
- "id": prob_id,
27
- "conversations": [
28
- {'from': 'human', 'value': f"{input}"},
29
- {'from': 'gpt', 'value': f"{output}"},
30
- ],
31
- })
32
-
33
- else:
34
- target_format.append({
35
- "id": prob_id,
36
- "image": os.path.join(prob_id, raw_prob_data['image']),
37
- "conversations": [
38
- {'from': 'human', 'value': f"{input}\n<image>"},
39
- {'from': 'gpt', 'value': f"{output}"},
40
- ],
41
- })
42
-
43
- print(f'Number of samples: {len(target_format)}')
44
-
45
- with open(os.path.join(base_dir, f"llava_{split}_{prompt_format}.json"), "w") as f:
46
- json.dump(target_format, f, indent=2)
47
-
48
-
49
- def convert_to_jsonl(base_dir, split, prompt_format="QCM-LEPA"):
50
- split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[split]
51
- problems = json.load(open(os.path.join(base_dir, "problems.json")))
52
-
53
- split_problems = build_prompt_chatbot(
54
- problems, split_indices, prompt_format,
55
- use_caption=False, is_test=False)
56
-
57
- writer = open(os.path.join(base_dir, f"scienceqa_{split}_{prompt_format}.jsonl"), "w")
58
- for prob_id, (input, output) in split_problems.items():
59
- if input.startswith('Question: '):
60
- input = input.replace('Question: ', '')
61
- if output.startswith('Answer: '):
62
- output = output.replace('Answer: ', '')
63
-
64
- raw_prob_data = problems[prob_id]
65
- if raw_prob_data['image'] is None:
66
- data = {
67
- "id": prob_id,
68
- "instruction": f"{input}",
69
- "output": f"{output}",
70
- }
71
-
72
- else:
73
- data = {
74
- "id": prob_id,
75
- "image": os.path.join(prob_id, raw_prob_data['image']),
76
- "instruction": f"{input}\n<image>",
77
- "output": f"{output}",
78
- }
79
- writer.write(json.dumps(data) + '\n')
80
- writer.close()
81
-
82
-
83
- def main(task, **kwargs):
84
- globals()[task](**kwargs)
85
-
86
-
87
- if __name__ == "__main__":
88
- fire.Fire(main)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py DELETED
@@ -1,334 +0,0 @@
1
- def get_question_text(problem):
2
- question = problem['question']
3
- return question
4
-
5
-
6
- def get_context_text(problem, use_caption):
7
- txt_context = problem['hint']
8
- img_context = problem['caption'] if use_caption else ""
9
- context = " ".join([txt_context, img_context]).strip()
10
- if context == "":
11
- context = "N/A"
12
- return context
13
-
14
-
15
- def get_choice_text(probelm, options):
16
- choices = probelm['choices']
17
- choice_list = []
18
- for i, c in enumerate(choices):
19
- choice_list.append("({}) {}".format(options[i], c))
20
- choice_txt = " ".join(choice_list)
21
- #print(choice_txt)
22
- return choice_txt
23
-
24
-
25
- def get_answer(problem, options):
26
- return options[problem['answer']]
27
-
28
-
29
- def get_lecture_text(problem):
30
- # \\n: GPT-3 can generate the lecture with more tokens.
31
- lecture = problem['lecture'].replace("\n", "\\n")
32
- return lecture
33
-
34
-
35
- def get_solution_text(problem):
36
- # \\n: GPT-3 can generate the solution with more tokens
37
- solution = problem['solution'].replace("\n", "\\n")
38
- return solution
39
-
40
-
41
- def create_one_example_chatbot(format, question, context, choice, answer, lecture, solution, test_example=True):
42
-
43
- input_format, output_format = format.split("-")
44
-
45
- ## Inputs
46
- if input_format == "CQM":
47
- input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n"
48
- elif input_format == "QCM":
49
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n"
50
- # upper bound experiment
51
- elif input_format == "QCML":
52
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n"
53
- elif input_format == "QCME":
54
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n"
55
- elif input_format == "QCMLE":
56
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n"
57
-
58
- elif input_format == "QCLM":
59
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n"
60
- elif input_format == "QCEM":
61
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n"
62
- elif input_format == "QCLEM":
63
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n"
64
-
65
- # Outputs
66
- if test_example:
67
- output = "Answer:"
68
- elif output_format == 'A':
69
- output = f"Answer: The answer is {answer}."
70
-
71
- elif output_format == 'AL':
72
- output = f"Answer: The answer is {answer}. BECAUSE: {solution}"
73
- elif output_format == 'AE':
74
- output = f"Answer: The answer is {answer}. BECAUSE: {lecture}"
75
- elif output_format == 'ALE':
76
- output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}"
77
- elif output_format == 'AEL':
78
- output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}"
79
-
80
- elif output_format == 'LA':
81
- output = f"Answer: {lecture} The answer is {answer}."
82
- elif output_format == 'EA':
83
- output = f"Answer: {solution} The answer is {answer}."
84
- elif output_format == 'LEA':
85
- output = f"Answer: {lecture} {solution} The answer is {answer}."
86
- elif output_format == 'ELA':
87
- output = f"Answer: {solution} {lecture} The answer is {answer}."
88
- elif output_format == 'LEPA':
89
- output = ''
90
- if len(lecture.strip()) > 0:
91
- output += f"LECTURE: {lecture}\n"
92
- if len(solution.strip()) > 0:
93
- output += f"SOLUTION: {solution}\n"
94
- output += '###\n'
95
- output += f"ANSWER: {answer}."
96
-
97
- input = input.replace(" ", " ").strip()
98
- output = output.replace(" ", " ").strip()
99
- if input.endswith("BECAUSE:"):
100
- input = input.replace("BECAUSE:", "").strip()
101
- if output.endswith("BECAUSE:"):
102
- output = output.replace("BECAUSE:", "").strip()
103
- return input, output
104
-
105
-
106
- def create_one_example(format, question, context, choice, answer, lecture, solution, test_example=True):
107
-
108
- input_format, output_format = format.split("-")
109
-
110
- ## Inputs
111
- if input_format == "CQM":
112
- input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n"
113
- elif input_format == "QCM":
114
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n"
115
- # upper bound experiment
116
- elif input_format == "QCML":
117
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n"
118
- elif input_format == "QCME":
119
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n"
120
- elif input_format == "QCMLE":
121
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n"
122
-
123
- elif input_format == "QCLM":
124
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n"
125
- elif input_format == "QCEM":
126
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n"
127
- elif input_format == "QCLEM":
128
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n"
129
-
130
- # Outputs
131
- if test_example:
132
- output = "Answer:"
133
- elif output_format == 'A':
134
- output = f"Answer: The answer is {answer}."
135
-
136
- elif output_format == 'AL':
137
- output = f"Answer: The answer is {answer}. BECAUSE: {solution}"
138
- elif output_format == 'AE':
139
- output = f"Answer: The answer is {answer}. BECAUSE: {lecture}"
140
- elif output_format == 'ALE':
141
- output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}"
142
- elif output_format == 'AEL':
143
- output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}"
144
-
145
- elif output_format == 'LA':
146
- output = f"Answer: {lecture} The answer is {answer}."
147
- elif output_format == 'EA':
148
- output = f"Answer: {solution} The answer is {answer}."
149
- elif output_format == 'LEA':
150
- output = f"Answer: {lecture} {solution} The answer is {answer}."
151
- elif output_format == 'ELA':
152
- output = f"Answer: {solution} {lecture} The answer is {answer}."
153
-
154
- text = input + output
155
- text = text.replace(" ", " ").strip()
156
- if text.endswith("BECAUSE:"):
157
- text = text.replace("BECAUSE:", "").strip()
158
- return text
159
-
160
-
161
-
162
- def create_one_example_gpt4(format, question, context, choice, answer, lecture, solution, test_example=True):
163
-
164
- input_format, output_format = format.split("-")
165
-
166
- ## Inputs
167
- if input_format == "CQM":
168
- input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n"
169
- elif input_format == "QCM":
170
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n"
171
- # upper bound experiment
172
- elif input_format == "QCML":
173
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n"
174
- elif input_format == "QCME":
175
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n"
176
- elif input_format == "QCMLE":
177
- input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n"
178
-
179
- elif input_format == "QCLM":
180
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n"
181
- elif input_format == "QCEM":
182
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n"
183
- elif input_format == "QCLEM":
184
- input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n"
185
-
186
- # Outputs
187
- if test_example:
188
- output = "Answer:"
189
- elif output_format == 'A':
190
- output = f"Answer: The answer is {answer}."
191
-
192
- elif output_format == 'AL':
193
- output = f"Answer: The answer is {answer}. BECAUSE: {solution}"
194
- elif output_format == 'AE':
195
- output = f"Answer: The answer is {answer}. BECAUSE: {lecture}"
196
- elif output_format == 'ALE':
197
- output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}"
198
- elif output_format == 'AEL':
199
- output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}"
200
-
201
- elif output_format == 'LA':
202
- output = f"Answer: {lecture} The answer is {answer}."
203
- elif output_format == 'EA':
204
- output = f"Answer: {solution} The answer is {answer}."
205
- elif output_format == 'LEA':
206
- output = f"Answer: {lecture} {solution} The answer is {answer}."
207
- elif output_format == 'ELA':
208
- output = f"Answer: {solution} {lecture} The answer is {answer}."
209
-
210
- input = input.replace(" ", " ").strip()
211
- output = output.replace(" ", " ").strip()
212
- if output.endswith("BECAUSE:"):
213
- output = output.replace("BECAUSE:", "").strip()
214
-
215
- user_prompt = {"role": "user", "content": f"Can you explain {input}?"}
216
- assistant_prompt = {"role": "assistant", "content": f"{output}"}
217
-
218
- return user_prompt, assistant_prompt
219
-
220
-
221
- def build_prompt_chatbot(problems, shot_qids, prompt_format, use_caption=False, options=["A", "B", "C", "D", "E"], is_test=False):
222
- examples = {}
223
-
224
- for qid in shot_qids:
225
- question = get_question_text(problems[qid])
226
- context = get_context_text(problems[qid], use_caption)
227
- choice = get_choice_text(problems[qid], options)
228
- answer = get_answer(problems[qid], options)
229
- lecture = get_lecture_text(problems[qid]).replace('\\n', '\n')
230
- solution = get_solution_text(problems[qid]).replace('\\n', '\n')
231
-
232
- train_example = create_one_example_chatbot(prompt_format,
233
- question,
234
- context,
235
- choice,
236
- answer,
237
- lecture,
238
- solution,
239
- test_example=is_test)
240
- examples[qid] = train_example
241
- return examples
242
-
243
-
244
- def build_prompt(problems, shot_qids, test_qid, args):
245
-
246
- examples = []
247
-
248
- # n-shot training examples
249
- for qid in shot_qids:
250
- question = get_question_text(problems[qid])
251
- context = get_context_text(problems[qid], args.use_caption)
252
- choice = get_choice_text(problems[qid], args.options)
253
- answer = get_answer(problems[qid], args.options)
254
- lecture = get_lecture_text(problems[qid])
255
- solution = get_solution_text(problems[qid])
256
-
257
- train_example = create_one_example(args.prompt_format,
258
- question,
259
- context,
260
- choice,
261
- answer,
262
- lecture,
263
- solution,
264
- test_example=False)
265
- examples.append(train_example)
266
-
267
- # test example
268
- question = get_question_text(problems[test_qid])
269
- context = get_context_text(problems[test_qid], args.use_caption)
270
- choice = get_choice_text(problems[test_qid], args.options)
271
- answer = get_answer(problems[test_qid], args.options)
272
- lecture = get_lecture_text(problems[test_qid])
273
- solution = get_solution_text(problems[test_qid])
274
-
275
- test_example = create_one_example(args.prompt_format,
276
- question,
277
- context,
278
- choice,
279
- answer,
280
- lecture,
281
- solution,
282
- test_example=True)
283
- examples.append(test_example)
284
-
285
- # create the prompt input
286
- prompt_input = '\n\n'.join(examples)
287
-
288
- return prompt_input
289
-
290
-
291
- def build_prompt_gpt4(problems, shot_qids, test_qid, args):
292
-
293
- prompt_array = [{"role": "system", "content": "You are a helpful assistant."}]
294
-
295
- # n-shot training examples
296
- for qid in shot_qids:
297
- question = get_question_text(problems[qid])
298
- context = get_context_text(problems[qid], args.use_caption)
299
- choice = get_choice_text(problems[qid], args.options)
300
- answer = get_answer(problems[qid], args.options)
301
- lecture = get_lecture_text(problems[qid])
302
- solution = get_solution_text(problems[qid])
303
-
304
- user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format,
305
- question,
306
- context,
307
- choice,
308
- answer,
309
- lecture,
310
- solution,
311
- test_example=False)
312
- prompt_array.append(user_prompt)
313
- prompt_array.append(assistant_prompt)
314
-
315
- # test example
316
- question = get_question_text(problems[test_qid])
317
- context = get_context_text(problems[test_qid], args.use_caption)
318
- choice = get_choice_text(problems[test_qid], args.options)
319
- answer = get_answer(problems[test_qid], args.options)
320
- lecture = get_lecture_text(problems[test_qid])
321
- solution = get_solution_text(problems[test_qid])
322
-
323
- user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format,
324
- question,
325
- context,
326
- choice,
327
- answer,
328
- lecture,
329
- solution,
330
- test_example=True)
331
- prompt_array.append(user_prompt)
332
- prompt_array.append(assistant_prompt)
333
-
334
- return prompt_array
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py DELETED
@@ -1,47 +0,0 @@
1
- import os
2
- import argparse
3
- import json
4
-
5
- from LLAV.llava.eval.m4c_evaluator import EvalAIAnswerProcessor
6
-
7
-
8
- def parse_args():
9
- parser = argparse.ArgumentParser()
10
- parser.add_argument('--annotation-file', type=str, required=True)
11
- parser.add_argument('--result-file', type=str, required=True)
12
- parser.add_argument('--result-upload-file', type=str, required=True)
13
- return parser.parse_args()
14
-
15
-
16
- if __name__ == '__main__':
17
-
18
- args = parse_args()
19
-
20
- os.makedirs(os.path.dirname(args.result_upload_file), exist_ok=True)
21
-
22
- results = []
23
- error_line = 0
24
- for line_idx, line in enumerate(open(args.result_file)):
25
- try:
26
- results.append(json.loads(line))
27
- except:
28
- error_line += 1
29
- results = {x['question_id']: x['text'] for x in results}
30
- test_split = [json.loads(line) for line in open(args.annotation_file)]
31
- split_ids = set([x['question_id'] for x in test_split])
32
-
33
- print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}')
34
-
35
- all_answers = []
36
-
37
- answer_processor = EvalAIAnswerProcessor()
38
-
39
- for x in test_split:
40
- assert x['question_id'] in results
41
- all_answers.append({
42
- 'image': x['image'],
43
- 'answer': answer_processor(results[x['question_id']])
44
- })
45
-
46
- with open(args.result_upload_file, 'w') as f:
47
- json.dump(all_answers, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/convert_vqav2_for_submission.py DELETED
@@ -1,56 +0,0 @@
1
- import os
2
- import argparse
3
- import json
4
-
5
- from LLAV.llava.eval.m4c_evaluator import EvalAIAnswerProcessor
6
-
7
-
8
- def parse_args():
9
- parser = argparse.ArgumentParser()
10
- parser.add_argument('--dir', type=str, default="./playground/data/eval/vqav2")
11
- parser.add_argument('--ckpt', type=str, required=True)
12
- parser.add_argument('--split', type=str, required=True)
13
- return parser.parse_args()
14
-
15
-
16
- if __name__ == '__main__':
17
-
18
- args = parse_args()
19
-
20
- src = os.path.join(args.dir, 'answers', args.split, args.ckpt, 'merge.jsonl')
21
- test_split = os.path.join(args.dir, 'llava_vqav2_mscoco_test2015.jsonl')
22
- dst = os.path.join(args.dir, 'answers_upload', args.split, f'{args.ckpt}.json')
23
- os.makedirs(os.path.dirname(dst), exist_ok=True)
24
-
25
- results = []
26
- error_line = 0
27
- for line_idx, line in enumerate(open(src)):
28
- try:
29
- results.append(json.loads(line))
30
- except:
31
- error_line += 1
32
-
33
- results = {x['question_id']: x['text'] for x in results}
34
- test_split = [json.loads(line) for line in open(test_split)]
35
- split_ids = set([x['question_id'] for x in test_split])
36
-
37
- print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}')
38
-
39
- all_answers = []
40
-
41
- answer_processor = EvalAIAnswerProcessor()
42
-
43
- for x in test_split:
44
- if x['question_id'] not in results:
45
- all_answers.append({
46
- 'question_id': x['question_id'],
47
- 'answer': ''
48
- })
49
- else:
50
- all_answers.append({
51
- 'question_id': x['question_id'],
52
- 'answer': answer_processor(results[x['question_id']])
53
- })
54
-
55
- with open(dst, 'w') as f:
56
- json.dump(all_answers, open(dst, 'w'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/extract_mm_projector.py DELETED
@@ -1,47 +0,0 @@
1
- """
2
- This is just a utility that I use to extract the projector for quantized models.
3
- It is NOT necessary at all to train, or run inference/serve demos.
4
- Use this script ONLY if you fully understand its implications.
5
- """
6
-
7
-
8
- import os
9
- import argparse
10
- import torch
11
- import json
12
- from collections import defaultdict
13
-
14
-
15
- def parse_args():
16
- parser = argparse.ArgumentParser(description='Extract MMProjector weights')
17
- parser.add_argument('--model-path', type=str, help='model folder')
18
- parser.add_argument('--output', type=str, help='output file')
19
- args = parser.parse_args()
20
- return args
21
-
22
-
23
- if __name__ == '__main__':
24
- args = parse_args()
25
-
26
- keys_to_match = ['mm_projector']
27
- ckpt_to_key = defaultdict(list)
28
- try:
29
- model_indices = json.load(open(os.path.join(args.model_path, 'pytorch_model.bin.index.json')))
30
- for k, v in model_indices['weight_map'].items():
31
- if any(key_match in k for key_match in keys_to_match):
32
- ckpt_to_key[v].append(k)
33
- except FileNotFoundError:
34
- # Smaller models or model checkpoints saved by DeepSpeed.
35
- v = 'pytorch_model.bin'
36
- for k in torch.load(os.path.join(args.model_path, v), map_location='cpu').keys():
37
- if any(key_match in k for key_match in keys_to_match):
38
- ckpt_to_key[v].append(k)
39
-
40
- loaded_weights = {}
41
-
42
- for ckpt_name, weight_keys in ckpt_to_key.items():
43
- ckpt = torch.load(os.path.join(args.model_path, ckpt_name), map_location='cpu')
44
- for k in weight_keys:
45
- loaded_weights[k] = ckpt[k]
46
-
47
- torch.save(loaded_weights, args.output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/finetune.sh DELETED
@@ -1,48 +0,0 @@
1
- #!/bin/bash
2
-
3
- # IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
4
-
5
- # Uncomment and set the following variables correspondingly to run this script:
6
-
7
- ################## VICUNA ##################
8
- # PROMPT_VERSION=v1
9
- # MODEL_VERSION="vicuna-v1-3-7b"
10
- ################## VICUNA ##################
11
-
12
- ################## LLaMA-2 ##################
13
- # PROMPT_VERSION="llava_llama_2"
14
- # MODEL_VERSION="llama-2-7b-chat"
15
- ################## LLaMA-2 ##################
16
-
17
- deepspeed llava/train/train_mem.py \
18
- --deepspeed ./scripts/zero2.json \
19
- --model_name_or_path ./checkpoints/$MODEL_VERSION \
20
- --version $PROMPT_VERSION \
21
- --data_path ./playground/data/llava_instruct_80k.json \
22
- --image_folder /path/to/coco/train2017 \
23
- --vision_tower openai/clip-vit-large-patch14 \
24
- --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
25
- --mm_vision_select_layer -2 \
26
- --mm_use_im_start_end False \
27
- --mm_use_im_patch_token False \
28
- --bf16 True \
29
- --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
30
- --num_train_epochs 1 \
31
- --per_device_train_batch_size 16 \
32
- --per_device_eval_batch_size 4 \
33
- --gradient_accumulation_steps 1 \
34
- --evaluation_strategy "no" \
35
- --save_strategy "steps" \
36
- --save_steps 50000 \
37
- --save_total_limit 1 \
38
- --learning_rate 2e-5 \
39
- --weight_decay 0. \
40
- --warmup_ratio 0.03 \
41
- --lr_scheduler_type "cosine" \
42
- --logging_steps 1 \
43
- --tf32 True \
44
- --model_max_length 2048 \
45
- --gradient_checkpointing True \
46
- --dataloader_num_workers 4 \
47
- --lazy_preprocess True \
48
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/finetune_full_schedule.sh DELETED
@@ -1,48 +0,0 @@
1
- #!/bin/bash
2
-
3
- # IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
4
-
5
- # Uncomment and set the following variables correspondingly to run this script:
6
-
7
- ################## VICUNA ##################
8
- # PROMPT_VERSION=v1
9
- # MODEL_VERSION="vicuna-v1-3-7b"
10
- ################## VICUNA ##################
11
-
12
- ################## LLaMA-2 ##################
13
- # PROMPT_VERSION="llava_llama_2"
14
- # MODEL_VERSION="llama-2-7b-chat"
15
- ################## LLaMA-2 ##################
16
-
17
- deepspeed llava/train/train_mem.py \
18
- --deepspeed ./scripts/zero2.json \
19
- --model_name_or_path ./checkpoints/$MODEL_VERSION \
20
- --version $PROMPT_VERSION \
21
- --data_path ./playground/data/llava_instruct_158k.json \
22
- --image_folder /path/to/coco/train2017 \
23
- --vision_tower openai/clip-vit-large-patch14 \
24
- --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
25
- --mm_vision_select_layer -2 \
26
- --mm_use_im_start_end False \
27
- --mm_use_im_patch_token False \
28
- --bf16 True \
29
- --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \
30
- --num_train_epochs 3 \
31
- --per_device_train_batch_size 16 \
32
- --per_device_eval_batch_size 4 \
33
- --gradient_accumulation_steps 1 \
34
- --evaluation_strategy "no" \
35
- --save_strategy "steps" \
36
- --save_steps 50000 \
37
- --save_total_limit 1 \
38
- --learning_rate 2e-5 \
39
- --weight_decay 0. \
40
- --warmup_ratio 0.03 \
41
- --lr_scheduler_type "cosine" \
42
- --logging_steps 1 \
43
- --tf32 True \
44
- --model_max_length 2048 \
45
- --gradient_checkpointing True \
46
- --dataloader_num_workers 4 \
47
- --lazy_preprocess True \
48
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/finetune_lora.sh DELETED
@@ -1,49 +0,0 @@
1
- #!/bin/bash
2
-
3
- # IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
4
-
5
- # Uncomment and set the following variables correspondingly to run this script:
6
-
7
- ################## VICUNA ##################
8
- # PROMPT_VERSION=v1
9
- # MODEL_VERSION="vicuna-v1-3-7b"
10
- ################## VICUNA ##################
11
-
12
- ################## LLaMA-2 ##################
13
- # PROMPT_VERSION="llava_llama_2"
14
- # MODEL_VERSION="llama-2-7b-chat"
15
- ################## LLaMA-2 ##################
16
-
17
- deepspeed llava/train/train_mem.py \
18
- --deepspeed ./scripts/zero2.json \
19
- --lora_enable True \
20
- --model_name_or_path ./checkpoints/$MODEL_VERSION \
21
- --version $PROMPT_VERSION \
22
- --data_path ./playground/data/llava_instruct_80k.json \
23
- --image_folder /path/to/coco/train2017 \
24
- --vision_tower openai/clip-vit-large-patch14 \
25
- --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
26
- --mm_vision_select_layer -2 \
27
- --mm_use_im_start_end False \
28
- --mm_use_im_patch_token False \
29
- --bf16 True \
30
- --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \
31
- --num_train_epochs 1 \
32
- --per_device_train_batch_size 16 \
33
- --per_device_eval_batch_size 4 \
34
- --gradient_accumulation_steps 1 \
35
- --evaluation_strategy "no" \
36
- --save_strategy "steps" \
37
- --save_steps 50000 \
38
- --save_total_limit 1 \
39
- --learning_rate 2e-5 \
40
- --weight_decay 0. \
41
- --warmup_ratio 0.03 \
42
- --lr_scheduler_type "cosine" \
43
- --logging_steps 1 \
44
- --tf32 True \
45
- --model_max_length 2048 \
46
- --gradient_checkpointing True \
47
- --lazy_preprocess True \
48
- --dataloader_num_workers 4 \
49
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/finetune_qlora.sh DELETED
@@ -1,50 +0,0 @@
1
- #!/bin/bash
2
-
3
- # IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
4
-
5
- # Uncomment and set the following variables correspondingly to run this script:
6
-
7
- ################## VICUNA ##################
8
- # PROMPT_VERSION=v1
9
- # MODEL_VERSION="vicuna-v1-3-7b"
10
- ################## VICUNA ##################
11
-
12
- ################## LLaMA-2 ##################
13
- # PROMPT_VERSION="llava_llama_2"
14
- # MODEL_VERSION="llama-2-7b-chat"
15
- ################## LLaMA-2 ##################
16
-
17
- deepspeed llava/train/train_mem.py \
18
- --deepspeed ./scripts/zero2.json \
19
- --lora_enable True \
20
- --bits 4 \
21
- --model_name_or_path ./checkpoints/$MODEL_VERSION \
22
- --version $PROMPT_VERSION \
23
- --data_path ./playground/data/llava_instruct_80k.json \
24
- --image_folder /path/to/coco/train2017 \
25
- --vision_tower openai/clip-vit-large-patch14 \
26
- --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \
27
- --mm_vision_select_layer -2 \
28
- --mm_use_im_start_end False \
29
- --mm_use_im_patch_token False \
30
- --bf16 True \
31
- --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \
32
- --num_train_epochs 1 \
33
- --per_device_train_batch_size 16 \
34
- --per_device_eval_batch_size 4 \
35
- --gradient_accumulation_steps 1 \
36
- --evaluation_strategy "no" \
37
- --save_strategy "steps" \
38
- --save_steps 50000 \
39
- --save_total_limit 1 \
40
- --learning_rate 2e-5 \
41
- --weight_decay 0. \
42
- --warmup_ratio 0.03 \
43
- --lr_scheduler_type "cosine" \
44
- --logging_steps 1 \
45
- --tf32 True \
46
- --model_max_length 2048 \
47
- --gradient_checkpointing True \
48
- --lazy_preprocess True \
49
- --dataloader_num_workers 4 \
50
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/finetune_sqa.sh DELETED
@@ -1,36 +0,0 @@
1
- #!/bin/bash
2
-
3
- # IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
4
-
5
- deepspeed llava/train/train_mem.py \
6
- --deepspeed ./scripts/zero2.json \
7
- --model_name_or_path lmsys/vicuna-13b-v1.3 \
8
- --version $PROMPT_VERSION \
9
- --data_path /Data/ScienceQA/data/scienceqa/llava_train_QCM-LEA.json \
10
- --image_folder /Data/ScienceQA/data/scienceqa/images/train \
11
- --vision_tower openai/clip-vit-large-patch14 \
12
- --pretrain_mm_mlp_adapter ./checkpoints/huggingface/liuhaotian/llava-pretrain-vicuna-13b-v1.3/mm_projector.bin \
13
- --mm_vision_select_layer -2 \
14
- --mm_use_im_start_end False \
15
- --mm_use_im_patch_token False \
16
- --bf16 True \
17
- --output_dir ./checkpoints/llava-vicuna-13b-v1.3-pretrain_lcs558k_plain-ScienceQA_QCM_LEA-12e \
18
- --num_train_epochs 12 \
19
- --per_device_train_batch_size 16 \
20
- --per_device_eval_batch_size 4 \
21
- --gradient_accumulation_steps 1 \
22
- --evaluation_strategy "no" \
23
- --save_strategy "steps" \
24
- --save_steps 50000 \
25
- --save_total_limit 1 \
26
- --learning_rate 2e-5 \
27
- --weight_decay 0. \
28
- --warmup_ratio 0.03 \
29
- --lr_scheduler_type "cosine" \
30
- --logging_steps 1 \
31
- --tf32 True \
32
- --model_max_length 2048 \
33
- --gradient_checkpointing True \
34
- --dataloader_num_workers 4 \
35
- --lazy_preprocess True \
36
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/merge_lora_weights.py DELETED
@@ -1,22 +0,0 @@
1
- import argparse
2
- from LLAV.llava.model.builder import load_pretrained_model
3
- from LLAV.llava.mm_utils import get_model_name_from_path
4
-
5
-
6
- def merge_lora(args):
7
- model_name = get_model_name_from_path(args.model_path)
8
- tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, device_map='cpu')
9
-
10
- model.save_pretrained(args.save_model_path)
11
- tokenizer.save_pretrained(args.save_model_path)
12
-
13
-
14
- if __name__ == "__main__":
15
- parser = argparse.ArgumentParser()
16
- parser.add_argument("--model-path", type=str, required=True)
17
- parser.add_argument("--model-base", type=str, required=True)
18
- parser.add_argument("--save-model-path", type=str, required=True)
19
-
20
- args = parser.parse_args()
21
-
22
- merge_lora(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/pretrain.sh DELETED
@@ -1,46 +0,0 @@
1
- #!/bin/bash
2
-
3
- # IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5!
4
-
5
- # Uncomment and set the following variables correspondingly to run this script:
6
-
7
- # MODEL_VERSION=vicuna-v1-3-7b
8
- # MODEL_VERSION=llama-2-7b-chat
9
-
10
- ########### DO NOT CHANGE ###########
11
- ########### USE THIS FOR BOTH ###########
12
- PROMPT_VERSION=plain
13
- ########### DO NOT CHANGE ###########
14
-
15
- deepspeed llava/train/train_mem.py \
16
- --deepspeed ./scripts/zero2.json \
17
- --model_name_or_path ./checkpoints/$MODEL_VERSION \
18
- --version $PROMPT_VERSION \
19
- --data_path /path/to/pretrain_data.json \
20
- --image_folder /path/to/images \
21
- --vision_tower openai/clip-vit-large-patch14 \
22
- --tune_mm_mlp_adapter True \
23
- --mm_vision_select_layer -2 \
24
- --mm_use_im_start_end False \
25
- --mm_use_im_patch_token False \
26
- --bf16 True \
27
- --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
28
- --num_train_epochs 1 \
29
- --per_device_train_batch_size 16 \
30
- --per_device_eval_batch_size 4 \
31
- --gradient_accumulation_steps 1 \
32
- --evaluation_strategy "no" \
33
- --save_strategy "steps" \
34
- --save_steps 24000 \
35
- --save_total_limit 1 \
36
- --learning_rate 2e-3 \
37
- --weight_decay 0. \
38
- --warmup_ratio 0.03 \
39
- --lr_scheduler_type "cosine" \
40
- --logging_steps 1 \
41
- --tf32 True \
42
- --model_max_length 2048 \
43
- --gradient_checkpointing True \
44
- --dataloader_num_workers 4 \
45
- --lazy_preprocess True \
46
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/pretrain_xformers.sh DELETED
@@ -1,44 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Uncomment and set the following variables correspondingly to run this script:
4
-
5
- # MODEL_VERSION=vicuna-v1-3-7b
6
- # MODEL_VERSION=llama-2-7b-chat
7
-
8
- ########### DO NOT CHANGE ###########
9
- ########### USE THIS FOR BOTH ###########
10
- PROMPT_VERSION=plain
11
- ########### DO NOT CHANGE ###########
12
-
13
- deepspeed llava/train/train_xformers.py \
14
- --deepspeed ./scripts/zero2.json \
15
- --model_name_or_path ./checkpoints/$MODEL_VERSION \
16
- --version $PROMPT_VERSION \
17
- --data_path /path/to/pretrain_data.json \
18
- --image_folder /path/to/images \
19
- --vision_tower openai/clip-vit-large-patch14 \
20
- --tune_mm_mlp_adapter True \
21
- --mm_vision_select_layer -2 \
22
- --mm_use_im_start_end False \
23
- --mm_use_im_patch_token False \
24
- --bf16 False \
25
- --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \
26
- --num_train_epochs 1 \
27
- --per_device_train_batch_size 4 \
28
- --per_device_eval_batch_size 4 \
29
- --gradient_accumulation_steps 4 \
30
- --evaluation_strategy "no" \
31
- --save_strategy "steps" \
32
- --save_steps 24000 \
33
- --save_total_limit 1 \
34
- --learning_rate 2e-3 \
35
- --weight_decay 0. \
36
- --warmup_ratio 0.03 \
37
- --lr_scheduler_type "cosine" \
38
- --logging_steps 1 \
39
- --tf32 False \
40
- --model_max_length 2048 \
41
- --gradient_checkpointing True \
42
- --dataloader_num_workers 4 \
43
- --lazy_preprocess True \
44
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/sqa_eval_batch.sh DELETED
@@ -1,13 +0,0 @@
1
- #!/bin/bash
2
-
3
- CHUNKS=8
4
- for IDX in {0..7}; do
5
- CUDA_VISIBLE_DEVICES=$IDX python -m llava.eval.model_vqa_science \
6
- --model-path liuhaotian/llava-lcs558k-scienceqa-vicuna-13b-v1.3 \
7
- --question-file ~/haotian/datasets/ScienceQA/data/scienceqa/llava_test_QCM-LEA.json \
8
- --image-folder ~/haotian/datasets/ScienceQA/data/scienceqa/images/test \
9
- --answers-file ./test_llava-13b-chunk$CHUNKS_$IDX.jsonl \
10
- --num-chunks $CHUNKS \
11
- --chunk-idx $IDX \
12
- --conv-mode llava_v1 &
13
- done
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/sqa_eval_gather.sh DELETED
@@ -1,18 +0,0 @@
1
- #!/bin/bash
2
-
3
- CHUNKS=8
4
- output_file="test_llava-13b.jsonl"
5
-
6
- # Clear out the output file if it exists.
7
- > "$output_file"
8
-
9
- # Loop through the indices and concatenate each file.
10
- for idx in $(seq 0 $((CHUNKS-1))); do
11
- cat "./test_llava-13b-chunk${idx}.jsonl" >> "$output_file"
12
- done
13
-
14
- python llava/eval/eval_science_qa.py \
15
- --base-dir ~/haotian/datasets/ScienceQA/data/scienceqa \
16
- --result-file ./test_llava-13b.jsonl \
17
- --output-file ./test_llava-13b_output.json \
18
- --output-result ./test_llava-13b_result.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/gqa.sh DELETED
@@ -1,39 +0,0 @@
1
- #!/bin/bash
2
-
3
- gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
4
- IFS=',' read -ra GPULIST <<< "$gpu_list"
5
-
6
- CHUNKS=${#GPULIST[@]}
7
-
8
- CKPT="llava-v1.5-13b"
9
- SPLIT="llava_gqa_testdev_balanced"
10
- GQADIR="./playground/data/eval/gqa/data"
11
-
12
- for IDX in $(seq 0 $((CHUNKS-1))); do
13
- CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \
14
- --model-path liuhaotian/llava-v1.5-13b \
15
- --question-file ./playground/data/eval/gqa/$SPLIT.jsonl \
16
- --image-folder ./playground/data/eval/gqa/data/images \
17
- --answers-file ./playground/data/eval/gqa/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl \
18
- --num-chunks $CHUNKS \
19
- --chunk-idx $IDX \
20
- --temperature 0 \
21
- --conv-mode vicuna_v1 &
22
- done
23
-
24
- wait
25
-
26
- output_file=./playground/data/eval/gqa/answers/$SPLIT/$CKPT/merge.jsonl
27
-
28
- # Clear out the output file if it exists.
29
- > "$output_file"
30
-
31
- # Loop through the indices and concatenate each file.
32
- for IDX in $(seq 0 $((CHUNKS-1))); do
33
- cat ./playground/data/eval/gqa/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file"
34
- done
35
-
36
- python scripts/convert_gqa_for_eval.py --src $output_file --dst $GQADIR/testdev_balanced_predictions.json
37
-
38
- cd $GQADIR
39
- python eval/eval.py --tier testdev_balanced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh DELETED
@@ -1,23 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/llava-bench-in-the-wild/questions.jsonl \
6
- --image-folder ./playground/data/eval/llava-bench-in-the-wild/images \
7
- --answers-file ./playground/data/eval/llava-bench-in-the-wild/answers/llava-v1.5-13b.jsonl \
8
- --temperature 0 \
9
- --conv-mode vicuna_v1
10
-
11
- mkdir -p playground/data/eval/llava-bench-in-the-wild/reviews
12
-
13
- python llava/eval/eval_gpt_review_bench.py \
14
- --question playground/data/eval/llava-bench-in-the-wild/questions.jsonl \
15
- --context playground/data/eval/llava-bench-in-the-wild/context.jsonl \
16
- --rule llava/eval/table/rule.json \
17
- --answer-list \
18
- playground/data/eval/llava-bench-in-the-wild/answers_gpt4.jsonl \
19
- playground/data/eval/llava-bench-in-the-wild/answers/llava-v1.5-13b.jsonl \
20
- --output \
21
- playground/data/eval/llava-bench-in-the-wild/reviews/llava-v1.5-13b.jsonl
22
-
23
- python llava/eval/summarize_gpt_review.py -f playground/data/eval/llava-bench-in-the-wild/reviews/llava-v1.5-13b.jsonl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh DELETED
@@ -1,19 +0,0 @@
1
- #!/bin/bash
2
-
3
- SPLIT="mmbench_dev_20230712"
4
-
5
- python -m llava.eval.model_vqa_mmbench \
6
- --model-path liuhaotian/llava-v1.5-13b \
7
- --question-file ./playground/data/eval/mmbench/$SPLIT.tsv \
8
- --answers-file ./playground/data/eval/mmbench/answers/$SPLIT/llava-v1.5-13b.jsonl \
9
- --single-pred-prompt \
10
- --temperature 0 \
11
- --conv-mode vicuna_v1
12
-
13
- mkdir -p playground/data/eval/mmbench/answers_upload/$SPLIT
14
-
15
- python scripts/convert_mmbench_for_submission.py \
16
- --annotation-file ./playground/data/eval/mmbench/$SPLIT.tsv \
17
- --result-dir ./playground/data/eval/mmbench/answers/$SPLIT \
18
- --upload-dir ./playground/data/eval/mmbench/answers_upload/$SPLIT \
19
- --experiment llava-v1.5-13b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh DELETED
@@ -1,20 +0,0 @@
1
- #!/bin/bash
2
-
3
- SPLIT="mmbench_dev_cn_20231003"
4
-
5
- python -m llava.eval.model_vqa_mmbench \
6
- --model-path liuhaotian/llava-v1.5-13b \
7
- --question-file ./playground/data/eval/mmbench_cn/$SPLIT.tsv \
8
- --answers-file ./playground/data/eval/mmbench_cn/answers/$SPLIT/llava-v1.5-13b.jsonl \
9
- --lang cn \
10
- --single-pred-prompt \
11
- --temperature 0 \
12
- --conv-mode vicuna_v1
13
-
14
- mkdir -p playground/data/eval/mmbench/answers_upload/$SPLIT
15
-
16
- python scripts/convert_mmbench_for_submission.py \
17
- --annotation-file ./playground/data/eval/mmbench_cn/$SPLIT.tsv \
18
- --result-dir ./playground/data/eval/mmbench_cn/answers/$SPLIT \
19
- --upload-dir ./playground/data/eval/mmbench_cn/answers_upload/$SPLIT \
20
- --experiment llava-v1.5-13b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/mme.sh DELETED
@@ -1,17 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa_loader \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/MME/llava_mme.jsonl \
6
- --image-folder ./playground/data/eval/MME/MME_Benchmark_release_version \
7
- --answers-file ./playground/data/eval/MME/answers/llava-v1.5-13b.jsonl \
8
- --temperature 0 \
9
- --conv-mode vicuna_v1
10
-
11
- cd ./playground/data/eval/MME
12
-
13
- python convert_answer_to_mme.py --experiment llava-v1.5-13b
14
-
15
- cd eval_tool
16
-
17
- python calculation.py --results_dir answers/llava-v1.5-13b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh DELETED
@@ -1,16 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/mm-vet/llava-mm-vet.jsonl \
6
- --image-folder ./playground/data/eval/mm-vet/images \
7
- --answers-file ./playground/data/eval/mm-vet/answers/llava-v1.5-13b.jsonl \
8
- --temperature 0 \
9
- --conv-mode vicuna_v1
10
-
11
- mkdir -p ./playground/data/eval/mm-vet/results
12
-
13
- python scripts/convert_mmvet_for_eval.py \
14
- --src ./playground/data/eval/mm-vet/answers/llava-v1.5-13b.jsonl \
15
- --dst ./playground/data/eval/mm-vet/results/llava-v1.5-13b.json
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/pope.sh DELETED
@@ -1,14 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa_loader \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \
6
- --image-folder ./playground/data/eval/pope/val2014 \
7
- --answers-file ./playground/data/eval/pope/answers/llava-v1.5-13b.jsonl \
8
- --temperature 0 \
9
- --conv-mode vicuna_v1
10
-
11
- python llava/eval/eval_pope.py \
12
- --annotation-dir ./playground/data/eval/pope/coco \
13
- --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \
14
- --result-file ./playground/data/eval/pope/answers/llava-v1.5-13b.jsonl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/qbench.sh DELETED
@@ -1,18 +0,0 @@
1
- #!/bin/bash
2
-
3
- if [ "$1" = "dev" ]; then
4
- echo "Evaluating in 'dev' split."
5
- elif [ "$1" = "test" ]; then
6
- echo "Evaluating in 'test' split."
7
- else
8
- echo "Unknown split, please choose between 'dev' and 'test'."
9
- exit 1
10
- fi
11
-
12
- python -m llava.eval.model_vqa_qbench \
13
- --model-path liuhaotian/llava-v1.5-13b \
14
- --image-folder ./playground/data/eval/qbench/images_llvisionqa/ \
15
- --questions-file ./playground/data/eval/qbench/llvisionqa_$1.json \
16
- --answers-file ./playground/data/eval/qbench/llvisionqa_$1_answers.jsonl \
17
- --conv-mode llava_v1 \
18
- --lang en
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh DELETED
@@ -1,20 +0,0 @@
1
- #!/bin/bash
2
-
3
- if [ "$1" = "dev" ]; then
4
- ZH_SPLIT="验证集"
5
- echo "Evaluating in 'dev' split."
6
- elif [ "$1" = "test" ]; then
7
- ZH_SPLIT="测试集"
8
- echo "Evaluating in 'test' split."
9
- else
10
- echo "Unknown split, please choose between 'dev' and 'test'."
11
- exit 1
12
- fi
13
-
14
- python -m llava.eval.model_vqa_qbench \
15
- --model-path liuhaotian/llava-v1.5-13b \
16
- --image-folder ./playground/data/eval/qbench/images_llvisionqa/ \
17
- --questions-file ./playground/data/eval/qbench/质衡-问答-$ZH_SPLIT.json \
18
- --answers-file ./playground/data/eval/qbench/llvisionqa_zh_$1_answers.jsonl \
19
- --conv-mode llava_v1 \
20
- --lang zh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/seed.sh DELETED
@@ -1,39 +0,0 @@
1
- #!/bin/bash
2
-
3
- gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
4
- IFS=',' read -ra GPULIST <<< "$gpu_list"
5
-
6
- CHUNKS=${#GPULIST[@]}
7
-
8
- CKPT="llava-v1.5-13b"
9
-
10
- for IDX in $(seq 0 $((CHUNKS-1))); do
11
- CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \
12
- --model-path liuhaotian/llava-v1.5-13b \
13
- --question-file ./playground/data/eval/seed_bench/llava-seed-bench.jsonl \
14
- --image-folder ./playground/data/eval/seed_bench \
15
- --answers-file ./playground/data/eval/seed_bench/answers/$CKPT/${CHUNKS}_${IDX}.jsonl \
16
- --num-chunks $CHUNKS \
17
- --chunk-idx $IDX \
18
- --temperature 0 \
19
- --conv-mode vicuna_v1 &
20
- done
21
-
22
- wait
23
-
24
- output_file=./playground/data/eval/seed_bench/answers/$CKPT/merge.jsonl
25
-
26
- # Clear out the output file if it exists.
27
- > "$output_file"
28
-
29
- # Loop through the indices and concatenate each file.
30
- for IDX in $(seq 0 $((CHUNKS-1))); do
31
- cat ./playground/data/eval/seed_bench/answers/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file"
32
- done
33
-
34
- # Evaluate
35
- python scripts/convert_seed_for_submission.py \
36
- --annotation-file ./playground/data/eval/seed_bench/SEED-Bench.json \
37
- --result-file $output_file \
38
- --result-upload-file ./playground/data/eval/seed_bench/answers_upload/llava-v1.5-13b.jsonl
39
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/sqa.sh DELETED
@@ -1,16 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa_science \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/scienceqa/llava_test_CQM-A.json \
6
- --image-folder ./playground/data/eval/scienceqa/images/test \
7
- --answers-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b.jsonl \
8
- --single-pred-prompt \
9
- --temperature 0 \
10
- --conv-mode vicuna_v1
11
-
12
- python llava/eval/eval_science_qa.py \
13
- --base-dir ./playground/data/eval/scienceqa \
14
- --result-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b.jsonl \
15
- --output-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b_output.jsonl \
16
- --output-result ./playground/data/eval/scienceqa/answers/llava-v1.5-13b_result.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh DELETED
@@ -1,13 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa_loader \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/textvqa/llava_textvqa_val_v051_ocr.jsonl \
6
- --image-folder ./playground/data/eval/textvqa/train_images \
7
- --answers-file ./playground/data/eval/textvqa/answers/llava-v1.5-13b.jsonl \
8
- --temperature 0 \
9
- --conv-mode vicuna_v1
10
-
11
- python -m llava.eval.eval_textvqa \
12
- --annotation-file ./playground/data/eval/textvqa/TextVQA_0.5.1_val.json \
13
- --result-file ./playground/data/eval/textvqa/answers/llava-v1.5-13b.jsonl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh DELETED
@@ -1,14 +0,0 @@
1
- #!/bin/bash
2
-
3
- python -m llava.eval.model_vqa_loader \
4
- --model-path liuhaotian/llava-v1.5-13b \
5
- --question-file ./playground/data/eval/vizwiz/llava_test.jsonl \
6
- --image-folder ./playground/data/eval/vizwiz/test \
7
- --answers-file ./playground/data/eval/vizwiz/answers/llava-v1.5-13b.jsonl \
8
- --temperature 0 \
9
- --conv-mode vicuna_v1
10
-
11
- python scripts/convert_vizwiz_for_submission.py \
12
- --annotation-file ./playground/data/eval/vizwiz/llava_test.jsonl \
13
- --result-file ./playground/data/eval/vizwiz/answers/llava-v1.5-13b.jsonl \
14
- --result-upload-file ./playground/data/eval/vizwiz/answers_upload/llava-v1.5-13b.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh DELETED
@@ -1,36 +0,0 @@
1
- #!/bin/bash
2
-
3
- gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
4
- IFS=',' read -ra GPULIST <<< "$gpu_list"
5
-
6
- CHUNKS=${#GPULIST[@]}
7
-
8
- CKPT="llava-v1.5-13b"
9
- SPLIT="llava_vqav2_mscoco_test-dev2015"
10
-
11
- for IDX in $(seq 0 $((CHUNKS-1))); do
12
- CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \
13
- --model-path liuhaotian/llava-v1.5-13b \
14
- --question-file ./playground/data/eval/vqav2/$SPLIT.jsonl \
15
- --image-folder ./playground/data/eval/vqav2/test2015 \
16
- --answers-file ./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl \
17
- --num-chunks $CHUNKS \
18
- --chunk-idx $IDX \
19
- --temperature 0 \
20
- --conv-mode vicuna_v1 &
21
- done
22
-
23
- wait
24
-
25
- output_file=./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/merge.jsonl
26
-
27
- # Clear out the output file if it exists.
28
- > "$output_file"
29
-
30
- # Loop through the indices and concatenate each file.
31
- for IDX in $(seq 0 $((CHUNKS-1))); do
32
- cat ./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file"
33
- done
34
-
35
- python scripts/convert_vqav2_for_submission.py --split $SPLIT --ckpt $CKPT
36
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/finetune.sh DELETED
@@ -1,37 +0,0 @@
1
- #!/bin/bash
2
-
3
- deepspeed llava/train/train_mem.py \
4
- --deepspeed ./scripts/zero3.json \
5
- --model_name_or_path lmsys/vicuna-13b-v1.5 \
6
- --version v1 \
7
- --data_path ./playground/data/llava_v1_5_mix665k.json \
8
- --image_folder ./playground/data \
9
- --vision_tower openai/clip-vit-large-patch14-336 \
10
- --pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \
11
- --mm_projector_type mlp2x_gelu \
12
- --mm_vision_select_layer -2 \
13
- --mm_use_im_start_end False \
14
- --mm_use_im_patch_token False \
15
- --image_aspect_ratio pad \
16
- --group_by_modality_length True \
17
- --bf16 True \
18
- --output_dir ./checkpoints/llava-v1.5-13b \
19
- --num_train_epochs 1 \
20
- --per_device_train_batch_size 16 \
21
- --per_device_eval_batch_size 4 \
22
- --gradient_accumulation_steps 1 \
23
- --evaluation_strategy "no" \
24
- --save_strategy "steps" \
25
- --save_steps 50000 \
26
- --save_total_limit 1 \
27
- --learning_rate 2e-5 \
28
- --weight_decay 0. \
29
- --warmup_ratio 0.03 \
30
- --lr_scheduler_type "cosine" \
31
- --logging_steps 1 \
32
- --tf32 True \
33
- --model_max_length 2048 \
34
- --gradient_checkpointing True \
35
- --dataloader_num_workers 4 \
36
- --lazy_preprocess True \
37
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/finetune_lora.sh DELETED
@@ -1,38 +0,0 @@
1
- #!/bin/bash
2
-
3
- deepspeed llava/train/train_mem.py \
4
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
5
- --deepspeed ./scripts/zero3.json \
6
- --model_name_or_path lmsys/vicuna-13b-v1.5 \
7
- --version v1 \
8
- --data_path ./playground/data/llava_v1_5_mix665k.json \
9
- --image_folder ./playground/data \
10
- --vision_tower openai/clip-vit-large-patch14-336 \
11
- --pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \
12
- --mm_projector_type mlp2x_gelu \
13
- --mm_vision_select_layer -2 \
14
- --mm_use_im_start_end False \
15
- --mm_use_im_patch_token False \
16
- --image_aspect_ratio pad \
17
- --group_by_modality_length True \
18
- --bf16 True \
19
- --output_dir ./checkpoints/llava-v1.5-13b-lora \
20
- --num_train_epochs 1 \
21
- --per_device_train_batch_size 16 \
22
- --per_device_eval_batch_size 4 \
23
- --gradient_accumulation_steps 1 \
24
- --evaluation_strategy "no" \
25
- --save_strategy "steps" \
26
- --save_steps 50000 \
27
- --save_total_limit 1 \
28
- --learning_rate 2e-4 \
29
- --weight_decay 0. \
30
- --warmup_ratio 0.03 \
31
- --lr_scheduler_type "cosine" \
32
- --logging_steps 1 \
33
- --tf32 True \
34
- --model_max_length 2048 \
35
- --gradient_checkpointing True \
36
- --dataloader_num_workers 4 \
37
- --lazy_preprocess True \
38
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/finetune_task.sh DELETED
@@ -1,36 +0,0 @@
1
- #!/bin/bash
2
-
3
- deepspeed llava/train/train_mem.py \
4
- --deepspeed ./scripts/zero3.json \
5
- --model_name_or_path liuhaotian/llava-v1.5-13b \
6
- --version v1 \
7
- --data_path ./playground/data/llava_v1_5_mix665k.json \
8
- --image_folder ./playground/data \
9
- --vision_tower openai/clip-vit-large-patch14-336 \
10
- --mm_projector_type mlp2x_gelu \
11
- --mm_vision_select_layer -2 \
12
- --mm_use_im_start_end False \
13
- --mm_use_im_patch_token False \
14
- --image_aspect_ratio pad \
15
- --group_by_modality_length True \
16
- --bf16 True \
17
- --output_dir ./checkpoints/llava-v1.5-13b-task \
18
- --num_train_epochs 1 \
19
- --per_device_train_batch_size 16 \
20
- --per_device_eval_batch_size 4 \
21
- --gradient_accumulation_steps 1 \
22
- --evaluation_strategy "no" \
23
- --save_strategy "steps" \
24
- --save_steps 50000 \
25
- --save_total_limit 1 \
26
- --learning_rate 2e-5 \
27
- --weight_decay 0. \
28
- --warmup_ratio 0.03 \
29
- --lr_scheduler_type "cosine" \
30
- --logging_steps 1 \
31
- --tf32 True \
32
- --model_max_length 2048 \
33
- --gradient_checkpointing True \
34
- --dataloader_num_workers 4 \
35
- --lazy_preprocess True \
36
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh DELETED
@@ -1,37 +0,0 @@
1
- #!/bin/bash
2
-
3
- deepspeed llava/train/train_mem.py \
4
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
5
- --deepspeed ./scripts/zero3.json \
6
- --model_name_or_path liuhaotian/llava-v1.5-13b \
7
- --version v1 \
8
- --data_path ./playground/data/llava_v1_5_mix665k.json \
9
- --image_folder ./playground/data \
10
- --vision_tower openai/clip-vit-large-patch14-336 \
11
- --mm_projector_type mlp2x_gelu \
12
- --mm_vision_select_layer -2 \
13
- --mm_use_im_start_end False \
14
- --mm_use_im_patch_token False \
15
- --image_aspect_ratio pad \
16
- --group_by_modality_length True \
17
- --bf16 True \
18
- --output_dir ./checkpoints/llava-v1.5-13b-task-lora \
19
- --num_train_epochs 1 \
20
- --per_device_train_batch_size 16 \
21
- --per_device_eval_batch_size 4 \
22
- --gradient_accumulation_steps 1 \
23
- --evaluation_strategy "no" \
24
- --save_strategy "steps" \
25
- --save_steps 50000 \
26
- --save_total_limit 1 \
27
- --learning_rate 2e-4 \
28
- --weight_decay 0. \
29
- --warmup_ratio 0.03 \
30
- --lr_scheduler_type "cosine" \
31
- --logging_steps 1 \
32
- --tf32 True \
33
- --model_max_length 2048 \
34
- --gradient_checkpointing True \
35
- --dataloader_num_workers 4 \
36
- --lazy_preprocess True \
37
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/scripts/v1_5/pretrain.sh DELETED
@@ -1,35 +0,0 @@
1
- #!/bin/bash
2
-
3
- deepspeed llava/train/train_mem.py \
4
- --deepspeed ./scripts/zero2.json \
5
- --model_name_or_path lmsys/vicuna-13b-v1.5 \
6
- --version plain \
7
- --data_path ./playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json \
8
- --image_folder ./playground/data/LLaVA-Pretrain/images \
9
- --vision_tower openai/clip-vit-large-patch14-336 \
10
- --mm_projector_type mlp2x_gelu \
11
- --tune_mm_mlp_adapter True \
12
- --mm_vision_select_layer -2 \
13
- --mm_use_im_start_end False \
14
- --mm_use_im_patch_token False \
15
- --bf16 True \
16
- --output_dir ./checkpoints/llava-v1.5-13b-pretrain \
17
- --num_train_epochs 1 \
18
- --per_device_train_batch_size 32 \
19
- --per_device_eval_batch_size 4 \
20
- --gradient_accumulation_steps 1 \
21
- --evaluation_strategy "no" \
22
- --save_strategy "steps" \
23
- --save_steps 24000 \
24
- --save_total_limit 1 \
25
- --learning_rate 1e-3 \
26
- --weight_decay 0. \
27
- --warmup_ratio 0.03 \
28
- --lr_scheduler_type "cosine" \
29
- --logging_steps 1 \
30
- --tf32 True \
31
- --model_max_length 2048 \
32
- --gradient_checkpointing True \
33
- --dataloader_num_workers 4 \
34
- --lazy_preprocess True \
35
- --report_to wandb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config.conf DELETED
@@ -1,60 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=radialog
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
-
11
- # activate corresponding environment
12
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
13
- source ~/miniconda3/etc/profile.d/conda.sh
14
- conda activate llava_raddialog
15
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
16
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
17
-
18
- export GPUS_PER_NODE=1
19
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
20
- #export MASTER_PORT=9901
21
- export MASTER_ADDR=$(hostname)
22
- export MASTER_PORT=29719
23
-
24
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
25
- --lora_enable True --bits 4 --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \
26
- --deepspeed ./scripts/zero2.json \
27
- --model_name_or_path liuhaotian/llava-v1.5-7b \
28
- --version v1 \
29
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava.json \
30
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
31
- --vision_tower openai/clip-vit-large-patch14-336 \
32
- --mm_projector_type mlp2x_gelu \
33
- --mm_vision_select_layer -2 \
34
- --mm_use_im_start_end False \
35
- --mm_use_im_patch_token False \
36
- --image_aspect_ratio pad \
37
- --group_by_modality_length True \
38
- --bf16 True \
39
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llava_unfreeze \
40
- --num_train_epochs 1 \
41
- --per_device_train_batch_size 16 \
42
- --per_device_eval_batch_size 4 \
43
- --gradient_accumulation_steps 8 \
44
- --evaluation_strategy "no" \
45
- --save_strategy "steps" \
46
- --save_steps 500 \
47
- --learning_rate 2e-4 \
48
- --max_grad_norm 0.1 \
49
- --weight_decay 0. \
50
- --warmup_ratio 0.03 \
51
- --lr_scheduler_type "cosine" \
52
- --logging_steps 1 \
53
- --tf32 True \
54
- --model_max_length 1300 \
55
- --gradient_checkpointing True \
56
- --dataloader_num_workers 4 \
57
- --lazy_preprocess True \
58
- --report_to wandb \
59
- --run_name llava-v1.5-7b-task-lora_radialog_cosine_llava_unfreeze \
60
- --unfreeze_n_vision_tower_layers 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_biovil_frozen.conf DELETED
@@ -1,60 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=ins_v4_frozen
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
- # activate corresponding environment
11
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
12
- source ~/miniconda3/etc/profile.d/conda.sh
13
- conda activate llava_raddialog
14
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
15
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
16
-
17
- export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
18
-
19
- export GPUS_PER_NODE=1
20
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
21
- #export MASTER_PORT=9901
22
- export MASTER_ADDR=$(hostname)
23
- export MASTER_PORT=29719
24
-
25
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
26
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
27
- --deepspeed ./scripts/zero2.json \
28
- --model_name_or_path liuhaotian/llava-v1.5-7b \
29
- --version v1 \
30
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v4_sentenized.json \
31
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
32
- --vision_tower biovil \
33
- --mm_projector_type mlp2x_gelu \
34
- --mm_vision_select_layer -2 \
35
- --mm_use_im_start_end False \
36
- --mm_use_im_patch_token False \
37
- --image_aspect_ratio pad \
38
- --group_by_modality_length True \
39
- --bf16 True \
40
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v4_sentenized \
41
- --num_train_epochs 5 \
42
- --per_device_train_batch_size 2 \
43
- --per_device_eval_batch_size 4 \
44
- --gradient_accumulation_steps 64 \
45
- --evaluation_strategy "no" \
46
- --save_strategy "steps" \
47
- --save_steps 1500 \
48
- --learning_rate 2e-5 \
49
- --max_grad_norm 0.1 \
50
- --weight_decay 0. \
51
- --warmup_ratio 0.03 \
52
- --lr_scheduler_type "cosine" \
53
- --logging_steps 1 \
54
- --tf32 True \
55
- --model_max_length 1300 \
56
- --gradient_checkpointing False \
57
- --dataloader_num_workers 4 \
58
- --lazy_preprocess True \
59
- --report_to wandb \
60
- --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v4_sentenized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf DELETED
@@ -1,60 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=ins_v5_frozen
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
- # activate corresponding environment
11
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
12
- source ~/miniconda3/etc/profile.d/conda.sh
13
- conda activate llava_raddialog
14
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
15
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
16
-
17
- export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
18
-
19
- export GPUS_PER_NODE=1
20
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
21
- #export MASTER_PORT=9901
22
- export MASTER_ADDR=$(hostname)
23
- export MASTER_PORT=29711
24
-
25
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
26
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
27
- --deepspeed ./scripts/zero2.json \
28
- --model_name_or_path liuhaotian/llava-v1.5-7b \
29
- --version v1 \
30
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v5.json \
31
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
32
- --vision_tower biovil \
33
- --mm_projector_type mlp2x_gelu \
34
- --mm_vision_select_layer -2 \
35
- --mm_use_im_start_end False \
36
- --mm_use_im_patch_token False \
37
- --image_aspect_ratio pad \
38
- --group_by_modality_length True \
39
- --bf16 True \
40
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v5 \
41
- --num_train_epochs 5 \
42
- --per_device_train_batch_size 2 \
43
- --per_device_eval_batch_size 4 \
44
- --gradient_accumulation_steps 64 \
45
- --evaluation_strategy "no" \
46
- --save_strategy "steps" \
47
- --save_steps 1500 \
48
- --learning_rate 2e-5 \
49
- --max_grad_norm 0.1 \
50
- --weight_decay 0. \
51
- --warmup_ratio 0.03 \
52
- --lr_scheduler_type "cosine" \
53
- --logging_steps 1 \
54
- --tf32 True \
55
- --model_max_length 1300 \
56
- --gradient_checkpointing False \
57
- --dataloader_num_workers 4 \
58
- --lazy_preprocess True \
59
- --report_to wandb \
60
- --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_biovil_unfrozen.conf DELETED
@@ -1,61 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=ins_v4_unfrozen
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
- # activate corresponding environment
11
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
12
- source ~/miniconda3/etc/profile.d/conda.sh
13
- conda activate llava_raddialog
14
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
15
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
16
-
17
- export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
18
-
19
- export GPUS_PER_NODE=1
20
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
21
- #export MASTER_PORT=9901
22
- export MASTER_ADDR=$(hostname)
23
- export MASTER_PORT=29718
24
-
25
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
26
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
27
- --deepspeed ./scripts/zero2.json \
28
- --model_name_or_path liuhaotian/llava-v1.5-7b \
29
- --version v1 \
30
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v4_sentenized.json \
31
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
32
- --vision_tower biovil \
33
- --mm_projector_type mlp2x_gelu \
34
- --mm_vision_select_layer -2 \
35
- --mm_use_im_start_end False \
36
- --mm_use_im_patch_token False \
37
- --image_aspect_ratio pad \
38
- --group_by_modality_length True \
39
- --bf16 True \
40
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v4_sentenized \
41
- --num_train_epochs 5 \
42
- --per_device_train_batch_size 2 \
43
- --per_device_eval_batch_size 4 \
44
- --gradient_accumulation_steps 64 \
45
- --evaluation_strategy "no" \
46
- --save_strategy "steps" \
47
- --save_steps 1500 \
48
- --learning_rate 2e-5 \
49
- --max_grad_norm 0.1 \
50
- --weight_decay 0. \
51
- --warmup_ratio 0.03 \
52
- --lr_scheduler_type "cosine" \
53
- --logging_steps 1 \
54
- --tf32 True \
55
- --model_max_length 1300 \
56
- --gradient_checkpointing False \
57
- --dataloader_num_workers 4 \
58
- --lazy_preprocess True \
59
- --report_to wandb \
60
- --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v4_sentenized \
61
- --unfreeze_n_vision_tower_layers 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf DELETED
@@ -1,61 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=ins_v5_unfrozen
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
- # activate corresponding environment
11
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
12
- source ~/miniconda3/etc/profile.d/conda.sh
13
- conda activate llava_raddialog
14
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
15
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
16
-
17
- export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
18
-
19
- export GPUS_PER_NODE=1
20
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
21
- #export MASTER_PORT=9901
22
- export MASTER_ADDR=$(hostname)
23
- export MASTER_PORT=29712
24
-
25
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
26
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
27
- --deepspeed ./scripts/zero2.json \
28
- --model_name_or_path liuhaotian/llava-v1.5-7b \
29
- --version v1 \
30
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v5.json \
31
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
32
- --vision_tower biovil \
33
- --mm_projector_type mlp2x_gelu \
34
- --mm_vision_select_layer -2 \
35
- --mm_use_im_start_end False \
36
- --mm_use_im_patch_token False \
37
- --image_aspect_ratio pad \
38
- --group_by_modality_length True \
39
- --bf16 True \
40
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5 \
41
- --num_train_epochs 5 \
42
- --per_device_train_batch_size 2 \
43
- --per_device_eval_batch_size 4 \
44
- --gradient_accumulation_steps 64 \
45
- --evaluation_strategy "no" \
46
- --save_strategy "steps" \
47
- --save_steps 1500 \
48
- --learning_rate 2e-5 \
49
- --max_grad_norm 0.1 \
50
- --weight_decay 0. \
51
- --warmup_ratio 0.03 \
52
- --lr_scheduler_type "cosine" \
53
- --logging_steps 1 \
54
- --tf32 True \
55
- --model_max_length 1300 \
56
- --gradient_checkpointing False \
57
- --dataloader_num_workers 4 \
58
- --lazy_preprocess True \
59
- --report_to wandb \
60
- --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5 \
61
- --unfreeze_n_vision_tower_layers 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_llavamed.conf DELETED
@@ -1,61 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=rd_llavamed
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
-
11
- # activate corresponding environment
12
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
13
- source ~/miniconda3/etc/profile.d/conda.sh
14
- conda activate llava_raddialog
15
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
16
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
17
-
18
- export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
19
-
20
- export GPUS_PER_NODE=1
21
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
22
- #export MASTER_PORT=9901
23
- export MASTER_ADDR=$(hostname)
24
- export MASTER_PORT=29719
25
-
26
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
27
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \
28
- --deepspeed ./scripts/zero2.json \
29
- --model_name_or_path /home/guests/shared/LLaMA/7B_LLaVAMed \
30
- --version llava_med \
31
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava.json \
32
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
33
- --vision_tower biovil \
34
- --mm_projector_type mlp2x_gelu \
35
- --mm_vision_select_layer -2 \
36
- --mm_use_im_start_end True \
37
- --mm_use_im_patch_token True \
38
- --image_aspect_ratio square \
39
- --group_by_modality_length False \
40
- --bf16 True \
41
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llavamed_biovil \
42
- --num_train_epochs 1 \
43
- --per_device_train_batch_size 2 \
44
- --per_device_eval_batch_size 4 \
45
- --gradient_accumulation_steps 64 \
46
- --evaluation_strategy "no" \
47
- --save_strategy "steps" \
48
- --save_steps 500 \
49
- --learning_rate 2e-4 \
50
- --max_grad_norm 0.1 \
51
- --weight_decay 0. \
52
- --warmup_ratio 0.03 \
53
- --lr_scheduler_type "cosine" \
54
- --logging_steps 1 \
55
- --tf32 True \
56
- --model_max_length 1300 \
57
- --gradient_checkpointing False \
58
- --dataloader_num_workers 4 \
59
- --lazy_preprocess True \
60
- --report_to wandb \
61
- --run_name llava-v1.5-7b-task-lora_radialog_cosine_llavamed_biovil
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_ms_cxr_t.conf DELETED
@@ -1,61 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=cxrt_concat
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
-
11
- # activate corresponding environment
12
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
13
- source ~/miniconda3/etc/profile.d/conda.sh
14
- conda activate llava_raddialog
15
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
16
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
17
- export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH"
18
-
19
- export GPUS_PER_NODE=1
20
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
21
- #export MASTER_PORT=9901
22
- export MASTER_ADDR=$(hostname)
23
- export MASTER_PORT=29715
24
-
25
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
26
- --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
27
- --deepspeed ./scripts/zero2.json \
28
- --model_name_or_path liuhaotian/llava-v1.5-7b \
29
- --version v1 \
30
- --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/ms_cxr_t_llava.json \
31
- --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \
32
- --vision_tower biovil \
33
- --mm_projector_type mlp2x_gelu \
34
- --mm_vision_select_layer -2 \
35
- --mm_use_im_start_end False \
36
- --mm_use_im_patch_token False \
37
- --image_aspect_ratio pad \
38
- --group_by_modality_length True \
39
- --bf16 True \
40
- --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cxr_t_frozen_pool_concat \
41
- --num_train_epochs 10 \
42
- --per_device_train_batch_size 2 \
43
- --per_device_eval_batch_size 4 \
44
- --gradient_accumulation_steps 64 \
45
- --evaluation_strategy "no" \
46
- --save_strategy "epoch" \
47
- --save_steps 500 \
48
- --learning_rate 2e-5 \
49
- --max_grad_norm 0.1 \
50
- --weight_decay 0. \
51
- --warmup_ratio 0.03 \
52
- --lr_scheduler_type "cosine" \
53
- --logging_steps 1 \
54
- --tf32 True \
55
- --model_max_length 550 \
56
- --gradient_checkpointing False \
57
- --dataloader_num_workers 4 \
58
- --lazy_preprocess True \
59
- --report_to wandb \
60
- --run_name llava-v1.5-7b-task-lora_radialog_cxr_t_frozen_pool_concat \
61
- --mv_type "pool_concat"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LLAVA_Biovil/slurm_config_pretrain.conf DELETED
@@ -1,61 +0,0 @@
1
- #!/bin/sh
2
-
3
- #SBATCH --job-name=oracle
4
- #SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files
5
- #SBATCH --error=oracle-%A.err # Standard error of the script
6
- #SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds)
7
- #SBATCH --gres=gpu:1 # Number of GPUs if needed
8
- #SBATCH --cpus-per-task=4 # Number of CPUs (Don't use more than 24 per GPU)
9
- #SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128?
10
-
11
- # activate corresponding environment
12
- # conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal
13
- source ~/miniconda3/etc/profile.d/conda.sh
14
- conda activate oracle
15
- # FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES.
16
- # POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED
17
-
18
- export GPUS_PER_NODE=1
19
- #export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups
20
- #export MASTER_PORT=9901
21
- export MASTER_ADDR=$(hostname)
22
- export MASTER_PORT=29508
23
-
24
-
25
- srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \
26
- --deepspeed ./scripts/zero2.json \
27
- --model_name_or_path liuhaotian/llava-v1.5-7b \
28
- --version v1 \
29
- --data_path /home/guests/ege_oezsoy/Oracle/data/llava_samples/train.json \
30
- --image_folder / \
31
- --vision_tower openai/clip-vit-large-patch14-336 \
32
- --mm_projector_type mlp2x_gelu \
33
- --tune_mm_mlp_adapter True \
34
- --mm_vision_select_layer -2 \
35
- --mm_use_im_start_end False \
36
- --mm_use_im_patch_token False \
37
- --image_aspect_ratio pad \
38
- --group_by_modality_length True \
39
- --bf16 True \
40
- --output_dir ./checkpoints/llava-v1.5-7b-task-4dor_pretrain_linear_weighting \
41
- --num_train_epochs 50 \
42
- --per_device_train_batch_size 16 \
43
- --per_device_eval_batch_size 4 \
44
- --gradient_accumulation_steps 1 \
45
- --evaluation_strategy "no" \
46
- --save_strategy "epoch" \
47
- --save_steps 10 \
48
- --save_total_limit 1 \
49
- --learning_rate 2e-5 \
50
- --max_grad_norm 0.1 \
51
- --weight_decay 0. \
52
- --warmup_ratio 0.03 \
53
- --lr_scheduler_type "cosine" \
54
- --logging_steps 1 \
55
- --tf32 True \
56
- --model_max_length 2048 \
57
- --gradient_checkpointing True \
58
- --dataloader_num_workers 4 \
59
- --lazy_preprocess True \
60
- --report_to wandb \
61
- --run_name llava-v1.5-7b-task-4dor_pretrain_linear_weighting