diff --git a/LLAVA_Biovil/cog.yaml b/LLAVA_Biovil/cog.yaml deleted file mode 100644 index 55b739fd437a1897c1c1ec001f47aac2fbfdf68b..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/cog.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# Configuration for Cog ⚙️ -# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md - -build: - gpu: true - - python_version: "3.11" - - python_packages: - - "torch==2.0.1" - - "accelerate==0.21.0" - - "bitsandbytes==0.41.0" - - "deepspeed==0.9.5" - - "einops-exts==0.0.4" - - "einops==0.6.1" - - "gradio==3.35.2" - - "gradio_client==0.2.9" - - "httpx==0.24.0" - - "markdown2==2.4.10" - - "numpy==1.26.0" - - "peft==0.4.0" - - "scikit-learn==1.2.2" - - "sentencepiece==0.1.99" - - "shortuuid==1.0.11" - - "timm==0.6.13" - - "tokenizers==0.13.3" - - "torch==2.0.1" - - "torchvision==0.15.2" - - "transformers==4.31.0" - - "wandb==0.15.12" - - "wavedrom==2.0.3.post3" - - "Pygments==2.16.1" - run: - - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget - -# predict.py defines how predictions are run on your model -predict: "predict.py:Predictor" diff --git a/LLAVA_Biovil/install.md b/LLAVA_Biovil/install.md deleted file mode 100644 index e1023309a419ee16243d3bccde322b90c63a934c..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/install.md +++ /dev/null @@ -1,6 +0,0 @@ -step 1: clone Llava -step 2: git clone https://github.com/Dao-AILab/flash-attention.git -step 3: conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia -step 4: pip install -e . -step 5: pip install -e ".[train]" -step 6: in flash attention folder, run: python setup.py install diff --git a/LLAVA_Biovil/predict.py b/LLAVA_Biovil/predict.py deleted file mode 100644 index 9b91829d502696ca03237c9dee8e162292831e1e..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/predict.py +++ /dev/null @@ -1,157 +0,0 @@ -import torch - -from llava import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN -from llava import conv_templates, SeparatorStyle -from llava import load_pretrained_model -from llava import disable_torch_init -from llava import tokenizer_image_token, KeywordsStoppingCriteria -from transformers.generation.streamers import TextIteratorStreamer - -from PIL import Image - -import requests -from io import BytesIO - -from cog import BasePredictor, Input, Path, ConcatenateIterator -import time -import subprocess -from threading import Thread - -import os -os.environ["HUGGINGFACE_HUB_CACHE"] = os.getcwd() + "/weights" - -# url for the weights mirror -REPLICATE_WEIGHTS_URL = "https://weights.replicate.delivery/default" -# files to download from the weights mirrors -weights = [ - { - "dest": "liuhaotian/llava-v1.5-13b", - # git commit hash from huggingface - "src": "llava-v1.5-13b/006818fc465ebda4c003c0998674d9141d8d95f8", - "files": [ - "config.json", - "generation_config.json", - "pytorch_model-00001-of-00003.bin", - "pytorch_model-00002-of-00003.bin", - "pytorch_model-00003-of-00003.bin", - "pytorch_model.bin.index.json", - "special_tokens_map.json", - "tokenizer.model", - "tokenizer_config.json", - ] - }, - { - "dest": "openai/clip-vit-large-patch14-336", - "src": "clip-vit-large-patch14-336/ce19dc912ca5cd21c8a653c79e251e808ccabcd1", - "files": [ - "config.json", - "preprocessor_config.json", - "pytorch_model.bin" - ], - } -] - -def download_json(url: str, dest: Path): - res = requests.get(url, allow_redirects=True) - if res.status_code == 200 and res.content: - with dest.open("wb") as f: - f.write(res.content) - else: - print(f"Failed to download {url}. Status code: {res.status_code}") - -def download_weights(baseurl: str, basedest: str, files: list[str]): - basedest = Path(basedest) - start = time.time() - print("downloading to: ", basedest) - basedest.mkdir(parents=True, exist_ok=True) - for f in files: - dest = basedest / f - url = os.path.join(REPLICATE_WEIGHTS_URL, baseurl, f) - if not dest.exists(): - print("downloading url: ", url) - if dest.suffix == ".json": - download_json(url, dest) - else: - subprocess.check_call(["pget", url, str(dest)], close_fds=False) - print("downloading took: ", time.time() - start) - -class Predictor(BasePredictor): - def setup(self) -> None: - """Load the model into memory to make running multiple predictions efficient""" - for weight in weights: - download_weights(weight["src"], weight["dest"], weight["files"]) - disable_torch_init() - - self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model("liuhaotian/llava-v1.5-13b", model_name="llava-v1.5-13b", model_base=None, load_8bit=False, load_4bit=False) - - def predict( - self, - image: Path = Input(description="Input image"), - prompt: str = Input(description="Prompt to use for text generation"), - top_p: float = Input(description="When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens", ge=0.0, le=1.0, default=1.0), - temperature: float = Input(description="Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic", default=0.2, ge=0.0), - max_tokens: int = Input(description="Maximum number of tokens to generate. A word is generally 2-3 tokens", default=1024, ge=0), - ) -> ConcatenateIterator[str]: - """Run a single prediction on the model""" - - conv_mode = "llava_v1" - conv = conv_templates[conv_mode].copy() - - image_data = load_image(str(image)) - image_tensor = self.image_processor.preprocess(image_data, return_tensors='pt')['pixel_values'].half().cuda() - - # loop start - - # just one turn, always prepend image token - inp = DEFAULT_IMAGE_TOKEN + '\n' + prompt - conv.append_message(conv.roles[0], inp) - - conv.append_message(conv.roles[1], None) - prompt = conv.get_prompt() - - input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda() - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, self.tokenizer, input_ids) - streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, timeout=20.0) - - with torch.inference_mode(): - thread = Thread(target=self.model.generate, kwargs=dict( - inputs=input_ids, - images=image_tensor, - do_sample=True, - temperature=temperature, - top_p=top_p, - max_new_tokens=max_tokens, - streamer=streamer, - use_cache=True, - stopping_criteria=[stopping_criteria])) - thread.start() - # workaround: second-to-last token is always " " - # but we want to keep it if it's not the second-to-last token - prepend_space = False - for new_text in streamer: - if new_text == " ": - prepend_space = True - continue - if new_text.endswith(stop_str): - new_text = new_text[:-len(stop_str)].strip() - prepend_space = False - elif prepend_space: - new_text = " " + new_text - prepend_space = False - if len(new_text): - yield new_text - if prepend_space: - yield " " - thread.join() - - -def load_image(image_file): - if image_file.startswith('http') or image_file.startswith('https'): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert('RGB') - else: - image = Image.open(image_file).convert('RGB') - return image - diff --git a/LLAVA_Biovil/pyproject.toml b/LLAVA_Biovil/pyproject.toml deleted file mode 100644 index 13d4e0144acac3ba288fea8f349c66ee4c5c3667..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "llava" -version = "1.1.3" -description = "Towards GPT-4 like large language and visual assistant." -readme = "README.md" -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache Software License", -] -dependencies = [ - "torch==2.0.1", "torchvision==0.15.2", - "transformers==4.31.0", "tokenizers>=0.12.1,<0.14", "sentencepiece==0.1.99", "shortuuid", - "accelerate==0.21.0", "peft==0.4.0", "bitsandbytes==0.41.0", - "pydantic<2,>=1", "markdown2[all]", "numpy", "scikit-learn==1.2.2", - "gradio==3.35.2", "gradio_client==0.2.9", - "requests", "httpx==0.24.0", "uvicorn", "fastapi", - "einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13", -] - -[project.optional-dependencies] -train = ["deepspeed==0.9.5", "ninja", "wandb"] - -[project.urls] -"Homepage" = "https://llava-vl.github.io" -"Bug Tracker" = "https://github.com/haotian-liu/LLaVA/issues" - -[tool.setuptools.packages.find] -exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"] - -[tool.wheel] -exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"] diff --git a/LLAVA_Biovil/scripts/convert_gqa_for_eval.py b/LLAVA_Biovil/scripts/convert_gqa_for_eval.py deleted file mode 100644 index 4d46c8b876df618faac548e9b369109d541f4f23..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_gqa_for_eval.py +++ /dev/null @@ -1,18 +0,0 @@ -import os -import json -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--src", type=str) -parser.add_argument("--dst", type=str) -args = parser.parse_args() - -all_answers = [] -for line_idx, line in enumerate(open(args.src)): - res = json.loads(line) - question_id = res['question_id'] - text = res['text'].rstrip('.').lower() - all_answers.append({"questionId": question_id, "prediction": text}) - -with open(args.dst, 'w') as f: - json.dump(all_answers, f) diff --git a/LLAVA_Biovil/scripts/convert_mmbench_for_submission.py b/LLAVA_Biovil/scripts/convert_mmbench_for_submission.py deleted file mode 100644 index 27baec12f9ef48d4e3df41e15b1d2644aab4174b..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_mmbench_for_submission.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import json -import argparse -import pandas as pd - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--annotation-file", type=str, required=True) - parser.add_argument("--result-dir", type=str, required=True) - parser.add_argument("--upload-dir", type=str, required=True) - parser.add_argument("--experiment", type=str, required=True) - - return parser.parse_args() - -if __name__ == "__main__": - args = get_args() - - df = pd.read_table(args.annotation_file) - - cur_df = df.copy() - cur_df = cur_df.drop(columns=['hint', 'category', 'source', 'image', 'comment', 'l2-category']) - cur_df.insert(6, 'prediction', None) - for pred in open(os.path.join(args.result_dir, f"{args.experiment}.jsonl")): - pred = json.loads(pred) - cur_df.loc[df['index'] == pred['question_id'], 'prediction'] = pred['text'] - - cur_df.to_excel(os.path.join(args.upload_dir, f"{args.experiment}.xlsx"), index=False, engine='openpyxl') diff --git a/LLAVA_Biovil/scripts/convert_mmvet_for_eval.py b/LLAVA_Biovil/scripts/convert_mmvet_for_eval.py deleted file mode 100644 index 97f5cfb7fb7691ef3921e3e6afc6d82ec54d4c6c..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_mmvet_for_eval.py +++ /dev/null @@ -1,18 +0,0 @@ -import os -import json -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--src", type=str) -parser.add_argument("--dst", type=str) -args = parser.parse_args() - -cur_result = {} - -for line in open(args.src): - data = json.loads(line) - qid = data['question_id'] - cur_result[f'v1_{qid}'] = data['text'] - -with open(args.dst, 'w') as f: - json.dump(cur_result, f, indent=2) diff --git a/LLAVA_Biovil/scripts/convert_seed_for_submission.py b/LLAVA_Biovil/scripts/convert_seed_for_submission.py deleted file mode 100644 index ae903e63087516bc8ae77142532196be6a85589c..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_seed_for_submission.py +++ /dev/null @@ -1,74 +0,0 @@ -import os -import json -import argparse - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--annotation-file", type=str) - parser.add_argument("--result-file", type=str) - parser.add_argument("--result-upload-file", type=str) - return parser.parse_args() - - -def eval_single(result_file, eval_only_type=None): - results = {} - for line in open(result_file): - row = json.loads(line) - results[row['question_id']] = row - - type_counts = {} - correct_counts = {} - for question_data in data['questions']: - if eval_only_type is not None and question_data['data_type'] != eval_only_type: continue - data_type = question_data['question_type_id'] - type_counts[data_type] = type_counts.get(data_type, 0) + 1 - try: - question_id = int(question_data['question_id']) - except: - question_id = question_data['question_id'] - if question_id not in results: - correct_counts[data_type] = correct_counts.get(data_type, 0) - continue - row = results[question_id] - if row['text'] == question_data['answer']: - correct_counts[data_type] = correct_counts.get(data_type, 0) + 1 - - total_count = 0 - total_correct = 0 - for data_type in sorted(type_counts.keys()): - accuracy = correct_counts[data_type] / type_counts[data_type] * 100 - if eval_only_type is None: - print(f"{ques_type_id_to_name[data_type]}: {accuracy:.2f}%") - - total_count += type_counts[data_type] - total_correct += correct_counts[data_type] - - total_accuracy = total_correct / total_count * 100 - if eval_only_type is None: - print(f"Total accuracy: {total_accuracy:.2f}%") - else: - print(f"{eval_only_type} accuracy: {total_accuracy:.2f}%") - - return results - -if __name__ == "__main__": - args = get_args() - data = json.load(open(args.annotation_file)) - ques_type_id_to_name = {id:n for n,id in data['question_type'].items()} - - results = eval_single(args.result_file) - eval_single(args.result_file, eval_only_type='image') - eval_single(args.result_file, eval_only_type='video') - - with open(args.result_upload_file, 'w') as fp: - for question in data['questions']: - qid = question['question_id'] - if qid in results: - result = results[qid] - else: - result = results[int(qid)] - fp.write(json.dumps({ - 'question_id': qid, - 'prediction': result['text'] - }) + '\n') diff --git a/LLAVA_Biovil/scripts/convert_sqa_to_llava.py b/LLAVA_Biovil/scripts/convert_sqa_to_llava.py deleted file mode 100644 index 26fe3002413a23b5029e540c8b338ebb14307bf6..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_sqa_to_llava.py +++ /dev/null @@ -1,88 +0,0 @@ -import json -import os -import fire -import re -from convert_sqa_to_llava_base_prompt import build_prompt_chatbot - - -def convert_to_llava(base_dir, split, prompt_format="QCM-LEA"): - split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[split] - problems = json.load(open(os.path.join(base_dir, "problems.json"))) - - split_problems = build_prompt_chatbot( - problems, split_indices, prompt_format, - use_caption=False, is_test=False) - - target_format = [] - for prob_id, (input, output) in split_problems.items(): - if input.startswith('Question: '): - input = input.replace('Question: ', '') - if output.startswith('Answer: '): - output = output.replace('Answer: ', '') - - raw_prob_data = problems[prob_id] - if raw_prob_data['image'] is None: - target_format.append({ - "id": prob_id, - "conversations": [ - {'from': 'human', 'value': f"{input}"}, - {'from': 'gpt', 'value': f"{output}"}, - ], - }) - - else: - target_format.append({ - "id": prob_id, - "image": os.path.join(prob_id, raw_prob_data['image']), - "conversations": [ - {'from': 'human', 'value': f"{input}\n"}, - {'from': 'gpt', 'value': f"{output}"}, - ], - }) - - print(f'Number of samples: {len(target_format)}') - - with open(os.path.join(base_dir, f"llava_{split}_{prompt_format}.json"), "w") as f: - json.dump(target_format, f, indent=2) - - -def convert_to_jsonl(base_dir, split, prompt_format="QCM-LEPA"): - split_indices = json.load(open(os.path.join(base_dir, "pid_splits.json")))[split] - problems = json.load(open(os.path.join(base_dir, "problems.json"))) - - split_problems = build_prompt_chatbot( - problems, split_indices, prompt_format, - use_caption=False, is_test=False) - - writer = open(os.path.join(base_dir, f"scienceqa_{split}_{prompt_format}.jsonl"), "w") - for prob_id, (input, output) in split_problems.items(): - if input.startswith('Question: '): - input = input.replace('Question: ', '') - if output.startswith('Answer: '): - output = output.replace('Answer: ', '') - - raw_prob_data = problems[prob_id] - if raw_prob_data['image'] is None: - data = { - "id": prob_id, - "instruction": f"{input}", - "output": f"{output}", - } - - else: - data = { - "id": prob_id, - "image": os.path.join(prob_id, raw_prob_data['image']), - "instruction": f"{input}\n", - "output": f"{output}", - } - writer.write(json.dumps(data) + '\n') - writer.close() - - -def main(task, **kwargs): - globals()[task](**kwargs) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py b/LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py deleted file mode 100644 index b327fcc29eb44d7fe68be35da25bafa0e1d6feba..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_sqa_to_llava_base_prompt.py +++ /dev/null @@ -1,334 +0,0 @@ -def get_question_text(problem): - question = problem['question'] - return question - - -def get_context_text(problem, use_caption): - txt_context = problem['hint'] - img_context = problem['caption'] if use_caption else "" - context = " ".join([txt_context, img_context]).strip() - if context == "": - context = "N/A" - return context - - -def get_choice_text(probelm, options): - choices = probelm['choices'] - choice_list = [] - for i, c in enumerate(choices): - choice_list.append("({}) {}".format(options[i], c)) - choice_txt = " ".join(choice_list) - #print(choice_txt) - return choice_txt - - -def get_answer(problem, options): - return options[problem['answer']] - - -def get_lecture_text(problem): - # \\n: GPT-3 can generate the lecture with more tokens. - lecture = problem['lecture'].replace("\n", "\\n") - return lecture - - -def get_solution_text(problem): - # \\n: GPT-3 can generate the solution with more tokens - solution = problem['solution'].replace("\n", "\\n") - return solution - - -def create_one_example_chatbot(format, question, context, choice, answer, lecture, solution, test_example=True): - - input_format, output_format = format.split("-") - - ## Inputs - if input_format == "CQM": - input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" - elif input_format == "QCM": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" - # upper bound experiment - elif input_format == "QCML": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" - elif input_format == "QCME": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" - elif input_format == "QCMLE": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" - - elif input_format == "QCLM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" - elif input_format == "QCEM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" - elif input_format == "QCLEM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" - - # Outputs - if test_example: - output = "Answer:" - elif output_format == 'A': - output = f"Answer: The answer is {answer}." - - elif output_format == 'AL': - output = f"Answer: The answer is {answer}. BECAUSE: {solution}" - elif output_format == 'AE': - output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" - elif output_format == 'ALE': - output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" - elif output_format == 'AEL': - output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" - - elif output_format == 'LA': - output = f"Answer: {lecture} The answer is {answer}." - elif output_format == 'EA': - output = f"Answer: {solution} The answer is {answer}." - elif output_format == 'LEA': - output = f"Answer: {lecture} {solution} The answer is {answer}." - elif output_format == 'ELA': - output = f"Answer: {solution} {lecture} The answer is {answer}." - elif output_format == 'LEPA': - output = '' - if len(lecture.strip()) > 0: - output += f"LECTURE: {lecture}\n" - if len(solution.strip()) > 0: - output += f"SOLUTION: {solution}\n" - output += '###\n' - output += f"ANSWER: {answer}." - - input = input.replace(" ", " ").strip() - output = output.replace(" ", " ").strip() - if input.endswith("BECAUSE:"): - input = input.replace("BECAUSE:", "").strip() - if output.endswith("BECAUSE:"): - output = output.replace("BECAUSE:", "").strip() - return input, output - - -def create_one_example(format, question, context, choice, answer, lecture, solution, test_example=True): - - input_format, output_format = format.split("-") - - ## Inputs - if input_format == "CQM": - input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" - elif input_format == "QCM": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" - # upper bound experiment - elif input_format == "QCML": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" - elif input_format == "QCME": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" - elif input_format == "QCMLE": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" - - elif input_format == "QCLM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" - elif input_format == "QCEM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" - elif input_format == "QCLEM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" - - # Outputs - if test_example: - output = "Answer:" - elif output_format == 'A': - output = f"Answer: The answer is {answer}." - - elif output_format == 'AL': - output = f"Answer: The answer is {answer}. BECAUSE: {solution}" - elif output_format == 'AE': - output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" - elif output_format == 'ALE': - output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" - elif output_format == 'AEL': - output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" - - elif output_format == 'LA': - output = f"Answer: {lecture} The answer is {answer}." - elif output_format == 'EA': - output = f"Answer: {solution} The answer is {answer}." - elif output_format == 'LEA': - output = f"Answer: {lecture} {solution} The answer is {answer}." - elif output_format == 'ELA': - output = f"Answer: {solution} {lecture} The answer is {answer}." - - text = input + output - text = text.replace(" ", " ").strip() - if text.endswith("BECAUSE:"): - text = text.replace("BECAUSE:", "").strip() - return text - - - -def create_one_example_gpt4(format, question, context, choice, answer, lecture, solution, test_example=True): - - input_format, output_format = format.split("-") - - ## Inputs - if input_format == "CQM": - input = f"Context: {context}\nQuestion: {question}\nOptions: {choice}\n" - elif input_format == "QCM": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\n" - # upper bound experiment - elif input_format == "QCML": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture}\n" - elif input_format == "QCME": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {solution}\n" - elif input_format == "QCMLE": - input = f"Question: {question}\nContext: {context}\nOptions: {choice}\nBECAUSE: {lecture} {solution}\n" - - elif input_format == "QCLM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture}\nOptions: {choice}\n" - elif input_format == "QCEM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {solution}\nOptions: {choice}\n" - elif input_format == "QCLEM": - input = f"Question: {question}\nContext: {context}\nBECAUSE: {lecture} {solution}\nOptions: {choice}\n" - - # Outputs - if test_example: - output = "Answer:" - elif output_format == 'A': - output = f"Answer: The answer is {answer}." - - elif output_format == 'AL': - output = f"Answer: The answer is {answer}. BECAUSE: {solution}" - elif output_format == 'AE': - output = f"Answer: The answer is {answer}. BECAUSE: {lecture}" - elif output_format == 'ALE': - output = f"Answer: The answer is {answer}. BECAUSE: {lecture} {solution}" - elif output_format == 'AEL': - output = f"Answer: The answer is {answer}. BECAUSE: {solution} {lecture}" - - elif output_format == 'LA': - output = f"Answer: {lecture} The answer is {answer}." - elif output_format == 'EA': - output = f"Answer: {solution} The answer is {answer}." - elif output_format == 'LEA': - output = f"Answer: {lecture} {solution} The answer is {answer}." - elif output_format == 'ELA': - output = f"Answer: {solution} {lecture} The answer is {answer}." - - input = input.replace(" ", " ").strip() - output = output.replace(" ", " ").strip() - if output.endswith("BECAUSE:"): - output = output.replace("BECAUSE:", "").strip() - - user_prompt = {"role": "user", "content": f"Can you explain {input}?"} - assistant_prompt = {"role": "assistant", "content": f"{output}"} - - return user_prompt, assistant_prompt - - -def build_prompt_chatbot(problems, shot_qids, prompt_format, use_caption=False, options=["A", "B", "C", "D", "E"], is_test=False): - examples = {} - - for qid in shot_qids: - question = get_question_text(problems[qid]) - context = get_context_text(problems[qid], use_caption) - choice = get_choice_text(problems[qid], options) - answer = get_answer(problems[qid], options) - lecture = get_lecture_text(problems[qid]).replace('\\n', '\n') - solution = get_solution_text(problems[qid]).replace('\\n', '\n') - - train_example = create_one_example_chatbot(prompt_format, - question, - context, - choice, - answer, - lecture, - solution, - test_example=is_test) - examples[qid] = train_example - return examples - - -def build_prompt(problems, shot_qids, test_qid, args): - - examples = [] - - # n-shot training examples - for qid in shot_qids: - question = get_question_text(problems[qid]) - context = get_context_text(problems[qid], args.use_caption) - choice = get_choice_text(problems[qid], args.options) - answer = get_answer(problems[qid], args.options) - lecture = get_lecture_text(problems[qid]) - solution = get_solution_text(problems[qid]) - - train_example = create_one_example(args.prompt_format, - question, - context, - choice, - answer, - lecture, - solution, - test_example=False) - examples.append(train_example) - - # test example - question = get_question_text(problems[test_qid]) - context = get_context_text(problems[test_qid], args.use_caption) - choice = get_choice_text(problems[test_qid], args.options) - answer = get_answer(problems[test_qid], args.options) - lecture = get_lecture_text(problems[test_qid]) - solution = get_solution_text(problems[test_qid]) - - test_example = create_one_example(args.prompt_format, - question, - context, - choice, - answer, - lecture, - solution, - test_example=True) - examples.append(test_example) - - # create the prompt input - prompt_input = '\n\n'.join(examples) - - return prompt_input - - -def build_prompt_gpt4(problems, shot_qids, test_qid, args): - - prompt_array = [{"role": "system", "content": "You are a helpful assistant."}] - - # n-shot training examples - for qid in shot_qids: - question = get_question_text(problems[qid]) - context = get_context_text(problems[qid], args.use_caption) - choice = get_choice_text(problems[qid], args.options) - answer = get_answer(problems[qid], args.options) - lecture = get_lecture_text(problems[qid]) - solution = get_solution_text(problems[qid]) - - user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format, - question, - context, - choice, - answer, - lecture, - solution, - test_example=False) - prompt_array.append(user_prompt) - prompt_array.append(assistant_prompt) - - # test example - question = get_question_text(problems[test_qid]) - context = get_context_text(problems[test_qid], args.use_caption) - choice = get_choice_text(problems[test_qid], args.options) - answer = get_answer(problems[test_qid], args.options) - lecture = get_lecture_text(problems[test_qid]) - solution = get_solution_text(problems[test_qid]) - - user_prompt, assistant_prompt = create_one_example_gpt4(args.prompt_format, - question, - context, - choice, - answer, - lecture, - solution, - test_example=True) - prompt_array.append(user_prompt) - prompt_array.append(assistant_prompt) - - return prompt_array \ No newline at end of file diff --git a/LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py b/LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py deleted file mode 100644 index 61eac21ae3797b95e4597bf2629f321120d1991a..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_vizwiz_for_submission.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import argparse -import json - -from LLAV.llava.eval.m4c_evaluator import EvalAIAnswerProcessor - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--annotation-file', type=str, required=True) - parser.add_argument('--result-file', type=str, required=True) - parser.add_argument('--result-upload-file', type=str, required=True) - return parser.parse_args() - - -if __name__ == '__main__': - - args = parse_args() - - os.makedirs(os.path.dirname(args.result_upload_file), exist_ok=True) - - results = [] - error_line = 0 - for line_idx, line in enumerate(open(args.result_file)): - try: - results.append(json.loads(line)) - except: - error_line += 1 - results = {x['question_id']: x['text'] for x in results} - test_split = [json.loads(line) for line in open(args.annotation_file)] - split_ids = set([x['question_id'] for x in test_split]) - - print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}') - - all_answers = [] - - answer_processor = EvalAIAnswerProcessor() - - for x in test_split: - assert x['question_id'] in results - all_answers.append({ - 'image': x['image'], - 'answer': answer_processor(results[x['question_id']]) - }) - - with open(args.result_upload_file, 'w') as f: - json.dump(all_answers, f) diff --git a/LLAVA_Biovil/scripts/convert_vqav2_for_submission.py b/LLAVA_Biovil/scripts/convert_vqav2_for_submission.py deleted file mode 100644 index d95d76a4a992f570d45bab7a7b0df6bbd482672e..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/convert_vqav2_for_submission.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -import argparse -import json - -from LLAV.llava.eval.m4c_evaluator import EvalAIAnswerProcessor - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument('--dir', type=str, default="./playground/data/eval/vqav2") - parser.add_argument('--ckpt', type=str, required=True) - parser.add_argument('--split', type=str, required=True) - return parser.parse_args() - - -if __name__ == '__main__': - - args = parse_args() - - src = os.path.join(args.dir, 'answers', args.split, args.ckpt, 'merge.jsonl') - test_split = os.path.join(args.dir, 'llava_vqav2_mscoco_test2015.jsonl') - dst = os.path.join(args.dir, 'answers_upload', args.split, f'{args.ckpt}.json') - os.makedirs(os.path.dirname(dst), exist_ok=True) - - results = [] - error_line = 0 - for line_idx, line in enumerate(open(src)): - try: - results.append(json.loads(line)) - except: - error_line += 1 - - results = {x['question_id']: x['text'] for x in results} - test_split = [json.loads(line) for line in open(test_split)] - split_ids = set([x['question_id'] for x in test_split]) - - print(f'total results: {len(results)}, total split: {len(test_split)}, error_line: {error_line}') - - all_answers = [] - - answer_processor = EvalAIAnswerProcessor() - - for x in test_split: - if x['question_id'] not in results: - all_answers.append({ - 'question_id': x['question_id'], - 'answer': '' - }) - else: - all_answers.append({ - 'question_id': x['question_id'], - 'answer': answer_processor(results[x['question_id']]) - }) - - with open(dst, 'w') as f: - json.dump(all_answers, open(dst, 'w')) diff --git a/LLAVA_Biovil/scripts/extract_mm_projector.py b/LLAVA_Biovil/scripts/extract_mm_projector.py deleted file mode 100644 index 45be31e896e9c087093bd9bcb6d355ec6dfd11ab..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/extract_mm_projector.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -This is just a utility that I use to extract the projector for quantized models. -It is NOT necessary at all to train, or run inference/serve demos. -Use this script ONLY if you fully understand its implications. -""" - - -import os -import argparse -import torch -import json -from collections import defaultdict - - -def parse_args(): - parser = argparse.ArgumentParser(description='Extract MMProjector weights') - parser.add_argument('--model-path', type=str, help='model folder') - parser.add_argument('--output', type=str, help='output file') - args = parser.parse_args() - return args - - -if __name__ == '__main__': - args = parse_args() - - keys_to_match = ['mm_projector'] - ckpt_to_key = defaultdict(list) - try: - model_indices = json.load(open(os.path.join(args.model_path, 'pytorch_model.bin.index.json'))) - for k, v in model_indices['weight_map'].items(): - if any(key_match in k for key_match in keys_to_match): - ckpt_to_key[v].append(k) - except FileNotFoundError: - # Smaller models or model checkpoints saved by DeepSpeed. - v = 'pytorch_model.bin' - for k in torch.load(os.path.join(args.model_path, v), map_location='cpu').keys(): - if any(key_match in k for key_match in keys_to_match): - ckpt_to_key[v].append(k) - - loaded_weights = {} - - for ckpt_name, weight_keys in ckpt_to_key.items(): - ckpt = torch.load(os.path.join(args.model_path, ckpt_name), map_location='cpu') - for k in weight_keys: - loaded_weights[k] = ckpt[k] - - torch.save(loaded_weights, args.output) diff --git a/LLAVA_Biovil/scripts/finetune.sh b/LLAVA_Biovil/scripts/finetune.sh deleted file mode 100644 index c14f770b481a548c978daca4b42fc0f74aeebe13..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/finetune.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5! - -# Uncomment and set the following variables correspondingly to run this script: - -################## VICUNA ################## -# PROMPT_VERSION=v1 -# MODEL_VERSION="vicuna-v1-3-7b" -################## VICUNA ################## - -################## LLaMA-2 ################## -# PROMPT_VERSION="llava_llama_2" -# MODEL_VERSION="llama-2-7b-chat" -################## LLaMA-2 ################## - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path ./playground/data/llava_instruct_80k.json \ - --image_folder /path/to/coco/train2017 \ - --vision_tower openai/clip-vit-large-patch14 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/finetune_full_schedule.sh b/LLAVA_Biovil/scripts/finetune_full_schedule.sh deleted file mode 100644 index 59a0d4aa4d8f391c5b5e62452c4e9ef38934b4a9..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/finetune_full_schedule.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5! - -# Uncomment and set the following variables correspondingly to run this script: - -################## VICUNA ################## -# PROMPT_VERSION=v1 -# MODEL_VERSION="vicuna-v1-3-7b" -################## VICUNA ################## - -################## LLaMA-2 ################## -# PROMPT_VERSION="llava_llama_2" -# MODEL_VERSION="llama-2-7b-chat" -################## LLaMA-2 ################## - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path ./playground/data/llava_instruct_158k.json \ - --image_folder /path/to/coco/train2017 \ - --vision_tower openai/clip-vit-large-patch14 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune \ - --num_train_epochs 3 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/finetune_lora.sh b/LLAVA_Biovil/scripts/finetune_lora.sh deleted file mode 100644 index fc02e09d7792eb6a13ec32447b5e7f59ce141c8e..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/finetune_lora.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5! - -# Uncomment and set the following variables correspondingly to run this script: - -################## VICUNA ################## -# PROMPT_VERSION=v1 -# MODEL_VERSION="vicuna-v1-3-7b" -################## VICUNA ################## - -################## LLaMA-2 ################## -# PROMPT_VERSION="llava_llama_2" -# MODEL_VERSION="llama-2-7b-chat" -################## LLaMA-2 ################## - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --lora_enable True \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path ./playground/data/llava_instruct_80k.json \ - --image_folder /path/to/coco/train2017 \ - --vision_tower openai/clip-vit-large-patch14 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --lazy_preprocess True \ - --dataloader_num_workers 4 \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/finetune_qlora.sh b/LLAVA_Biovil/scripts/finetune_qlora.sh deleted file mode 100644 index c2ed4c030cb7a3fff79f47a8e681f4df7c989100..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/finetune_qlora.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5! - -# Uncomment and set the following variables correspondingly to run this script: - -################## VICUNA ################## -# PROMPT_VERSION=v1 -# MODEL_VERSION="vicuna-v1-3-7b" -################## VICUNA ################## - -################## LLaMA-2 ################## -# PROMPT_VERSION="llava_llama_2" -# MODEL_VERSION="llama-2-7b-chat" -################## LLaMA-2 ################## - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --lora_enable True \ - --bits 4 \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path ./playground/data/llava_instruct_80k.json \ - --image_folder /path/to/coco/train2017 \ - --vision_tower openai/clip-vit-large-patch14 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-$MODEL_VERSION-pretrain/mm_projector.bin \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-finetune_lora \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --lazy_preprocess True \ - --dataloader_num_workers 4 \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/finetune_sqa.sh b/LLAVA_Biovil/scripts/finetune_sqa.sh deleted file mode 100644 index 3ed50288c31c118cab22312ad02a559d45725490..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/finetune_sqa.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5! - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path lmsys/vicuna-13b-v1.3 \ - --version $PROMPT_VERSION \ - --data_path /Data/ScienceQA/data/scienceqa/llava_train_QCM-LEA.json \ - --image_folder /Data/ScienceQA/data/scienceqa/images/train \ - --vision_tower openai/clip-vit-large-patch14 \ - --pretrain_mm_mlp_adapter ./checkpoints/huggingface/liuhaotian/llava-pretrain-vicuna-13b-v1.3/mm_projector.bin \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-vicuna-13b-v1.3-pretrain_lcs558k_plain-ScienceQA_QCM_LEA-12e \ - --num_train_epochs 12 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/merge_lora_weights.py b/LLAVA_Biovil/scripts/merge_lora_weights.py deleted file mode 100644 index 4188cbbc84781d4f01ccdeb7df48e6ce7bc61868..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/merge_lora_weights.py +++ /dev/null @@ -1,22 +0,0 @@ -import argparse -from LLAV.llava.model.builder import load_pretrained_model -from LLAV.llava.mm_utils import get_model_name_from_path - - -def merge_lora(args): - model_name = get_model_name_from_path(args.model_path) - tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, device_map='cpu') - - model.save_pretrained(args.save_model_path) - tokenizer.save_pretrained(args.save_model_path) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--model-path", type=str, required=True) - parser.add_argument("--model-base", type=str, required=True) - parser.add_argument("--save-model-path", type=str, required=True) - - args = parser.parse_args() - - merge_lora(args) diff --git a/LLAVA_Biovil/scripts/pretrain.sh b/LLAVA_Biovil/scripts/pretrain.sh deleted file mode 100644 index 83f263dd570e447b3b009542d26688ce936436af..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/pretrain.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# IMPORTANT: this is the training script for the original LLaVA, NOT FOR LLaVA V1.5! - -# Uncomment and set the following variables correspondingly to run this script: - -# MODEL_VERSION=vicuna-v1-3-7b -# MODEL_VERSION=llama-2-7b-chat - -########### DO NOT CHANGE ########### -########### USE THIS FOR BOTH ########### -PROMPT_VERSION=plain -########### DO NOT CHANGE ########### - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path /path/to/pretrain_data.json \ - --image_folder /path/to/images \ - --vision_tower openai/clip-vit-large-patch14 \ - --tune_mm_mlp_adapter True \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 24000 \ - --save_total_limit 1 \ - --learning_rate 2e-3 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/pretrain_xformers.sh b/LLAVA_Biovil/scripts/pretrain_xformers.sh deleted file mode 100644 index ecba9c1ce714d481638e269ee4857fbe6a8de2fd..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/pretrain_xformers.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Uncomment and set the following variables correspondingly to run this script: - -# MODEL_VERSION=vicuna-v1-3-7b -# MODEL_VERSION=llama-2-7b-chat - -########### DO NOT CHANGE ########### -########### USE THIS FOR BOTH ########### -PROMPT_VERSION=plain -########### DO NOT CHANGE ########### - -deepspeed llava/train/train_xformers.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path ./checkpoints/$MODEL_VERSION \ - --version $PROMPT_VERSION \ - --data_path /path/to/pretrain_data.json \ - --image_folder /path/to/images \ - --vision_tower openai/clip-vit-large-patch14 \ - --tune_mm_mlp_adapter True \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 False \ - --output_dir ./checkpoints/llava-$MODEL_VERSION-pretrain \ - --num_train_epochs 1 \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 4 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 24000 \ - --save_total_limit 1 \ - --learning_rate 2e-3 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 False \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/sqa_eval_batch.sh b/LLAVA_Biovil/scripts/sqa_eval_batch.sh deleted file mode 100644 index adbf46ef7a6e86181b5927002597ef786add5bde..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/sqa_eval_batch.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -CHUNKS=8 -for IDX in {0..7}; do - CUDA_VISIBLE_DEVICES=$IDX python -m llava.eval.model_vqa_science \ - --model-path liuhaotian/llava-lcs558k-scienceqa-vicuna-13b-v1.3 \ - --question-file ~/haotian/datasets/ScienceQA/data/scienceqa/llava_test_QCM-LEA.json \ - --image-folder ~/haotian/datasets/ScienceQA/data/scienceqa/images/test \ - --answers-file ./test_llava-13b-chunk$CHUNKS_$IDX.jsonl \ - --num-chunks $CHUNKS \ - --chunk-idx $IDX \ - --conv-mode llava_v1 & -done diff --git a/LLAVA_Biovil/scripts/sqa_eval_gather.sh b/LLAVA_Biovil/scripts/sqa_eval_gather.sh deleted file mode 100644 index 525bd43b850e9f6a923158abd23bca6f8d15650e..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/sqa_eval_gather.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -CHUNKS=8 -output_file="test_llava-13b.jsonl" - -# Clear out the output file if it exists. -> "$output_file" - -# Loop through the indices and concatenate each file. -for idx in $(seq 0 $((CHUNKS-1))); do - cat "./test_llava-13b-chunk${idx}.jsonl" >> "$output_file" -done - -python llava/eval/eval_science_qa.py \ - --base-dir ~/haotian/datasets/ScienceQA/data/scienceqa \ - --result-file ./test_llava-13b.jsonl \ - --output-file ./test_llava-13b_output.json \ - --output-result ./test_llava-13b_result.json diff --git a/LLAVA_Biovil/scripts/v1_5/eval/gqa.sh b/LLAVA_Biovil/scripts/v1_5/eval/gqa.sh deleted file mode 100644 index 5c3c2c31fc35377a926739e8e4bfd4c23fb39e7f..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/gqa.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -gpu_list="${CUDA_VISIBLE_DEVICES:-0}" -IFS=',' read -ra GPULIST <<< "$gpu_list" - -CHUNKS=${#GPULIST[@]} - -CKPT="llava-v1.5-13b" -SPLIT="llava_gqa_testdev_balanced" -GQADIR="./playground/data/eval/gqa/data" - -for IDX in $(seq 0 $((CHUNKS-1))); do - CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/gqa/$SPLIT.jsonl \ - --image-folder ./playground/data/eval/gqa/data/images \ - --answers-file ./playground/data/eval/gqa/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl \ - --num-chunks $CHUNKS \ - --chunk-idx $IDX \ - --temperature 0 \ - --conv-mode vicuna_v1 & -done - -wait - -output_file=./playground/data/eval/gqa/answers/$SPLIT/$CKPT/merge.jsonl - -# Clear out the output file if it exists. -> "$output_file" - -# Loop through the indices and concatenate each file. -for IDX in $(seq 0 $((CHUNKS-1))); do - cat ./playground/data/eval/gqa/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file" -done - -python scripts/convert_gqa_for_eval.py --src $output_file --dst $GQADIR/testdev_balanced_predictions.json - -cd $GQADIR -python eval/eval.py --tier testdev_balanced diff --git a/LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh b/LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh deleted file mode 100644 index ed236e4e3cee3105edd8d2c0bcee8e1ce22d4614..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/llavabench.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/llava-bench-in-the-wild/questions.jsonl \ - --image-folder ./playground/data/eval/llava-bench-in-the-wild/images \ - --answers-file ./playground/data/eval/llava-bench-in-the-wild/answers/llava-v1.5-13b.jsonl \ - --temperature 0 \ - --conv-mode vicuna_v1 - -mkdir -p playground/data/eval/llava-bench-in-the-wild/reviews - -python llava/eval/eval_gpt_review_bench.py \ - --question playground/data/eval/llava-bench-in-the-wild/questions.jsonl \ - --context playground/data/eval/llava-bench-in-the-wild/context.jsonl \ - --rule llava/eval/table/rule.json \ - --answer-list \ - playground/data/eval/llava-bench-in-the-wild/answers_gpt4.jsonl \ - playground/data/eval/llava-bench-in-the-wild/answers/llava-v1.5-13b.jsonl \ - --output \ - playground/data/eval/llava-bench-in-the-wild/reviews/llava-v1.5-13b.jsonl - -python llava/eval/summarize_gpt_review.py -f playground/data/eval/llava-bench-in-the-wild/reviews/llava-v1.5-13b.jsonl diff --git a/LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh b/LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh deleted file mode 100644 index d0b3a5c63bc7c8bb022ea2be41275cb921e8755d..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/mmbench.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -SPLIT="mmbench_dev_20230712" - -python -m llava.eval.model_vqa_mmbench \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/mmbench/$SPLIT.tsv \ - --answers-file ./playground/data/eval/mmbench/answers/$SPLIT/llava-v1.5-13b.jsonl \ - --single-pred-prompt \ - --temperature 0 \ - --conv-mode vicuna_v1 - -mkdir -p playground/data/eval/mmbench/answers_upload/$SPLIT - -python scripts/convert_mmbench_for_submission.py \ - --annotation-file ./playground/data/eval/mmbench/$SPLIT.tsv \ - --result-dir ./playground/data/eval/mmbench/answers/$SPLIT \ - --upload-dir ./playground/data/eval/mmbench/answers_upload/$SPLIT \ - --experiment llava-v1.5-13b diff --git a/LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh b/LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh deleted file mode 100644 index ce27c93aa1ea8a667a4bdd894be6db1d352ad7f5..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/mmbench_cn.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -SPLIT="mmbench_dev_cn_20231003" - -python -m llava.eval.model_vqa_mmbench \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/mmbench_cn/$SPLIT.tsv \ - --answers-file ./playground/data/eval/mmbench_cn/answers/$SPLIT/llava-v1.5-13b.jsonl \ - --lang cn \ - --single-pred-prompt \ - --temperature 0 \ - --conv-mode vicuna_v1 - -mkdir -p playground/data/eval/mmbench/answers_upload/$SPLIT - -python scripts/convert_mmbench_for_submission.py \ - --annotation-file ./playground/data/eval/mmbench_cn/$SPLIT.tsv \ - --result-dir ./playground/data/eval/mmbench_cn/answers/$SPLIT \ - --upload-dir ./playground/data/eval/mmbench_cn/answers_upload/$SPLIT \ - --experiment llava-v1.5-13b diff --git a/LLAVA_Biovil/scripts/v1_5/eval/mme.sh b/LLAVA_Biovil/scripts/v1_5/eval/mme.sh deleted file mode 100644 index 9b0f8ca657a429d92c233aaa404d9637d7500cc5..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/mme.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/MME/llava_mme.jsonl \ - --image-folder ./playground/data/eval/MME/MME_Benchmark_release_version \ - --answers-file ./playground/data/eval/MME/answers/llava-v1.5-13b.jsonl \ - --temperature 0 \ - --conv-mode vicuna_v1 - -cd ./playground/data/eval/MME - -python convert_answer_to_mme.py --experiment llava-v1.5-13b - -cd eval_tool - -python calculation.py --results_dir answers/llava-v1.5-13b diff --git a/LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh b/LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh deleted file mode 100644 index 9ff31ed469bb95e40116e66ad249c38770ba3735..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/mmvet.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/mm-vet/llava-mm-vet.jsonl \ - --image-folder ./playground/data/eval/mm-vet/images \ - --answers-file ./playground/data/eval/mm-vet/answers/llava-v1.5-13b.jsonl \ - --temperature 0 \ - --conv-mode vicuna_v1 - -mkdir -p ./playground/data/eval/mm-vet/results - -python scripts/convert_mmvet_for_eval.py \ - --src ./playground/data/eval/mm-vet/answers/llava-v1.5-13b.jsonl \ - --dst ./playground/data/eval/mm-vet/results/llava-v1.5-13b.json - diff --git a/LLAVA_Biovil/scripts/v1_5/eval/pope.sh b/LLAVA_Biovil/scripts/v1_5/eval/pope.sh deleted file mode 100644 index 93fe449d943b36780341ce00638c94eba2e1f37b..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/pope.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \ - --image-folder ./playground/data/eval/pope/val2014 \ - --answers-file ./playground/data/eval/pope/answers/llava-v1.5-13b.jsonl \ - --temperature 0 \ - --conv-mode vicuna_v1 - -python llava/eval/eval_pope.py \ - --annotation-dir ./playground/data/eval/pope/coco \ - --question-file ./playground/data/eval/pope/llava_pope_test.jsonl \ - --result-file ./playground/data/eval/pope/answers/llava-v1.5-13b.jsonl diff --git a/LLAVA_Biovil/scripts/v1_5/eval/qbench.sh b/LLAVA_Biovil/scripts/v1_5/eval/qbench.sh deleted file mode 100644 index 46b8e029bbb02ccaf8cae1a7025867553fbd6c6c..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/qbench.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -if [ "$1" = "dev" ]; then - echo "Evaluating in 'dev' split." -elif [ "$1" = "test" ]; then - echo "Evaluating in 'test' split." -else - echo "Unknown split, please choose between 'dev' and 'test'." - exit 1 -fi - -python -m llava.eval.model_vqa_qbench \ - --model-path liuhaotian/llava-v1.5-13b \ - --image-folder ./playground/data/eval/qbench/images_llvisionqa/ \ - --questions-file ./playground/data/eval/qbench/llvisionqa_$1.json \ - --answers-file ./playground/data/eval/qbench/llvisionqa_$1_answers.jsonl \ - --conv-mode llava_v1 \ - --lang en diff --git a/LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh b/LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh deleted file mode 100644 index 7bfc17088cda577b6f25ec09b20ee8cb2664fec8..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/qbench_zh.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -if [ "$1" = "dev" ]; then - ZH_SPLIT="验证集" - echo "Evaluating in 'dev' split." -elif [ "$1" = "test" ]; then - ZH_SPLIT="测试集" - echo "Evaluating in 'test' split." -else - echo "Unknown split, please choose between 'dev' and 'test'." - exit 1 -fi - -python -m llava.eval.model_vqa_qbench \ - --model-path liuhaotian/llava-v1.5-13b \ - --image-folder ./playground/data/eval/qbench/images_llvisionqa/ \ - --questions-file ./playground/data/eval/qbench/质衡-问答-$ZH_SPLIT.json \ - --answers-file ./playground/data/eval/qbench/llvisionqa_zh_$1_answers.jsonl \ - --conv-mode llava_v1 \ - --lang zh diff --git a/LLAVA_Biovil/scripts/v1_5/eval/seed.sh b/LLAVA_Biovil/scripts/v1_5/eval/seed.sh deleted file mode 100644 index 565e54d1d4d35791d5ed22ad4e60c43fbdd877ed..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/seed.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -gpu_list="${CUDA_VISIBLE_DEVICES:-0}" -IFS=',' read -ra GPULIST <<< "$gpu_list" - -CHUNKS=${#GPULIST[@]} - -CKPT="llava-v1.5-13b" - -for IDX in $(seq 0 $((CHUNKS-1))); do - CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/seed_bench/llava-seed-bench.jsonl \ - --image-folder ./playground/data/eval/seed_bench \ - --answers-file ./playground/data/eval/seed_bench/answers/$CKPT/${CHUNKS}_${IDX}.jsonl \ - --num-chunks $CHUNKS \ - --chunk-idx $IDX \ - --temperature 0 \ - --conv-mode vicuna_v1 & -done - -wait - -output_file=./playground/data/eval/seed_bench/answers/$CKPT/merge.jsonl - -# Clear out the output file if it exists. -> "$output_file" - -# Loop through the indices and concatenate each file. -for IDX in $(seq 0 $((CHUNKS-1))); do - cat ./playground/data/eval/seed_bench/answers/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file" -done - -# Evaluate -python scripts/convert_seed_for_submission.py \ - --annotation-file ./playground/data/eval/seed_bench/SEED-Bench.json \ - --result-file $output_file \ - --result-upload-file ./playground/data/eval/seed_bench/answers_upload/llava-v1.5-13b.jsonl - diff --git a/LLAVA_Biovil/scripts/v1_5/eval/sqa.sh b/LLAVA_Biovil/scripts/v1_5/eval/sqa.sh deleted file mode 100644 index 8c82dbc256bd610c5ef2564ed2449b6a91857968..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/sqa.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa_science \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/scienceqa/llava_test_CQM-A.json \ - --image-folder ./playground/data/eval/scienceqa/images/test \ - --answers-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b.jsonl \ - --single-pred-prompt \ - --temperature 0 \ - --conv-mode vicuna_v1 - -python llava/eval/eval_science_qa.py \ - --base-dir ./playground/data/eval/scienceqa \ - --result-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b.jsonl \ - --output-file ./playground/data/eval/scienceqa/answers/llava-v1.5-13b_output.jsonl \ - --output-result ./playground/data/eval/scienceqa/answers/llava-v1.5-13b_result.json diff --git a/LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh b/LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh deleted file mode 100644 index 12311c3ccc3511446298c8e829216266e702ec16..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/textvqa.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/textvqa/llava_textvqa_val_v051_ocr.jsonl \ - --image-folder ./playground/data/eval/textvqa/train_images \ - --answers-file ./playground/data/eval/textvqa/answers/llava-v1.5-13b.jsonl \ - --temperature 0 \ - --conv-mode vicuna_v1 - -python -m llava.eval.eval_textvqa \ - --annotation-file ./playground/data/eval/textvqa/TextVQA_0.5.1_val.json \ - --result-file ./playground/data/eval/textvqa/answers/llava-v1.5-13b.jsonl diff --git a/LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh b/LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh deleted file mode 100644 index 16cf35ce1b77834d9d8888d53e6cd0f7c2c4ccc6..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/vizwiz.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/vizwiz/llava_test.jsonl \ - --image-folder ./playground/data/eval/vizwiz/test \ - --answers-file ./playground/data/eval/vizwiz/answers/llava-v1.5-13b.jsonl \ - --temperature 0 \ - --conv-mode vicuna_v1 - -python scripts/convert_vizwiz_for_submission.py \ - --annotation-file ./playground/data/eval/vizwiz/llava_test.jsonl \ - --result-file ./playground/data/eval/vizwiz/answers/llava-v1.5-13b.jsonl \ - --result-upload-file ./playground/data/eval/vizwiz/answers_upload/llava-v1.5-13b.json diff --git a/LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh b/LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh deleted file mode 100644 index 696efe53340f4abe5ad3ba8b9578df056e6c897d..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/eval/vqav2.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -gpu_list="${CUDA_VISIBLE_DEVICES:-0}" -IFS=',' read -ra GPULIST <<< "$gpu_list" - -CHUNKS=${#GPULIST[@]} - -CKPT="llava-v1.5-13b" -SPLIT="llava_vqav2_mscoco_test-dev2015" - -for IDX in $(seq 0 $((CHUNKS-1))); do - CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python -m llava.eval.model_vqa_loader \ - --model-path liuhaotian/llava-v1.5-13b \ - --question-file ./playground/data/eval/vqav2/$SPLIT.jsonl \ - --image-folder ./playground/data/eval/vqav2/test2015 \ - --answers-file ./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl \ - --num-chunks $CHUNKS \ - --chunk-idx $IDX \ - --temperature 0 \ - --conv-mode vicuna_v1 & -done - -wait - -output_file=./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/merge.jsonl - -# Clear out the output file if it exists. -> "$output_file" - -# Loop through the indices and concatenate each file. -for IDX in $(seq 0 $((CHUNKS-1))); do - cat ./playground/data/eval/vqav2/answers/$SPLIT/$CKPT/${CHUNKS}_${IDX}.jsonl >> "$output_file" -done - -python scripts/convert_vqav2_for_submission.py --split $SPLIT --ckpt $CKPT - diff --git a/LLAVA_Biovil/scripts/v1_5/finetune.sh b/LLAVA_Biovil/scripts/v1_5/finetune.sh deleted file mode 100644 index 435448394dfcef578ac478f499160fba4ceacd6c..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/finetune.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero3.json \ - --model_name_or_path lmsys/vicuna-13b-v1.5 \ - --version v1 \ - --data_path ./playground/data/llava_v1_5_mix665k.json \ - --image_folder ./playground/data \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/v1_5/finetune_lora.sh b/LLAVA_Biovil/scripts/v1_5/finetune_lora.sh deleted file mode 100644 index 90f00707cf9c9ae499184f0135f7cc9d84327a21..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/finetune_lora.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -deepspeed llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero3.json \ - --model_name_or_path lmsys/vicuna-13b-v1.5 \ - --version v1 \ - --data_path ./playground/data/llava_v1_5_mix665k.json \ - --image_folder ./playground/data \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b-lora \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-4 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/v1_5/finetune_task.sh b/LLAVA_Biovil/scripts/v1_5/finetune_task.sh deleted file mode 100644 index 063f3f13e119fdb7f6af358f50315e022f15f578..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/finetune_task.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero3.json \ - --model_name_or_path liuhaotian/llava-v1.5-13b \ - --version v1 \ - --data_path ./playground/data/llava_v1_5_mix665k.json \ - --image_folder ./playground/data \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b-task \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh b/LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh deleted file mode 100644 index f11303f299aeb675e23b0cb37ff4c881aec6f99e..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/finetune_task_lora.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -deepspeed llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero3.json \ - --model_name_or_path liuhaotian/llava-v1.5-13b \ - --version v1 \ - --data_path ./playground/data/llava_v1_5_mix665k.json \ - --image_folder ./playground/data \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b-task-lora \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 50000 \ - --save_total_limit 1 \ - --learning_rate 2e-4 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/scripts/v1_5/pretrain.sh b/LLAVA_Biovil/scripts/v1_5/pretrain.sh deleted file mode 100644 index 9316eaa309ea8c12d9612a01d85958550357b9a7..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/scripts/v1_5/pretrain.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -deepspeed llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path lmsys/vicuna-13b-v1.5 \ - --version plain \ - --data_path ./playground/data/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json \ - --image_folder ./playground/data/LLaVA-Pretrain/images \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ - --tune_mm_mlp_adapter True \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-13b-pretrain \ - --num_train_epochs 1 \ - --per_device_train_batch_size 32 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 24000 \ - --save_total_limit 1 \ - --learning_rate 1e-3 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb diff --git a/LLAVA_Biovil/slurm_config.conf b/LLAVA_Biovil/slurm_config.conf deleted file mode 100644 index 272d797d8ec8b6110e85a2d038e4dfb2d247f9d3..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config.conf +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=radialog -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29719 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --bits 4 --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llava_unfreeze \ - --num_train_epochs 1 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 8 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-4 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1300 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_cosine_llava_unfreeze \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_biovil_frozen.conf b/LLAVA_Biovil/slurm_config_biovil_frozen.conf deleted file mode 100644 index 31ae5da76b907aed5dd3f736382c42ac682bde5b..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_biovil_frozen.conf +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=ins_v4_frozen -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29719 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v4_sentenized.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v4_sentenized \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 1500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1300 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v4_sentenized diff --git a/LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf b/LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf deleted file mode 100644 index afb6b17e3a5ef494d5488233c41c85e6635352d0..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_biovil_frozen_v5.conf +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=ins_v5_frozen -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29711 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v5.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v5 \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 1500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1300 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_frozen_2e-5_5epochs_v5 diff --git a/LLAVA_Biovil/slurm_config_biovil_unfrozen.conf b/LLAVA_Biovil/slurm_config_biovil_unfrozen.conf deleted file mode 100644 index 00879cadbc1d6e5424c47ed831628fa19a970195..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_biovil_unfrozen.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=ins_v4_unfrozen -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29718 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v4_sentenized.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v4_sentenized \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 1500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1300 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v4_sentenized \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf b/LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf deleted file mode 100644 index 3464b1f732c13d7dfd95964a417c37137e0f57fd..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_biovil_unfrozen_v5.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=ins_v5_unfrozen -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29712 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava_v5.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5 \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 1500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1300 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_instruct_llava_biovil_unfrozen_2e-5_5epochs_v5 \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_llavamed.conf b/LLAVA_Biovil/slurm_config_llavamed.conf deleted file mode 100644 index cb4bdf1b7ec8c357abad597e81b7efad618f664a..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_llavamed.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rd_llavamed -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29719 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path /home/guests/shared/LLaMA/7B_LLaVAMed \ - --version llava_med \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_instruct_llava.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end True \ - --mm_use_im_patch_token True \ - --image_aspect_ratio square \ - --group_by_modality_length False \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llavamed_biovil \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-4 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1300 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_cosine_llavamed_biovil diff --git a/LLAVA_Biovil/slurm_config_ms_cxr_t.conf b/LLAVA_Biovil/slurm_config_ms_cxr_t.conf deleted file mode 100644 index b6b04223f3f6a6a86d9a080093ed73f8bffaf2b7..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_ms_cxr_t.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=cxrt_concat -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29715 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/ms_cxr_t_llava.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cxr_t_frozen_pool_concat \ - --num_train_epochs 10 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "epoch" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_cxr_t_frozen_pool_concat \ - --mv_type "pool_concat" diff --git a/LLAVA_Biovil/slurm_config_pretrain.conf b/LLAVA_Biovil/slurm_config_pretrain.conf deleted file mode 100644 index c494715e2b26d03b0f968d423be9c0d8e0b254ae..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_pretrain.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=oracle -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=4 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate oracle -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29508 - - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/ege_oezsoy/Oracle/data/llava_samples/train.json \ - --image_folder / \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ - --tune_mm_mlp_adapter True \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-4dor_pretrain_linear_weighting \ - --num_train_epochs 50 \ - --per_device_train_batch_size 16 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --evaluation_strategy "no" \ - --save_strategy "epoch" \ - --save_steps 10 \ - --save_total_limit 1 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 2048 \ - --gradient_checkpointing True \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-4dor_pretrain_linear_weighting diff --git a/LLAVA_Biovil/slurm_config_radrestruct.conf b/LLAVA_Biovil/slurm_config_radrestruct.conf deleted file mode 100644 index 1142a8bad1c6b395318eb6cab2cb65a0d7487909..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_radrestruct.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rr_1to1 -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29717 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/radrestruct_mimic_merged_llava_balanced_1to1.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_merged_1to1 \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_merged_1to1 \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_radrestruct1to10.conf b/LLAVA_Biovil/slurm_config_radrestruct1to10.conf deleted file mode 100644 index 7a7a307beb364bc9d707db955618f3f09e048b04..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_radrestruct1to10.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rr_1to10 -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29716 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/radrestruct_mimic_merged_llava_balanced_1to10.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_merged_1to10 \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_merged_1to10 \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_radrestruct1to5.conf b/LLAVA_Biovil/slurm_config_radrestruct1to5.conf deleted file mode 100644 index 1d33b57619f37487d4a5d30ef1d926dc60dae506..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_radrestruct1to5.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rr_1to5 -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29715 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/radrestruct_mimic_merged_llava_balanced_1to5.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_merged_1to5 \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_merged_1to5 \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_radrestruct2.conf b/LLAVA_Biovil/slurm_config_radrestruct2.conf deleted file mode 100644 index cdc7504b8138b309800a526ef18a66b134c2f8d0..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_radrestruct2.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rr_ufr-noaugs -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29717 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/radrestruct_llava_balanced_50ep.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_balanced_50ep \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_noaugs_noweighting_lr2e-5_balanced_50ep_debug \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_radrestruct3.conf b/LLAVA_Biovil/slurm_config_radrestruct3.conf deleted file mode 100644 index 66cba9d8e26db25dc4f0f71b3c789a46bd47980c..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_radrestruct3.conf +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rr_ufr-augs -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29717 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/radrestruct_llava_balanced_50ep.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_augs_noweighting_lr2e-5_balanced_50ep \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_radrestruct_unfrozen_augs_noweighting_lr2e-5_balanced_50ep \ - --unfreeze_n_vision_tower_layers 12 \ - --do_augment diff --git a/LLAVA_Biovil/slurm_config_radrestruct4.conf b/LLAVA_Biovil/slurm_config_radrestruct4.conf deleted file mode 100644 index bbe5dbae4d30428f6248e84f55936e208bb6a44d..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_radrestruct4.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rr_fr-augs -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29718 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/radrestruct_llava_balanced_50ep.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_radrestruct_frozen_augs_noweighting_lr2e-5_balanced_50ep \ - --num_train_epochs 1 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 550 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_radrestruct_frozen_augs_noweighting_lr2e-5_balanced_50ep \ - --do_augment diff --git a/LLAVA_Biovil/slurm_config_report.conf b/LLAVA_Biovil/slurm_config_report.conf deleted file mode 100644 index 1f618f7fb2978fefddfe21fac25fec51e5dd33f4..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_report.conf +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rd_llava -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED - -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29721 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_reports_llava.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower openai/clip-vit-large-patch14-336 \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llava_report_unfreeze_2e-5 \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 2e-5 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 1100 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_cosine_llava_report_unfreeze_2e-5 \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_report_biovil.conf b/LLAVA_Biovil/slurm_config_report_biovil.conf deleted file mode 100644 index 7ad3430c45334a2a76079ac96bc618ddda0d6320..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_report_biovil.conf +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=rep_unfrozen -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29713 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-4 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_reports_llava.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llava_report_biovil_unfrozen_5epochs_highlr \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 1500 \ - --learning_rate 2e-4 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 800 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_cosine_llava_report_biovil_unfrozen_5epochs_highlr \ - --unfreeze_n_vision_tower_layers 12 diff --git a/LLAVA_Biovil/slurm_config_report_biovil_frozen.conf b/LLAVA_Biovil/slurm_config_report_biovil_frozen.conf deleted file mode 100644 index ae2c69d5d87346ee9ac436a73da3b593571346e2..0000000000000000000000000000000000000000 --- a/LLAVA_Biovil/slurm_config_report_biovil_frozen.conf +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/sh - -#SBATCH --job-name=bv_frozen -#SBATCH --output=oracle-%A.out # Standard output of the script (Can be absolute or relative path). %A adds the job id to the file name so you can launch the same script multiple times and get different logging files -#SBATCH --error=oracle-%A.err # Standard error of the script -#SBATCH --time=0-160:00:00 # Limit on the total run time (format: days-hours:minutes:seconds) -#SBATCH --gres=gpu:1 # Number of GPUs if needed -#SBATCH --cpus-per-task=8 # Number of CPUs (Don't use more than 24 per GPU) -#SBATCH --mem=96G # Memory in GB (Don't use more than 126G per GPU), maybe 128? - -# activate corresponding environment -# conda deactivate # If you launch your script from a terminal where your environment is already loaded, conda won't activate the environment. This guards against that. Not necessary if you always run this script from a clean terminal -source ~/miniconda3/etc/profile.d/conda.sh -conda activate llava_raddialog -# FLASH ATTN NEEDS TO BE INSTALLED FROM THE SOURCE FOR CUDA 11.7 by previously setting CUDA HOME and LD_LIBRARY SOMETHING VARIABLES. -# POTENTIALLY TRY OUT VERSION 2 AS WELL WHICH IS LLAMA 2 BASED -export PYTHONPATH="/home/guests/chantal_pellegrini/RaDialog_LLaVA:$PYTHONPATH" - -export GPUS_PER_NODE=1 -#export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) # TODO needed for multi-node setups -#export MASTER_PORT=9901 -export MASTER_ADDR=$(hostname) -export MASTER_PORT=29711 - -srun --jobid $SLURM_JOBID python -m torch.distributed.run --nproc_per_node=$GPUS_PER_NODE --master_addr=$MASTER_ADDR --master_port=$MASTER_PORT llava/train/train_mem.py \ - --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 5e-4 \ - --deepspeed ./scripts/zero2.json \ - --model_name_or_path liuhaotian/llava-v1.5-7b \ - --version v1 \ - --data_path /home/guests/chantal_pellegrini/RaDialog_LLaVA/data/mimic_cxr_reports_llava.json \ - --image_folder /home/data/DIVA/mimic/mimic-cxr-jpg/2.0.0 \ - --vision_tower biovil \ - --mm_projector_type mlp2x_gelu \ - --mm_vision_select_layer -2 \ - --mm_use_im_start_end False \ - --mm_use_im_patch_token False \ - --image_aspect_ratio pad \ - --group_by_modality_length True \ - --bf16 True \ - --output_dir ./checkpoints/llava-v1.5-7b-task-lora_radialog_cosine_llava_report_biovil_frozen_5e-4 \ - --num_train_epochs 5 \ - --per_device_train_batch_size 2 \ - --per_device_eval_batch_size 4 \ - --gradient_accumulation_steps 64 \ - --evaluation_strategy "no" \ - --save_strategy "steps" \ - --save_steps 500 \ - --learning_rate 5e-4 \ - --max_grad_norm 0.1 \ - --weight_decay 0. \ - --warmup_ratio 0.03 \ - --lr_scheduler_type "cosine" \ - --logging_steps 1 \ - --tf32 True \ - --model_max_length 800 \ - --gradient_checkpointing False \ - --dataloader_num_workers 4 \ - --lazy_preprocess True \ - --report_to wandb \ - --run_name llava-v1.5-7b-task-lora_radialog_cosine_llava_report_biovil_frozen_5e-4