|
--- |
|
pipeline_tag: image-text-to-text |
|
--- |
|
|
|
The following is the code to run Pangea-7B using huggingface generate: |
|
``` |
|
# Assuming that you have text_input and image_path |
|
from transformers import LlavaNextForConditionalGeneration, AutoProcessor |
|
import torch |
|
from PIL import Image |
|
|
|
image_input = Image.open(image_path) |
|
|
|
model = LlavaNextForConditionalGeneration.from_pretrained( |
|
"neulab/Pangea-7B-hf", |
|
torch_dtype=torch.float16 |
|
).to(0) |
|
processor = AutoProcessor.from_pretrained("neulab/Pangea-7B-hf") |
|
model.resize_token_embeddings(len(processor.tokenizer)) |
|
|
|
text_input = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<image>\n{text_input}<|im_end|>\n<|im_start|>assistant\n" |
|
model_inputs = processor(images=image_input, text=text_input, return_tensors='pt').to("cuda", torch.float16) |
|
output = model.generate(**model_inputs, max_new_tokens=1024, min_new_tokens=32, temperature=1.0, top_p=0.9, do_sample=True) |
|
output = output[0] |
|
result = processor.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=False) |
|
|
|
print(result) |
|
``` |