krishnapal2308 commited on
Commit
4582e37
·
1 Parent(s): 7fbe5c5

manual to pipeline

Browse files
Files changed (1) hide show
  1. vit_gpt2.py +30 -16
vit_gpt2.py CHANGED
@@ -1,25 +1,39 @@
1
- from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
2
  from PIL import Image
3
 
4
- model = VisionEncoderDecoderModel.from_pretrained("vit-gpt2-image-captioning")
5
- feature_extractor = ViTImageProcessor.from_pretrained("vit-gpt2-image-captioning")
6
- tokenizer = AutoTokenizer.from_pretrained("vit-gpt2-image-captioning")
7
-
8
- max_length = 16
9
- num_beams = 4
10
- gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
11
-
12
 
13
  def predict_step(img_array):
 
14
  i_image = Image.fromarray(img_array)
15
 
16
  if i_image.mode != "RGB":
17
  i_image = i_image.convert(mode="RGB")
18
 
19
- pixel_values = feature_extractor(images=i_image, return_tensors="pt", do_normalize=True).pixel_values
20
-
21
- output_ids = model.generate(pixel_values, **gen_kwargs)
22
-
23
- pred = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
24
- pred = [p.strip() for p in pred]
25
- return pred
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
  from PIL import Image
3
 
 
 
 
 
 
 
 
 
4
 
5
  def predict_step(img_array):
6
+ image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
7
  i_image = Image.fromarray(img_array)
8
 
9
  if i_image.mode != "RGB":
10
  i_image = i_image.convert(mode="RGB")
11
 
12
+ prediction = image_to_text(i_image)
13
+ return prediction[0]['generated_text']
14
+
15
+ # from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
16
+ # from PIL import Image
17
+ #
18
+ # model = VisionEncoderDecoderModel.from_pretrained("vit-gpt2-image-captioning")
19
+ # feature_extractor = ViTImageProcessor.from_pretrained("vit-gpt2-image-captioning")
20
+ # tokenizer = AutoTokenizer.from_pretrained("vit-gpt2-image-captioning")
21
+ #
22
+ # max_length = 16
23
+ # num_beams = 4
24
+ # gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
25
+ #
26
+ #
27
+ # def predict_step(img_array):
28
+ # i_image = Image.fromarray(img_array)
29
+ #
30
+ # if i_image.mode != "RGB":
31
+ # i_image = i_image.convert(mode="RGB")
32
+ #
33
+ # pixel_values = feature_extractor(images=i_image, return_tensors="pt", do_normalize=True).pixel_values
34
+ #
35
+ # output_ids = model.generate(pixel_values, **gen_kwargs)
36
+ #
37
+ # pred = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
38
+ # pred = [p.strip() for p in pred]
39
+ # return pred