dakkoong commited on
Commit
ffea36f
·
1 Parent(s): 4e2b8d6

initial commit

Browse files
Files changed (12) hide show
  1. app.py +66 -0
  2. image1.jpg +0 -0
  3. image10.jpg +0 -0
  4. image2.jpg +0 -0
  5. image3.jpg +0 -0
  6. image4.jpg +0 -0
  7. image5.jpg +0 -0
  8. image6.jpg +0 -0
  9. image7.jpg +0 -0
  10. image8.jpg +0 -0
  11. image9.jpg +0 -0
  12. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import ViltProcessor, ViltForQuestionAnswering
3
+ from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering
4
+ from PIL import Image
5
+ import torch
6
+
7
+ dataset_name = "Multimodal-Fatima/OK-VQA_train"
8
+ model_name = "microsoft/git-base-vqav2"
9
+ model_path = "git-base-vqav2"
10
+
11
+ questions = ["What can happen the objects shown are thrown on the ground?",
12
+ "What was the machine beside the bowl used for?",
13
+ "What kind of cars are in the photo?",
14
+ "What is the hairstyle of the blond called?",
15
+ "How old do you have to be in canada to do this?",
16
+ "Can you guess the place where the man is playing?",
17
+ "What loony tune character is in this photo?",
18
+ "Whose birthday is being celebrated?",
19
+ "Where can that toilet seat be bought?",
20
+ "What do you call the kind of pants that the man on the right is wearing?"]
21
+
22
+ processor = AutoProcessor.from_pretrained(model_path)
23
+ model = AutoModelForVisualQuestionAnswering.from_pretrained(model_path)
24
+
25
+
26
+ def main(select_exemple_num):
27
+ selectednum = select_exemple_num
28
+ exemple_img = f"image{selectednum}.jpg"
29
+ img = Image.open(exemple_img)
30
+ question = questions[selectednum - 1]
31
+
32
+ encoding = processor(img, question, return_tensors='pt')
33
+
34
+ outputs = model(**encoding)
35
+ logits = outputs.logits
36
+
37
+ # ---
38
+ output_str = 'pridicted : \n'
39
+ predicted_classes = torch.sigmoid(logits)
40
+
41
+ probs, classes = torch.topk(predicted_classes, 5)
42
+ ans = ''
43
+
44
+ for prob, class_idx in zip(probs.squeeze().tolist(), classes.squeeze().tolist()):
45
+ print(prob, model.config.id2label[class_idx])
46
+ output_str += str(prob)
47
+ output_str += " "
48
+ output_str += model.config.id2label[class_idx]
49
+ output_str += "\n"
50
+ if not ans:
51
+ ans = model.config.id2label[class_idx]
52
+
53
+ print(ans)
54
+ # ---
55
+ output_str += f"\nso I think it's answer is : \n{ans}"
56
+
57
+ return exemple_img, question, output_str
58
+
59
+
60
+ demo = gr.Interface(
61
+ fn=main,
62
+ inputs=[gr.Slider(1, len(questions), step=1)],
63
+ outputs=["image", "text", "text"],
64
+ )
65
+
66
+ demo.launch(share=True)
image1.jpg ADDED
image10.jpg ADDED
image2.jpg ADDED
image3.jpg ADDED
image4.jpg ADDED
image5.jpg ADDED
image6.jpg ADDED
image7.jpg ADDED
image8.jpg ADDED
image9.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ tensorflow
4
+ numpy
5
+ Image
6
+ TensorRT