xxx1 commited on
Commit
c431f44
1 Parent(s): 1d42b83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -2
app.py CHANGED
@@ -10,9 +10,36 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
 
11
  processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
12
  model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def inference_chat(input_image,input_text):
14
  inputs = processor(images=input_image, text=input_text,return_tensors="pt")
15
- inputs["max_length"] = 20
16
  inputs["num_beams"] = 5
17
  inputs['num_return_sequences'] =4
18
  out = model_vqa.generate(**inputs)
@@ -41,9 +68,12 @@ with gr.Blocks(
41
  submit_button = gr.Button(
42
  value="Submit", interactive=True, variant="primary"
43
  )
 
 
 
44
  with gr.Column():
45
  caption_output = gr.Textbox(lines=0, label="VQA Output(模型答案输出)")
46
-
47
 
48
  image_input.change(
49
  lambda: ("", "", []),
@@ -73,6 +103,14 @@ with gr.Blocks(
73
  ],
74
  [caption_output],
75
  )
 
 
 
 
 
 
 
 
76
 
77
  # examples = gr.Examples(
78
  # examples=examples,
 
10
 
11
  processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
12
  model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large").to(device)
13
+
14
+ from transformers import BlipProcessor, Blip2ForConditionalGeneration
15
+
16
+ cap_processor = BlipProcessor.from_pretrained("Salesforce/blip2-flan-t5-xl")
17
+ cap_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
18
+
19
+
20
+
21
+ def caption(input_image):
22
+ inputs = processor(input_image, return_tensors="pt")
23
+ inputs["num_beams"] = 4
24
+ inputs['num_return_sequences'] =4
25
+ out = model.generate(**inputs)
26
+ return "\n".join(processor.decode(out[0], skip_special_tokens=True))
27
+ def gpt3(input_text):
28
+ response = openai.Completion.create(
29
+ engine="text-davinci-003",
30
+ prompt=input_text,
31
+ max_tokens=10,
32
+ n=1,
33
+ stop=None,
34
+ temperature=0.7,
35
+ )
36
+ answer = response.choices[0].text.strip()
37
+ return answer
38
+
39
+
40
  def inference_chat(input_image,input_text):
41
  inputs = processor(images=input_image, text=input_text,return_tensors="pt")
42
+ inputs["max_length"] = 10
43
  inputs["num_beams"] = 5
44
  inputs['num_return_sequences'] =4
45
  out = model_vqa.generate(**inputs)
 
68
  submit_button = gr.Button(
69
  value="Submit", interactive=True, variant="primary"
70
  )
71
+ cap_submit_button = gr.Button(
72
+ value="Submit", interactive=True, variant="primary"
73
+ )
74
  with gr.Column():
75
  caption_output = gr.Textbox(lines=0, label="VQA Output(模型答案输出)")
76
+ caption_output_v1 = gr.Textbox(lines=0, label="Caption Output(模型caption输出)")
77
 
78
  image_input.change(
79
  lambda: ("", "", []),
 
103
  ],
104
  [caption_output],
105
  )
106
+ cap_submit_button.click(
107
+ caption,
108
+ [
109
+ image_input,
110
+
111
+ ],
112
+ [caption_output_v1],
113
+ )
114
 
115
  # examples = gr.Examples(
116
  # examples=examples,