Prakh24s commited on
Commit
1a004cd
·
1 Parent(s): 854fcef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -1,7 +1,5 @@
1
  from PIL import Image
2
- import requests
3
  import gradio as gr
4
-
5
  from transformers import BlipProcessor, BlipForConditionalGeneration
6
 
7
  model_id = "Salesforce/blip-image-captioning-base"
@@ -9,11 +7,21 @@ model_id = "Salesforce/blip-image-captioning-base"
9
  model = BlipForConditionalGeneration.from_pretrained(model_id)
10
  processor = BlipProcessor.from_pretrained(model_id)
11
 
12
- def launch(input):
13
- image = Image.open(requests.get(input, stream=True).raw).convert('RGB')
 
 
 
14
  inputs = processor(image, return_tensors="pt")
 
 
15
  out = model.generate(**inputs)
 
 
16
  return processor.decode(out[0], skip_special_tokens=True)
17
 
18
- iface = gr.Interface(launch, inputs="text", outputs="text")
19
- iface.launch()
 
 
 
 
1
  from PIL import Image
 
2
  import gradio as gr
 
3
  from transformers import BlipProcessor, BlipForConditionalGeneration
4
 
5
  model_id = "Salesforce/blip-image-captioning-base"
 
7
  model = BlipForConditionalGeneration.from_pretrained(model_id)
8
  processor = BlipProcessor.from_pretrained(model_id)
9
 
10
+ def generate_caption(image_path):
11
+ # Load the image directly from the path
12
+ image = Image.open(image_path).convert('RGB')
13
+
14
+ # Process the image to generate tensor inputs
15
  inputs = processor(image, return_tensors="pt")
16
+
17
+ # Generate caption for the image
18
  out = model.generate(**inputs)
19
+
20
+ # Decode and return the generated caption
21
  return processor.decode(out[0], skip_special_tokens=True)
22
 
23
+ # Gradio interface setup to accept image input and produce text output
24
+ iface = gr.Interface(generate_caption, inputs="image", outputs="text")
25
+
26
+ # Launch the interface
27
+ iface.launch()