Stoneman commited on
Commit
1e1a274
1 Parent(s): ed7be52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -60
app.py CHANGED
@@ -1,65 +1,13 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import GPT2TokenizerFast, ViTImageProcessor, VisionEncoderDecoderModel
4
 
5
- # Setup device, model, tokenizer, and feature extractor
6
- device = 'cpu'
7
-
8
-
9
- model_checkpoint1 = "Stoneman/IG-caption-generator-vit-gpt2-last-block"
10
- feature_extractor1 = ViTImageProcessor.from_pretrained(model_checkpoint1)
11
- tokenizer1 = GPT2TokenizerFast.from_pretrained(model_checkpoint1)
12
- model1 = VisionEncoderDecoderModel.from_pretrained(model_checkpoint1).to(device)
13
-
14
- model_checkpoint2 = "Stoneman/IG-caption-generator-vit-gpt2-all"
15
- model2 = VisionEncoderDecoderModel.from_pretrained(model_checkpoint2).to(device)
16
-
17
- model_checkpoint3 = "Stoneman/IG-caption-generator-nlpconnect-last-block"
18
- model3 = VisionEncoderDecoderModel.from_pretrained(model_checkpoint3).to(device)
19
-
20
- model_checkpoint4 = "Stoneman/IG-caption-generator-nlpconnect-all"
21
- model4 = VisionEncoderDecoderModel.from_pretrained(model_checkpoint4).to(device)
22
-
23
- models = {
24
- 1: model1,
25
- 2: model2,
26
- 3: model3,
27
- 4: model4
28
- }
29
-
30
- # Prediction function
31
- def predict(image, max_length=128):
32
- captions = {}
33
-
34
- image = image.convert('RGB')
35
- pixel_values = feature_extractor1(images=image, return_tensors="pt").pixel_values.to(device)
36
- for i in range(1,5):
37
- caption_ids = models[i].generate(pixel_values, max_length=max_length)[0]
38
- caption_text = tokenizer1.decode(caption_ids, skip_special_tokens=True)
39
- captions[i] = caption_text
40
- # Return a single string with all captions
41
- return '\n\n'.join(f'Model {i}: {caption}' for i, caption in captions.items())
42
-
43
-
44
- # Define input and output components
45
- input_component = gr.components.Image(label="Upload any Image", type="pil")
46
- output_component = gr.components.Textbox(label="Captions")
47
 
48
  # Example images
49
  examples = [f"example{i}.JPG" for i in range(1, 10)]
50
-
51
- # Interface
52
- title = "IG-caption-generator"
53
- description = "Made by: Jiayu Shi"
54
- interface = gr.Interface(
55
- fn=predict,
56
- description=description,
57
- inputs=input_component,
58
- theme="huggingface",
59
- outputs=output_component,
60
- examples=examples,
61
- title=title,
62
- )
63
-
64
- # Launch interface
65
- interface.launch(debug=True)
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
3
 
4
+ pipe = pipeline(task="image-to-text",
5
+ model="Stoneman/IG-caption-generator-nlpconnect-last-block")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Example images
8
  examples = [f"example{i}.JPG" for i in range(1, 10)]
9
+ gr.Interface.from_pipeline(pipe,
10
+ title="IG-caption-generator",
11
+ description="IG caption generator using ViT and GPT2.",
12
+ examples=examples
13
+ ).launch()