Abdou
/

vit-swin-base-224-gpt2-image-captioning

vision-encoder-decoder

image-text-to-text

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

Abdou commited on Feb 26, 2023

Commit

274a696

·

1 Parent(s): 6bec5e7

Update README.md

Files changed (1) hide show

README.md +20 -0

README.md CHANGED Viewed

@@ -53,6 +53,25 @@ Or initialize everything for more flexibility:
 ```python
 from transformers import VisionEncoderDecoderModel, GPT2TokenizerFast, ViTImageProcessor
 import torch
 # a function to perform inference
 def get_caption(model, image_processor, tokenizer, image_path):
@@ -77,6 +96,7 @@ url = "http://images.cocodataset.org/test-stuff2017/000000000019.jpg"
 caption = get_caption(model, image_processor, tokenizer, url)
 print(f"caption: {caption}")
 ```
 Output:
 ```

 ```python
 from transformers import VisionEncoderDecoderModel, GPT2TokenizerFast, ViTImageProcessor
 import torch
+import os
+import urllib.parse as parse
+from PIL import Image
+import requests
+# a function to determine whether a string is a URL or not
+def is_url(string):
+    try:
+        result = parse.urlparse(string)
+        return all([result.scheme, result.netloc, result.path])
+    except:
+        return False
+# a function to load an image
+def load_image(image_path):
+    if is_url(image_path):
+        return Image.open(requests.get(image_path, stream=True).raw)
+    elif os.path.exists(image_path):
+        return Image.open(image_path)
 # a function to perform inference
 def get_caption(model, image_processor, tokenizer, image_path):
 caption = get_caption(model, image_processor, tokenizer, url)
 print(f"caption: {caption}")
 ```
 Output:
 ```