Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""imagetortext.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1UXh8tivt-4vHaBeXgfyq-TYLvpJAMZVV | |
""" | |
!pip install transformers dataset | |
!pip install -q gradio | |
!pip install sentencepiece | |
!pip install googletrans==3.1.0a0 | |
import gradio as gr | |
import requests | |
from PIL import Image | |
from torchvision import transforms | |
from transformers import pipeline | |
import torch | |
import sentencepiece | |
import re | |
import googletrans | |
from googletrans import Translator | |
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True).eval() | |
from transformers import SegformerFeatureExtractor, SegformerForImageClassification, T5Tokenizer, T5Model | |
from PIL import Image | |
import requests | |
def loadImageToText(image, argument): | |
# url = "https://media.istockphoto.com/id/470604022/es/foto/%C3%A1rbol-de-manzano.jpg?s=1024x1024&w=is&k=20&c=R7b6jPeTGsDw75Sqn3VwpNRckqlAkJNPLelb48pCk2U=" | |
# image = Image.open(requests.get(url, stream=True).raw) | |
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/mit-b2") | |
model = SegformerForImageClassification.from_pretrained("nvidia/mit-b2") | |
translator = Translator() | |
inputs = feature_extractor(images=image, return_tensors="pt") | |
outputs = model(**inputs) | |
logits = outputs.logits | |
# model predicts one of the 1000 ImageNet classes | |
predicted_class_idx = logits.argmax(-1).item() | |
part_args = f"<"+re.sub("[^(\w|<|>)]+(?=\w)", "><", argument) + ">" | |
story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator") | |
story_text = story_gen(part_args + model.config.id2label[predicted_class_idx]) | |
generate_text_stroy = story_text[0]["generated_text"] | |
ln_text_story = generate_text_stroy[len(part_args):len(generate_text_stroy)] | |
translated_ita = translator.translate(ln_text_story, src='en', dest='es') | |
return translated_ita.text | |
#print(loadImageToText("animal,super")) # borra el argumento 'image' y sus variables internas si quieres probarlo desde aquí. | |
gr.Interface(fn=loadImageToText, | |
inputs=[gr.Image(), gr.Text(label="Argumentos base", placeholder="Verano, película,playa, superhéroe, animal")], | |
outputs="text").launch() |