Spaces:
Running
on
A10G
Running
on
A10G
File size: 3,010 Bytes
ed6faba d4c7e67 ed6faba c6144cf 6d6196b ed6faba 667b3c7 6c15ef5 d4c7e67 f689a2c ba120a3 297ba74 ba120a3 38f987f ba120a3 38f987f f689a2c 8d8c0fc 58dd1da f689a2c 58dd1da ed6faba 6d6196b c6144cf 911bd1c 667b3c7 ed6faba f689a2c c0006ba f689a2c ed6faba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import os
import gradio as gr
import torch
import numpy as np
from transformers import pipeline
import spaces
import torch
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
examples = [
["Please answer to the following question. Who is going to be the next Ballon d'or?"],
["Q: Can Barack Obama have a conversation with George Washington? Give the rationale before answering."],
["Summarize the following text: Peter and Elizabeth took a taxi to attend the night party in the city. While in the party, Elizabeth collapsed and was rushed to the hospital. Since she was diagnosed with a brain injury, the doctor told Peter to stay besides her until she gets well. Therefore, Peter stayed with her at the hospital for 3 days without leaving."],
["Please answer the following question: What is the boiling point of water?"],
["Answer the following question by detailing your reasoning: Are Pokemons alive?"],
["Translate to German: How old are you?"],
["Generate a cooking recipe to make bolognese pasta:"],
["Answer the following yes/no question by reasoning step-by-step. Can you write a whole Haiku in a single tweet?"],
["Premise: At my age you will probably have learnt one lesson. Hypothesis: It's not certain how many lessons you'll learn by your thirties. Does the premise entail the hypothesis?"],
["Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch and bought 6 more, how many apples do they have?"],
["""Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half of the golf balls are blue. How many blue golf balls are there?"""]
]
title = "Flan T5 and Vanilla T5"
description = "This demo compares [T5-large](https://huggingface.co./t5-large) and [Flan-T5-XX-large](https://huggingface.co./google/flan-t5-xxl). Note that T5 expects a very specific format of the prompts, so the examples below are not necessarily the best prompts to compare."
@spaces.GPU
def inference(text):
output_flan = pipe_flan(text, max_length=100)[0]["generated_text"]
output_vanilla = pipe_vanilla(text, max_length=100)[0]["generated_text"]
return [output_flan, output_vanilla]
io = gr.Interface(
inference,
gr.Textbox(lines=3),
outputs=[
gr.Textbox(lines=3, label="Flan T5"),
gr.Textbox(lines=3, label="T5")
],
title=title,
description=description,
examples=examples
)
io.launch() |