Spaces:

osanseviero
/

i-like-flan

Running on A10G

App Files Files Community

ybelkada commited on Oct 25, 2022

Commit

36407aa

•

1 Parent(s): c6f1661

Update app.py

Browse files

Add `bfloat16` support for lighter (maybe faster too?) inference. I used to add this argument on `pipeline`, see for example https://gist.github.com/younesbelkada/dba25f75d3749b4e2d2d4821f0d6f385#file-benchmark-py-L42 /

Files changed (1) hide show

app.py +3 -2

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import gradio as gr
 import numpy as np
 from transformers import pipeline
@@ -7,8 +8,8 @@ import torch
 print(f"Is CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0")
-pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0")
 examples = [
   ["Please answer to the following question. Who is going to be the next Ballon d'or?"],

 import os
 import gradio as gr
+import torch
 import numpy as np
 from transformers import pipeline
 print(f"Is CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
+pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
 examples = [
   ["Please answer to the following question. Who is going to be the next Ballon d'or?"],