Spaces:
Running
on
A10G
Running
on
A10G
Update app.py
Browse filesAdd `bfloat16` support for lighter (maybe faster too?) inference. I used to add this argument on `pipeline`, see for example https://gist.github.com/younesbelkada/dba25f75d3749b4e2d2d4821f0d6f385#file-benchmark-py-L42 /
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
|
|
3 |
import numpy as np
|
4 |
from transformers import pipeline
|
5 |
|
@@ -7,8 +8,8 @@ import torch
|
|
7 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
8 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
9 |
|
10 |
-
pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0")
|
11 |
-
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0")
|
12 |
|
13 |
examples = [
|
14 |
["Please answer to the following question. Who is going to be the next Ballon d'or?"],
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
import torch
|
4 |
import numpy as np
|
5 |
from transformers import pipeline
|
6 |
|
|
|
8 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
9 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
10 |
|
11 |
+
pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
|
12 |
+
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
|
13 |
|
14 |
examples = [
|
15 |
["Please answer to the following question. Who is going to be the next Ballon d'or?"],
|