ybelkada commited on
Commit
36407aa
1 Parent(s): c6f1661

Update app.py

Browse files

Add `bfloat16` support for lighter (maybe faster too?) inference. I used to add this argument on `pipeline`, see for example https://gist.github.com/younesbelkada/dba25f75d3749b4e2d2d4821f0d6f385#file-benchmark-py-L42 /

Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import gradio as gr
 
3
  import numpy as np
4
  from transformers import pipeline
5
 
@@ -7,8 +8,8 @@ import torch
7
  print(f"Is CUDA available: {torch.cuda.is_available()}")
8
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
9
 
10
- pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0")
11
- pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0")
12
 
13
  examples = [
14
  ["Please answer to the following question. Who is going to be the next Ballon d'or?"],
 
1
  import os
2
  import gradio as gr
3
+ import torch
4
  import numpy as np
5
  from transformers import pipeline
6
 
 
8
  print(f"Is CUDA available: {torch.cuda.is_available()}")
9
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
10
 
11
+ pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
12
+ pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
13
 
14
  examples = [
15
  ["Please answer to the following question. Who is going to be the next Ballon d'or?"],