shrimantasatpati commited on
Commit
c4be01c
1 Parent(s): cef4bfa

Updated app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -1,8 +1,12 @@
1
  import streamlit as st
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
-
5
-
 
 
 
6
 
7
  # torch.set_default_device("cuda")
8
  # Load the Phi 2 model and tokenizer
@@ -13,10 +17,11 @@ tokenizer = AutoTokenizer.from_pretrained(
13
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  "microsoft/phi-2",
16
- device_map="auto",
17
  trust_remote_code=True,
18
  # offload_folder="offload",
19
  torch_dtype=torch.float32
 
20
  )
21
 
22
  # Streamlit UI
@@ -28,13 +33,15 @@ prompt = st.text_area("Enter your prompt:", """Write a short summary about how t
28
  # Generate output based on user input
29
  if st.button("Generate Output"):
30
  with torch.no_grad():
31
- token_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt", return_attention_mask=False)
 
 
32
  output_ids = model.generate(
33
  token_ids.to(model.device),
34
  max_new_tokens=512,
35
  do_sample=True,
36
  temperature=0.3,
37
- max_length=200
38
  )
39
 
40
  output = tokenizer.decode(output_ids[0][token_ids.size(1):])
 
1
  import streamlit as st
2
+ import transformers
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
+ # device = "cpu"
6
+ # if torch.cuda.is_available():
7
+ # device = "cuda"
8
+ # if torch.backends.mps.is_available():
9
+ # device = "mps"
10
 
11
  # torch.set_default_device("cuda")
12
  # Load the Phi 2 model and tokenizer
 
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  "microsoft/phi-2",
20
+ device_map="cpu",
21
  trust_remote_code=True,
22
  # offload_folder="offload",
23
  torch_dtype=torch.float32
24
+ # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
  )
26
 
27
  # Streamlit UI
 
33
  # Generate output based on user input
34
  if st.button("Generate Output"):
35
  with torch.no_grad():
36
+ token_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt",
37
+ # return_attention_mask=False
38
+ )
39
  output_ids = model.generate(
40
  token_ids.to(model.device),
41
  max_new_tokens=512,
42
  do_sample=True,
43
  temperature=0.3,
44
+ # max_length=200
45
  )
46
 
47
  output = tokenizer.decode(output_ids[0][token_ids.size(1):])