codelion commited on
Commit
cea1140
·
verified ·
1 Parent(s): fd43a4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -7,13 +7,13 @@ import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
9
 
10
- import subprocess
11
  # Install flash attention, skipping CUDA build if necessary
12
- subprocess.run(
13
- "pip install flash-attn --no-build-isolation",
14
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
15
- shell=True,
16
- )
17
 
18
  MAX_MAX_NEW_TOKENS = 1024
19
  DEFAULT_MAX_NEW_TOKENS = 512
@@ -33,7 +33,8 @@ if not torch.cuda.is_available():
33
 
34
 
35
  if torch.cuda.is_available():
36
- model_id = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
 
37
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True,trust_remote_code=True)
38
  tokenizer = AutoTokenizer.from_pretrained(model_id)
39
  tokenizer.padding_side = 'right'
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
9
 
10
+ #import subprocess
11
  # Install flash attention, skipping CUDA build if necessary
12
+ #subprocess.run(
13
+ # "pip install flash-attn --no-build-isolation",
14
+ # env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
15
+ # shell=True,
16
+ #)
17
 
18
  MAX_MAX_NEW_TOKENS = 1024
19
  DEFAULT_MAX_NEW_TOKENS = 512
 
33
 
34
 
35
  if torch.cuda.is_available():
36
+ #model_id = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
37
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
38
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True,trust_remote_code=True)
39
  tokenizer = AutoTokenizer.from_pretrained(model_id)
40
  tokenizer.padding_side = 'right'