InspirationYF commited on
Commit
7ca365c
·
1 Parent(s): 62e4c6c

feat: use spaces gpu

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import torch
 
3
  import gradio as gr
4
  from huggingface_hub import login
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -15,6 +16,7 @@ def get_llm(model_id):
15
  return model
16
 
17
  # 问答逻辑
 
18
  def retriever_qa(file, query):
19
  # 加载模型和分词器
20
  model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
@@ -41,13 +43,17 @@ def retriever_qa(file, query):
41
 
42
  # Tokenize 输入
43
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
 
44
  print('Start Inference')
45
 
46
  # 推理
47
- generated_ids = llm.generate(model_inputs['input_ids'], max_new_tokens=50, do_sample=True)
48
-
 
49
  # 解码输出
 
50
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
51
  return response
52
 
53
  # 调用推理逻辑
 
1
  import os
2
  import torch
3
+ import spaces
4
  import gradio as gr
5
  from huggingface_hub import login
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
16
  return model
17
 
18
  # 问答逻辑
19
+ @spaces.GPU(duration=120)
20
  def retriever_qa(file, query):
21
  # 加载模型和分词器
22
  model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
 
43
 
44
  # Tokenize 输入
45
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
46
+ print(f"Model Inputs: {model_inputs}")
47
  print('Start Inference')
48
 
49
  # 推理
50
+ generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
51
+ # generated_ids = llm.generate(input_ids=model_inputs['input_ids'], attention_mask=model_inputs['attention_mask'], max_new_tokens=50, do_sample=True)
52
+ print(f'Generated ids: {generated_ids}')
53
  # 解码输出
54
+ print('Start detokenize')
55
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
56
+ print(response)
57
  return response
58
 
59
  # 调用推理逻辑