Spaces:
Sleeping
Sleeping
Commit
·
7ca365c
1
Parent(s):
62e4c6c
feat: use spaces gpu
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import torch
|
|
|
3 |
import gradio as gr
|
4 |
from huggingface_hub import login
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -15,6 +16,7 @@ def get_llm(model_id):
|
|
15 |
return model
|
16 |
|
17 |
# 问答逻辑
|
|
|
18 |
def retriever_qa(file, query):
|
19 |
# 加载模型和分词器
|
20 |
model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
|
@@ -41,13 +43,17 @@ def retriever_qa(file, query):
|
|
41 |
|
42 |
# Tokenize 输入
|
43 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
|
|
44 |
print('Start Inference')
|
45 |
|
46 |
# 推理
|
47 |
-
generated_ids = llm.generate(model_inputs
|
48 |
-
|
|
|
49 |
# 解码输出
|
|
|
50 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
51 |
return response
|
52 |
|
53 |
# 调用推理逻辑
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
+
import spaces
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import login
|
6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
16 |
return model
|
17 |
|
18 |
# 问答逻辑
|
19 |
+
@spaces.GPU(duration=120)
|
20 |
def retriever_qa(file, query):
|
21 |
# 加载模型和分词器
|
22 |
model_id = 'mistralai/Mistral-7B-Instruct-v0.2'
|
|
|
43 |
|
44 |
# Tokenize 输入
|
45 |
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
|
46 |
+
print(f"Model Inputs: {model_inputs}")
|
47 |
print('Start Inference')
|
48 |
|
49 |
# 推理
|
50 |
+
generated_ids = llm.generate(model_inputs, max_new_tokens=50, do_sample=True)
|
51 |
+
# generated_ids = llm.generate(input_ids=model_inputs['input_ids'], attention_mask=model_inputs['attention_mask'], max_new_tokens=50, do_sample=True)
|
52 |
+
print(f'Generated ids: {generated_ids}')
|
53 |
# 解码输出
|
54 |
+
print('Start detokenize')
|
55 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
56 |
+
print(response)
|
57 |
return response
|
58 |
|
59 |
# 调用推理逻辑
|