Spaces:
Runtime error
Runtime error
File size: 2,835 Bytes
591004d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
#!python
# -*- coding: utf-8 -*-
# @author: Kun
import torch
from flagai.model.predictor.predictor import Predictor
from flagai.model.predictor.aquila import aquila_generate
from models.aquila_fa import max_token, temperature, top_p
from common import torch_gc
from global_config import lang_opt
# for Aquila on FlagAI
def get_api_response(model, tokenizer, content: str, max_tokens=None):
if "en" == lang_opt:
system_role_content = 'You are a helpful and creative assistant for writing novel.'
elif "zh1" == lang_opt:
system_role_content = 'You are a helpful and creative assistant for writing novel.\
You are must always in Chinese.重要,你需要使用中文与我进行交流。'
elif "zh2" == lang_opt:
system_role_content = '你是写小说的好帮手,有创意的助手。'
else:
raise Exception(f"not supported language: {lang_opt}")
print("===> Question:")
print(content)
print("<==="+"="*100)
predictor = Predictor(model, tokenizer)
content = f'{content}'
with torch.no_grad():
out = predictor.predict_generate_randomsample(
content, out_max_length=max_token, temperature=temperature, top_p=top_p)
response = out
torch_gc()
print("===> Generated Text: ")
print(response)
print("<==="+"="*100)
return response
# # for Aquila on HuggingFace
# def get_api_response(model, tokenizer, content: str, max_tokens=None):
# if "en" == lang_opt:
# system_role_content = 'You are a helpful and creative assistant for writing novel.'
# elif "zh1" == lang_opt:
# system_role_content = 'You are a helpful and creative assistant for writing novel.\
# You are must always in Chinese.重要,你需要使用中文与我进行交流。'
# elif "zh2" == lang_opt:
# system_role_content = '你是写小说的好帮手,有创意的助手。'
# else:
# raise Exception(f"not supported language: {lang_opt}")
# print("===> Question:")
# print(content)
# print("<==="+"="*100)
# with torch.no_grad():
# ret = model.generate(
# **tokenizer(content, return_tensors='pt').to('cuda'),
# do_sample=False,
# max_new_tokens=max_token,
# temperature=temperature,
# top_p=top_p,
# use_cache=True
# )
# output_ids = ret[0].detach().cpu().numpy().tolist()
# if 100007 in output_ids:
# output_ids = output_ids[:output_ids.index(100007)]
# elif 0 in output_ids:
# output_ids = output_ids[:output_ids.index(0)]
# response = tokenizer.decode(output_ids)
# torch_gc()
# print("===> Generated Text: ")
# print(response)
# print("<==="+"="*100)
# return response
|