#!python # -*- coding: utf-8 -*- # @author: Kun import torch from flagai.model.predictor.predictor import Predictor from flagai.model.predictor.aquila import aquila_generate from models.aquila_fa import max_token, temperature, top_p from common import torch_gc from global_config import lang_opt # for Aquila on FlagAI def get_api_response(model, tokenizer, content: str, max_tokens=None): if "en" == lang_opt: system_role_content = 'You are a helpful and creative assistant for writing novel.' elif "zh1" == lang_opt: system_role_content = 'You are a helpful and creative assistant for writing novel.\ You are must always in Chinese.重要,你需要使用中文与我进行交流。' elif "zh2" == lang_opt: system_role_content = '你是写小说的好帮手,有创意的助手。' else: raise Exception(f"not supported language: {lang_opt}") print("===> Question:") print(content) print("<==="+"="*100) predictor = Predictor(model, tokenizer) content = f'{content}' with torch.no_grad(): out = predictor.predict_generate_randomsample( content, out_max_length=max_token, temperature=temperature, top_p=top_p) response = out torch_gc() print("===> Generated Text: ") print(response) print("<==="+"="*100) return response # # for Aquila on HuggingFace # def get_api_response(model, tokenizer, content: str, max_tokens=None): # if "en" == lang_opt: # system_role_content = 'You are a helpful and creative assistant for writing novel.' # elif "zh1" == lang_opt: # system_role_content = 'You are a helpful and creative assistant for writing novel.\ # You are must always in Chinese.重要,你需要使用中文与我进行交流。' # elif "zh2" == lang_opt: # system_role_content = '你是写小说的好帮手,有创意的助手。' # else: # raise Exception(f"not supported language: {lang_opt}") # print("===> Question:") # print(content) # print("<==="+"="*100) # with torch.no_grad(): # ret = model.generate( # **tokenizer(content, return_tensors='pt').to('cuda'), # do_sample=False, # max_new_tokens=max_token, # temperature=temperature, # top_p=top_p, # use_cache=True # ) # output_ids = ret[0].detach().cpu().numpy().tolist() # if 100007 in output_ids: # output_ids = output_ids[:output_ids.index(100007)] # elif 0 in output_ids: # output_ids = output_ids[:output_ids.index(0)] # response = tokenizer.decode(output_ids) # torch_gc() # print("===> Generated Text: ") # print(response) # print("<==="+"="*100) # return response