#!python
# -*- coding: utf-8 -*-
# @author: Kun


import torch
from flagai.model.predictor.predictor import Predictor
from flagai.model.predictor.aquila import aquila_generate
from models.aquila_fa import max_token, temperature, top_p
from common import torch_gc
from global_config import lang_opt

# for Aquila on FlagAI
def get_api_response(model, tokenizer, content: str, max_tokens=None):

    if "en" == lang_opt:
        system_role_content = 'You are a helpful and creative assistant for writing novel.'
    elif "zh1" == lang_opt:
        system_role_content = 'You are a helpful and creative assistant for writing novel.\
                You are must always in Chinese.重要，你需要使用中文与我进行交流。'
    elif "zh2" == lang_opt:
        system_role_content = '你是写小说的好帮手，有创意的助手。'
    else:
        raise Exception(f"not supported language: {lang_opt}")

    print("===> Question:")
    print(content)
    print("<==="+"="*100)

    predictor = Predictor(model, tokenizer)
    content = f'{content}'
    with torch.no_grad():
        out = predictor.predict_generate_randomsample(
            content, out_max_length=max_token, temperature=temperature, top_p=top_p)
        response = out

    torch_gc()

    print("===> Generated Text: ")
    print(response)
    print("<==="+"="*100)

    return response

# # for Aquila on HuggingFace
# def get_api_response(model, tokenizer, content: str, max_tokens=None):

#     if "en" == lang_opt:
#         system_role_content = 'You are a helpful and creative assistant for writing novel.'
#     elif "zh1" == lang_opt:
#         system_role_content = 'You are a helpful and creative assistant for writing novel.\
#                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
#     elif "zh2" == lang_opt:
#         system_role_content = '你是写小说的好帮手，有创意的助手。'
#     else:
#         raise Exception(f"not supported language: {lang_opt}")

#     print("===> Question:")
#     print(content)
#     print("<==="+"="*100)

#     with torch.no_grad():
#         ret = model.generate(
#             **tokenizer(content, return_tensors='pt').to('cuda'),
#             do_sample=False,
#             max_new_tokens=max_token,
#             temperature=temperature,
#             top_p=top_p,
#             use_cache=True
#         )
#         output_ids = ret[0].detach().cpu().numpy().tolist()
#         if 100007 in output_ids:
#             output_ids = output_ids[:output_ids.index(100007)]
#         elif 0 in output_ids:
#             output_ids = output_ids[:output_ids.index(0)]
#         response = tokenizer.decode(output_ids)

#     torch_gc()

#     print("===> Generated Text: ")
#     print(response)
#     print("<==="+"="*100)

#     return response