#### pip install gradio==3.50.2
import gradio as gr
import pandas as pd
import numpy as np
import os
import json
import re
from functools import partial
import jieba
def repeat_to_one_f(x):
req = None
for token in jieba.lcut(x):
#print("req :", req)
if len(set(token)) == 1:
token = token[0]
if req is None:
req = token
else:
if token in req:
continue
else:
while req.endswith(token[0]):
token = token[1:]
req = req + token
return req.strip()
def repeat_to_one_fb(x):
return sorted(map(repeat_to_one_f, [x, "".join(jieba.lcut(x)[::-1])]),
key = len
)[0]
repeat_to_one = repeat_to_one_fb
from huggingface_hub import snapshot_download
if not os.path.exists("genshin-impact-character"):
path = snapshot_download(
repo_id="svjack/genshin-impact-character",
repo_type="dataset",
local_dir="genshin-impact-character",
local_dir_use_symlinks = False
)
if not os.path.exists("genshin_impact_character_llamazh13b_ggml"):
path = snapshot_download(
repo_id="svjack/genshin_impact_character_llamazh13b_ggml",
repo_type="model",
local_dir="genshin_impact_character_llamazh13b_ggml",
local_dir_use_symlinks = False
)
info_df = pd.read_csv("genshin-impact-character/genshin_impact_background_settings_constrained.csv")
info_df["info"] = info_df["info"].map(eval)
with open("genshin-impact-character/genshin_impact_character_setting.json", "r") as f:
character_setting_total_dict = json.load(f)
req_dict = {}
for k, v_dict in character_setting_total_dict.items():
req_dict[k] = {}
for kk, vv in v_dict.items():
if kk != "元素力":
req_dict[k][kk] = vv
character_setting_total_dict = req_dict
def get_character_background_list(info_dict):
text = []
if "角色详细" in info_dict["描述"]:
text.append(info_dict["描述"]["角色详细"])
if "更多描述" in info_dict["描述"]:
text.append(info_dict["描述"]["更多描述"])
return list(map(lambda x: x.replace(" ", "").replace("\n\n", "\n"), text))
def get_character_background(info_dict):
return "\n".join(get_character_background_list(info_dict))
pd.DataFrame(
pd.Series(character_setting_total_dict.values()).map(
lambda x: {
"性别": x['性别'],
"国籍": x["国籍"]
}
).values.tolist()).apply(lambda x: set(x), axis = 0).to_dict()
character_setting_total_dist_dict = {
'姓名': "",
'性别': {'少女女性', '少年男性', '成年女性', '成年男性'},
'国籍': {'枫丹', '璃月', '稻妻', '至冬', '蒙德', '须弥'},
'身份': "",
'性格特征': "",
'角色介绍': "",
}
'''
#### text background_text
text = get_character_background(info_dict)
'''
def get_character_setting_total_dict(name):
from copy import deepcopy
req = deepcopy(character_setting_total_dist_dict)
if name in character_setting_total_dict:
for k, v in character_setting_total_dict[name].items():
req[k] = v
info_dict = dict(info_df[["title", "info"]].values.tolist())[name]
req["角色介绍"] = get_character_background(info_dict)
req["姓名"] = name
return req
get_character_setting_total_dict("迪卢克")
get_character_setting_total_dict("阿扎尔")
prompt_format_dict = {
"Basic_Info": ["性别", "国籍", "身份", "性格特征"],
"两人同属{}": ["国籍"],
"{}来自{},{}来自{}。": ["姓名", "国籍", "姓名", "国籍"],
"下面是{}的一些基本信息\n{}": ["姓名", "Basic_Info"],
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"],
"续写下面的角色介绍,下面是角色介绍的开头。{}是{}。{}": ["姓名", "身份", "Text"],
"续写下面的角色故事,下面是角色故事的开头。{}是{}。{}": ["姓名", "身份", "Text"],
"续写下面获得神之眼的过程,下面是开头。{}是{}。{}": ["姓名", "身份", "Text"],
"{}给你写了一封信,信主题是{},信的内容是这样的。": ["姓名", "Text"],
"{}在进行有关{}的聊天时会说什么?": ["姓名", "Text"],
"{}在{}的时候会说什么?": ["姓名", "Text"],
"{}在{}时会说什么?": ["姓名", "Text"],
"关于{},{}会说什么?": ["Text", "姓名"],
"当你想要了解{}时": ["姓名"],
"关于{},{}会说什么?": ["姓名", "姓名"],
"从{}那里,可以获得哪些关于{}的信息?": ["姓名", "姓名"]
}
def single_character_prompt_func(name,
used_prompt_format_dict,
character_setting_rewrite_dict = {},
Text = "",
):
assert type(used_prompt_format_dict) == type({})
assert type(character_setting_rewrite_dict) == type({})
character_setting_total_dict = get_character_setting_total_dict(name)
for k, v in character_setting_rewrite_dict.items():
if k in character_setting_total_dict:
character_setting_total_dict[k] = v
key = list(used_prompt_format_dict.keys())[0]
assert key in prompt_format_dict
if key == "Basic_Info":
return "\n".join(
map(lambda k: "{}:{}".format(k, character_setting_total_dict[k]), prompt_format_dict[key])
)
elif key == "两人同属{}":
return "两人同属{}".format(character_setting_total_dict["国籍"])
elif key == "下面是{}的一些基本信息\n{}":
return "下面是{}的一些基本信息\n{}".format(name,
single_character_prompt_func(name,
{
"Basic_Info": ["性别", "国籍", "身份", "性格特征"]
},
character_setting_rewrite_dict
)
)
elif key == "下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}":
return "下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}".format(
name,
single_character_prompt_func(name,
{
"Basic_Info": ["性别", "国籍", "身份", "性格特征"]
},
character_setting_rewrite_dict
),
character_setting_total_dict["角色介绍"]
)
elif key == "续写下面的角色介绍,下面是角色介绍的开头。{}是{}。{}":
return "续写下面的角色介绍,下面是角色介绍的开头。{}是{}。{}".format(
name,
character_setting_total_dict["身份"],
Text
)
elif key == "续写下面的角色故事,下面是角色故事的开头。{}是{}。{}":
return "续写下面的角色故事,下面是角色介绍的开头。{}是{}。{}".format(
name,
character_setting_total_dict["身份"],
Text
)
elif key == "续写下面获得神之眼的过程,下面是开头。{}是{}。{}":
return "续写下面获得神之眼的过程,下面是开头。{}是{}。{}".format(
name,
character_setting_total_dict["身份"],
Text
)
elif key == "{}给你写了一封信,信主题是{},信的内容是这样的。":
return "{}给你写了一封信,信主题是{},信的内容是这样的。".format(
name,
Text
)
elif key == "{}在进行有关{}的聊天时会说什么?":
return "{}在进行有关{}的聊天时会说什么?".format(
name,
Text
)
elif key == "{}在{}的时候会说什么?":
return "{}在{}的时候会说什么?".format(
name,
Text
)
elif key == "{}在{}时会说什么?":
return "{}在{}时会说什么?".format(
name,
Text
)
elif key == "关于{},{}会说什么?":
return "关于{},{}会说什么?".format(
Text,
name,
)
elif key == "当你想要了解{}时":
return "当你想要了解{}时".format(
name,
)
return 1 / 0
def two_character_prompt_func(
name_1,
name_2,
used_prompt_format_dict,
character_setting_rewrite_dict_1 = {},
character_setting_rewrite_dict_2 = {},
):
assert type(character_setting_rewrite_dict_1) == type({})
character_setting_total_dict_1 = get_character_setting_total_dict(name_1)
for k, v in character_setting_rewrite_dict_1.items():
if k in character_setting_total_dict_1:
character_setting_total_dict_1[k] = v
character_setting_total_dict_2 = get_character_setting_total_dict(name_2)
for k, v in character_setting_rewrite_dict_2.items():
if k in character_setting_total_dict_2:
character_setting_total_dict_2[k] = v
key = list(used_prompt_format_dict.keys())[0]
assert key in prompt_format_dict
if key == "关于{},{}会说什么?":
return "关于{},{}会说什么?".format(name_1, name_2)
elif key == "从{}那里,可以获得哪些关于{}的信息?":
return "从{}那里,可以获得哪些关于{}的信息?".format(name_1, name_2)
elif key == "{}来自{},{}来自{}。":
return "{}来自{},{}来自{}。".format(name_1, character_setting_total_dict_1["国籍"],
name_2, character_setting_total_dict_2["国籍"],
)
return 1 / 0
def main_single_character_prompt_func(name,
used_prompt_format_dict,
character_setting_rewrite_dict = {},
Text = "",
):
key = list(used_prompt_format_dict.keys())[0]
assert key in prompt_format_dict
if key == "续写下面的角色介绍,下面是角色介绍的开头。{}是{}。{}":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}": ["姓名", "Basic_Info"]
},
character_setting_rewrite_dict,
Text
)
elif key == "续写下面的角色故事,下面是角色故事的开头。{}是{}。{}":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "续写下面获得神之眼的过程,下面是开头。{}是{}。{}":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "{}给你写了一封信,信主题是{},信的内容是这样的。":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "{}在进行有关{}的聊天时会说什么?":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "{}在{}的时候会说什么?":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "{}在{}时会说什么?":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "关于{},{}会说什么?":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
elif key == "当你想要了解{}时":
task_prompt = single_character_prompt_func(
name,
used_prompt_format_dict,
character_setting_rewrite_dict,
Text
)
info_prompt = single_character_prompt_func(
name,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict,
Text
)
return task_prompt, info_prompt
def main_two_character_prompt_func(
name_1,
name_2,
used_prompt_format_dict,
character_setting_rewrite_dict_1 = {},
character_setting_rewrite_dict_2 = {},
):
task_prompt = two_character_prompt_func(
name_1,
name_2,
used_prompt_format_dict,
character_setting_rewrite_dict_1,
character_setting_rewrite_dict_2)
info_prompt_1 = single_character_prompt_func(
name_1,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict_1,
)
info_prompt_2 = single_character_prompt_func(
name_2,
{
"下面是{}的一些基本信息\n{}\n这些是一段角色介绍\n{}": ["姓名", "Basic_Info", "角色介绍"]
},
character_setting_rewrite_dict_2,
)
character_setting_total_dict_1 = get_character_setting_total_dict(name_1)
for k, v in character_setting_rewrite_dict_1.items():
if k in character_setting_total_dict_1:
character_setting_total_dict_1[k] = v
character_setting_total_dict_2 = get_character_setting_total_dict(name_2)
for k, v in character_setting_rewrite_dict_2.items():
if k in character_setting_total_dict_2:
character_setting_total_dict_2[k] = v
country_prompt = ""
same_country = character_setting_total_dict_1["国籍"] == character_setting_total_dict_2["国籍"]
if same_country:
country_prompt = single_character_prompt_func(
name_1,
{
"两人同属{}": ["国籍"]
},
character_setting_rewrite_dict_1,
)
else:
country_prompt = two_character_prompt_func(
name_1,
name_2,
{
"{}来自{},{}来自{}。": ["姓名", "国籍", "姓名", "国籍"]
},
character_setting_rewrite_dict_1,
character_setting_rewrite_dict_2,
)
info_prompt = "\n".join(
[info_prompt_1, info_prompt_2, country_prompt]
)
return task_prompt, info_prompt
def main_single_character_prompt_func_cls(
name,
task,
character_setting_rewrite_dict = {},
Text = "",
):
#assert task in ["介绍", "故事", "神之眼", "信", "聊天", "时候", "关于", "了解"]
assert task in ["介绍", "故事", "信", "聊天", "时候", "关于", "了解"]
if task == "介绍":
return main_single_character_prompt_func(
name,
{
"续写下面的角色介绍,下面是角色介绍的开头。{}是{}。{}": ["姓名", "身份", "Text"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "故事":
return main_single_character_prompt_func(
name,
{
"续写下面的角色故事,下面是角色故事的开头。{}是{}。{}": ["姓名", "身份", "Text"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "神之眼":
return main_single_character_prompt_func(
name,
{
"续写下面获得神之眼的过程,下面是开头。{}是{}。{}": ["姓名", "身份", "Text"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "信":
return main_single_character_prompt_func(
name,
{
"{}给你写了一封信,信主题是{},信的内容是这样的。": ["姓名", "Text"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "聊天":
return main_single_character_prompt_func(
name,
{
"{}在进行有关{}的聊天时会说什么?": ["姓名", "Text"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "时候":
return main_single_character_prompt_func(
name,
{
"{}在{}的时候会说什么?": ["姓名", "Text"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "关于":
return main_single_character_prompt_func(
name,
{
"关于{},{}会说什么?": ["Text", "姓名"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
elif task == "了解":
return main_single_character_prompt_func(
name,
{
"当你想要了解{}时": ["姓名"],
},
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
return 1 / 0
def main_two_character_prompt_func_cls(
name_1,
name_2,
task,
character_setting_rewrite_dict_1 = {},
character_setting_rewrite_dict_2 = {},
):
assert task in ["会说什么", "哪些信息"]
if task == "会说什么":
return main_two_character_prompt_func(
name_1,
name_2,
{
"关于{},{}会说什么?": ["姓名", "姓名"],
},
character_setting_rewrite_dict_1,
character_setting_rewrite_dict_2
)
elif task == "哪些信息":
return main_two_character_prompt_func(
name_1,
name_2,
{
"从{}那里,可以获得哪些关于{}的信息?": ["姓名", "姓名"]
},
character_setting_rewrite_dict_1,
character_setting_rewrite_dict_2
)
return 1 / 0
character_setting_total_dist_dict = {
'姓名': "",
'性别': {'少女女性', '少年男性', '成年女性', '成年男性'},
'国籍': {'枫丹', '璃月', '稻妻', '至冬', '蒙德', '须弥'},
'身份': "",
'性格特征': "",
'角色介绍': "",
}
all_single_task = ["介绍", "故事", "信", "聊天", "时候", "关于", "了解"]
all_two_task = ["会说什么", "哪些信息"]
all_genders = ['少女女性', '少年男性', '成年女性', '成年男性']
all_countries = ['蒙德','璃月', '稻妻', '须弥','枫丹', '至冬']
def change_single_name(single_name):
if hasattr(single_name, "value"):
single_name_ = single_name.value
else:
single_name_ = single_name
character_setting_total_dict = get_character_setting_total_dict(single_name)
character_setting_total_dict = dict(map(lambda t2: (t2[0] ,t2[1] if type(t2[1]) == type("") else ""),
character_setting_total_dict.items()))
return character_setting_total_dict["姓名"], \
gr.Dropdown.update(value = character_setting_total_dict["性别"], choices = all_genders), \
gr.Dropdown.update(value = character_setting_total_dict["国籍"], choices = all_countries), \
character_setting_total_dict["身份"], \
character_setting_total_dict["性格特征"], character_setting_total_dict["角色介绍"]
def get_single_prompt(
single_name, select_gender, select_country, single_identity, single_disposition,
select_task, Text, single_introduction
):
if hasattr(single_name, "value"):
single_name_ = single_name.value
else:
single_name_ = single_name
if hasattr(select_gender, "value"):
select_gender_ = select_gender.value
else:
select_gender_ = select_gender
if hasattr(select_country, "value"):
select_country_ = select_country.value
else:
select_country_ = select_country
if hasattr(single_identity, "value"):
single_identity_ = single_identity.value
else:
single_identity_ = single_identity
if hasattr(single_disposition, "value"):
single_disposition_ = single_disposition.value
else:
single_disposition_ = single_disposition
if hasattr(select_task, "value"):
select_task_ = select_task.value
else:
select_task_ = select_task
if hasattr(Text, "value"):
Text_ = Text.value
else:
Text_ = Text
if hasattr(single_introduction, "value"):
single_introduction_ = single_introduction.value
else:
single_introduction_ = single_introduction
character_setting_rewrite_dict = {
'姓名': single_name_,
'性别': select_gender_,
'国籍': select_country_,
'身份': single_identity_,
'性格特征': single_disposition_,
'角色介绍': single_introduction_,
}
a, b = main_single_character_prompt_func_cls(
single_name_,
select_task_,
character_setting_rewrite_dict = character_setting_rewrite_dict,
Text = Text,
)
return "\n".join([b, a])
def get_two_prompt(
single_name_1, select_gender_1, select_country_1, single_identity_1, single_disposition_1,
single_introduction_1,
single_name_2, select_gender_2, select_country_2, single_identity_2, single_disposition_2,
single_introduction_2, two_task,
):
assert two_task in ["会说什么", "哪些信息"]
if hasattr(single_name_1, "value"):
single_name_1_ = single_name_1.value
else:
single_name_1_ = single_name_1
if hasattr(select_gender_1, "value"):
select_gender_1_ = select_gender_1.value
else:
select_gender_1_ = select_gender_1
if hasattr(select_country_1, "value"):
select_country_1_ = select_country_1.value
else:
select_country_1_ = select_country_1
if hasattr(single_identity_1, "value"):
single_identity_1_ = single_identity_1.value
else:
single_identity_1_ = single_identity_1
if hasattr(single_disposition_1, "value"):
single_disposition_1_ = single_disposition_1.value
else:
single_disposition_1_ = single_disposition_1
if hasattr(single_introduction_1, "value"):
single_introduction_1_ = single_introduction_1.value
else:
single_introduction_1_ = single_introduction_1
if hasattr(single_name_2, "value"):
single_name_2_ = single_name_2.value
else:
single_name_2_ = single_name_2
if hasattr(select_gender_2, "value"):
select_gender_2_ = select_gender_2.value
else:
select_gender_2_ = select_gender_2
if hasattr(select_country_2, "value"):
select_country_2_ = select_country_2.value
else:
select_country_2_ = select_country_2
if hasattr(single_identity_2, "value"):
single_identity_2_ = single_identity_2.value
else:
single_identity_2_ = single_identity_2
if hasattr(single_disposition_2, "value"):
single_disposition_2_ = single_disposition_2.value
else:
single_disposition_2_ = single_disposition_2
if hasattr(single_introduction_2, "value"):
single_introduction_2_ = single_introduction_2.value
else:
single_introduction_2_ = single_introduction_2
character_setting_rewrite_dict_1 = {
'姓名': single_name_1_,
'性别': select_gender_1_,
'国籍': select_country_1_,
'身份': single_identity_1_,
'性格特征': single_disposition_1_,
'角色介绍': single_introduction_1_,
}
character_setting_rewrite_dict_2 = {
'姓名': single_name_2_,
'性别': select_gender_2_,
'国籍': select_country_2_,
'身份': single_identity_2_,
'性格特征': single_disposition_2_,
'角色介绍': single_introduction_2_,
}
a, b = main_two_character_prompt_func_cls(
single_name_1_,
single_name_2_,
two_task,
character_setting_rewrite_dict_1 = character_setting_rewrite_dict_1,
character_setting_rewrite_dict_2 = character_setting_rewrite_dict_2,
)
return "\n".join([b, a])
import re
import pandas as pd
def retrieve_sent_split(sent,
stops_split_pattern = "|".join(map(lambda x: r"\{}".format(x),
",." + ",。" + ":" + "n"))
):
if not sent.strip():
return []
split_list = re.split(stops_split_pattern, sent)
split_list = list(filter(lambda x: x.strip() ,split_list))
return split_list
def stop_criteria(sent, min_sub_len = 4):
#### chunk rec stop
split_list = retrieve_sent_split(sent)
split_list = list(filter(lambda x: len(x) >= min_sub_len,split_list))
if split_list:
if pd.Series(split_list).value_counts().max() >= 2:
print("stop in : {}".format(sent))
return "stop"
#### row rec stop
if list(filter(lambda x: x ,map(lambda x: x.strip(),sent.split("\n")))) and pd.Series(list(filter(lambda x: x ,map(lambda x: x.strip(),sent.split("\n"))))).value_counts().max() >= 2:
return "stop"
return "continue"
#model_file_path = "llama2zh-13b-3900-q4_0.gguf"
#model_file_path = "llama2zh-1_3b-4500-q4_0.gguf"
#model_file_path = "llama2zh-13b-9300-q4_0.gguf"
#model_file_path = "llama2zh-13b-7500-q4_0.gguf"
model_file_path = "genshin_impact_character_llamazh13b_ggml/llama2zh-13b-3900-q4_0.gguf"
from llama_cpp import Llama
llama = Llama(model_file_path,
n_ctx = 4090,
n_threads=2, # The number of CPU threads to use, tailor to your system and the resulting performance
#n_gpu_layers=-1
)
print("load {}".format(model_file_path))
def repeat_cmp_process(x, ratio_threshold = 0.3):
l = x.split("\n")
l = list(filter(lambda y: y.strip(), l))
req = []
for ele in l:
one_ele = repeat_to_one(ele)
if ele.strip() and (len(one_ele) / len(ele)) <= ratio_threshold:
req.append(one_ele)
else:
req.append(ele)
return "\n".join(req)
def text_process_before_yield(x, add_repeat_process = True):
import re
x = x.strip()
if len(x.split("\n")) <= 1:
#return repeat_to_one_fb(x)
if add_repeat_process:
return repeat_cmp_process(x)
return x
zh_list = re.findall(u"[\u4e00-\u9fa5]+" ,x)
if zh_list:
last_zh = zh_list[-1]
l = list(map(lambda y: y.strip() ,x.split("\n")))
l_rev = l[::-1]
l_rev_collect = []
find_it = False
for ele in l_rev:
if not ele.endswith(last_zh):
find_it = True
else:
pass
if find_it:
l_rev_collect.append(ele)
l_collect = l_rev_collect[::-1]
#print(l_collect)
req = "\n".join(l_collect)
'''
zh_list = re.findall(u"[\u4e00-\u9fa5]+" ,x)
if zh_list:
req = req[req.find(zh_list[0]):]
'''
#return repeat_to_one_fb(req)
if add_repeat_process:
return repeat_cmp_process(req)
return req
return ""
def llama2_zh_predict(message, llama = llama,
max_output_length = 512, max_message_length = 1024, top_p = 0.8,
use_system = True, temperature = 0.8
):
message = message[:max_message_length]
DEFAULT_SYSTEM_PROMPT = """You are a helpful assistant. 你是一个乐于助人的助手。"""
TEMPLATE_WITH_SYSTEM_PROMPT = (
"[INST] <