Spaces:

fb700
/

chatglm-fitness-RLHF

Runtime error

App Files Files Community

chatglm-fitness-RLHF / app.py

fb700

test langchain-chatglm

e8f4bcb over 1 year ago

raw

history blame

14.7 kB

	import os
	import shutil

	from app_modules.presets import *
	from clc.langchain_application import LangChainApplication


	# 修改成自己的配置！！！
	class LangChainCFG:
	llm_model_name = 'fb700/chatglm-fitness-RLHF' # 本地模型文件 or huggingface远程仓库
	embedding_model_name = 'moka-ai/m3e-large' # 检索模型文件 or huggingface远程仓库
	vector_store_path = './cache'
	docs_path = './docs'
	kg_vector_stores = {
	'中文维基百科': './cache/zh_wikipedia',
	'大规模金融研报': './cache/financial_research_reports',
	'初始化': './cache',
	} # 可以替换成自己的知识库，如果没有需要设置为None
	# kg_vector_stores=None
	patterns = ['模型问答', '知识库问答'] #


	config = LangChainCFG()
	application = LangChainApplication(config)
	application.source_service.init_source_vector()


	def get_file_list():
	if not os.path.exists("docs"):
	return []
	return [f for f in os.listdir("docs")]


	file_list = get_file_list()


	def upload_file(file):
	if not os.path.exists("docs"):
	os.mkdir("docs")
	filename = os.path.basename(file.name)
	shutil.move(file.name, "docs/" + filename)
	# file_list首位插入新上传的文件
	file_list.insert(0, filename)
	application.source_service.add_document("docs/" + filename)
	return gr.Dropdown.update(choices=file_list, value=filename)


	def set_knowledge(kg_name, history):
	try:
	application.source_service.load_vector_store(config.kg_vector_stores[kg_name])
	msg_status = f'{kg_name}知识库已成功加载'
	except Exception as e:
	print(e)
	msg_status = f'{kg_name}知识库未成功加载'
	return history + [[None, msg_status]]


	def clear_session():
	return '', None


	def predict(input,
	large_language_model,
	embedding_model,
	top_k,
	use_web,
	use_pattern,
	history=None):
	# print(large_language_model, embedding_model)
	print(input)
	if history == None:
	history = []

	if use_web == '使用':
	web_content = application.source_service.search_web(query=input)
	else:
	web_content = ''
	search_text = ''
	if use_pattern == '模型问答':
	result = application.get_llm_answer(query=input, web_content=web_content)
	history.append((input, result))
	search_text += web_content
	return '', history, history, search_text

	else:
	resp = application.get_knowledge_based_answer(
	query=input,
	history_len=1,
	temperature=0.1,
	top_p=0.9,
	top_k=top_k,
	web_content=web_content,
	chat_history=history
	)
	history.append((input, resp['result']))
	for idx, source in enumerate(resp['source_documents'][:4]):
	sep = f'----------【搜索结果{idx + 1}：】---------------\n'
	search_text += f'{sep}\n{source.page_content}\n\n'
	print(search_text)
	search_text += "----------【网络检索内容】-----------\n"
	search_text += web_content
	return '', history, history, search_text


	with open("assets/custom.css", "r", encoding="utf-8") as f:
	customCSS = f.read()
	with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
	gr.Markdown("""<h1><center>Chinese-LangChain by 帛凡 Fitness AI</center></h1>
	<center><font size=3>
	</center></font>
	""")
	state = gr.State()

	with gr.Row():
	with gr.Column(scale=1):
	embedding_model = gr.Dropdown([
	"moka-ai/m3e-large"
	],
	label="Embedding model",
	value="moka-ai/m3e-large")

	large_language_model = gr.Dropdown(
	[
	"帛凡 Fitness AI",
	],
	label="large language model",
	value="帛凡 Fitness AI")

	top_k = gr.Slider(1,
	20,
	value=4,
	step=1,
	label="检索top-k文档",
	interactive=True)

	use_web = gr.Radio(["使用", "不使用"], label="web search",
	info="是否使用网络搜索，使用时确保网络通常",
	value="不使用"
	)
	use_pattern = gr.Radio(
	[
	'模型问答',
	'知识库问答',
	],
	label="模式",
	value='模型问答',
	interactive=True)

	kg_name = gr.Radio(list(config.kg_vector_stores.keys()),
	label="知识库",
	value=None,
	info="使用知识库问答，请加载知识库",
	interactive=True)
	set_kg_btn = gr.Button("加载知识库")

	file = gr.File(label="将文件上传到知识库库，内容要尽量匹配",
	visible=True,
	file_types=['.txt', '.md', '.docx', '.pdf']
	)


	with gr.Column(scale=4):
	with gr.Row():
	chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
	with gr.Row():
	message = gr.Textbox(label='请输入问题')
	with gr.Row():
	clear_history = gr.Button("🧹 清除历史对话")
	send = gr.Button("🚀 发送")
	with gr.Row():
	gr.Markdown("""提醒：<br>
	[帛凡 Fitness AI模型下载地址](https://huggingface.co./fb700/chatglm-fitness-RLHF) <br>
	It's beyond Fitness,模型由[帛凡]基于ChatGLM-6b进行微调后，在健康（全科）、心理等领域达至少60分的专业水准，而且中文总结能力超越了GPT3.5各版本。声明：本应用仅为模型能力演示，无任何商业行为，部署资源为Huggingface官方免费提供，任何通过此项目产生的知识仅用于学术参考，作者和网站均不承担任何责任。帛凡 Fitness AI 演示T4 is just a machine wiht 16G VRAM ，so OOM is easy to occur ，If you meet any error，Please email me 。 👉 [email protected]<br>
	""")
	with gr.Column(scale=2):
	search = gr.Textbox(label='搜索结果')

	# ============= 触发动作=============
	file.upload(upload_file,
	inputs=file,
	outputs=None)
	set_kg_btn.click(
	set_knowledge,
	show_progress=True,
	inputs=[kg_name, chatbot],
	outputs=chatbot
	)
	# 发送按钮提交
	send.click(predict,
	inputs=[
	message,
	large_language_model,
	embedding_model,
	top_k,
	use_web,
	use_pattern,
	state
	],
	outputs=[message, chatbot, state, search])

	# 清空历史对话按钮提交
	clear_history.click(fn=clear_session,
	inputs=[],
	outputs=[chatbot, state],
	queue=False)

	# 输入框回车
	message.submit(predict,
	inputs=[
	message,
	large_language_model,
	embedding_model,
	top_k,
	use_web,
	use_pattern,
	state
	],
	outputs=[message, chatbot, state, search])

	with gr.Accordion("Example inputs", open=True):
	etext0 = """ "act": "作为基于文本的冒险游戏",\n "prompt": "我想让你扮演一个基于文本的冒险游戏。我在这个基于文本的冒险游戏中扮演一个角色。请尽可能具体地描述角色所看到的内容和环境，并在游戏输出1、2、3让用户选择进行回复，而不是其它方式。我将输入命令来告诉角色该做什么，而你需要回复角色的行动结果以推动游戏的进行。我的第一个命令是'醒来'，请从这里开始故事 “ """
	etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
	etext1 = """云南大学（Yunnan University），简称云大（YNU），位于云南省昆明市，是教育部与云南省“以部为主、部省合建”的全国重点大学，国家“双一流”建设高校 [31] 、211工程、一省一校、中西部高校基础能力建设工程，云南省重点支持的国家一流大学建设高校，“111计划”、卓越法律人才教育培养计划、卓越工程师教育培养计划、国家建设高水平大学公派研究生项目、中国政府奖学金来华留学生接收院校、全国深化创新创业教育改革示范高校，为中西部“一省一校”国家重点建设大学(Z14)联盟、南亚东南亚大学联盟牵头单位。 [1]
	云南大学始建于1922年，时为私立东陆大学。1930年，改为省立东陆大学。1934年更名为省立云南大学。1938年改为国立云南大学。1946年，《不列颠百科全书》将云南大学列为中国15所在世界最具影响的大学之一。1950年定名为云南大学。1958年，云南大学由中央高教部划归云南省管理。1978年，云南大学被国务院确定为88所全国重点大学之一。1996年首批列入国家“211工程”重点建设大学。1999年，云南政法高等专科学校并入云南大学。 [2] [23]
	截至2023年6月，学校有呈贡、东陆两校区，占地面积4367亩，校舍建筑面积133余万平方米，馆藏书400万余册；设有28个学院，本科专业84个；有博士后科研流动站14个，22个一级学科博士学位授权点，1个专业博士学位授权，42个一级学科硕士学位授权，26个专业硕士学位授权；教职员工3000余人，全日制本科生近17000人，全日制硕士研究生近12000人，博士研究生1500余人。 """
	examples = gr.Examples(
	examples=[
	[f"{etext0}"],
	["熬夜对身体有什么危害? "],
	["新冠肺炎怎么预防"],
	["系统性红斑狼疮的危害和治疗方法是什么？"],
	[
	"我经常感觉郁闷，而且控制不住情绪，经常对周围的人喊叫，怎么办？"
	],
	["太阳为什么会发热? "],
	["指南针是怎么工作的？"],
	["在野外怎么辨别方向？"],
	[
	"发芽的土豆还能不能吃？"
	],
	["What NFL team won the Super Bowl in the year Justin Bieber was born? "],
	["What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."],
	["Explain the plot of Cinderella in a sentence."],
	[
	"How long does it take to become proficient in French, and what are the best methods for retaining information?"
	],
	["What are some common mistakes to avoid when writing code?"],
	["Build a prompt to generate a beautiful portrait of a horse"],
	["Suggest four metaphors to describe the benefits of AI"],
	["Write a pop song about leaving home for the sandy beaches."],
	["Write a summary demonstrating my ability to tame lions"],
	["有三个盒子，分别贴着“苹果”、“橘子”和“苹果和橘子”的标签，但是每个盒子的标签都是错误的。你只能打开一个盒子，然后从里面拿出一个水果，然后确定每个盒子里装的是什么水果。你应该打开哪个盒子？为什么？"],
	["春天来了，万物复苏，小鸟歌唱，生机勃勃。\n问题：以上文本表达的情绪是正向还是负向？"],
	["正无穷大加一大于正无穷大吗？"],
	["正无穷大加正无穷大大于正无穷大吗？"],
	["以今天对应的节气写一副对联"],
	["树上有5只鸟，猎人开枪打死了一只。树上还有几只鸟？Think step by step."],
	["从零学习编程，请给我一个三个月的学习计划。"],
	["双喜临门，打一中国地名"],
	["以红楼梦的行文风格写一张委婉的请假条。不少于320字。"],
	[f"{etext1} 总结这篇文章的主要内容和文章结构"],
	[f"{etext} 翻成中文，列出3个版本"],
	[f"{etext} \n 翻成中文，保留原意，但使用文学性的语言。不要写解释。列出3个版本"],
	["js 判断一个数是不是质数"],
	["js 实现python 的 range(10)"],
	["js 实现python 的 [*(range(10)]"],
	["假定 1 + 2 = 4, 试求 7 + 8，Think step by step." ],
	["2023年云南大学成立100周年，它是哪一年成立的？" ],
	["Erkläre die Handlung von Cinderella in einem Satz."],
	["Erkläre die Handlung von Cinderella in einem Satz. Auf Deutsch"],
	],
	inputs=[user_input],
	examples_per_page=50,
	)

	with gr.Accordion("For Chat/Translation API", open=False, visible=False):
	input_text = gr.Text()
	tr_btn = gr.Button("Go", variant="primary")
	out_text = gr.Text()
	tr_btn.click(
	trans_api,
	[input_text, max_length, top_p, temperature],
	out_text,
	# show_progress="full",
	api_name="tr",
	)
	_ = """
	input_text.submit(
	trans_api,
	[input_text, max_length, top_p, temperature],
	out_text,
	show_progress="full",
	api_name="tr1",
	)
	# """


	demo.queue(concurrency_count=2).launch(
	server_name='0.0.0.0',
	share=False,
	show_error=True,
	debug=True,
	enable_queue=True,
	inbrowser=True,
	)