Spaces:

andrewgleave
/

tokbot

Sleeping

App Files Files Community

tokbot / app.py

andrewgleave

Cleanup

b8f5c23 almost 2 years ago

raw

history blame

4.26 kB

	import json
	import os
	from pathlib import Path

	import faiss
	from langchain.docstore.document import Document
	from langchain.docstore.in_memory import InMemoryDocstore
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores.faiss import FAISS

	import gradio as gr

	from chain import get_chain

	STORE_DIR = "store"
	YOUTUBE_EMBED_TEMPLATE = """
	<iframe width="354"
	height="200"
	src="{source}&cc_load_policy=1"
	title="YouTube video player"
	frameborder="0"
	allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen>
	</iframe>"""


	def load_store():
	def keys_to_int(x):
	return {int(k): v for k, v in x.items()}

	index_path = list(Path(STORE_DIR).glob("*.faiss"))
	if len(index_path) == 0:
	raise ValueError("No index found in path")

	index_path = index_path[0]
	index_name = index_path.name.split(".")[0]

	with open(os.path.join(STORE_DIR, f"{index_name}_doc_idx.json"), "r") as f:
	index_to_id = json.load(f, object_hook=keys_to_int)

	with open(os.path.join(STORE_DIR, f"{index_name}_docs.json"), "r") as f:
	docs = json.load(f)

	embeddings = OpenAIEmbeddings()
	return FAISS(
	embedding_function=embeddings.embed_query,
	index=faiss.read_index(str(index_path)),
	docstore=InMemoryDocstore(
	{index_to_id[i]: Document(**doc) for i, doc in enumerate(docs.values())}
	),
	index_to_docstore_id=index_to_id,
	)


	def set_openai_api_key(api_key, agent):
	if api_key:
	os.environ["OPENAI_API_KEY"] = api_key
	vstore = load_store()
	qa_chain = get_chain(vstore)
	os.environ["OPENAI_API_KEY"] = ""
	return qa_chain


	def _to_embed(link):
	return link.replace("watch?v=", "embed/").replace("&t=", "?start=")


	def chat(inp, history, agent):
	history = history or []
	if agent is None:
	history.append((inp, "Please paste your OpenAI key"))
	return history, history
	output = agent({"question": inp, "chat_history": history})
	answer = output["answer"]
	history.append((inp, answer))
	source_iframes = []
	for source in output["sources"]:
	if "youtube.com" in source:
	source_iframes.append(
	YOUTUBE_EMBED_TEMPLATE.format(source=_to_embed(source))
	)
	source_html = f"""<div style='min-height:200px;display:flex;align-items:center;justify-content:space-around;'>
	{''.join(source_iframes)}
	</div>"""
	return history, history, source_html


	with gr.Blocks(css=".gradio-container {background-color: lightgray}") as demo:

	state = gr.State()
	agent_state = gr.State()

	gr.Markdown("<h3><center>ToKBot🤖 - Ask ToKCast Questions</center></h3>")
	openai_api_key_textbox = gr.Textbox(
	placeholder="Paste your OpenAI API key (sk-...)",
	show_label=False,
	lines=1,
	type="password",
	)

	sources = gr.HTML()
	chatbot = gr.Chatbot()
	with gr.Row():
	message = gr.Textbox(
	label="What's your question?",
	placeholder="Type your question here...",
	lines=1,
	)
	submit = gr.Button(value="Send", variant="secondary").style(full_width=False)

	gr.Examples(
	examples=[
	'What does "the beginning of infinity" refer to?',
	"How do memes differ from genes in their replication?",
	"What is the nature of knowledge and how does it grow?",
	],
	inputs=message,
	)

	gr.HTML(
	"""<p>A GPT-3/LangChain bot that answers questions about the TokCast podcast provides relevant video excerpts</p>"""
	)

	gr.HTML(
	"<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
	)

	submit.click(
	chat,
	inputs=[message, state, agent_state],
	outputs=[chatbot, state, sources],
	)
	message.submit(
	chat,
	inputs=[message, state, agent_state],
	outputs=[chatbot, state, sources],
	)

	openai_api_key_textbox.change(
	set_openai_api_key,
	inputs=[openai_api_key_textbox, agent_state],
	outputs=[agent_state],
	)

	demo.launch()