Spaces:

intoxication
/

wbrule

Configuration error

App Files Files Community

wbrule / ui /webapp.py

intoxication

Upload 12 files

3fe47db about 1 year ago

raw

history blame

11.5 kB

	import os
	import sys
	import logging
	from pathlib import Path
	from json import JSONDecodeError

	import pandas as pd
	import streamlit as st
	from annotated_text import annotation
	from markdown import markdown

	from ui.utils import haystack_is_ready, query, send_feedback, upload_doc, haystack_version, get_backlink


	# Adjust to a question that you would like users to see in the search bar when they load the UI:
	DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "What are the symptoms of cancer related fatigue?")
	DEFAULT_ANSWER_AT_STARTUP = os.getenv("DEFAULT_ANSWER_AT_STARTUP", "diminished energy, increased need to rest")

	# Sliders
	DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
	DEFAULT_NUMBER_OF_ANSWERS = int(os.getenv("DEFAULT_NUMBER_OF_ANSWERS", "3"))

	# Labels for the evaluation
	EVAL_LABELS = os.getenv("EVAL_FILE", str(Path(__file__).parent / "eval_labels_example.csv"))

	# Whether the file upload should be enabled or not
	DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))


	def set_state_if_absent(key, value):
	if key not in st.session_state:
	st.session_state[key] = value


	def main():

	st.set_page_config(page_title="Haystack Demo", page_icon="https://haystack.deepset.ai/img/HaystackIcon.png")

	# Persistent state
	set_state_if_absent("question", DEFAULT_QUESTION_AT_STARTUP)
	set_state_if_absent("answer", DEFAULT_ANSWER_AT_STARTUP)
	set_state_if_absent("results", None)
	set_state_if_absent("raw_json", None)
	set_state_if_absent("random_question_requested", False)

	# Small callback to reset the interface in case the text of the question changes
	def reset_results(*args):
	st.session_state.answer = None
	st.session_state.results = None
	st.session_state.raw_json = None

	# Title
	st.write("# Healthcare Demo")
	st.markdown(
	"""
	Ask a question and see if Haystack can find the correct answer to your query!

	Note: do not use keywords, but full-fledged questions. The demo is not optimized to deal with keyword queries and might misunderstand you.
	""",
	unsafe_allow_html=True,
	)

	# Sidebar
	st.sidebar.header("Options")
	top_k_reader = st.sidebar.slider(
	"Max. number of answers",
	min_value=1,
	max_value=10,
	value=DEFAULT_NUMBER_OF_ANSWERS,
	step=1,
	on_change=reset_results,
	)
	top_k_retriever = st.sidebar.slider(
	"Max. number of documents from retriever",
	min_value=1,
	max_value=10,
	value=DEFAULT_DOCS_FROM_RETRIEVER,
	step=1,
	on_change=reset_results,
	)
	eval_mode = st.sidebar.checkbox("Evaluation mode")
	debug = st.sidebar.checkbox("Show debug info")

	# File upload block
	if not DISABLE_FILE_UPLOAD:
	st.sidebar.write("## File Upload:")
	data_files = st.sidebar.file_uploader(
	"upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
	)
	for data_file in data_files:
	# Upload file
	if data_file:
	try:
	raw_json = upload_doc(data_file)
	st.sidebar.write(str(data_file.name) + "    ✅ ")
	if debug:
	st.subheader("REST API JSON response")
	st.sidebar.write(raw_json)
	except Exception as e:
	st.sidebar.write(str(data_file.name) + "    ❌ ")
	st.sidebar.write("_This file could not be parsed, see the logs for more information._")


	hs_version = ""
	try:
	hs_version = f" <small>(v{haystack_version()})</small>"
	except Exception:
	pass

	st.sidebar.markdown(
	f"""
	<style>
	a {{
	text-decoration: none;
	}}
	.haystack-footer {{
	text-align: center;
	}}
	.haystack-footer h4 {{
	margin: 0.1rem;
	padding:0;
	}}
	footer {{
	opacity: 0;
	}}
	</style>
	<div class="haystack-footer">
	<hr />
	<h4>Built with <a href="https://haystack.deepset.ai/">Haystack</a> 1.14.0</h4>
	<p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a>    -    Read the <a href="https://docs.haystack.deepset.ai/docs">Docs</a></p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Load csv into pandas dataframe
	try:
	df = pd.read_csv(EVAL_LABELS, sep=";")
	except Exception:
	st.error(
	f"The eval file was not found. Please check the demo's [README](https://github.com/deepset-ai/haystack/tree/main/ui/README.md) for more information."
	)
	sys.exit(
	f"The eval file was not found under `{EVAL_LABELS}`. Please check the README (https://github.com/deepset-ai/haystack/tree/main/ui/README.md) for more information."
	)

	# Search bar
	question = st.text_input(
	value=st.session_state.question,
	max_chars=100,
	on_change=reset_results,
	label="question",
	label_visibility="hidden",
	)
	col1, col2 = st.columns(2)
	col1.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
	col2.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)

	# Run button
	run_pressed = col1.button("Run")

	# Get next random question from the CSV
	if col2.button("Random question"):
	reset_results()
	new_row = df.sample(1)
	while (
	new_row["Question Text"].values[0] == st.session_state.question
	): # Avoid picking the same question twice (the change is not visible on the UI)
	new_row = df.sample(1)
	st.session_state.question = new_row["Question Text"].values[0]
	st.session_state.answer = new_row["Answer"].values[0]
	st.session_state.random_question_requested = True
	# Re-runs the script setting the random question as the textbox value
	# Unfortunately necessary as the Random Question button is _below_ the textbox
	if hasattr(st, "scriptrunner"):
	raise st.scriptrunner.script_runner.RerunException(
	st.scriptrunner.script_requests.RerunData(widget_states=None)
	)
	raise st.runtime.scriptrunner.script_runner.RerunException(
	st.runtime.scriptrunner.script_requests.RerunData(widget_states=None)
	)
	st.session_state.random_question_requested = False

	run_query = (
	run_pressed or question != st.session_state.question
	) and not st.session_state.random_question_requested

	# Check the connection
	with st.spinner("⌛️    Haystack is starting..."):
	if not haystack_is_ready():
	st.error("🚫    Connection Error. Is Haystack running?")
	run_query = False
	reset_results()

	# Get results for query
	if run_query and question:
	reset_results()
	st.session_state.question = question

	with st.spinner(
	"🧠    Performing neural search on documents... \n "
	"Do you want to optimize speed or accuracy? \n"
	"Check out the docs: https://haystack.deepset.ai/usage/optimization "
	):
	try:
	st.session_state.results, st.session_state.raw_json = query(
	question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever
	)
	except JSONDecodeError as je:
	st.error("👓    An error occurred reading the results. Is the document store working?")
	return
	except Exception as e:
	logging.exception(e)
	if "The server is busy processing requests" in str(e) or "503" in str(e):
	st.error("🧑‍🌾    All our workers are busy! Try again later.")
	else:
	st.error("🐞    An error occurred during the request.")
	return

	if st.session_state.results:

	# Show the gold answer if we use a question of the given set
	if eval_mode and st.session_state.answer:
	st.write("## Correct answer:")
	st.write(st.session_state.answer)

	st.write("## Results:")

	for count, result in enumerate(st.session_state.results):
	if result["answer"]:
	answer, context = result["answer"], result["context"]
	start_idx = context.find(answer)
	end_idx = start_idx + len(answer)
	# Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
	st.write(
	markdown(context[:start_idx] + str(annotation(answer, "ANSWER", "#8ef")) + context[end_idx:]),
	unsafe_allow_html=True,
	)
	source = ""
	url, title = get_backlink(result)
	if url and title:
	source = f"[{result['document']['meta']['title']}]({result['document']['meta']['url']})"
	else:
	source = f"{result['source']}"
	st.markdown(f"Relevance: {result['relevance']} - Source: {source}")

	else:
	st.info(
	"🤔    Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
	)
	st.write("Relevance: ", result["relevance"])

	if eval_mode and result["answer"]:
	# Define columns for buttons
	is_correct_answer = None
	is_correct_document = None

	button_col1, button_col2, button_col3, _ = st.columns([1, 1, 1, 6])
	if button_col1.button("👍", key=f"{result['context']}{count}1", help="Correct answer"):
	is_correct_answer = True
	is_correct_document = True

	if button_col2.button("👎", key=f"{result['context']}{count}2", help="Wrong answer and wrong passage"):
	is_correct_answer = False
	is_correct_document = False

	if button_col3.button(
	"👎👍", key=f"{result['context']}{count}3", help="Wrong answer, but correct passage"
	):
	is_correct_answer = False
	is_correct_document = True

	if is_correct_answer is not None and is_correct_document is not None:
	try:
	send_feedback(
	query=question,
	answer_obj=result["_raw"],
	is_correct_answer=is_correct_answer,
	is_correct_document=is_correct_document,
	document=result["document"],
	)
	st.success("✨    Thanks for your feedback!    ✨")
	except Exception as e:
	logging.exception(e)
	st.error("🐞    An error occurred while submitting your feedback!")

	st.write("___")

	if debug:
	st.subheader("REST API JSON response")
	st.write(st.session_state.raw_json)


	main()