Spaces:

Timjo88
/

monopoly-qa-semantic-search

Runtime error

File size: 3,511 Bytes

421b7df
 
 
 
 
8156e34
d8ad5ef
421b7df
235de1f
421b7df
 
235de1f
421b7df
 
 
 
 
 
d8ad5ef
421b7df
 
 
 
 
 
 
 
 
 
8156e34
d8ad5ef
421b7df
8156e34
 
f291d33
 
f821944
235de1f
 
 
d8ad5ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235de1f
d8ad5ef
 
 
 
 
 
421b7df
235de1f
0ba08cd
40195d1
b8e7924
078de3e
8156e34
 
 
 
 
 
 
d8ad5ef

import gradio as gr
import pandas as pd

from haystack.schema import Answer
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, TextConverter, PreProcessor
from haystack.utils import print_answers
from haystack.utils import convert_files_to_docs
import logging

# FAQ Haystack function calls 

def start_haystack():
    document_store = InMemoryDocumentStore(index="document", embedding_field='embedding', embedding_dim=384, similarity='cosine')
    retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
    load_data_to_store(document_store,retriever)
    pipeline = FAQPipeline(retriever=retriever)
    return pipeline

def load_data_to_store(document_store, retriever):
    df = pd.read_csv('monopoly_qa-v1.csv')
    questions = list(df.Question)
    df['embedding'] = retriever.embed_queries(texts=questions)
    df = df.rename(columns={"Question":"content","Answer":"answer"})
    df.drop('link to source (to prevent duplicate sources)',axis=1, inplace=True)
    
    dicts = df.to_dict(orient="records")
    document_store.write_documents(dicts)
    
faq_pipeline = start_haystack()
 
def predict_faq(question):
    prediction = faq_pipeline.run(question)
    answer =  prediction["answers"][0].meta
    faq_response = "FAQ Question: " + answer["query"] + "\n"+"Answer: " + answer["answer"]
    return faq_response
    
# Extractive QA functions

## preprocess monopoly rules

def preprocess_txt_doc(fpath):

    converter = TextConverter(remove_numeric_tables=True, valid_languages=["en"])
    doc_txt = converter.convert(file_path=fpath, meta=None)[0]
    preprocessor = PreProcessor(
        clean_empty_lines=True,
        clean_whitespace=True,
        clean_header_footer=False,
        split_by="word",
        split_length=100,
        split_respect_sentence_boundary=True,)
    docs = preprocessor.process([doc_txt])
    return docs

def start_ex_haystack(documents):
    ex_document_store = InMemoryDocumentStore()
    ex_document_store.write_documents(documents)
    retriever = TfidfRetriever(document_store=ex_document_store)
    reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
    pipe = ExtractiveQAPipeline(reader, retriever)
    return pipe

docs = preprocess_txt_doc("monopoly_text_v1.txt")
ex_pipeline = start_ex_haystack(docs)

def predict_extract(question):
    prediction = ex_pipeline.run(question)
    possible_answers = ""
    for i,a in enumerate(prediction["answers"]):
        possible_answers =  possible_answers  +str(i) + ":" + a.answer + "\n"
    return possible_answers
 
# Gradio App section 
input_question =gr.inputs.Textbox(label="enter your monopoly question here")
response = "text"
examples = ["how much cash do we get to start with?", "at what point can I buy houses?", "what happens when I land on free parking?"]

mon_faq = gr.Interface(
            fn=predict_faq,
            inputs=input_question,
            outputs=response,
            examples=examples,
            title="Monopoly FAQ Semantic Search")

# extractive interface
mon_ex = gr.Interface(
            fn=predict_extract,
            inputs=input_question,
            outputs=response,
            examples=examples,
            title="Monopoly Extractive QA Search")
 
gradio.TabbedInterface([mon_faq,mon_ex]).launch()