import os import streamlit as st from streamlit_option_menu import option_menu from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline os.environ["OPENAI_API_KEY"] = "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" # Read data with open("./data/full_context.txt", "r") as file1: doc = file1.read() # Splitting up the text into smaller chunks for indexing text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, chunk_overlap = 200, #striding over the text length_function = len, ) texts = text_splitter.split_text(doc) # Download embeddings from OpenAI embeddings = OpenAIEmbeddings() docsearch = FAISS.from_texts(texts, embeddings) # Load roberta model model_path0 = "./models/roberta_model" model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0) tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2') # Initialize Transformer pipeline with our own model and tokenizer question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0) # Load bert base model model_path1 = "./models/bert_finetuned_model" model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1) tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad') # Initialize Transformer pipeline with our own model and tokenizer question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1) def QnAfunction(question, QnAmodel): docs_found = docsearch.similarity_search(question) score = 0.5 answer = '' for doc in docs_found: doc_result = QnAmodel(question=question, context = doc.page_content) if doc_result['score'] > score: score = doc_result['score'] answer = doc_result['answer'] if answer != '': return answer, score # print("Answer: ", answer1) # print("Score: ", score1) else: return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0 # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.") # GUI with Streamlit st.markdown(""" """, unsafe_allow_html=True) with st.sidebar: selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"], icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0) if selected == "Roberta base squad2": st.markdown('
QnA for Swinburne\'s Bachelor of Computer Science progrom
', unsafe_allow_html=True) st.write("- ", selected) text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ") if text0: ####### ans, score = QnAfunction(text0, question_answerer0) if score > 0.5: st.write("Answer: ", ans) st.write("Score: ", score) else: st.write(ans) elif selected == "Bert finetuned squad": st.markdown('QnA for Swinburne\'s Bachelor of Computer Science progrom
', unsafe_allow_html=True) st.write("- ", selected) text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ") if text1: # Fed in the question to the model ans, score = QnAfunction(text1, question_answerer1) if score > 0.5: st.write("Answer: ", ans) st.write("Score: ", score) else: st.write(ans)