import os import streamlit as st from streamlit_option_menu import option_menu from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline from PIL import Image os.environ["OPENAI_API_KEY"] = "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" # Read data with open("./data/full_context.txt", "r") as file1: doc = file1.read() # Splitting up the text into smaller chunks for indexing text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, chunk_overlap = 200, #striding over the text length_function = len, ) texts = text_splitter.split_text(doc) # Download embeddings from OpenAI embeddings = OpenAIEmbeddings() docsearch = FAISS.from_texts(texts, embeddings) # Load roberta model model_path0 = "./models/roberta_model" model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0) tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2') # Initialize Transformer pipeline with our own model and tokenizer question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0) # Load bert base model model_path1 = "./models/bert_finetuned_model" model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1) tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad') # Initialize Transformer pipeline with our own model and tokenizer question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1) def QnAfunction(question, QnAmodel): docs_found = docsearch.similarity_search(question) score = 0.01 answer = '' for doc in docs_found: doc_result = QnAmodel(question=question, context = doc.page_content) if doc_result['score'] > score: score = doc_result['score'] answer = doc_result['answer'] if answer != '': return answer, score # print("Answer: ", answer1) # print("Score: ", score1) else: return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0 # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.") # GUI with Streamlit st.markdown(""" """, unsafe_allow_html=True) with st.sidebar: selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"], icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0) image = Image.open('Swinburne_Logo.png') st.image(image) st.markdown('

QnA for Swinburne\'s Bachelor of Computer Science progrom

', unsafe_allow_html=True) st.write("- ", selected) if selected == "Roberta base squad2": text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?):", max_chars=350) if text0: ####### ans0, score0 = QnAfunction(text0, question_answerer0) if score0 > 0.5: st.write("Answer: ", ans0) st.write("Score: ", score0) else: st.write(ans0) elif selected == "Bert finetuned squad": text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ", max_chars=350) if text1: # Fed in the question to the model ans1, score1 = QnAfunction(text1, question_answerer1) if score1 > 0.5: st.write("Answer: ", ans1) st.write("Score: ", score1) else: st.write(ans1)