File size: 3,897 Bytes
16c6bcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import streamlit as st
from streamlit_option_menu import option_menu
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS 
from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline

os.environ["OPENAI_API_KEY"] = "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"

# Read data
with open("./data/full_context.txt", "r") as file1:
    doc = file1.read()

# Splitting up the text into smaller chunks for indexing
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 200, #striding over the text
    length_function = len,
)
texts = text_splitter.split_text(doc)

# Download embeddings from OpenAI
embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(texts, embeddings)

# Load roberta model
model_path0 = "./models/roberta_model"
model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)

# Load bert base model
model_path1 = "./models/bert_finetuned_model"
model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
# Initialize Transformer pipeline with our own model and tokenizer
question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)


def QnAfunction(question, QnAmodel):
    docs_found = docsearch.similarity_search(question)
    score = 0.5
    answer = ''
    for doc in docs_found:
        doc_result = QnAmodel(question=question, context = doc.page_content)
        if doc_result['score'] > score:
            score = doc_result['score']
            answer = doc_result['answer']

    if answer != '':
        return answer, score
        # print("Answer: ", answer1)
        # print("Score: ", score1)
    else:
        return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
        # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")

# GUI with Streamlit
st.markdown("""
    <style> 
        .big-font {
            margin: 50px 0 10px 0 !important;
            font-size:25px !important;
            font-weight: bold !important;
        }
    </style>
    """, unsafe_allow_html=True)

with st.sidebar:
    selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"], 
        icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)

if selected == "Roberta base squad2":
    st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
    st.write("- ", selected)
    text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
    if text0:
        #######
        ans, score = QnAfunction(text0, question_answerer0)
        if score > 0.5:
            st.write("Answer: ", ans)
            st.write("Score: ", score)
        else:
            st.write(ans)


elif selected == "Bert finetuned squad":
    st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
    st.write("- ", selected)
    text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
    if text1:
        # Fed in the question to the model
        ans, score = QnAfunction(text1, question_answerer1)
        if score > 0.5:
            st.write("Answer: ", ans)
            st.write("Score: ", score)
        else:
            st.write(ans)