Kelvinhjk commited on
Commit
16c6bcf
·
1 Parent(s): e198fdc

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from streamlit_option_menu import option_menu
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.vectorstores import FAISS
7
+ from transformers import TFAutoModelForQuestionAnswering, AutoTokenizer, pipeline
8
+
9
+ os.environ["OPENAI_API_KEY"] = "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"
10
+
11
+ # Read data
12
+ with open("./data/full_context.txt", "r") as file1:
13
+ doc = file1.read()
14
+
15
+ # Splitting up the text into smaller chunks for indexing
16
+ text_splitter = CharacterTextSplitter(
17
+ separator = "\n",
18
+ chunk_size = 1000,
19
+ chunk_overlap = 200, #striding over the text
20
+ length_function = len,
21
+ )
22
+ texts = text_splitter.split_text(doc)
23
+
24
+ # Download embeddings from OpenAI
25
+ embeddings = OpenAIEmbeddings()
26
+ docsearch = FAISS.from_texts(texts, embeddings)
27
+
28
+ # Load roberta model
29
+ model_path0 = "./models/roberta_model"
30
+ model0 = TFAutoModelForQuestionAnswering.from_pretrained(model_path0)
31
+ tokenizer0 = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
32
+ # Initialize Transformer pipeline with our own model and tokenizer
33
+ question_answerer0 = pipeline("question-answering", model=model0, tokenizer=tokenizer0)
34
+
35
+ # Load bert base model
36
+ model_path1 = "./models/bert_finetuned_model"
37
+ model1 = TFAutoModelForQuestionAnswering.from_pretrained(model_path1)
38
+ tokenizer1 = AutoTokenizer.from_pretrained('huggingface-course/bert-finetuned-squad')
39
+ # Initialize Transformer pipeline with our own model and tokenizer
40
+ question_answerer1 = pipeline("question-answering", model=model1, tokenizer=tokenizer1)
41
+
42
+
43
+ def QnAfunction(question, QnAmodel):
44
+ docs_found = docsearch.similarity_search(question)
45
+ score = 0.5
46
+ answer = ''
47
+ for doc in docs_found:
48
+ doc_result = QnAmodel(question=question, context = doc.page_content)
49
+ if doc_result['score'] > score:
50
+ score = doc_result['score']
51
+ answer = doc_result['answer']
52
+
53
+ if answer != '':
54
+ return answer, score
55
+ # print("Answer: ", answer1)
56
+ # print("Score: ", score1)
57
+ else:
58
+ return "No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.", 0
59
+ # print("No Answer found. Please ask question related to Bachelor of Computer Science program at Swinburne.")
60
+
61
+ # GUI with Streamlit
62
+ st.markdown("""
63
+ <style>
64
+ .big-font {
65
+ margin: 50px 0 10px 0 !important;
66
+ font-size:25px !important;
67
+ font-weight: bold !important;
68
+ }
69
+ </style>
70
+ """, unsafe_allow_html=True)
71
+
72
+ with st.sidebar:
73
+ selected = option_menu("Model selection", ["Roberta base squad2", "Bert finetuned squad"],
74
+ icons=['box-fill', 'box-fill'], menu_icon="cast", default_index=0)
75
+
76
+ if selected == "Roberta base squad2":
77
+ st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
78
+ st.write("- ", selected)
79
+ text0 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
80
+ if text0:
81
+ #######
82
+ ans, score = QnAfunction(text0, question_answerer0)
83
+ if score > 0.5:
84
+ st.write("Answer: ", ans)
85
+ st.write("Score: ", score)
86
+ else:
87
+ st.write(ans)
88
+
89
+
90
+ elif selected == "Bert finetuned squad":
91
+ st.markdown('<p class="big-font">QnA for Swinburne\'s Bachelor of Computer Science progrom</p>', unsafe_allow_html=True)
92
+ st.write("- ", selected)
93
+ text1 = st.text_area("Type question (Eg. What is the duration of the Bachelor of Computer Science program?): ")
94
+ if text1:
95
+ # Fed in the question to the model
96
+ ans, score = QnAfunction(text1, question_answerer1)
97
+ if score > 0.5:
98
+ st.write("Answer: ", ans)
99
+ st.write("Score: ", score)
100
+ else:
101
+ st.write(ans)
102
+
103
+
104
+
105
+
106
+
107
+