Krish234 commited on
Commit
7a8c186
·
verified ·
1 Parent(s): 8cdf699

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +155 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tempfile
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.docstore.document import Document
6
+ from langchain.chains.summarize import load_summarize_chain
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.vectorstores import Chroma
10
+ from langchain.llms import LlamaCpp
11
+ from langchain.prompts import PromptTemplate
12
+ import os
13
+ import pandas as pd
14
+
15
+
16
+ prompt_template_questions = """
17
+ You are an expert in creating practice questions based on study material.
18
+ Your goal is to prepare a student for their exam. You do this by asking questions about the text below:
19
+
20
+ ------------
21
+ {text}
22
+ ------------
23
+
24
+ Create questions that will prepare the student for their exam. Make sure not to lose any important information.
25
+
26
+ QUESTIONS:
27
+ """
28
+
29
+ PROMPT_QUESTIONS = PromptTemplate(template=prompt_template_questions, input_variables=["text"])
30
+
31
+ refine_template_questions = """
32
+ You are an expert in creating practice questions based on study material.
33
+ Your goal is to help a student prepare for an exam.
34
+ We have received some practice questions to a certain extent: {existing_answer}.
35
+ We have the option to refine the existing questions or add new ones.
36
+ (only if necessary) with some more context below.
37
+ ------------
38
+ {text}
39
+ ------------
40
+
41
+ Given the new context, refine the original questions in English.
42
+ If the context is not helpful, please provide the original questions.
43
+
44
+ QUESTIONS:
45
+ """
46
+
47
+ REFINE_PROMPT_QUESTIONS = PromptTemplate(
48
+ input_variables=["existing_answer", "text"],
49
+ template=refine_template_questions,
50
+ )
51
+
52
+ # Initialize Streamlit app
53
+ st.title('Question-Answer Pair Generator with Zephyr-7B')
54
+ st.markdown('<style>h1{color: orange; text-align: center;}</style>', unsafe_allow_html=True)
55
+
56
+ # File upload widget
57
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type=["pdf"])
58
+
59
+ # Set file path
60
+ file_path = None
61
+
62
+ # Check if a file is uploaded
63
+ if uploaded_file:
64
+ # Save the uploaded file to a temporary location
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
66
+ temp_file.write(uploaded_file.read())
67
+ file_path = temp_file.name
68
+
69
+ # Check if file_path is set
70
+ if file_path:
71
+ # Load data from the uploaded PDF
72
+ loader = PyPDFLoader(file_path)
73
+ data = loader.load()
74
+
75
+ # Combine text from Document into one string for question generation
76
+ text_question_gen = ''
77
+ for page in data:
78
+ text_question_gen += page.page_content
79
+
80
+ # Initialize Text Splitter for question generation
81
+ text_splitter_question_gen = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=50)
82
+
83
+ # Split text into chunks for question generation
84
+ text_chunks_question_gen = text_splitter_question_gen.split_text(text_question_gen)
85
+
86
+ # Convert chunks into Documents for question generation
87
+ docs_question_gen = [Document(page_content=t) for t in text_chunks_question_gen]
88
+
89
+ # Initialize Large Language Model for question generation
90
+ llm_question_gen = LlamaCpp(
91
+ streaming = True,
92
+ model_path="zephyr-7b-alpha.Q4_K_M.gguf",
93
+ temperature=0.75,
94
+ top_p=1,
95
+ verbose=True,
96
+ n_ctx=4096
97
+ )
98
+
99
+ # Initialize question generation chain
100
+ question_gen_chain = load_summarize_chain(llm=llm_question_gen, chain_type="refine", verbose=True,
101
+ question_prompt=PROMPT_QUESTIONS, refine_prompt=REFINE_PROMPT_QUESTIONS)
102
+ # Run question generation chain
103
+ questions = question_gen_chain.run(docs_question_gen)
104
+
105
+ # Initialize Large Language Model for answer generation
106
+ llm_answer_gen = LlamaCpp(
107
+ streaming = True,
108
+ model_path="zephyr-7b-alpha.Q4_K_M.gguf",
109
+ temperature=0.75,
110
+ top_p=1,
111
+ verbose=True,
112
+ n_ctx=4096)
113
+
114
+ # Create vector database for answer generation
115
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
116
+
117
+ # Initialize vector store for answer generation
118
+ vector_store = Chroma.from_documents(docs_question_gen, embeddings)
119
+
120
+ # Initialize retrieval chain for answer generation
121
+ answer_gen_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen, chain_type="stuff",
122
+ retriever=vector_store.as_retriever(k=2))
123
+
124
+ # Split generated questions into a list of questions
125
+ question_list = questions.split("\n")
126
+
127
+ # Answer each question and save to a file
128
+ question_answer_pairs = []
129
+
130
+ for question in question_list:
131
+ st.write("Question: ", question)
132
+ answer = answer_gen_chain.run(question)
133
+ question_answer_pairs.append([question, answer])
134
+ st.write("Answer: ", answer)
135
+ st.write("--------------------------------------------------\n\n")
136
+
137
+ # Create a directory for storing answers
138
+ answers_dir = os.path.join(tempfile.gettempdir(), "answers")
139
+ os.makedirs(answers_dir, exist_ok=True)
140
+
141
+ # Create a DataFrame from the list of question-answer pairs
142
+ qa_df = pd.DataFrame(question_answer_pairs, columns=["Question", "Answer"])
143
+
144
+ # Save the DataFrame to a CSV file
145
+ csv_file_path = os.path.join(answers_dir, "questions_and_answers.csv")
146
+ qa_df.to_csv(csv_file_path, index=False)
147
+
148
+ # Create a download button for the questions and answers CSV file
149
+ st.markdown('### Download Questions and Answers in CSV')
150
+ st.download_button("Download Questions and Answers (CSV)", csv_file_path)
151
+
152
+ # Cleanup temporary files
153
+ if file_path:
154
+ os.remove(file_path)
155
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ streamlit
3
+ huggingface_hub
4
+ Chromadb
5
+ pypdf
6
+ sentence-transformers
7
+ torch
8
+ accelerate
9
+ llama-cpp-python
10
+