Spaces:
Sleeping
Sleeping
aryarishit
commited on
Commit
•
dad4fc7
1
Parent(s):
75e431a
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import streamlit as st
|
3 |
+
from langchain_community.llms import CTransformers
|
4 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
5 |
+
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
|
6 |
+
from llama_index.core.retrievers import VectorIndexRetriever
|
7 |
+
from llama_index.core.query_engine import RetrieverQueryEngine
|
8 |
+
from llama_index.core.postprocessor import SimilarityPostprocessor
|
9 |
+
|
10 |
+
def load_llm():
|
11 |
+
model_name = 'aryarishit/phi3-unsloth-resumebot-GGUF'
|
12 |
+
|
13 |
+
llm = CTransformers(
|
14 |
+
model = model_name,
|
15 |
+
max_new_tokens = 128,
|
16 |
+
temperature = 0.5
|
17 |
+
)
|
18 |
+
return llm
|
19 |
+
|
20 |
+
def get_index():
|
21 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")
|
22 |
+
docs = SimpleDirectoryReader('Info_docs').load_data()
|
23 |
+
Settings.llm = None
|
24 |
+
Settings.chunk_size = 84
|
25 |
+
Settings.chunk_overlap = 25
|
26 |
+
index = VectorStoreIndex.from_documents(docs)
|
27 |
+
|
28 |
+
return index
|
29 |
+
|
30 |
+
def get_context(index, query,top_k = 2):
|
31 |
+
top_k = top_k
|
32 |
+
|
33 |
+
# configure retriever
|
34 |
+
retriever = VectorIndexRetriever(
|
35 |
+
index=index,
|
36 |
+
similarity_top_k=top_k,
|
37 |
+
)
|
38 |
+
|
39 |
+
# assemble query engine
|
40 |
+
query_engine = RetrieverQueryEngine(
|
41 |
+
retriever=retriever,
|
42 |
+
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.2)],
|
43 |
+
)
|
44 |
+
# query documents
|
45 |
+
query = query
|
46 |
+
response = query_engine.query(query)
|
47 |
+
|
48 |
+
# reformat response
|
49 |
+
context = ""
|
50 |
+
for i in range(len(response.source_nodes)):
|
51 |
+
context = context + response.source_nodes[i].text + "\n\n"
|
52 |
+
|
53 |
+
return context
|
54 |
+
|
55 |
+
def get_alpaca_prompt(context, query):
|
56 |
+
|
57 |
+
instruction_string = '''[INST]Consider you are assistant to Rishit Arya, and answers on behalf of him, Given the following context and a question, generate an answer based on the given context only. If the answer to the question is not found in the context, strictly state "I don't know." only, don't try to make up an answer.Answer pricesly to what is asked it as if you are answering to Rishit's potential client. \nContext:{}
|
58 |
+
Question:{}[\INST] \nAnswer:'''
|
59 |
+
|
60 |
+
prompt = instruction_string.format(
|
61 |
+
context,
|
62 |
+
query # input
|
63 |
+
)
|
64 |
+
return prompt
|
65 |
+
|
66 |
+
st.title("Ask-Rishit")
|
67 |
+
|
68 |
+
if "llm" not in st.session_state:
|
69 |
+
st.session_state['llm'] = None
|
70 |
+
if "embeddings" not in st.session_state:
|
71 |
+
st.session_state['embeddings'] = None
|
72 |
+
|
73 |
+
if st.session_state['llm'] is None:
|
74 |
+
with st.spinner('Loading the model'):
|
75 |
+
llm = load_llm()
|
76 |
+
st.session_state['llm'] = llm
|
77 |
+
|
78 |
+
if st.session_state['embeddings'] is None:
|
79 |
+
index = get_index()
|
80 |
+
st.session_state['embeddings'] = index
|
81 |
+
|
82 |
+
query = st.text_input('Enter your Question')
|
83 |
+
|
84 |
+
if st.button('Generate') and st.session_state['llm'] is not None and st.session_state['embeddings'] is not None:
|
85 |
+
with st.spinner('Generating.......'):
|
86 |
+
llm = st.session_state['llm']
|
87 |
+
index = st.session_state['embeddings']
|
88 |
+
context = get_context(index, query)
|
89 |
+
st.write("Context: " + context)
|
90 |
+
prompt = get_alpaca_prompt(context, query)
|
91 |
+
|
92 |
+
start_time = time.time()
|
93 |
+
response = llm.invoke(prompt)
|
94 |
+
end_time = time.time()
|
95 |
+
time_taken = round(end_time-start_time, 2)
|
96 |
+
st.write(response)
|
97 |
+
st.caption('Time taken:' + str(time_taken))
|