Update app.py
Browse files
app.py
CHANGED
@@ -14,8 +14,10 @@ import re
|
|
14 |
from llama_index.llms.cohere import Cohere
|
15 |
from llama_index.embeddings.cohere import CohereEmbedding
|
16 |
|
17 |
-
|
18 |
-
from llama_index.core.
|
|
|
|
|
19 |
import gradio as gr
|
20 |
import uuid
|
21 |
|
@@ -31,11 +33,8 @@ embedding_model = CohereEmbedding(
|
|
31 |
input_type="search_document",
|
32 |
embedding_type="int8",)
|
33 |
|
34 |
-
query_model = CohereEmbedding(
|
35 |
-
api_key=api_key,
|
36 |
-
model_name="embed-multilingual-v3.0",
|
37 |
-
input_type="search_query",)
|
38 |
|
|
|
39 |
|
40 |
# Set Global settings
|
41 |
Settings.llm = llm
|
@@ -43,7 +42,9 @@ Settings.embed_model=embedding_model
|
|
43 |
# set context window
|
44 |
Settings.context_window = 4096
|
45 |
# set number of output tokens
|
46 |
-
Settings.num_output =
|
|
|
|
|
47 |
|
48 |
db_path=""
|
49 |
|
@@ -95,15 +96,7 @@ def infer(message:str, history: list):
|
|
95 |
messages = []
|
96 |
files_list = message["files"]
|
97 |
|
98 |
-
|
99 |
-
for prompt,answer in history:
|
100 |
-
if prompt is tuple:
|
101 |
-
files_list += prompt[0]
|
102 |
-
else:
|
103 |
-
messages.append(ChatMessage(role= "user", content = prompt))
|
104 |
-
messages.append(ChatMessage(role= "assistant", content = answer))
|
105 |
-
|
106 |
-
|
107 |
if files_list:
|
108 |
documents, option = extract_doc(files_list)
|
109 |
db_path = create_col(documents)
|
@@ -112,7 +105,7 @@ def infer(message:str, history: list):
|
|
112 |
documents, option = extract_web(message["text"])
|
113 |
db_path = create_col(documents)
|
114 |
elif not message["text"].startswith("http://") and not message["text"].startswith("https://") and len(history) == 0:
|
115 |
-
gr.Error("Please input an url or upload file at first.")
|
116 |
|
117 |
|
118 |
# Load from disk
|
@@ -129,23 +122,26 @@ def infer(message:str, history: list):
|
|
129 |
vector_store,
|
130 |
)
|
131 |
|
132 |
-
|
133 |
-
template = (
|
134 |
-
""" You are an assistant for question-answering tasks.
|
135 |
-
Use the following context to answer the question.
|
136 |
-
If you don't know the answer, just say that you don't know.
|
137 |
-
Use five sentences maximum and keep the answer concise.\n
|
138 |
-
Question: {query_str} \nContext: {context_str} \nAnswer:"""
|
139 |
-
)
|
140 |
-
llm_prompt = PromptTemplate(template)
|
141 |
-
print(llm_prompt)
|
142 |
-
|
143 |
if option == "web" and len(history) == 0:
|
144 |
response = "Get the web data! You can ask it."
|
145 |
else:
|
146 |
question = message['text']
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
print(type(response))
|
151 |
print(f'response: {response}')
|
|
|
14 |
from llama_index.llms.cohere import Cohere
|
15 |
from llama_index.embeddings.cohere import CohereEmbedding
|
16 |
|
17 |
+
|
18 |
+
from llama_index.core.memory import ChatMemoryBuffer
|
19 |
+
from llama_index.core.chat_engine import CondensePlusContextChatEngine
|
20 |
+
|
21 |
import gradio as gr
|
22 |
import uuid
|
23 |
|
|
|
33 |
input_type="search_document",
|
34 |
embedding_type="int8",)
|
35 |
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
38 |
|
39 |
# Set Global settings
|
40 |
Settings.llm = llm
|
|
|
42 |
# set context window
|
43 |
Settings.context_window = 4096
|
44 |
# set number of output tokens
|
45 |
+
Settings.num_output = 512
|
46 |
+
|
47 |
+
|
48 |
|
49 |
db_path=""
|
50 |
|
|
|
96 |
messages = []
|
97 |
files_list = message["files"]
|
98 |
|
99 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
if files_list:
|
101 |
documents, option = extract_doc(files_list)
|
102 |
db_path = create_col(documents)
|
|
|
105 |
documents, option = extract_web(message["text"])
|
106 |
db_path = create_col(documents)
|
107 |
elif not message["text"].startswith("http://") and not message["text"].startswith("https://") and len(history) == 0:
|
108 |
+
return gr.Error("Please input an url or upload file at first.")
|
109 |
|
110 |
|
111 |
# Load from disk
|
|
|
122 |
vector_store,
|
123 |
)
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
if option == "web" and len(history) == 0:
|
126 |
response = "Get the web data! You can ask it."
|
127 |
else:
|
128 |
question = message['text']
|
129 |
+
|
130 |
+
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
131 |
+
index.as_retriever(),
|
132 |
+
memory=memory,
|
133 |
+
context_prompt=(
|
134 |
+
"You are a chatbot, able to have normal interactions, as well as talk"
|
135 |
+
" about the Kendrick and Drake beef."
|
136 |
+
"Here are the relevant documents for the context:\n"
|
137 |
+
"{context_str}"
|
138 |
+
"\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
|
139 |
+
),
|
140 |
+
verbose=True,
|
141 |
+
)
|
142 |
+
response = chat_engine.chat(
|
143 |
+
question
|
144 |
+
)
|
145 |
|
146 |
print(type(response))
|
147 |
print(f'response: {response}')
|