Artteiv commited on
Commit
75df934
·
verified ·
1 Parent(s): 88cdf09

Upload 39 files

Browse files
Files changed (39) hide show
  1. README.md +12 -12
  2. chat/__init__.py +9 -0
  3. chat/__pycache__/__init__.cpython-310.pyc +0 -0
  4. chat/__pycache__/admin.cpython-310.pyc +0 -0
  5. chat/__pycache__/apps.cpython-310.pyc +0 -0
  6. chat/__pycache__/consumers.cpython-310.pyc +0 -0
  7. chat/__pycache__/model_manage.cpython-310.pyc +0 -0
  8. chat/__pycache__/models.cpython-310.pyc +0 -0
  9. chat/__pycache__/routing.cpython-310.pyc +0 -0
  10. chat/__pycache__/urls.cpython-310.pyc +0 -0
  11. chat/__pycache__/views.cpython-310.pyc +0 -0
  12. chat/apps.py +11 -0
  13. chat/arxiv_bot/__pycache__/arxiv_bot_utils.cpython-310.pyc +0 -0
  14. chat/arxiv_bot/arxiv_bot_utils.py +276 -0
  15. chat/arxiv_bot/prebuild.ipynb +354 -0
  16. chat/consumers.py +18 -0
  17. chat/migrations/0001_initial.py +22 -0
  18. chat/migrations/__init__.py +0 -0
  19. chat/migrations/__pycache__/0001_initial.cpython-310.pyc +0 -0
  20. chat/migrations/__pycache__/__init__.cpython-310.pyc +0 -0
  21. chat/model_manage.py +169 -0
  22. chat/routing.py +9 -0
  23. chat/templates/index.html +229 -0
  24. chat/templates/myfirst.html +9 -0
  25. chat/urls.py +7 -0
  26. chat/views.py +10 -0
  27. chatbot_django/__init__.py +0 -0
  28. chatbot_django/__pycache__/__init__.cpython-310.pyc +0 -0
  29. chatbot_django/__pycache__/asgi.cpython-310.pyc +0 -0
  30. chatbot_django/__pycache__/settings.cpython-310.pyc +0 -0
  31. chatbot_django/__pycache__/urls.cpython-310.pyc +0 -0
  32. chatbot_django/__pycache__/wsgi.cpython-310.pyc +0 -0
  33. chatbot_django/asgi.py +28 -0
  34. chatbot_django/settings.py +126 -0
  35. chatbot_django/urls.py +23 -0
  36. chatbot_django/wsgi.py +16 -0
  37. manage.py +21 -0
  38. oldindex.html +259 -0
  39. topic_descriptions.txt +6 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Arxiv Chatbot
3
- emoji: 🐢
4
- colorFrom: gray
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.34.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Arxiv Chatbot
3
+ emoji: 🐢
4
+ colorFrom: gray
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.34.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
chat/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import chat.arxiv_bot.arxiv_bot_utils as utils
3
+ import os
4
+ from getpass import getpass
5
+ import json
6
+ from .model_manage import get_model
7
+
8
+ model = get_model()
9
+
chat/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (394 Bytes). View file
 
chat/__pycache__/admin.cpython-310.pyc ADDED
Binary file (225 Bytes). View file
 
chat/__pycache__/apps.cpython-310.pyc ADDED
Binary file (515 Bytes). View file
 
chat/__pycache__/consumers.cpython-310.pyc ADDED
Binary file (1.1 kB). View file
 
chat/__pycache__/model_manage.cpython-310.pyc ADDED
Binary file (5.37 kB). View file
 
chat/__pycache__/models.cpython-310.pyc ADDED
Binary file (472 Bytes). View file
 
chat/__pycache__/routing.cpython-310.pyc ADDED
Binary file (385 Bytes). View file
 
chat/__pycache__/urls.cpython-310.pyc ADDED
Binary file (357 Bytes). View file
 
chat/__pycache__/views.cpython-310.pyc ADDED
Binary file (494 Bytes). View file
 
chat/apps.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.apps import AppConfig
2
+ import google.generativeai as genai
3
+
4
+
5
+
6
+
7
+ class MembersConfig(AppConfig):
8
+ default_auto_field = 'django.db.models.BigAutoField'
9
+ name = 'chat'
10
+
11
+
chat/arxiv_bot/__pycache__/arxiv_bot_utils.cpython-310.pyc ADDED
Binary file (9.78 kB). View file
 
chat/arxiv_bot/arxiv_bot_utils.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb import Documents, EmbeddingFunction, Embeddings
3
+ from transformers import AutoModel
4
+ import json
5
+ from numpy.linalg import norm
6
+ import sqlite3
7
+ import urllib
8
+ from django.conf import settings
9
+
10
+
11
+ # this module act as a singleton class
12
+
13
+ class JinaAIEmbeddingFunction(EmbeddingFunction):
14
+ def __init__(self, model):
15
+ super().__init__()
16
+ self.model = model
17
+
18
+ def __call__(self, input: Documents) -> Embeddings:
19
+ embeddings = self.model.encode(input)
20
+ return embeddings.tolist()
21
+
22
+ # instance of embedding_model
23
+ embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en',
24
+ trust_remote_code=True,
25
+ cache_dir='models')
26
+
27
+ # instance of JinaAIEmbeddingFunction
28
+ ef = JinaAIEmbeddingFunction(embedding_model)
29
+
30
+ # list of topics
31
+ topic_descriptions = json.load(open("topic_descriptions.txt"))
32
+ topics = list(dict.keys(topic_descriptions))
33
+ embeddings = [embedding_model.encode(topic_descriptions[key]) for key in topic_descriptions]
34
+ cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
35
+
36
+ def choose_topic(summary):
37
+ embed = embedding_model.encode(summary)
38
+ topic = ""
39
+ max_sim = 0.
40
+ for i,key in enumerate(topics):
41
+ sim = cos_sim(embed,embeddings[i])
42
+ if sim > max_sim:
43
+ topic = key
44
+ max_sim = sim
45
+ return topic
46
+
47
+ def authors_list_to_str(authors):
48
+ """input a list of authors, return a string represent authors"""
49
+ text = ""
50
+ for author in authors:
51
+ text+=author+", "
52
+ return text[:-3]
53
+
54
+ def authors_str_to_list(string):
55
+ """input a string of authors, return a list of authors"""
56
+ authors = []
57
+ list_auth = string.split("and")
58
+ for author in list_auth:
59
+ if author != "et al.":
60
+ authors.append(author.strip())
61
+ return authors
62
+
63
+ def chunk_texts(text, max_char=400):
64
+ """
65
+ Chunk a long text into several chunks, with each chunk about 300-400 characters long,
66
+ but make sure no word is cut in half.
67
+ Args:
68
+ text: The long text to be chunked.
69
+ max_char: The maximum number of characters per chunk (default: 400).
70
+ Returns:
71
+ A list of chunks.
72
+ """
73
+ chunks = []
74
+ current_chunk = ""
75
+ words = text.split()
76
+ for word in words:
77
+ if len(current_chunk) + len(word) + 1 >= max_char:
78
+ chunks.append(current_chunk)
79
+ current_chunk = " "
80
+ else:
81
+ current_chunk += " " + word
82
+ chunks.append(current_chunk.strip())
83
+ return chunks
84
+
85
+ def trimming(txt):
86
+ start = txt.find("{")
87
+ end = txt.rfind("}")
88
+ return txt[start:end+1].replace("\n"," ")
89
+
90
+ # crawl data
91
+
92
+ def extract_tag(txt,tagname):
93
+ return txt[txt.find("<"+tagname+">")+len(tagname)+2:txt.find("</"+tagname+">")]
94
+
95
+ def get_record(extract):
96
+ id = extract_tag(extract,"id")
97
+ updated = extract_tag(extract,"updated")
98
+ published = extract_tag(extract,"published")
99
+ title = extract_tag(extract,"title").replace("\n ","").strip()
100
+ summary = extract_tag(extract,"summary").replace("\n","").strip()
101
+ authors = []
102
+ while extract.find("<author>")!=-1:
103
+ author = extract_tag(extract,"name")
104
+ extract = extract[extract.find("</author>")+9:]
105
+ authors.append(author)
106
+ pattern = '<link title="pdf" href="'
107
+ link_start = extract.find('<link title="pdf" href="')
108
+ link = extract[link_start+len(pattern):extract.find("rel=",link_start)-2]
109
+ return [id, updated, published, title, authors, link, summary]
110
+
111
+ def crawl_exact_paper(title,author,max_results=3):
112
+ authors = authors_list_to_str(author)
113
+ records = []
114
+ url = 'http://export.arxiv.org/api/query?search_query=ti:{title}+AND+au:{author}&max_results={max_results}'.format(title=title,author=authors,max_results=max_results)
115
+ url = url.replace(" ","%20")
116
+ try:
117
+ arxiv_page = urllib.request.urlopen(url,timeout=100).read()
118
+ xml = str(arxiv_page,encoding="utf-8")
119
+ while xml.find("<entry>") != -1:
120
+ extract = xml[xml.find("<entry>")+7:xml.find("</entry>")]
121
+ xml = xml[xml.find("</entry>")+8:]
122
+ extract = get_record(extract)
123
+ topic = choose_topic(extract[6])
124
+ records.append([topic,*extract])
125
+ return records
126
+ except Exception as e:
127
+ return "Error: "+str(e)
128
+
129
+ def crawl_arxiv(keyword_list, max_results=100):
130
+ baseurl = 'http://export.arxiv.org/api/query?search_query='
131
+ records = []
132
+ for i,keyword in enumerate(keyword_list):
133
+ if i ==0:
134
+ url = baseurl + 'all:' + keyword
135
+ else:
136
+ url = url + '+OR+' + 'all:' + keyword
137
+ url = url+ '&max_results=' + str(max_results)
138
+ url = url.replace(' ', '%20')
139
+ try:
140
+ arxiv_page = urllib.request.urlopen(url,timeout=100).read()
141
+ xml = str(arxiv_page,encoding="utf-8")
142
+ while xml.find("<entry>") != -1:
143
+ extract = xml[xml.find("<entry>")+7:xml.find("</entry>")]
144
+ xml = xml[xml.find("</entry>")+8:]
145
+ extract = get_record(extract)
146
+ topic = choose_topic(extract[6])
147
+ records.append([topic,*extract])
148
+ return records
149
+ except Exception as e:
150
+ return "Error: "+str(e)
151
+
152
+ class ArxivSQL:
153
+ def __init__(self, table="arxivsql", name="db.sqlite3"):
154
+ self.con = sqlite3.connect(name)
155
+ self.cur = self.con.cursor()
156
+ self.table = table
157
+
158
+ def query(self, title="", author=[]):
159
+ if len(title)>0:
160
+ query_title = 'title like "%{}%"'.format(title)
161
+ else:
162
+ query_title = "True"
163
+ if len(author)>0:
164
+ query_author = 'authors like '
165
+ for auth in author:
166
+ query_author += "'%{}%' or ".format(auth)
167
+ query_author = query_author[:-4]
168
+ else:
169
+ query_author = "True"
170
+ query = "select * from {} where {} and {}".format(self.table,query_title,query_author)
171
+ result = self.cur.execute(query)
172
+ return result.fetchall()
173
+
174
+ def query_id(self, ids=[]):
175
+ try:
176
+ if len(ids) == 0:
177
+ return None
178
+ query = "select * from {} where id in (".format(self.table)
179
+ for id in ids:
180
+ query+="'"+id+"',"
181
+ query = query[:-1] + ")"
182
+ result = self.cur.execute(query)
183
+ return result.fetchall()
184
+ except Exception as e:
185
+ print(e)
186
+ print("Error query: ",query)
187
+
188
+ def add(self, crawl_records):
189
+ """
190
+ Add crawl_records (list) obtained from arxiv_crawlers
191
+ A record is a list of 8 columns:
192
+ [topic, id, updated, published, title, author, link, summary]
193
+ Return the final length of the database table
194
+ """
195
+ results = ""
196
+ for record in crawl_records:
197
+ try:
198
+ query = """insert into arxivsql values("{}","{}","{}","{}","{}","{}","{}")""".format(
199
+ record[1][21:],
200
+ record[0],
201
+ record[4].replace('"',"'"),
202
+ authors_list_to_str(record[5]),
203
+ record[2][:10],
204
+ record[3][:10],
205
+ record[6]
206
+ )
207
+ self.cur.execute(query)
208
+ self.con.commit()
209
+ except Exception as e:
210
+ result+=str(e)
211
+ result+="\n" + query + "\n"
212
+ finally:
213
+ return results
214
+
215
+ # instance of ArxivSQL
216
+ sqldb = ArxivSQL()
217
+
218
+ class ArxivChroma:
219
+ """
220
+ Create an interface to arxivdb, which only support query and addition.
221
+ This interface do not support edition and deletion procedures.
222
+ """
223
+ def __init__(self, table="arxiv_records", name="arxivdb/"):
224
+ self.client = chromadb.PersistentClient(name)
225
+ self.model = embedding_model
226
+ self.collection = self.client.get_or_create_collection(table,
227
+ embedding_function=JinaAIEmbeddingFunction(
228
+ model = self.model
229
+ ))
230
+
231
+ def query_relevant(self, keywords, query_texts, n_results=3):
232
+ """
233
+ Perform a query using a list of keywords (str),
234
+ or using a relavant string
235
+ """
236
+ contains = []
237
+ for keyword in keywords:
238
+ contains.append({"$contains":keyword.lower()})
239
+ return self.collection.query(
240
+ query_texts=query_texts,
241
+ where_document={
242
+ "$or":contains
243
+ },
244
+ n_results=n_results,
245
+ )
246
+
247
+ def query_exact(self, id):
248
+ ids = ["{}_{}".format(id,j) for j in range(0,10)]
249
+ return self.collection.get(ids=ids)
250
+
251
+ def add(self, crawl_records):
252
+ """
253
+ Add crawl_records (list) obtained from arxiv_crawlers
254
+ A record is a list of 8 columns:
255
+ [topic, id, updated, published, title, author, link, summary]
256
+ Return the final length of the database table
257
+ """
258
+ for record in crawl_records:
259
+ embed_text = """
260
+ Topic: {},
261
+ Title: {},
262
+ Summary: {}
263
+ """.format(record[0],record[4],record[7])
264
+ chunks = chunk_texts(embed_text)
265
+ ids = [record[1][21:]+"_"+str(j) for j in range(len(chunks))]
266
+ paper_ids = [{"paper_id":record[1][21:]} for _ in range(len(chunks))]
267
+ self.collection.add(
268
+ documents = chunks,
269
+ metadatas=paper_ids,
270
+ ids = ids
271
+ )
272
+ return self.collection.count()
273
+
274
+ # instance of ArxivChroma
275
+ db = ArxivChroma()
276
+
chat/arxiv_bot/prebuild.ipynb ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "d:\\Program\\Anaconda\\envs\\python_project\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n",
14
+ "d:\\Program\\Anaconda\\envs\\python_project\\lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
15
+ " warnings.warn(\n",
16
+ "d:\\Program\\Anaconda\\envs\\python_project\\lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
17
+ " warnings.warn(\n"
18
+ ]
19
+ }
20
+ ],
21
+ "source": [
22
+ "import google.generativeai as genai\n",
23
+ "import arxiv_bot_utils as utils\n",
24
+ "import os\n",
25
+ "from getpass import getpass\n",
26
+ "import json\n",
27
+ "#chỉ là import một cách bình thường\n",
28
+ "#nội dung là "
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 2,
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "name": "stdout",
38
+ "output_type": "stream",
39
+ "text": [
40
+ "models/gemini-1.0-pro\n",
41
+ "models/gemini-1.0-pro-001\n",
42
+ "models/gemini-1.0-pro-latest\n",
43
+ "models/gemini-1.0-pro-vision-latest\n",
44
+ "models/gemini-1.5-pro-latest\n",
45
+ "models/gemini-pro\n",
46
+ "models/gemini-pro-vision\n"
47
+ ]
48
+ }
49
+ ],
50
+ "source": [
51
+ "os.environ['GEMINI_API_KEY'] = getpass(\"Input your API key: \")\n",
52
+ "# gán biến môi trường luôn\n",
53
+ "gemini_api_key = os.getenv(\"GEMINI_API_KEY\") # string trong môi trường\n",
54
+ "if not gemini_api_key:\n",
55
+ " raise ValueError(\n",
56
+ " \"Gemini API Key not provided. Please provide GEMINI_API_KEY as an environment variable\"\n",
57
+ " )\n",
58
+ "genai.configure(api_key=gemini_api_key)\n",
59
+ "for m in genai.list_models():\n",
60
+ " if 'generateContent' in m.supported_generation_methods:\n",
61
+ " print(m.name)\n",
62
+ " #models nằm trên máy chủ\n"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 3,
68
+ "metadata": {},
69
+ "outputs": [],
70
+ "source": [
71
+ "config = genai.GenerationConfig(max_output_tokens=2048,\n",
72
+ " temperature=0.7)\n",
73
+ "safety_settings = [\n",
74
+ " {\n",
75
+ " \"category\": \"HARM_CATEGORY_DANGEROUS\",\n",
76
+ " \"threshold\": \"BLOCK_NONE\",\n",
77
+ " },\n",
78
+ " {\n",
79
+ " \"category\": \"HARM_CATEGORY_HARASSMENT\",\n",
80
+ " \"threshold\": \"BLOCK_NONE\",\n",
81
+ " },\n",
82
+ " {\n",
83
+ " \"category\": \"HARM_CATEGORY_HATE_SPEECH\",\n",
84
+ " \"threshold\": \"BLOCK_NONE\",\n",
85
+ " },\n",
86
+ " {\n",
87
+ " \"category\": \"HARM_CATEGORY_SEXUALLY_EXPLICIT\",\n",
88
+ " \"threshold\": \"BLOCK_NONE\",\n",
89
+ " },\n",
90
+ " {\n",
91
+ " \"category\": \"HARM_CATEGORY_DANGEROUS_CONTENT\",\n",
92
+ " \"threshold\": \"BLOCK_NONE\",\n",
93
+ " },\n",
94
+ "]\n",
95
+ "model = genai.GenerativeModel(\"gemini-pro\",\n",
96
+ " generation_config=config,\n",
97
+ " safety_settings=safety_settings)"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 4,
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "def extract_keyword_prompt(query):\n",
107
+ " \"\"\"A prompt that return a JSON block as arguments for querying database\"\"\"\n",
108
+ "\n",
109
+ " prompt = (\n",
110
+ " \"\"\"[INST] SYSTEM: You are an assistant that choose only one action below based on guest question.\n",
111
+ " 1. If the guest question is asking for a single specific document or article with explicit title, you need to respond the information in JSON format with 2 keys \"title\", \"author\" if found any above. The authors are separated with the word 'and'. \n",
112
+ " 2. If the guest question is asking for relevant informations about a topic, you need to respond the information in JSON format with 2 keys \"keywords\", \"description\", include a list of keywords represent the main academic topic, \\\n",
113
+ " and a description about the main topic. You may paraphrase the keywords to add more. \\\n",
114
+ " 3. If the guest is not asking for any informations or documents, you need to respond with a polite answer in JSON format with 1 key \"answer\".\n",
115
+ " QUESTION: '{query}'\n",
116
+ " [/INST]\n",
117
+ " ANSWER: \n",
118
+ " \"\"\"\n",
119
+ " ).format(query=query)\n",
120
+ "\n",
121
+ " return prompt\n",
122
+ "\n",
123
+ "def make_answer_prompt(input, contexts):\n",
124
+ " \"\"\"A prompt that return the final answer, based on the queried context\"\"\"\n",
125
+ "\n",
126
+ " prompt = (\n",
127
+ " \"\"\"[INST] You are a library assistant that help to search articles and documents based on user's question.\n",
128
+ " From guest's question, you have found some records and documents that may help. Now you need to answer the guest with the information found.\n",
129
+ " If no information found in the database, you may generate some other recommendation related to user's question using your own knowledge. Each article or paper must have a link to the pdf download page.\n",
130
+ " You should answer in a conversational form politely.\n",
131
+ " QUESTION: '{input}'\n",
132
+ " INFORMATION: '{contexts}'\n",
133
+ " [/INST]\n",
134
+ " ANSWER:\n",
135
+ " \"\"\"\n",
136
+ " ).format(input=input, contexts=contexts)\n",
137
+ "\n",
138
+ " return prompt"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 5,
144
+ "metadata": {},
145
+ "outputs": [],
146
+ "source": [
147
+ "def response(args):\n",
148
+ " \"\"\"Create response context, based on input arguments\"\"\"\n",
149
+ " keys = list(dict.keys(args))\n",
150
+ " if \"answer\" in keys:\n",
151
+ " return args['answer'], None # trả lời trực tiếp\n",
152
+ " \n",
153
+ " if \"keywords\" in keys:\n",
154
+ " # perform query\n",
155
+ " query_texts = args[\"description\"]\n",
156
+ " keywords = args[\"keywords\"]\n",
157
+ " results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)\n",
158
+ " # print(results)\n",
159
+ " ids = results['metadatas'][0]\n",
160
+ " if len(ids) == 0:\n",
161
+ " # go crawl some\n",
162
+ " new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)\n",
163
+ " print(\"Got new records: \",len(new_records))\n",
164
+ " if type(new_records) == str:\n",
165
+ " return \"Error occured, information not found\", new_records\n",
166
+ " utils.db.add(new_records)\n",
167
+ " utils.sqldb.add(new_records)\n",
168
+ " results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)\n",
169
+ " ids = results['metadatas'][0]\n",
170
+ " print(\"Re-queried on chromadb, results: \",ids)\n",
171
+ " paper_id = [id['paper_id'] for id in ids]\n",
172
+ " paper_info = utils.sqldb.query_id(paper_id)\n",
173
+ " print(paper_info)\n",
174
+ " records = [] # get title (2), author (3), link (6)\n",
175
+ " result_string = \"\"\n",
176
+ " if paper_info:\n",
177
+ " for i in range(len(paper_info)):\n",
178
+ " result_string += \"Title: {}, Author: {}, Link: {}\".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])\n",
179
+ " records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])\n",
180
+ " return result_string, records\n",
181
+ " else:\n",
182
+ " return \"Information not found\", \"Information not found\"\n",
183
+ " # invoke llm and return result\n",
184
+ "\n",
185
+ " if \"title\" in keys:\n",
186
+ " title = args['title']\n",
187
+ " authors = utils.authors_str_to_list(args['author'])\n",
188
+ " paper_info = utils.sqldb.query(title = title,author = authors)\n",
189
+ " # if query not found then go crawl brh\n",
190
+ " # print(paper_info)\n",
191
+ "\n",
192
+ " if len(paper_info) == 0:\n",
193
+ " new_records = utils.crawl_exact_paper(title=title,author=authors)\n",
194
+ " print(\"Got new records: \",len(new_records))\n",
195
+ " if type(new_records) == str:\n",
196
+ " # print(new_records)\n",
197
+ " return \"Error occured, information not found\", \"Information not found\"\n",
198
+ " utils.db.add(new_records)\n",
199
+ " utils.sqldb.add(new_records)\n",
200
+ " paper_info = utils.sqldb.query(title = title,author = authors)\n",
201
+ " print(\"Re-queried on chromadb, results: \",paper_info)\n",
202
+ " # -------------------------------------\n",
203
+ " records = [] # get title (2), author (3), link (6)\n",
204
+ " result_string = \"\"\n",
205
+ " for i in range(len(paper_info)):\n",
206
+ " result_string += \"Title: {}, Author: {}, Link: {}\".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])\n",
207
+ " records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])\n",
208
+ " # process results:\n",
209
+ " if len(result_string) == 0:\n",
210
+ " return \"Information not found\", \"Information not found\"\n",
211
+ " return result_string, records\n",
212
+ " # invoke llm and return result"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 6,
218
+ "metadata": {},
219
+ "outputs": [],
220
+ "source": [
221
+ "def full_chain_single_question(input_prompt):\n",
222
+ " try:\n",
223
+ " first_prompt = extract_keyword_prompt(input_prompt)\n",
224
+ " temp_answer = model.generate_content(first_prompt).text\n",
225
+ "\n",
226
+ " args = json.loads(utils.trimming(temp_answer))\n",
227
+ " contexts, results = response(args)\n",
228
+ " if not results:\n",
229
+ " print(contexts)\n",
230
+ " else:\n",
231
+ " output_prompt = make_answer_prompt(input_prompt,contexts)\n",
232
+ " answer = model.generate_content(output_prompt).text\n",
233
+ " return temp_answer, answer\n",
234
+ " except Exception as e:\n",
235
+ " print(e)\n",
236
+ " return temp_answer, \"Error occured: \" + str(e)"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": 27,
242
+ "metadata": {},
243
+ "outputs": [
244
+ {
245
+ "name": "stdout",
246
+ "output_type": "stream",
247
+ "text": [
248
+ "[('1903.04824v1', 'computer science', 'Proceedings of the Fifth International Conference on Cloud and Robotics (ICCR2018)', ' Huaxi, Zhang, Jacques Malenfan', '2019-03-12', '2019-03-12', 'http://arxiv.org/pdf/1903.04824v1'), ('1709.07597v1', 'economics', 'Inverse Reinforcement Learning with Conditional Choice Probabilities', 'Mohit Sharma, Kris M. Kitani, Joachim Groege', '2017-09-22', '2017-09-22', 'http://arxiv.org/pdf/1709.07597v1')]\n",
249
+ "Sure, here are some key papers on model predictive control for nonlinear systems:\n",
250
+ "\n",
251
+ "* **Nonlinear Model Predictive Control: A Survey** by Garcia, P.D., Prett, D.M., and Morari, M. (1989)\n",
252
+ "* **Model Predictive Control for Nonlinear Systems** by Camacho, E.F. and Bordons, C. (1999)\n",
253
+ "* **Nonlinear Model Predictive Control** by Rawlings, J.B. and Mayne, D.Q. (2009)\n",
254
+ "\n",
255
+ "As for recent reviews on the application of control theory to robotics, here are a few:\n",
256
+ "\n",
257
+ "* **Control of Robot Manipulators** by Spong, M.W., Hutchinson, S., and Vidyasagar, M. (2006)\n",
258
+ "* **Robotics: Modelling, Planning and Control** by Siciliano, B., Sciavicco, L., Villani, L., and Oriolo, G. (2010)\n",
259
+ "* **Control of Robot Arms** by Featherstone, R. (2014)\n",
260
+ "\n",
261
+ "I hope this information is helpful. Please let me know if you have any other questions.\n"
262
+ ]
263
+ }
264
+ ],
265
+ "source": [
266
+ "# test response, second step\n",
267
+ "input_prompt = \"Can you suggest some key papers on model predictive control for nonlinear systems, and are there any recent reviews on the application of control theory to robotics?\"\n",
268
+ "args = \"{\\n \\\"keywords\\\": [\\\"Model predictive control\\\", \\\"Nonlinear systems\\\", \\\"Robotics\\\", \\\"Control theory\\\"],\\n \\\"description\\\": \\\"Model predictive control (MPC) is a control algorithm that uses a model of the system to predict future behavior and optimize the control inputs. MPC is particularly well-suited for nonlinear systems, as it can handle the complex dynamics of these systems. In recent years, MPC has been increasingly applied to robotics, as it can improve the performance and safety of robotic systems. Control theory is a branch of mathematics that deals with the analysis and design of control systems. Control theory has been applied to a wide range of problems in robotics, including motion planning, trajectory tracking, and force control.\\\"\\n}\"\n",
269
+ "args = json.loads(args)\n",
270
+ "contexts, results = response(args)\n",
271
+ "if not results:\n",
272
+ " # direct answer\n",
273
+ " print(contexts)\n",
274
+ "else:\n",
275
+ " output_prompt = make_answer_prompt(input_prompt,contexts)\n",
276
+ " answer = model.generate_content(output_prompt).text\n",
277
+ " print(answer)"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 7,
283
+ "metadata": {},
284
+ "outputs": [
285
+ {
286
+ "name": "stdout",
287
+ "output_type": "stream",
288
+ "text": [
289
+ "{'desired': 'Natural Language Processing (Computer Science)', 'question': 'What are some recent papers on deep learning architectures for text classification, and can you recommend any surveys or reviews on the topic?'}\n",
290
+ "0\n",
291
+ "[('1808.08121v1', 'computer science', 'An Improvement of Data Classification Using Random Multimodel Deep Learning (RMDL)', 'Mojtaba Heidarysafa, Kamran Kowsari, Donald E. Brown, Kiana Jafari Meimandi, Laura E. Barne', '2018-08-23', '2018-08-23', 'http://arxiv.org/pdf/1808.08121v1'), ('1904.08067v5', 'computer science', 'Text Classification Algorithms: A Survey', 'Kamran Kowsari, Kiana Jafari Meimandi, Mojtaba Heidarysafa, Sanjana Mendu, Laura E. Barnes, Donald E. Brow', '2020-05-20', '2019-04-17', 'http://arxiv.org/pdf/1904.08067v5'), ('2202.09144v1', 'computer science', 'Modelling the semantics of text in complex document layouts using graph transformer networks', 'Thomas Roland Barillot, Jacob Saks, Polena Lilyanova, Edward Torgas, Yachen Hu, Yuanqing Liu, Varun Balupuri, Paul Gaskel', '2022-02-18', '2022-02-18', 'http://arxiv.org/pdf/2202.09144v1')]\n",
292
+ "1\n",
293
+ "[('1601.04187v1', 'computer science', 'Conversion of Artificial Recurrent Neural Networks to Spiking Neural Networks for Low-power Neuromorphic Hardware', 'Peter U. Diehl, Guido Zarrella, Andrew Cassidy, Bruno U. Pedroni, Emre Neftc', '2016-01-16', '2016-01-16', 'http://arxiv.org/pdf/1601.04187v1'), ('1801.01093v3', 'economics', 'Comparing the Forecasting Performances of Linear Models for Electricity Prices with High RES Penetration', 'Angelica Gianfreda, Francesco Ravazzolo, Luca Rossin', '2019-11-12', '2018-01-03', 'http://arxiv.org/pdf/1801.01093v3'), ('2302.11093v1', 'electrical engineering and system science', 'Use Cases for Time-Frequency Image Representations and Deep Learning Techniques for Improved Signal Classification', 'Mehmet Parla', '2023-02-22', '2023-02-22', 'http://arxiv.org/pdf/2302.11093v1')]\n",
294
+ "2\n",
295
+ "[('1505.07907v4', 'economics', 'Linking Economic Complexity, Institutions and Income Inequality', 'D. Hartmann, M. R. Guevara, C. Jara-Figueroa, M. Aristaran, C. A. Hidalg', '2017-01-04', '2015-05-29', 'http://arxiv.org/pdf/1505.07907v4'), ('2107.06855v2', 'economics', 'Comparing Intellectual property policy in the Global North and South -- A one-size-fits-all policy for economic prosperity?', 'S Sidhartha Narayan, Malavika Ranjan, Madhumitha Raghurama', '2021-08-10', '2021-07-14', 'http://arxiv.org/pdf/2107.06855v2'), ('1910.11780v1', 'economics', 'Inequality in Turkey: Looking Beyond Growth', 'Bayram Cakir, Ipek Ergu', '2019-10-25', '2019-10-25', 'http://arxiv.org/pdf/1910.11780v1')]\n",
296
+ "3\n",
297
+ "[('1607.06583v2', 'computer science', \"Classification of Alzheimer's Disease Structural MRI Data by Deep Learning Convolutional Neural Networks\", 'Saman Sarraf, Ghassem Tofigh', '2017-05-19', '2016-07-22', 'http://arxiv.org/pdf/1607.06583v2'), ('2101.10265v1', 'computer science', 'Superiorities of Deep Extreme Learning Machines against Convolutional Neural Networks', 'Gokhan Altan, Yakup Kutl', '2021-01-21', '2021-01-21', 'http://arxiv.org/pdf/2101.10265v1'), ('2208.03143v1', 'computer science', 'Deep Learning and Health Informatics for Smart Monitoring and Diagnosis', 'Amin Gasm', '2022-08-05', '2022-08-05', 'http://arxiv.org/pdf/2208.03143v1')]\n",
298
+ "4\n",
299
+ "[('2302.06584v3', 'computer science', 'Thermodynamic AI and the fluctuation frontier', 'Patrick J. Coles, Collin Szczepanski, Denis Melanson, Kaelan Donatella, Antonio J. Martinez, Faris Sbah', '2023-06-13', '2023-02-09', 'http://arxiv.org/pdf/2302.06584v3'), ('2307.12298v1', 'computer science', 'Stabilization and Dissipative Information Transfer of a Superconducting Kerr-Cat Qubit', 'Ufuk Korkmaz, Deniz Türkpenç', '2023-07-23', '2023-07-23', 'http://arxiv.org/pdf/2307.12298v1'), ('2106.10421v1', 'computer science', 'QFCNN: Quantum Fourier Convolutional Neural Network', 'Feihong Shen, Jun Li', '2021-06-19', '2021-06-19', 'http://arxiv.org/pdf/2106.10421v1')]\n",
300
+ "5\n",
301
+ "[('2308.16539v2', 'computer science', 'On a Connection between Differential Games, Optimal Control, and Energy-based Models for Multi-Agent Interactions', 'Christopher Diehl, Tobias Klosek, Martin Krüger, Nils Murzyn, Torsten Bertra', '2023-10-16', '2023-08-31', 'http://arxiv.org/pdf/2308.16539v2'), ('2404.12474v1', 'computer science', 'Learning a Stable, Safe, Distributed Feedback Controller for a Heterogeneous Platoon of Vehicles', 'Michael H. Shaham, Taskin Padi', '2024-04-18', '2024-04-18', 'http://arxiv.org/pdf/2404.12474v1'), ('2008.13221v1', 'computer science', 'Human-in-the-Loop Methods for Data-Driven and Reinforcement Learning Systems', 'Vinicius G. Goeck', '2020-08-30', '2020-08-30', 'http://arxiv.org/pdf/2008.13221v1')]\n",
302
+ "6\n",
303
+ "[('1911.06206v3', 'economics', 'Bayesian state-space modeling for analyzing heterogeneous network effects of US monetary policy', 'Niko Hauzenberger, Michael Pfarrhofe', '2020-09-10', '2019-11-14', 'http://arxiv.org/pdf/1911.06206v3'), ('2302.14114v1', 'economics', 'Econometric assessment of the monetary policy shocks in Morocco: Evidence from a Bayesian Factor-Augmented VAR', 'Marouane Daou', '2023-02-27', '2023-02-27', 'http://arxiv.org/pdf/2302.14114v1'), ('2311.11858v1', 'economics', 'Theory coherent shrinkage of Time-Varying Parameters in VARs', 'Andrea Renzett', '2023-11-20', '2023-11-20', 'http://arxiv.org/pdf/2311.11858v1')]\n",
304
+ "7\n",
305
+ "[('2310.03365v2', 'computer science', 'Swin-Tempo: Temporal-Aware Lung Nodule Detection in CT Scans as Video Sequences Using Swin Transformer-Enhanced UNet', 'Hossein Jafari, Karim Faez, Hamidreza Amindava', '2023-10-14', '2023-10-05', 'http://arxiv.org/pdf/2310.03365v2'), ('1808.08531v1', 'computer science', 'DeepTracker: Visualizing the Training Process of Convolutional Neural Networks', 'Dongyu Liu, Weiwei Cui, Kai Jin, Yuxiao Guo, Huamin Q', '2018-08-26', '2018-08-26', 'http://arxiv.org/pdf/1808.08531v1'), ('2105.10448v1', 'computer science', 'Distinguishing artefacts: evaluating the saturation point of convolutional neural networks', 'Ric Real, James Gopsill, David Jones, Chris Snider, Ben Hick', '2021-05-21', '2021-05-21', 'http://arxiv.org/pdf/2105.10448v1')]\n",
306
+ "8\n",
307
+ "Got new records: 10\n",
308
+ "Re-queried on chromadb, results: []\n",
309
+ "None\n",
310
+ "9\n",
311
+ "[('2403.07017v1', 'computer science', 'Mathematics of multi-agent learning systems at the interface of game theory and artificial intelligence', 'Long Wang, Feng Fu, Xingru Che', '2024-03-09', '2024-03-09', 'http://arxiv.org/pdf/2403.07017v1'), ('2210.02205v1', 'computer science', 'Game Theoretic Rating in N-player general-sum games with Equilibria', 'Luke Marris, Marc Lanctot, Ian Gemp, Shayegan Omidshafiei, Stephen McAleer, Jerome Connor, Karl Tuyls, Thore Graepe', '2022-10-05', '2022-10-05', 'http://arxiv.org/pdf/2210.02205v1'), ('2212.05357v3', 'economics', 'On Blockchain We Cooperate: An Evolutionary Game Perspective', 'Luyao Zhang, Xinyu Tia', '2023-01-19', '2022-12-10', 'http://arxiv.org/pdf/2212.05357v3')]\n"
312
+ ]
313
+ }
314
+ ],
315
+ "source": [
316
+ "with open(\"test_questions.txt\",\"r\") as infile:\n",
317
+ " data = json.load(infile)\n",
318
+ "print(data[0])\n",
319
+ "\n",
320
+ "test_log = []\n",
321
+ "for i,t in enumerate(data):\n",
322
+ " print(i)\n",
323
+ " temp_answer, answer = full_chain_single_question(t['question'])\n",
324
+ " test_log.append({'desired topic':t['desired'],\n",
325
+ " 'question':t['question'],\n",
326
+ " 'first answer':temp_answer,\n",
327
+ " 'final answer':answer})\n",
328
+ "with open(\"test_results.json\",\"w\") as outfile:\n",
329
+ " json.dump(test_log,outfile)"
330
+ ]
331
+ }
332
+ ],
333
+ "metadata": {
334
+ "kernelspec": {
335
+ "display_name": "Python 3",
336
+ "language": "python",
337
+ "name": "python3"
338
+ },
339
+ "language_info": {
340
+ "codemirror_mode": {
341
+ "name": "ipython",
342
+ "version": 3
343
+ },
344
+ "file_extension": ".py",
345
+ "mimetype": "text/x-python",
346
+ "name": "python",
347
+ "nbconvert_exporter": "python",
348
+ "pygments_lexer": "ipython3",
349
+ "version": "3.10.12"
350
+ }
351
+ },
352
+ "nbformat": 4,
353
+ "nbformat_minor": 2
354
+ }
chat/consumers.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from . import model_manage as md
3
+ from channels.generic.websocket import WebsocketConsumer
4
+
5
+
6
+ class ChatConsumer(WebsocketConsumer):
7
+ def connect(self):
8
+ self.accept()
9
+ def disconnect(self, close_code):
10
+ pass
11
+ def receive(self, text_data):
12
+ text_data_json = json.loads(text_data)
13
+ message = text_data_json["message"]
14
+ print(message)
15
+ record, messagee = md.full_chain_single_question(message)
16
+ print("First answer: ",record)
17
+ self.send(text_data=json.dumps({"message": messagee}))
18
+
chat/migrations/0001_initial.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Django 4.1 on 2024-05-04 14:53
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+
8
+ initial = True
9
+
10
+ dependencies = [
11
+ ]
12
+
13
+ operations = [
14
+ migrations.CreateModel(
15
+ name='Member',
16
+ fields=[
17
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18
+ ('first_name', models.CharField(max_length=24)),
19
+ ('last_name', models.CharField(max_length=24)),
20
+ ],
21
+ ),
22
+ ]
chat/migrations/__init__.py ADDED
File without changes
chat/migrations/__pycache__/0001_initial.cpython-310.pyc ADDED
Binary file (722 Bytes). View file
 
chat/migrations/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (195 Bytes). View file
 
chat/model_manage.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # my_app/model_manager.py
2
+ import google.generativeai as genai
3
+ import chat.arxiv_bot.arxiv_bot_utils as utils
4
+ import json
5
+
6
+ model = None
7
+
8
+ def create_model():
9
+ with open("apikey.txt","r") as apikey:
10
+ key = apikey.readline()
11
+ genai.configure(api_key=key)
12
+ for m in genai.list_models():
13
+ if 'generateContent' in m.supported_generation_methods:
14
+ print(m.name)
15
+ print("He was there")
16
+ config = genai.GenerationConfig(max_output_tokens=2048,
17
+ temperature=0.7)
18
+ safety_settings = [
19
+ {
20
+ "category": "HARM_CATEGORY_DANGEROUS",
21
+ "threshold": "BLOCK_NONE",
22
+ },
23
+ {
24
+ "category": "HARM_CATEGORY_HARASSMENT",
25
+ "threshold": "BLOCK_NONE",
26
+ },
27
+ {
28
+ "category": "HARM_CATEGORY_HATE_SPEECH",
29
+ "threshold": "BLOCK_NONE",
30
+ },
31
+ {
32
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
33
+ "threshold": "BLOCK_NONE",
34
+ },
35
+ {
36
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
37
+ "threshold": "BLOCK_NONE",
38
+ },
39
+ ]
40
+ global model
41
+ model = genai.GenerativeModel("gemini-pro",
42
+ generation_config=config,
43
+ safety_settings=safety_settings)
44
+ return model
45
+
46
+ def get_model():
47
+ global model
48
+ if model is None:
49
+ # Khởi tạo model ở đây
50
+ model = create_model() # Giả sử create_model là hàm tạo model của bạn
51
+ return model
52
+
53
+ def extract_keyword_prompt(query):
54
+ """A prompt that return a JSON block as arguments for querying database"""
55
+
56
+ prompt = (
57
+ """[INST] SYSTEM: You are an assistant that choose only one action below based on guest question.
58
+ 1. If the guest question is asking for a single specific document or article with explicit title, you need to respond the information in JSON format with 2 keys "title", "author" if found any above. The authors are separated with the word 'and'.
59
+ 2. If the guest question is asking for relevant informations about a topic, you need to respond the information in JSON format with 2 keys "keywords", "description", include a list of keywords represent the main academic topic, \
60
+ and a description about the main topic. You may paraphrase the keywords to add more. \
61
+ 3. If the guest is not asking for any informations or documents, you need to respond with a polite answer in JSON format with 1 key "answer".
62
+ QUESTION: '{query}'
63
+ [/INST]
64
+ ANSWER:
65
+ """
66
+ ).format(query=query)
67
+
68
+ return prompt
69
+
70
+ def make_answer_prompt(input, contexts):
71
+ """A prompt that return the final answer, based on the queried context"""
72
+
73
+ prompt = (
74
+ """[INST] You are a library assistant that help to search articles and documents based on user's question.
75
+ From guest's question, you have found some records and documents that may help. Now you need to answer the guest with the information found.
76
+ If no information found in the database, you may generate some other recommendation related to user's question using your own knowledge. Each article or paper must have a link to the pdf download page.
77
+ You should answer in a conversational form politely.
78
+ QUESTION: '{input}'
79
+ INFORMATION: '{contexts}'
80
+ [/INST]
81
+ ANSWER:
82
+ """
83
+ ).format(input=input, contexts=contexts)
84
+
85
+ return prompt
86
+
87
+ def response(args):
88
+ """Create response context, based on input arguments"""
89
+ keys = list(dict.keys(args))
90
+ if "answer" in keys:
91
+ return args['answer'], None # trả lời trực tiếp
92
+
93
+ if "keywords" in keys:
94
+ # perform query
95
+ query_texts = args["description"]
96
+ keywords = args["keywords"]
97
+ results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
98
+ # print(results)
99
+ ids = results['metadatas'][0]
100
+ if len(ids) == 0:
101
+ # go crawl some
102
+ new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
103
+ print("Got new records: ",len(new_records))
104
+ if type(new_records) == str:
105
+ return "Error occured, information not found", new_records
106
+ utils.db.add(new_records)
107
+ utils.sqldb.add(new_records)
108
+ results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
109
+ ids = results['metadatas'][0]
110
+ print("Re-queried on chromadb, results: ",ids)
111
+ paper_id = [id['paper_id'] for id in ids]
112
+ paper_info = utils.sqldb.query_id(paper_id)
113
+ print(paper_info)
114
+ records = [] # get title (2), author (3), link (6)
115
+ result_string = ""
116
+ if paper_info:
117
+ for i in range(len(paper_info)):
118
+ result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
119
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
120
+ return result_string, records
121
+ else:
122
+ return "Information not found", "Information not found"
123
+ # invoke llm and return result
124
+
125
+ if "title" in keys:
126
+ title = args['title']
127
+ authors = utils.authors_str_to_list(args['author'])
128
+ paper_info = utils.sqldb.query(title = title,author = authors)
129
+ # if query not found then go crawl brh
130
+ # print(paper_info)
131
+
132
+ if len(paper_info) == 0:
133
+ new_records = utils.crawl_exact_paper(title=title,author=authors)
134
+ print("Got new records: ",len(new_records))
135
+ if type(new_records) == str:
136
+ # print(new_records)
137
+ return "Error occured, information not found", "Information not found"
138
+ utils.db.add(new_records)
139
+ utils.sqldb.add(new_records)
140
+ paper_info = utils.sqldb.query(title = title,author = authors)
141
+ print("Re-queried on chromadb, results: ",paper_info)
142
+ # -------------------------------------
143
+ records = [] # get title (2), author (3), link (6)
144
+ result_string = ""
145
+ for i in range(len(paper_info)):
146
+ result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
147
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
148
+ # process results:
149
+ if len(result_string) == 0:
150
+ return "Information not found", "Information not found"
151
+ return result_string, records
152
+ # invoke llm and return result
153
+ def full_chain_single_question(input_prompt):
154
+ try:
155
+ first_prompt = extract_keyword_prompt(input_prompt)
156
+ temp_answer = model.generate_content(first_prompt).text
157
+
158
+ args = json.loads(utils.trimming(temp_answer))
159
+ contexts, results = response(args)
160
+ if not results:
161
+ # print(contexts)
162
+ return "Random question, direct return", contexts
163
+ else:
164
+ output_prompt = make_answer_prompt(input_prompt,contexts)
165
+ answer = model.generate_content(output_prompt).text
166
+ return temp_answer, answer
167
+ except Exception as e:
168
+ # print(e)
169
+ return temp_answer, "Error occured: " + str(e)
chat/routing.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # chat/routing.py
2
+ from django.urls import re_path
3
+
4
+ from . import consumers
5
+
6
+ websocket_urlpatterns = [
7
+ re_path(r"ws/chat/(?P<room_name>\w+)/$", consumers.ChatConsumer.as_asgi()),
8
+ re_path("ws/chat",consumers.ChatConsumer.as_asgi()),
9
+ ]
chat/templates/index.html ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+
4
+ <head>
5
+ <title>Python Project</title>
6
+ <meta charset="UTF-8">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
+ <script src="https://cdn.tailwindcss.com"></script>
9
+ </head>
10
+ <script>
11
+ function textAreaAdjust(element) {
12
+ element.style.height = "1px";
13
+ element.style.height = (25+element.scrollHeight)+"px";
14
+ }
15
+ </script>
16
+ <body class="bg-[#2A2A2A] p-0 m-0 text-white">
17
+ <div id="app" class="md:container flex flex-col md:mx-auto mx-0 bg-[#2E2E2E] w-full md:w-[728px] h-screen">
18
+
19
+ <div class="flex flex-col h-full">
20
+ <!-- head -->
21
+ <div class="flex flex-row justify-between items-center p-4 select-none">
22
+ <h1 class="text-2xl font-medium text-center">
23
+ TinyChat {{
24
+ showConfig ? "Config" : ""
25
+ }}
26
+ </h1>
27
+ <div>
28
+ <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"
29
+ class="cursor-pointer" :class="{'text-white':!showConfig,'text-[#919191]':showConfig}"
30
+ @click="showConfig=!showConfig">
31
+ <path
32
+ d="M11.6439 3C10.9352 3 10.2794 3.37508 9.92001 3.98596L9.49643 4.70605C8.96183 5.61487 7.98937 6.17632 6.935 6.18489L6.09966 6.19168C5.39095 6.19744 4.73822 6.57783 4.38385 7.19161L4.02775 7.80841C3.67338 8.42219 3.67031 9.17767 4.01968 9.7943L4.4315 10.5212C4.95126 11.4386 4.95126 12.5615 4.4315 13.4788L4.01968 14.2057C3.67031 14.8224 3.67338 15.5778 4.02775 16.1916L4.38385 16.8084C4.73822 17.4222 5.39095 17.8026 6.09965 17.8083L6.93501 17.8151C7.98938 17.8237 8.96184 18.3851 9.49644 19.294L9.92001 20.014C10.2794 20.6249 10.9352 21 11.6439 21H12.3561C13.0648 21 13.7206 20.6249 14.08 20.014L14.5035 19.294C15.0381 18.3851 16.0106 17.8237 17.065 17.8151L17.9004 17.8083C18.6091 17.8026 19.2618 17.4222 19.6162 16.8084L19.9723 16.1916C20.3267 15.5778 20.3298 14.8224 19.9804 14.2057L19.5686 13.4788C19.0488 12.5615 19.0488 11.4386 19.5686 10.5212L19.9804 9.7943C20.3298 9.17767 20.3267 8.42219 19.9723 7.80841L19.6162 7.19161C19.2618 6.57783 18.6091 6.19744 17.9004 6.19168L17.065 6.18489C16.0106 6.17632 15.0382 5.61487 14.5036 4.70605L14.08 3.98596C13.7206 3.37508 13.0648 3 12.3561 3H11.6439Z"
33
+ stroke="white" stroke-width="2" stroke-linejoin="round" />
34
+ <path
35
+ d="M12 14.5C13.3807 14.5 14.5 13.3807 14.5 12C14.5 10.6193 13.3807 9.5 12 9.5C10.6193 9.5 9.5 10.6193 9.5 12C9.5 13.3807 10.6193 14.5 12 14.5Z"
36
+ stroke="white" stroke-width="2" />
37
+ </svg>
38
+
39
+ </div>
40
+ </div>
41
+ <!-- config -->
42
+ <div v-show="showConfig" class="flex flex-col gap-4 p-4 select-none">
43
+ <!-- render configFields -->
44
+ <div v-for="field in configFields" :key="field.name" class="flex flex-col gap-2">
45
+ <label class="text-sm text-gray-400">
46
+ {{ field.label }}
47
+ </label>
48
+ <input v-if="field.type=='text'" type="text"
49
+ class="bg-transparent border border-[#919191] rounded-lg p-2" v-model="config[field.name]" />
50
+ <input v-if="field.type=='number'" type="number"
51
+ class="bg-transparent border border-[#919191] rounded-lg p-2" v-model="config[field.name]" />
52
+ </div>
53
+ </div>
54
+ <!-- messages -->
55
+ <div class="flex flex-col overflow-y-auto scroll-smooth" id="messages">
56
+ <div v-for="(message,index) in messages" :key="message.id"
57
+ class="flex flex-col odd:bg-[#F7F7F7]/10 group">
58
+ <div class="p-4 flex flex-col justify-between gap-4">
59
+ <div class="flex flex-row gap-4 capitalize">
60
+ <div class="text-gray-400 text-sm" >
61
+ <textarea
62
+ class="text-gray-400 text-sm size-full outline-0 bg-transparent border-none text-white rounded-lg resize-none"
63
+ :rows="message.role" v-model="message.role"></textarea>
64
+ </div>
65
+ <div class="flex flex-col w-full">
66
+ <textarea
67
+ onkeyup="textAreaAdjust(this)" style="overflow:hidden"
68
+ class="size-full outline-0 bg-transparent border-none text-white rounded-lg resize-y"
69
+ :rows="message.content.split('\n').length" v-model="message.content"></textarea>
70
+ <div class="mt-4" v-if="index==messages.length-1 && message.role=='assistant'">
71
+ <svg stroke="currentColor" fill="none" stroke-width="1.5" viewBox="0 0 24 24"
72
+ stroke-linecap="round" stroke-linejoin="round" height="1em" width="1em"
73
+ xmlns="http://www.w3.org/2000/svg" v-if="index==messages.length-1"
74
+ class="group-hover:block cursor-pointer h-3 w-3" @click="refresh"
75
+ :class="{'animate-spin':isLoading}">
76
+ >
77
+ >
78
+ >
79
+ <polyline points="1 4 1 10 7 10"></polyline>
80
+ <polyline points="23 20 23 14 17 14"></polyline>
81
+ <path d="M20.49 9A9 9 0 0 0 5.64 5.64L1 10m22 4l-4.64 4.36A9 9 0 0 1 3.51 15">
82
+ </path>
83
+
84
+ </svg>
85
+ </div>
86
+ </div>
87
+ </div>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+ <!-- send message -->
93
+ <div class="flex flex-row justify-between items-center p-4 sticky bottom-0 bg-[#2E2E2E]" v-if="!showConfig">
94
+ <div class="w-full flex flex-row border border-1 border-[#919191] rounded-xl px-4 py-6 items-center">
95
+ <textarea
96
+ class="w-full h-full outline-0 bg-transparent border-none text-white resize-none text-md max-h-[100px]"
97
+ v-on:keyup.enter="sendMessage" placeholder="Type a message" :rows="message.split('\n').length"
98
+ v-model="message"></textarea>
99
+ <div class="flex flex-col justify-end gap-4 text-[#2E2E2E] rounded-md p-1"
100
+ :class="{'bg-white':message!='','bg-white/10':message==''}">
101
+ <button @click="sendMessage">
102
+ <svg class="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24"
103
+ xmlns="http://www.w3.org/2000/svg">
104
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
105
+ d="M12 19l9 2-9-18-9 18 9-2zm0 0v-8" />
106
+ </svg>
107
+ </button>
108
+ </div>
109
+ </div>
110
+ </div>
111
+
112
+ </div>
113
+ <script type="module">
114
+ import { createApp } from 'https://unpkg.com/vue@3/dist/vue.esm-browser.js'
115
+ import { SSE } from "https://cdn.jsdelivr.net/npm/[email protected]/lib/sse.min.js"
116
+
117
+ createApp({
118
+ data() {
119
+ return {
120
+ showConfig: false,
121
+ configFields: [
122
+ {
123
+ name: "endPoint",
124
+ type: "text",
125
+ label: "End Point"
126
+ },
127
+ {
128
+ name: "apiKey",
129
+ type: "text",
130
+ label: "API Key"
131
+ },
132
+ {
133
+ name: "model",
134
+ type: "text",
135
+ label: "Model"
136
+ },
137
+ {
138
+ name: "temperature",
139
+ type: "number",
140
+ label: "Temperature"
141
+ },
142
+ {
143
+ name: "presence_penalty",
144
+ type: "number",
145
+ label: "Presence Penalty"
146
+ },
147
+ {
148
+ name: "top_p",
149
+ type: "number",
150
+ label: "Top P"
151
+ },
152
+ {
153
+ name: "frequency_penalty",
154
+ type: "number",
155
+ label: "Frequency Penalty"
156
+ },
157
+ ],
158
+ isLoading: false,
159
+ message: '',
160
+ messages: [],
161
+ chatSocket : new WebSocket(
162
+ 'ws://'
163
+ + window.location.host
164
+ + '/ws/chat'
165
+ ),
166
+ config: {
167
+ endPoint: "",
168
+ apiKey: "",
169
+ model: "gpt-3.5-turbo",
170
+ temperature: 1,
171
+ presence_penalty: 0,
172
+ top_p: 1,
173
+ frequency_penalty: 0,
174
+ stream: true
175
+ }
176
+ }
177
+
178
+ },
179
+ created() {
180
+ this.connect();
181
+ },
182
+ methods: {
183
+ refresh() {
184
+ this.send(true);
185
+ },
186
+ sendMessage() {
187
+ this.messages.push({
188
+ role: "user",
189
+ content: this.message
190
+ });
191
+ this.chatSocket.send(JSON.stringify({
192
+ 'message':this.message,
193
+ 'messages': this.messages,
194
+ }));
195
+ console.log(JSON.stringify({
196
+ 'messages': this.messages
197
+ }))
198
+ this.message = "";
199
+ },
200
+ connect() {
201
+ this.chatSocket.onopen = () => {
202
+ this.connectionStatus = 'Connected';
203
+ console.log('WebSocket connected');
204
+ };
205
+
206
+ this.chatSocket.onmessage = (event) => {
207
+ var mes = JSON.parse(event.data)
208
+ this.messages.push({
209
+ role: "Chat bot",
210
+ content: mes.message
211
+ });
212
+ console.log('Message received: ', event.data);
213
+ };
214
+
215
+ this.chatSocket.onclose = () => {
216
+ this.connectionStatus = 'Disconnected';
217
+ console.log('WebSocket disconnected');
218
+ };
219
+
220
+ this.chatSocket.onerror = (error) => {
221
+ console.error('WebSocket Error: ', error);
222
+ };
223
+ }
224
+ },
225
+ }).mount('#app')
226
+ </script>
227
+ </body>
228
+
229
+ </html>
chat/templates/myfirst.html ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <body>
4
+
5
+ <h1>Hello World!</h1>
6
+ <p>Welcome {{my_name}} to my first Django project!</p>
7
+
8
+ </body>
9
+ </html>
chat/urls.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from django.urls import path
2
+ from . import views
3
+
4
+ urlpatterns = [
5
+ path('', views.index, name='index'),
6
+ path("<str:room_name>/", views.room, name="room"),
7
+ ]
chat/views.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # chat/views.py
2
+ from django.shortcuts import render
3
+
4
+
5
+ def index(request):
6
+ return render(request, "index.html")
7
+
8
+
9
+ def room(request, room_name):
10
+ return render(request, "chat/room.html", {"room_name": room_name})
chatbot_django/__init__.py ADDED
File without changes
chatbot_django/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (191 Bytes). View file
 
chatbot_django/__pycache__/asgi.cpython-310.pyc ADDED
Binary file (955 Bytes). View file
 
chatbot_django/__pycache__/settings.cpython-310.pyc ADDED
Binary file (2.41 kB). View file
 
chatbot_django/__pycache__/urls.cpython-310.pyc ADDED
Binary file (1.04 kB). View file
 
chatbot_django/__pycache__/wsgi.cpython-310.pyc ADDED
Binary file (608 Bytes). View file
 
chatbot_django/asgi.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ASGI config for chatbot_django project.
3
+
4
+ It exposes the ASGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from channels.auth import AuthMiddlewareStack
13
+ from channels.routing import ProtocolTypeRouter, URLRouter
14
+ from channels.security.websocket import AllowedHostsOriginValidator
15
+ from django.core.asgi import get_asgi_application
16
+ from chat.routing import websocket_urlpatterns
17
+
18
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chatbot_django.settings')
19
+
20
+ django_asgi_app = get_asgi_application()
21
+
22
+ application = ProtocolTypeRouter({
23
+ "http": django_asgi_app,
24
+ # Just HTTP for now. (We can add other protocols later.)
25
+ "websocket": AllowedHostsOriginValidator(
26
+ AuthMiddlewareStack(URLRouter(websocket_urlpatterns))
27
+ ),
28
+ })
chatbot_django/settings.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Django settings for chatbot_django project.
3
+
4
+ Generated by 'django-admin startproject' using Django 5.0.4.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.0/topics/settings/
8
+
9
+ For the full list of settings and their values, see
10
+ https://docs.djangoproject.com/en/5.0/ref/settings/
11
+ """
12
+
13
+ from pathlib import Path
14
+
15
+ # Build paths inside the project like this: BASE_DIR / 'subdir'.
16
+ BASE_DIR = Path(__file__).resolve().parent.parent
17
+
18
+
19
+ # Quick-start development settings - unsuitable for production
20
+ # See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
21
+
22
+ # SECURITY WARNING: keep the secret key used in production secret!
23
+ SECRET_KEY = 'django-insecure-l39x6jgx075p9bw8^px-c=pys%&$bzdo_sz_oabf#5bv@^isj9'
24
+
25
+ # SECURITY WARNING: don't run with debug turned on in production!
26
+ DEBUG = True
27
+
28
+ ALLOWED_HOSTS = []
29
+
30
+
31
+ # Application definition
32
+
33
+ INSTALLED_APPS = [
34
+ 'daphne',
35
+ 'django.contrib.admin',
36
+ 'django.contrib.auth',
37
+ 'django.contrib.contenttypes',
38
+ 'django.contrib.sessions',
39
+ 'django.contrib.messages',
40
+ 'django.contrib.staticfiles',
41
+ 'chat'
42
+ ]
43
+
44
+ MIDDLEWARE = [
45
+ 'django.middleware.security.SecurityMiddleware',
46
+ 'django.contrib.sessions.middleware.SessionMiddleware',
47
+ 'django.middleware.common.CommonMiddleware',
48
+ 'django.middleware.csrf.CsrfViewMiddleware',
49
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
50
+ 'django.contrib.messages.middleware.MessageMiddleware',
51
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
52
+ ]
53
+
54
+ ROOT_URLCONF = 'chatbot_django.urls'
55
+
56
+ TEMPLATES = [
57
+ {
58
+ 'BACKEND': 'django.template.backends.django.DjangoTemplates',
59
+ 'DIRS': [],
60
+ 'APP_DIRS': True,
61
+ 'OPTIONS': {
62
+ 'context_processors': [
63
+ 'django.template.context_processors.debug',
64
+ 'django.template.context_processors.request',
65
+ 'django.contrib.auth.context_processors.auth',
66
+ 'django.contrib.messages.context_processors.messages',
67
+ ],
68
+ },
69
+ },
70
+ ]
71
+
72
+ WSGI_APPLICATION = 'chatbot_django.wsgi.application'
73
+ ASGI_APPLICATION = "chatbot_django.asgi.application"
74
+
75
+
76
+ # Database
77
+ # https://docs.djangoproject.com/en/5.0/ref/settings/#databases
78
+
79
+ DATABASES = {
80
+ 'default': {
81
+ 'ENGINE': 'django.db.backends.sqlite3',
82
+ 'NAME': BASE_DIR / 'db.sqlite3',
83
+ }
84
+ }
85
+
86
+
87
+ # Password validation
88
+ # https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
89
+
90
+ AUTH_PASSWORD_VALIDATORS = [
91
+ {
92
+ 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
93
+ },
94
+ {
95
+ 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
96
+ },
97
+ {
98
+ 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
99
+ },
100
+ {
101
+ 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
102
+ },
103
+ ]
104
+
105
+
106
+ # Internationalization
107
+ # https://docs.djangoproject.com/en/5.0/topics/i18n/
108
+
109
+ LANGUAGE_CODE = 'en-us'
110
+
111
+ TIME_ZONE = 'UTC'
112
+
113
+ USE_I18N = True
114
+
115
+ USE_TZ = True
116
+
117
+
118
+ # Static files (CSS, JavaScript, Images)
119
+ # https://docs.djangoproject.com/en/5.0/howto/static-files/
120
+
121
+ STATIC_URL = 'static/'
122
+
123
+ # Default primary key field type
124
+ # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
125
+
126
+ DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
chatbot_django/urls.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ URL configuration for chatbot_django project.
3
+
4
+ The `urlpatterns` list routes URLs to views. For more information please see:
5
+ https://docs.djangoproject.com/en/5.0/topics/http/urls/
6
+ Examples:
7
+ Function views
8
+ 1. Add an import: from my_app import views
9
+ 2. Add a URL to urlpatterns: path('', views.home, name='home')
10
+ Class-based views
11
+ 1. Add an import: from other_app.views import Home
12
+ 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
13
+ Including another URLconf
14
+ 1. Import the include() function: from django.urls import include, path
15
+ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
16
+ """
17
+ from django.contrib import admin
18
+ from django.urls import path,include
19
+
20
+ urlpatterns = [
21
+ path('chat/',include('chat.urls')),
22
+ path('admin/', admin.site.urls),
23
+ ]
chatbot_django/wsgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WSGI config for chatbot_django project.
3
+
4
+ It exposes the WSGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.wsgi import get_wsgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chatbot_django.settings')
15
+
16
+ application = get_wsgi_application()
manage.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """Django's command-line utility for administrative tasks."""
3
+ import os
4
+ import sys
5
+
6
+ def main():
7
+ """Run administrative tasks."""
8
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chatbot_django.settings')
9
+ try:
10
+ from django.core.management import execute_from_command_line
11
+ except ImportError as exc:
12
+ raise ImportError(
13
+ "Couldn't import Django. Are you sure it's installed and "
14
+ "available on your PYTHONPATH environment variable? Did you "
15
+ "forget to activate a virtual environment?"
16
+ ) from exc
17
+ execute_from_command_line(sys.argv)
18
+
19
+
20
+ if __name__ == '__main__':
21
+ main()
oldindex.html ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+
4
+ <head>
5
+ <title>Python Project</title>
6
+ <meta charset="UTF-8">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
+ <script src="https://cdn.tailwindcss.com"></script>
9
+ </head>
10
+
11
+ <body class="bg-[#2A2A2A] p-0 m-0 text-white">
12
+ <div id="app" class="md:container flex flex-col md:mx-auto mx-0 bg-[#2E2E2E] w-full md:w-[728px] h-screen">
13
+
14
+ <div class="flex flex-col h-full">
15
+ <!-- head -->
16
+ <div class="flex flex-row justify-between items-center p-4 select-none">
17
+ <h1 class="text-2xl font-medium text-center">
18
+ TinyChat {{
19
+ showConfig ? "Config" : ""
20
+ }}
21
+ </h1>
22
+ <div>
23
+ <svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"
24
+ class="cursor-pointer" :class="{'text-white':!showConfig,'text-[#919191]':showConfig}"
25
+ @click="showConfig=!showConfig">
26
+ <path
27
+ d="M11.6439 3C10.9352 3 10.2794 3.37508 9.92001 3.98596L9.49643 4.70605C8.96183 5.61487 7.98937 6.17632 6.935 6.18489L6.09966 6.19168C5.39095 6.19744 4.73822 6.57783 4.38385 7.19161L4.02775 7.80841C3.67338 8.42219 3.67031 9.17767 4.01968 9.7943L4.4315 10.5212C4.95126 11.4386 4.95126 12.5615 4.4315 13.4788L4.01968 14.2057C3.67031 14.8224 3.67338 15.5778 4.02775 16.1916L4.38385 16.8084C4.73822 17.4222 5.39095 17.8026 6.09965 17.8083L6.93501 17.8151C7.98938 17.8237 8.96184 18.3851 9.49644 19.294L9.92001 20.014C10.2794 20.6249 10.9352 21 11.6439 21H12.3561C13.0648 21 13.7206 20.6249 14.08 20.014L14.5035 19.294C15.0381 18.3851 16.0106 17.8237 17.065 17.8151L17.9004 17.8083C18.6091 17.8026 19.2618 17.4222 19.6162 16.8084L19.9723 16.1916C20.3267 15.5778 20.3298 14.8224 19.9804 14.2057L19.5686 13.4788C19.0488 12.5615 19.0488 11.4386 19.5686 10.5212L19.9804 9.7943C20.3298 9.17767 20.3267 8.42219 19.9723 7.80841L19.6162 7.19161C19.2618 6.57783 18.6091 6.19744 17.9004 6.19168L17.065 6.18489C16.0106 6.17632 15.0382 5.61487 14.5036 4.70605L14.08 3.98596C13.7206 3.37508 13.0648 3 12.3561 3H11.6439Z"
28
+ stroke="white" stroke-width="2" stroke-linejoin="round" />
29
+ <path
30
+ d="M12 14.5C13.3807 14.5 14.5 13.3807 14.5 12C14.5 10.6193 13.3807 9.5 12 9.5C10.6193 9.5 9.5 10.6193 9.5 12C9.5 13.3807 10.6193 14.5 12 14.5Z"
31
+ stroke="white" stroke-width="2" />
32
+ </svg>
33
+
34
+ </div>
35
+ </div>
36
+ <!-- config -->
37
+ <div v-show="showConfig" class="flex flex-col gap-4 p-4 select-none">
38
+ <!-- render configFields -->
39
+ <div v-for="field in configFields" :key="field.name" class="flex flex-col gap-2">
40
+ <label class="text-sm text-gray-400">
41
+ {{ field.label }}
42
+ </label>
43
+ <input v-if="field.type=='text'" type="text"
44
+ class="bg-transparent border border-[#919191] rounded-lg p-2" v-model="config[field.name]" />
45
+ <input v-if="field.type=='number'" type="number"
46
+ class="bg-transparent border border-[#919191] rounded-lg p-2" v-model="config[field.name]" />
47
+ </div>
48
+ </div>
49
+ <!-- messages -->
50
+ <div class="flex flex-col overflow-y-auto scroll-smooth" id="messages">
51
+ <div v-for="(message,index) in messages" :key="message.id"
52
+ class="flex flex-col odd:bg-[#F7F7F7]/10 group">
53
+ <div class="p-4 flex flex-col justify-between gap-4">
54
+ <div class="flex flex-row gap-4 capitalize">
55
+ <div class="text-gray-400 text-sm" >
56
+ <textarea
57
+ class="text-gray-400 text-sm size-full outline-0 bg-transparent border-none text-white rounded-lg resize-none"
58
+ :rows="message.role" v-model="message.role"></textarea>
59
+ </div>
60
+ <div class="flex flex-col w-full">
61
+ <textarea
62
+ class="size-full outline-0 bg-transparent border-none text-white rounded-lg resize-none"
63
+ :rows="message.content.split('\n').length" v-model="message.content"></textarea>
64
+ <div class="mt-4" v-if="index==messages.length-1 && message.role=='assistant'">
65
+ <svg stroke="currentColor" fill="none" stroke-width="1.5" viewBox="0 0 24 24"
66
+ stroke-linecap="round" stroke-linejoin="round" height="1em" width="1em"
67
+ xmlns="http://www.w3.org/2000/svg" v-if="index==messages.length-1"
68
+ class="group-hover:block cursor-pointer h-3 w-3" @click="refresh"
69
+ :class="{'animate-spin':isLoading}">
70
+ >
71
+ >
72
+ >
73
+ <polyline points="1 4 1 10 7 10"></polyline>
74
+ <polyline points="23 20 23 14 17 14"></polyline>
75
+ <path d="M20.49 9A9 9 0 0 0 5.64 5.64L1 10m22 4l-4.64 4.36A9 9 0 0 1 3.51 15">
76
+ </path>
77
+
78
+ </svg>
79
+ </div>
80
+ </div>
81
+ </div>
82
+ </div>
83
+ </div>
84
+ </div>
85
+ </div>
86
+ <!-- send message -->
87
+ <div class="flex flex-row justify-between items-center p-4 sticky bottom-0 bg-[#2E2E2E]" v-if="!showConfig">
88
+ <div class="w-full flex flex-row border border-1 border-[#919191] rounded-xl px-4 py-6 items-center">
89
+ <textarea
90
+ class="w-full h-full outline-0 bg-transparent border-none text-white resize-none text-md max-h-[100px]"
91
+ v-on:keyup.enter="sendMessage" placeholder="Type a message" :rows="message.split('\n').length"
92
+ v-model="message"></textarea>
93
+ <div class="flex flex-col justify-end gap-4 text-[#2E2E2E] rounded-md p-1"
94
+ :class="{'bg-white':message!='','bg-white/10':message==''}">
95
+ <button @click="sendMessage">
96
+ <svg class="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24"
97
+ xmlns="http://www.w3.org/2000/svg">
98
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
99
+ d="M12 19l9 2-9-18-9 18 9-2zm0 0v-8" />
100
+ </svg>
101
+ </button>
102
+ </div>
103
+ </div>
104
+ </div>
105
+
106
+ </div>
107
+ <script type="module">
108
+ import { createApp } from 'https://unpkg.com/vue@3/dist/vue.esm-browser.js'
109
+ import { SSE } from "https://cdn.jsdelivr.net/npm/[email protected]/lib/sse.min.js"
110
+
111
+ createApp({
112
+ data() {
113
+ return {
114
+ showConfig: false,
115
+ configFields: [
116
+ {
117
+ name: "endPoint",
118
+ type: "text",
119
+ label: "End Point"
120
+ },
121
+ {
122
+ name: "apiKey",
123
+ type: "text",
124
+ label: "API Key"
125
+ },
126
+ {
127
+ name: "model",
128
+ type: "text",
129
+ label: "Model"
130
+ },
131
+ {
132
+ name: "temperature",
133
+ type: "number",
134
+ label: "Temperature"
135
+ },
136
+ {
137
+ name: "presence_penalty",
138
+ type: "number",
139
+ label: "Presence Penalty"
140
+ },
141
+ {
142
+ name: "top_p",
143
+ type: "number",
144
+ label: "Top P"
145
+ },
146
+ {
147
+ name: "frequency_penalty",
148
+ type: "number",
149
+ label: "Frequency Penalty"
150
+ },
151
+ ],
152
+ isLoading: false,
153
+ message: '',
154
+ messages: [],
155
+ chatSocket : new WebSocket(
156
+ 'ws://'
157
+ + window.location.host
158
+ + '/chat'
159
+ ),
160
+ config: {
161
+ endPoint: "",
162
+ apiKey: "",
163
+ model: "gpt-3.5-turbo",
164
+ temperature: 1,
165
+ presence_penalty: 0,
166
+ top_p: 1,
167
+ frequency_penalty: 0,
168
+ stream: true
169
+ }
170
+ }
171
+
172
+ },
173
+ methods: {
174
+ refresh() {
175
+ this.send(true);
176
+ },
177
+ sendMessage() {
178
+ this.messages.push({
179
+ role: "user",
180
+ content: this.message
181
+ });
182
+ // this.send();
183
+ this.chatSocket.send(JSON.stringify({
184
+ 'message': this.message
185
+ }));
186
+ this.message = "";
187
+ },
188
+ send(refresh = false) {
189
+ this.isLoading = true;
190
+ // let data = {
191
+ // "messages": this.messages,
192
+ // ...this.config
193
+ // }
194
+
195
+ // if (refresh) {
196
+ // this.messages.pop();
197
+ // this.messages.push({
198
+ // role: "assistant",
199
+ // content: ""
200
+ // });
201
+ // }
202
+ // let addedMsg = false;
203
+ // var source = new SSE(this.config.endPoint, {
204
+ // headers: {
205
+ // "accept": "*/*",
206
+ // 'content-type': 'application/json',
207
+ // authorization: 'Bearer ' + this.config.apiKey,
208
+ // },
209
+ // payload: JSON.stringify(data),
210
+ // method: 'POST'
211
+ // });
212
+ // source.addEventListener('message', (e) => {
213
+ // if (!addedMsg && !refresh) {
214
+ // this.messages.push({
215
+ // role: "assistant",
216
+ // content: ""
217
+ // });
218
+ // addedMsg = true;
219
+ // }
220
+ // try {
221
+ // var payload = JSON.parse(e.data);
222
+ // if (payload.choices[0].finish_reason == "stop") {
223
+ // this.isLoading = false;
224
+ // return;
225
+ // }
226
+ // const message = payload.choices[0].delta.content;
227
+ // if (message) {
228
+ // this.messages[this.messages.length - 1].content += message;
229
+ // }
230
+ // } catch (e) {
231
+ // }
232
+ // });
233
+ // source.addEventListener('error', (error) => {
234
+ // console.error('Error:', error);
235
+ // source.close();
236
+ // this.isLoading = false;
237
+ // });
238
+ }
239
+ },
240
+ // watch: {
241
+ // config: {
242
+ // deep: true,
243
+ // handler(newConfig) {
244
+ // if (newConfig) {
245
+ // localStorage.setItem("config", JSON.stringify(newConfig));
246
+ // }
247
+ // }
248
+ // }
249
+ // },
250
+ // mounted() {
251
+ // if (localStorage.getItem("config")) {
252
+ // this.config = JSON.parse(localStorage.getItem("config"));
253
+ // }
254
+ // },
255
+ }).mount('#app')
256
+ </script>
257
+ </body>
258
+
259
+ </html>
topic_descriptions.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "computer science":"Computer Science: The Language of Machines. Focus: This field deals with the theory, design, development, and application of software and hardware systems. It delves into the creation of algorithms, data structures, and programming languages that enable computers to process information and solve problems. Core Concepts: Algorithms: Step-by-step instructions for computers to solve problems. Data Structures: Organized ways to store and access data efficiently. Programming Languages: Tools to communicate instructions to computers. Artificial Intelligence: Machines mimicking human-like intelligence. Machine Learning: Algorithms that learn from data without explicit programming. Applications: From designing websites and mobile apps to developing complex operating systems and artificial intelligence, computer science underpins the digital world we interact with daily.",
3
+ "economics":"Economics: The Science of Choice and Exchange. Focus: Economics examines how individuals, societies, and governments make choices in a world with scarce resources. It analyzes production, distribution, and consumption of goods and services, studying factors like market behavior, prices, and money. Core Concepts: Scarcity: Limited resources versus unlimited wants and needs. Supply and Demand: Forces that determine market prices. Rational Choice Theory: Individuals making choices based on their preferences and constraints. Macroeconomics: Economy-wide phenomena like inflation, unemployment, and economic growth. Microeconomics: Individual and firm behavior in markets. Applications: Economic principles are used to understand market trends, formulate effective government policies, design financial systems, and make informed investment decisions.",
4
+ "electrical engineering and system science":"Electrical Engineering & System Science: Bridging Power and Complexity. Focus: This interdisciplinary field combines electrical engineering, which deals with electricity, electronics, and electromagnetism, with system science, which focuses on analyzing and designing complex systems. It involves designing and developing electrical circuits, devices, and systems for power generation, transmission, control, and communication. Core Concepts: Circuits: Networks of electrical components like resistors, capacitors, and transistors. Electromagnetism: Interaction of electric and magnetic fields. Power Systems: Networks for generating, transmitting, and distributing electricity. Control Systems: Systems that regulate the behavior of other systems. Signals and Systems: Analysis of how information is represented and processed in electrical systems. Applications: Electrical engineers and system scientists create a vast array of technologies, from power grids and communication systems to medical devices and embedded systems in everyday gadgets.",
5
+ "mathematics":"Mathematics: The Universal Language of Science. Focus: Mathematics is the abstract study of numbers, quantity, relationships, structures, and patterns. It provides a foundation for many other disciplines by offering a language and tools to express ideas, model problems, and derive solutions. Core Concepts: Calculus: Branch of mathematics dealing with rates of change and accumulation. Algebra: Studying relationships between variables using symbols and operations. Geometry: Properties of shapes and spaces. Probability and Statistics: Analyzing randomness and drawing meaningful conclusions from data. Logic: Reasoning and making valid deductions from given statements. Applications: Mathematics is an indispensable tool in all scientific fields, from physics and engineering to economics and computer science. It is also used in finance, cryptography, and game theory."
6
+ }