tosanoob commited on
Commit
e3b816c
·
verified ·
1 Parent(s): f93a31b

Update to use environment variables apikey

Browse files
Files changed (1) hide show
  1. chat/model_manage.py +176 -173
chat/model_manage.py CHANGED
@@ -1,174 +1,177 @@
1
- import chat.arxiv_bot.arxiv_bot_utils as utils
2
- import google.generativeai as genai
3
- import json
4
- import os
5
- from google.generativeai.types import content_types
6
- from collections.abc import Iterable
7
- from IPython import display
8
- from IPython.display import Markdown
9
-
10
- # ----------------------- define instructions -----------------------
11
- system_instruction = """You are a library chatbot that help people to find relevant articles about a topic, or find a specific article with given title and authors.
12
- Your job is to analyze the user question, generate enough parameters based on the user question and use the tools that are given to you.
13
- Also, after the function call is done, you must post-process the results in a more conversational form, providing some explanation about the paper based on its summary to avoid recitation.
14
- You must provide the link to its Arxiv pdf page."""
15
-
16
- # --------------------------- define tools --------------------------
17
- def search_for_relevant_article(keywords: list['str'], topic_description: str) -> str:
18
- """This tool is used to search for articles from the database which is relevant to a topic, using a list of more than 3 keywords and a long sentence topic description.
19
- If there is not enough 3 keywords from the question, the model must generate more keywords related to the topic.
20
- If there is no description about the topic, the model must generate a description for the function call.
21
- \nThe result is a string describe the records found from the database: 'Record no. - Title: <title>, Author: <authors>, Link: <link to the pdf file>, Summary: <summary of the article>'. There can be many records.
22
- \nIf the result is 'Information not found' it means some error has occured, or the database has no relevant article"""
23
-
24
- print('Keywords: {}, description: {}'.format(keywords,topic_description))
25
-
26
- results = utils.ArxivChroma.query_relevant(keywords=keywords, query_texts=topic_description)
27
- # print(results)
28
- ids = results['metadatas'][0]
29
- if len(ids) == 0:
30
- # go crawl some
31
- new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
32
- # print("Got new records: ",len(new_records))
33
- if type(new_records) == str:
34
- return "Information not found"
35
-
36
- utils.ArxivChroma.add(new_records)
37
- utils.ArxivSQL.add(new_records)
38
- results = utils.ArxivChroma.query_relevant(keywords=keywords, query_texts=topic_description)
39
- ids = results['metadatas'][0]
40
- # print("Re-queried on chromadb, results: ",ids)
41
-
42
- paper_id = [id['paper_id'] for id in ids]
43
- paper_info = utils.ArxivSQL.query_id(paper_id)
44
- # print(paper_info)
45
- records = [] # get title (2), author (3), link (6)
46
- result_string = ""
47
- if paper_info:
48
- for i in range(len(paper_info)):
49
- result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
50
- id = paper_info[i][0]
51
- selected_document = utils.ArxivChroma.query_exact(id)["documents"]
52
- doc_str = "Summary:"
53
- for doc in selected_document:
54
- doc_str+= doc + " "
55
- result_string += doc_str
56
- records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
57
- return result_string
58
- else:
59
- return "Information not found"
60
-
61
- def search_for_specific_article(title: str, authors: list['str']) -> str:
62
- """This tool is used to search for a specific article from the database, with its name and authors given.
63
- \nThe result is a string describe the records found from the database: 'Record no. - Title: <title>, Author: <authors>, Link: <link to the pdf file>, Summary: <summary of the article>'. There can be many records.
64
- \nIf the result is 'Information not found' it means some error has occured, or the database has no relevant article"""
65
-
66
- print('Keywords: {}, description: {}'.format(title,authors))
67
-
68
- paper_info = utils.ArxivSQL.query(title = title,author = authors)
69
- if paper_info:
70
- new_records = utils.crawl_exact_paper(title=title,author=authors)
71
- # print("Got new records: ",len(new_records))
72
- if type(new_records) == str:
73
- # print(new_records)
74
- return "Information not found"
75
- utils.ArxivChroma.add(new_records)
76
- utils.ArxivSQL.add(new_records)
77
- paper_info = utils.ArxivSQL.query(title = title,author = authors)
78
- # print("Re-queried on chromadb, results: ",paper_info)
79
- # -------------------------------------
80
- records = [] # get title (2), author (3), link (6)
81
- result_string = ""
82
- if paper_info:
83
- for i in range(len(paper_info)):
84
- result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
85
- id = paper_info[i][0]
86
- selected_document = utils.ArxivChroma.query_exact(id)["documents"]
87
- doc_str = "Summary:"
88
- for doc in selected_document:
89
- doc_str+= doc + " "
90
- result_string += doc_str
91
- records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
92
- # process results:
93
- if len(result_string) == 0:
94
- return "Information not found"
95
- return result_string
96
-
97
- def answer_others_questions(question: str) -> str:
98
- """This tool is the default option for other questions that are not related to article or paper request. The model will response the question with its own answer."""
99
- return question
100
-
101
- tools = [search_for_relevant_article, search_for_specific_article, answer_others_questions]
102
- tools_name = ['search_for_relevant_article', 'search_for_specific_article', 'answer_others_questions']
103
-
104
- # load key, prepare config ------------------------
105
- with open("apikey.txt","r") as apikey:
106
- key = apikey.readline()
107
- genai.configure(api_key=key)
108
- generation_config = {
109
- "temperature": 1,
110
- "top_p": 1,
111
- "top_k": 0,
112
- "max_output_tokens": 2048,
113
- "response_mime_type": "text/plain",
114
- }
115
- safety_settings = [
116
- {
117
- "category": "HARM_CATEGORY_DANGEROUS",
118
- "threshold": "BLOCK_NONE",
119
- },
120
- {
121
- "category": "HARM_CATEGORY_HARASSMENT",
122
- "threshold": "BLOCK_NONE",
123
- },
124
- {
125
- "category": "HARM_CATEGORY_HATE_SPEECH",
126
- "threshold": "BLOCK_NONE",
127
- },
128
- {
129
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
130
- "threshold": "BLOCK_NONE",
131
- },
132
- {
133
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
134
- "threshold": "BLOCK_NONE",
135
- },
136
- ]
137
- # this function return a tool_config with mode 'none', 'any', 'auto'
138
- def tool_config_from_mode(mode: str, fns: Iterable[str] = ()):
139
- """Create a tool config with the specified function calling mode."""
140
- return content_types.to_tool_config(
141
- {"function_calling_config": {"mode": mode, "allowed_function_names": fns}}
142
- )
143
-
144
- def init_model(mode = "auto"):
145
- # return an instance of a model, holding its own ChatSession
146
- # every socket session holds its own model
147
- # this function must be called upon socket init, also start_chat() to begin chat
148
- model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
149
- safety_settings=safety_settings,
150
- generation_config=generation_config,
151
- tools=tools,
152
- tool_config=tool_config_from_mode(mode),
153
- system_instruction=system_instruction)
154
- chat_instance = model.start_chat(enable_automatic_function_calling=True)
155
- return model, chat_instance
156
-
157
- # handle tool call and chatsession
158
- def full_chain_history_question(user_input, chat_instance: genai.ChatSession, mode="auto"):
159
- try:
160
- response = chat_instance.send_message(user_input,tool_config=tool_config_from_mode(mode)).text
161
- return response, chat_instance.history
162
- except Exception as e:
163
- print(e)
164
- return f'Error occured during call: {e}', chat_instance.history
165
-
166
- # for printing log session
167
- def print_history(history):
168
- for content in history:
169
- part = content.parts[0]
170
- print(content.role, "->", type(part).to_dict(part))
171
- print('-'*80)
172
-
173
- utils.ArxivChroma.connect()
 
 
 
174
  utils.ArxivSQL.connect()
 
1
+ import chat.arxiv_bot.arxiv_bot_utils as utils
2
+ import google.generativeai as genai
3
+ import json
4
+ import os
5
+ from google.generativeai.types import content_types
6
+ from collections.abc import Iterable
7
+ from IPython import display
8
+ from IPython.display import Markdown
9
+
10
+ # ----------------------- define instructions -----------------------
11
+ system_instruction = """You are a library chatbot that help people to find relevant articles about a topic, or find a specific article with given title and authors.
12
+ Your job is to analyze the user question, generate enough parameters based on the user question and use the tools that are given to you.
13
+ Also, after the function call is done, you must post-process the results in a more conversational form, providing some explanation about the paper based on its summary to avoid recitation.
14
+ You must provide the link to its Arxiv pdf page."""
15
+
16
+ # --------------------------- define tools --------------------------
17
+ def search_for_relevant_article(keywords: list['str'], topic_description: str) -> str:
18
+ """This tool is used to search for articles from the database which is relevant to a topic, using a list of more than 3 keywords and a long sentence topic description.
19
+ If there is not enough 3 keywords from the question, the model must generate more keywords related to the topic.
20
+ If there is no description about the topic, the model must generate a description for the function call.
21
+ \nThe result is a string describe the records found from the database: 'Record no. - Title: <title>, Author: <authors>, Link: <link to the pdf file>, Summary: <summary of the article>'. There can be many records.
22
+ \nIf the result is 'Information not found' it means some error has occured, or the database has no relevant article"""
23
+
24
+ print('Keywords: {}, description: {}'.format(keywords,topic_description))
25
+
26
+ results = utils.ArxivChroma.query_relevant(keywords=keywords, query_texts=topic_description)
27
+ # print(results)
28
+ ids = results['metadatas'][0]
29
+ if len(ids) == 0:
30
+ # go crawl some
31
+ new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
32
+ # print("Got new records: ",len(new_records))
33
+ if type(new_records) == str:
34
+ return "Information not found"
35
+
36
+ utils.ArxivChroma.add(new_records)
37
+ utils.ArxivSQL.add(new_records)
38
+ results = utils.ArxivChroma.query_relevant(keywords=keywords, query_texts=topic_description)
39
+ ids = results['metadatas'][0]
40
+ # print("Re-queried on chromadb, results: ",ids)
41
+
42
+ paper_id = [id['paper_id'] for id in ids]
43
+ paper_info = utils.ArxivSQL.query_id(paper_id)
44
+ # print(paper_info)
45
+ records = [] # get title (2), author (3), link (6)
46
+ result_string = ""
47
+ if paper_info:
48
+ for i in range(len(paper_info)):
49
+ result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
50
+ id = paper_info[i][0]
51
+ selected_document = utils.ArxivChroma.query_exact(id)["documents"]
52
+ doc_str = "Summary:"
53
+ for doc in selected_document:
54
+ doc_str+= doc + " "
55
+ result_string += doc_str
56
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
57
+ return result_string
58
+ else:
59
+ return "Information not found"
60
+
61
+ def search_for_specific_article(title: str, authors: list['str']) -> str:
62
+ """This tool is used to search for a specific article from the database, with its name and authors given.
63
+ \nThe result is a string describe the records found from the database: 'Record no. - Title: <title>, Author: <authors>, Link: <link to the pdf file>, Summary: <summary of the article>'. There can be many records.
64
+ \nIf the result is 'Information not found' it means some error has occured, or the database has no relevant article"""
65
+
66
+ print('Keywords: {}, description: {}'.format(title,authors))
67
+
68
+ paper_info = utils.ArxivSQL.query(title = title,author = authors)
69
+ if paper_info:
70
+ new_records = utils.crawl_exact_paper(title=title,author=authors)
71
+ # print("Got new records: ",len(new_records))
72
+ if type(new_records) == str:
73
+ # print(new_records)
74
+ return "Information not found"
75
+ utils.ArxivChroma.add(new_records)
76
+ utils.ArxivSQL.add(new_records)
77
+ paper_info = utils.ArxivSQL.query(title = title,author = authors)
78
+ # print("Re-queried on chromadb, results: ",paper_info)
79
+ # -------------------------------------
80
+ records = [] # get title (2), author (3), link (6)
81
+ result_string = ""
82
+ if paper_info:
83
+ for i in range(len(paper_info)):
84
+ result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
85
+ id = paper_info[i][0]
86
+ selected_document = utils.ArxivChroma.query_exact(id)["documents"]
87
+ doc_str = "Summary:"
88
+ for doc in selected_document:
89
+ doc_str+= doc + " "
90
+ result_string += doc_str
91
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
92
+ # process results:
93
+ if len(result_string) == 0:
94
+ return "Information not found"
95
+ return result_string
96
+
97
+ def answer_others_questions(question: str) -> str:
98
+ """This tool is the default option for other questions that are not related to article or paper request. The model will response the question with its own answer."""
99
+ return question
100
+
101
+ tools = [search_for_relevant_article, search_for_specific_article, answer_others_questions]
102
+ tools_name = ['search_for_relevant_article', 'search_for_specific_article', 'answer_others_questions']
103
+
104
+ # load key, prepare config ------------------------
105
+ if os.path.exist('apikey.txt'):
106
+ with open("apikey.txt","r") as apikey:
107
+ key = apikey.readline()
108
+ else:
109
+ key = os.environ.get('API_KEY')
110
+ genai.configure(api_key=key)
111
+ generation_config = {
112
+ "temperature": 1,
113
+ "top_p": 1,
114
+ "top_k": 0,
115
+ "max_output_tokens": 2048,
116
+ "response_mime_type": "text/plain",
117
+ }
118
+ safety_settings = [
119
+ {
120
+ "category": "HARM_CATEGORY_DANGEROUS",
121
+ "threshold": "BLOCK_NONE",
122
+ },
123
+ {
124
+ "category": "HARM_CATEGORY_HARASSMENT",
125
+ "threshold": "BLOCK_NONE",
126
+ },
127
+ {
128
+ "category": "HARM_CATEGORY_HATE_SPEECH",
129
+ "threshold": "BLOCK_NONE",
130
+ },
131
+ {
132
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
133
+ "threshold": "BLOCK_NONE",
134
+ },
135
+ {
136
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
137
+ "threshold": "BLOCK_NONE",
138
+ },
139
+ ]
140
+ # this function return a tool_config with mode 'none', 'any', 'auto'
141
+ def tool_config_from_mode(mode: str, fns: Iterable[str] = ()):
142
+ """Create a tool config with the specified function calling mode."""
143
+ return content_types.to_tool_config(
144
+ {"function_calling_config": {"mode": mode, "allowed_function_names": fns}}
145
+ )
146
+
147
+ def init_model(mode = "auto"):
148
+ # return an instance of a model, holding its own ChatSession
149
+ # every socket session holds its own model
150
+ # this function must be called upon socket init, also start_chat() to begin chat
151
+ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
152
+ safety_settings=safety_settings,
153
+ generation_config=generation_config,
154
+ tools=tools,
155
+ tool_config=tool_config_from_mode(mode),
156
+ system_instruction=system_instruction)
157
+ chat_instance = model.start_chat(enable_automatic_function_calling=True)
158
+ return model, chat_instance
159
+
160
+ # handle tool call and chatsession
161
+ def full_chain_history_question(user_input, chat_instance: genai.ChatSession, mode="auto"):
162
+ try:
163
+ response = chat_instance.send_message(user_input,tool_config=tool_config_from_mode(mode)).text
164
+ return response, chat_instance.history
165
+ except Exception as e:
166
+ print(e)
167
+ return f'Error occured during call: {e}', chat_instance.history
168
+
169
+ # for printing log session
170
+ def print_history(history):
171
+ for content in history:
172
+ part = content.parts[0]
173
+ print(content.role, "->", type(part).to_dict(part))
174
+ print('-'*80)
175
+
176
+ utils.ArxivChroma.connect()
177
  utils.ArxivSQL.connect()