Arbazkhan-cs commited on
Commit
7765fa9
1 Parent(s): 7db5fb1

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +69 -69
utils.py CHANGED
@@ -1,69 +1,69 @@
1
- import os
2
- from langchain_groq import ChatGroq
3
- from langchain_community.vectorstores.faiss import FAISS
4
- from langchain_community.document_loaders import PyPDFDirectoryLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain_community.embeddings import HuggingFaceHubEmbeddings
7
- from langchain.tools.retriever import create_retriever_tool
8
- from langchain_community.tools import ArxivQueryRun
9
- from langchain_community.utilities import ArxivAPIWrapper
10
- from langchain.agents import tool
11
- from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, PromptTemplate, MessagesPlaceholder
12
- from langchain import hub
13
- from googlesearch import search
14
- import requests
15
- from tqdm import tqdm
16
- from bs4 import BeautifulSoup
17
-
18
- def create_retriever_tool_agent(pdf_dir="./Pdfs"):
19
- loader = PyPDFDirectoryLoader(pdf_dir)
20
- documents = loader.load()
21
- doc_splitter = RecursiveCharacterTextSplitter(
22
- chunk_size=1000,
23
- chunk_overlap=200
24
- )
25
- split_docs = doc_splitter.split_documents(documents)
26
- embedding = HuggingFaceHubEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")
27
- db = FAISS.from_documents(split_docs, embedding)
28
- return create_retriever_tool(retriever=db.as_retriever(), name="Pdf_search", description="Use to search information from the PDFs. If information is not found, then use other tools.")
29
-
30
- def create_arxiv_tool_agent():
31
- arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
32
- return ArxivQueryRun(api_wrapper=arxiv_api)
33
-
34
- @tool
35
- def google_search(input: str, num_results: int = 5):
36
- """The Google Search tool enables the AI to fetch information from the web using Google's search engine. It is used when the AI's internal database does not have specific information requested by the user."""
37
- search_results = [url for url in tqdm(search(input, num_results=num_results))]
38
- return fetch_content(search_results)
39
-
40
- def fetch_content(urls: list):
41
- text = []
42
- for link in tqdm(urls):
43
- try:
44
- response = requests.get(link)
45
- response.raise_for_status()
46
- soup = BeautifulSoup(response.text, 'html.parser')
47
- paragraphs = ' '.join([p.text for p in soup.find_all('p')])
48
- paragraphs = paragraphs[:1000]
49
- if paragraphs:
50
- text.append(paragraphs)
51
- except Exception:
52
- continue
53
- return " ".join(text)
54
-
55
- def get_prompt():
56
- prompt = hub.pull("hwchase17/openai-functions-agent")
57
- prompt.messages = [
58
- SystemMessagePromptTemplate(prompt=PromptTemplate(
59
- input_variables=[],
60
- template="You are a helpful AI that answer from his database and if information is not found, then use a tool. You can use one tool at a time to search for information. Once a tool is used, do not switch to another tool."
61
- )),
62
- MessagesPlaceholder(variable_name='chat_history', optional=True),
63
- HumanMessagePromptTemplate(prompt=PromptTemplate(
64
- input_variables=['input'],
65
- template='{input}'
66
- )),
67
- MessagesPlaceholder(variable_name='agent_scratchpad')
68
- ]
69
- return prompt
 
1
+ import os
2
+ from langchain_groq import ChatGroq
3
+ from langchain_community.vectorstores.faiss import FAISS
4
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceHubEmbeddings
7
+ from langchain.tools.retriever import create_retriever_tool
8
+ from langchain_community.tools import ArxivQueryRun
9
+ from langchain_community.utilities import ArxivAPIWrapper
10
+ from langchain.agents import tool
11
+ from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, PromptTemplate, MessagesPlaceholder
12
+ from langchain import hub
13
+ from googlesearch import search
14
+ import requests
15
+ from tqdm import tqdm
16
+ from bs4 import BeautifulSoup
17
+
18
+ def create_retriever_tool_agent(pdf_dir="./Pdfs"):
19
+ loader = PyPDFDirectoryLoader(pdf_dir)
20
+ documents = loader.load()
21
+ doc_splitter = RecursiveCharacterTextSplitter(
22
+ chunk_size=1000,
23
+ chunk_overlap=200
24
+ )
25
+ split_docs = doc_splitter.split_documents(documents)
26
+ embedding = HuggingFaceHubEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")
27
+ db = FAISS.from_documents(split_docs, embedding)
28
+ return create_retriever_tool(retriever=db.as_retriever(), name="Pdf_search", description="Use to search information from the PDFs first. If information is not found, then use other tools.")
29
+
30
+ def create_arxiv_tool_agent():
31
+ arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
32
+ return ArxivQueryRun(api_wrapper=arxiv_api)
33
+
34
+ @tool
35
+ def google_search(input: str, num_results: int = 5):
36
+ """The Google Search tool enables the AI to fetch information from the web using Google's search engine. It is used when the AI's internal database does not have specific information requested by the user."""
37
+ search_results = [url for url in tqdm(search(input, num_results=num_results))]
38
+ return fetch_content(search_results)
39
+
40
+ def fetch_content(urls: list):
41
+ text = []
42
+ for link in tqdm(urls):
43
+ try:
44
+ response = requests.get(link)
45
+ response.raise_for_status()
46
+ soup = BeautifulSoup(response.text, 'html.parser')
47
+ paragraphs = ' '.join([p.text for p in soup.find_all('p')])
48
+ paragraphs = paragraphs[:1000]
49
+ if paragraphs:
50
+ text.append(paragraphs)
51
+ except Exception:
52
+ continue
53
+ return " ".join(text)
54
+
55
+ def get_prompt():
56
+ prompt = hub.pull("hwchase17/openai-functions-agent")
57
+ prompt.messages = [
58
+ SystemMessagePromptTemplate(prompt=PromptTemplate(
59
+ input_variables=[],
60
+ template="You are a helpful AI that uses tools to answer human questions. First try to answer from your database, and if information is not found, then use a tool. You can use one tool at a time to search for information. Once a tool is used, do not switch to another tool."
61
+ )),
62
+ MessagesPlaceholder(variable_name='chat_history', optional=True),
63
+ HumanMessagePromptTemplate(prompt=PromptTemplate(
64
+ input_variables=['input'],
65
+ template='{input}'
66
+ )),
67
+ MessagesPlaceholder(variable_name='agent_scratchpad')
68
+ ]
69
+ return prompt