|
import os |
|
from langchain_groq import ChatGroq |
|
from langchain_community.vectorstores.faiss import FAISS |
|
from langchain_community.document_loaders import PyPDFDirectoryLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.embeddings import HuggingFaceHubEmbeddings |
|
from langchain.tools.retriever import create_retriever_tool |
|
from langchain_community.tools import ArxivQueryRun |
|
from langchain_community.utilities import ArxivAPIWrapper |
|
from langchain.agents import tool |
|
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, PromptTemplate, MessagesPlaceholder |
|
from langchain import hub |
|
from googlesearch import search |
|
import requests |
|
from tqdm import tqdm |
|
from bs4 import BeautifulSoup |
|
|
|
def create_retriever_tool_agent(pdf_dir="./Pdfs"): |
|
loader = PyPDFDirectoryLoader(pdf_dir) |
|
documents = loader.load() |
|
doc_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, |
|
chunk_overlap=200 |
|
) |
|
split_docs = doc_splitter.split_documents(documents) |
|
embedding = HuggingFaceHubEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2") |
|
db = FAISS.from_documents(split_docs, embedding) |
|
return create_retriever_tool(retriever=db.as_retriever(), name="Pdf_search", description="Use to search information from the PDFs.") |
|
|
|
def create_arxiv_tool_agent(): |
|
arxiv_api = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200) |
|
return ArxivQueryRun(api_wrapper=arxiv_api) |
|
|
|
@tool |
|
def google_search(input: str, num_results: int = 5): |
|
"""Use to do google search. It is used when the AI's internal database does not have specific information requested by the user.""" |
|
search_results = [url for url in tqdm(search(input, num_results=num_results))] |
|
return fetch_content(search_results) |
|
|
|
def fetch_content(urls: list): |
|
text = [] |
|
for link in tqdm(urls): |
|
try: |
|
response = requests.get(link) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
paragraphs = ' '.join([p.text for p in soup.find_all('p')]) |
|
paragraphs = paragraphs[:1000] |
|
if paragraphs: |
|
text.append(paragraphs) |
|
except Exception: |
|
continue |
|
return " ".join(text) |
|
|
|
def get_prompt(): |
|
prompt = hub.pull("hwchase17/openai-functions-agent") |
|
prompt.messages = [ |
|
SystemMessagePromptTemplate(prompt=PromptTemplate( |
|
input_variables=[], |
|
template="""You are a helpful AI that uses sequence of tools to answer human questions. |
|
The sequence you follow: |
|
first, use your internal database |
|
second, use pdf search tool, |
|
third, use arxiv tool, |
|
fourth, use google search tool |
|
|
|
You can use one tool at a time to search for information. Once a tool is used, do not switch to another tool. |
|
If you know which tool is suitable to answer human question, than use that tool and don't follow the sequence""" |
|
)), |
|
MessagesPlaceholder(variable_name='chat_history', optional=True), |
|
HumanMessagePromptTemplate(prompt=PromptTemplate( |
|
input_variables=['input'], |
|
template='{input}' |
|
)), |
|
MessagesPlaceholder(variable_name='agent_scratchpad') |
|
] |
|
return prompt |
|
|