Spaces:

CyranoB
/

search_agent

Running

App Files Files Community

Eddie Pick commited on Sep 15

Commit

d803be1

•

1 Parent(s): 2f49709

Fixes and updates

Browse files

Files changed (7) hide show

copywriter.py +3 -0
models.py +280 -0
requirements.txt +2 -0
search_agent.py +25 -26
search_agent_ui.py +24 -16
web_crawler.py +43 -11
web_rag.py +79 -64

copywriter.py CHANGED Viewed

@@ -5,6 +5,7 @@ from langchain.prompts.chat import (
     ChatPromptTemplate
 )
 from langchain.prompts.prompt import PromptTemplate
 def get_comments_prompt(query, draft):
@@ -34,6 +35,7 @@ def get_comments_prompt(query, draft):
     )
     return [system_message, human_message]
 def generate_comments(chat_llm, query, draft, callbacks=[]):
     messages = get_comments_prompt(query, draft)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})
@@ -67,6 +69,7 @@ def get_final_text_prompt(query, draft, comments):
     return [system_message, human_message]
 def generate_final_text(chat_llm, query, draft, comments, callbacks=[]):
     messages = get_final_text_prompt(query, draft, comments)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})

     ChatPromptTemplate
 )
 from langchain.prompts.prompt import PromptTemplate
+from langsmith import traceable
 def get_comments_prompt(query, draft):
     )
     return [system_message, human_message]
+@traceable(run_type="llm", name="generate_comments")
 def generate_comments(chat_llm, query, draft, callbacks=[]):
     messages = get_comments_prompt(query, draft)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})
     return [system_message, human_message]
+@traceable(run_type="llm", name="generate_final_text")
 def generate_final_text(chat_llm, query, draft, comments, callbacks=[]):
     messages = get_final_text_prompt(query, draft, comments)
     response = chat_llm.invoke(messages, config={"callbacks": callbacks})

models.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import os
+import json
+from langchain.schema import SystemMessage, HumanMessage
+from langchain.prompts.chat import (
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+    ChatPromptTemplate
+)
+from langchain.prompts.prompt import PromptTemplate
+from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain_aws import BedrockEmbeddings
+from langchain_aws.chat_models.bedrock_converse import ChatBedrockConverse
+from langchain_cohere import ChatCohere
+from langchain_fireworks.chat_models import ChatFireworks
+from langchain_fireworks.embeddings import FireworksEmbeddings
+from langchain_groq.chat_models import ChatGroq
+from langchain_openai import ChatOpenAI
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_ollama.chat_models import ChatOllama
+from langchain_ollama.embeddings import OllamaEmbeddings
+from langchain_cohere.embeddings import CohereEmbeddings
+from langchain_cohere.chat_models import ChatCohere
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
+from langchain_community.chat_models import ChatPerplexity
+from langchain_together import ChatTogether
+from langchain_together.embeddings import TogetherEmbeddings
+def get_model(provider_model, temperature=0.0):
+    provider, model = (provider_model.split('/') + [None])[:2]
+    match provider:
+        case 'bedrock':
+            #credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME')
+            if model is None:
+                model = "anthropic.claude-3-sonnet-20240229-v1:0"
+            chat_llm = ChatBedrockConverse(
+                #credentials_profile_name=credentials_profile_name,
+                model=model,
+                temperature=temperature,
+            )
+        case 'cohere':
+            if model is None:
+                model = 'command-r-plus'
+            chat_llm = ChatCohere(model=model, temperature=temperature)
+        case 'fireworks':
+            if model is None:
+                model = 'accounts/fireworks/models/llama-v3p1-8b-instruct'
+            chat_llm = ChatFireworks(model_name=model, temperature=temperature, max_tokens=120000)
+        case 'googlegenerativeai':
+            if model is None:
+                model = "gemini-1.5-pro"
+            chat_llm = ChatGoogleGenerativeAI(model=model, temperature=temperature,
+                                              max_tokens=None, timeout=None, max_retries=2,)
+        case 'groq':
+            if model is None:
+                model = 'llama-3.1-8b-instant'
+            chat_llm = ChatGroq(model_name=model, temperature=temperature)
+        case 'ollama':
+            if model is None:
+                model = 'llama3.1'
+            chat_llm = ChatOllama(model=model, temperature=temperature)
+        case 'openai':
+            if model is None:
+                model = "gpt-4o-mini"
+            chat_llm = ChatOpenAI(model_name=model, temperature=temperature)
+        case 'perplexity':
+            if model is None:
+                model = 'llama-3.1-sonar-small-128k-online'
+            chat_llm = ChatPerplexity(model=model, temperature=temperature)
+        case 'together':
+            if model is None:
+                model = 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+            chat_llm = ChatTogether(model=model, temperature=temperature)
+        case _:
+            raise ValueError(f"Unknown LLM provider {provider}")
+    return chat_llm
+def get_embedding_model(provider_embedding_model):
+    provider, model = (provider_embedding_model.split('/') + [None])[:2]
+    match provider:
+        case 'bedrock':
+            #credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME')
+            if model is None:
+                model = "cohere.embed-multilingual-v3"
+            embedding_model = BedrockEmbeddings(
+                model_id=model,
+                #credentials_profile_name=credentials_profile_name
+            )
+        case 'cohere':
+            if model is None:
+                model = "embed-english-light-v3.0"
+            embedding_model = CohereEmbeddings(model=model)
+        case 'fireworks':
+            if model is None:
+                model = 'nomic-ai/nomic-embed-text-v1.5'
+            embedding_model = FireworksEmbeddings(model=model)
+        case 'ollama':
+            if model is None:
+                model = 'nomic-embed-text:latest'
+            embedding_model = OllamaEmbeddings(model=model)
+        case 'openai':
+            if model is None:
+                model = "text-embedding-3-small"
+            embedding_model = OpenAIEmbeddings(model=model)
+        case 'googlegenerativeai':
+            if model is None:
+                model = "models/embedding-001"
+            embedding_model = GoogleGenerativeAIEmbeddings(model=model)
+        case 'groq':
+            embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
+        case 'perplexity':
+            raise ValueError(f"Cannot use Perplexity for embedding model")
+        case 'together':
+            if model is None:
+                model = 'BAAI/bge-base-en-v1.5'
+            embedding_model = TogetherEmbeddings(model=model)
+        case _:
+            raise ValueError(f"Unknown LLM provider {provider}")
+    return embedding_model
+import unittest
+from unittest.mock import patch
+from models import get_embedding_model  # Make sure this import is correct
+class TestGetEmbeddingModel(unittest.TestCase):
+    @patch('models.BedrockEmbeddings')
+    def test_bedrock_embedding(self, mock_bedrock):
+        result = get_embedding_model('bedrock')
+        mock_bedrock.assert_called_once_with(model_id='cohere.embed-multilingual-v3')
+        self.assertEqual(result, mock_bedrock.return_value)
+    @patch('models.CohereEmbeddings')
+    def test_cohere_embedding(self, mock_cohere):
+        result = get_embedding_model('cohere')
+        mock_cohere.assert_called_once_with(model='embed-english-light-v3.0')
+        self.assertEqual(result, mock_cohere.return_value)
+    @patch('models.FireworksEmbeddings')
+    def test_fireworks_embedding(self, mock_fireworks):
+        result = get_embedding_model('fireworks')
+        mock_fireworks.assert_called_once_with(model='nomic-ai/nomic-embed-text-v1.5')
+        self.assertEqual(result, mock_fireworks.return_value)
+    @patch('models.OllamaEmbeddings')
+    def test_ollama_embedding(self, mock_ollama):
+        result = get_embedding_model('ollama')
+        mock_ollama.assert_called_once_with(model='nomic-embed-text:latest')
+        self.assertEqual(result, mock_ollama.return_value)
+    @patch('models.OpenAIEmbeddings')
+    def test_openai_embedding(self, mock_openai):
+        result = get_embedding_model('openai')
+        mock_openai.assert_called_once_with(model='text-embedding-3-small')
+        self.assertEqual(result, mock_openai.return_value)
+    @patch('models.GoogleGenerativeAIEmbeddings')
+    def test_google_embedding(self, mock_google):
+        result = get_embedding_model('googlegenerativeai')
+        mock_google.assert_called_once_with(model='models/embedding-001')
+        self.assertEqual(result, mock_google.return_value)
+    @patch('models.TogetherEmbeddings')
+    def test_together_embedding(self, mock_together):
+        result = get_embedding_model('together')
+        mock_together.assert_called_once_with(model='BAAI/bge-base-en-v1.5')
+        self.assertEqual(result, mock_together.return_value)
+    def test_invalid_provider(self):
+        with self.assertRaises(ValueError):
+            get_embedding_model('invalid_provider')
+    def test_groq_provider(self):
+        with self.assertRaises(ValueError):
+            get_embedding_model('groq')
+    def test_perplexity_provider(self):
+        with self.assertRaises(ValueError):
+            get_embedding_model('perplexity')
+import unittest
+from unittest.mock import patch
+from models import get_model  # Make sure this import is correct
+class TestGetModel(unittest.TestCase):
+    @patch('models.ChatBedrockConverse')
+    def test_bedrock_model(self, mock_bedrock):
+        result = get_model('bedrock')
+        mock_bedrock.assert_called_once_with(
+            model="anthropic.claude-3-sonnet-20240229-v1:0",
+            temperature=0.0
+        )
+        self.assertEqual(result, mock_bedrock.return_value)
+    @patch('models.ChatCohere')
+    def test_cohere_model(self, mock_cohere):
+        result = get_model('cohere')
+        mock_cohere.assert_called_once_with(model='command-r-plus', temperature=0.0)
+        self.assertEqual(result, mock_cohere.return_value)
+    @patch('models.ChatFireworks')
+    def test_fireworks_model(self, mock_fireworks):
+        result = get_model('fireworks')
+        mock_fireworks.assert_called_once_with(
+            model_name='accounts/fireworks/models/llama-v3p1-8b-instruct',
+            temperature=0.0,
+            max_tokens=120000
+        )
+        self.assertEqual(result, mock_fireworks.return_value)
+    @patch('models.ChatGoogleGenerativeAI')
+    def test_google_model(self, mock_google):
+        result = get_model('googlegenerativeai')
+        mock_google.assert_called_once_with(
+            model="gemini-1.5-pro",
+            temperature=0.0,
+            max_tokens=None,
+            timeout=None,
+            max_retries=2
+        )
+        self.assertEqual(result, mock_google.return_value)
+    @patch('models.ChatGroq')
+    def test_groq_model(self, mock_groq):
+        result = get_model('groq')
+        mock_groq.assert_called_once_with(model_name='llama-3.1-8b-instant', temperature=0.0)
+        self.assertEqual(result, mock_groq.return_value)
+    @patch('models.ChatOllama')
+    def test_ollama_model(self, mock_ollama):
+        result = get_model('ollama')
+        mock_ollama.assert_called_once_with(model='llama3.1', temperature=0.0)
+        self.assertEqual(result, mock_ollama.return_value)
+    @patch('models.ChatOpenAI')
+    def test_openai_model(self, mock_openai):
+        result = get_model('openai')
+        mock_openai.assert_called_once_with(model_name='gpt-4o-mini', temperature=0.0)
+        self.assertEqual(result, mock_openai.return_value)
+    @patch('models.ChatPerplexity')
+    def test_perplexity_model(self, mock_perplexity):
+        result = get_model('perplexity')
+        mock_perplexity.assert_called_once_with(model='llama-3.1-sonar-small-128k-online', temperature=0.0)
+        self.assertEqual(result, mock_perplexity.return_value)
+    @patch('models.ChatTogether')
+    def test_together_model(self, mock_together):
+        result = get_model('together')
+        mock_together.assert_called_once_with(model='meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo', temperature=0.0)
+        self.assertEqual(result, mock_together.return_value)
+    def test_invalid_provider(self):
+        with self.assertRaises(ValueError):
+            get_model('invalid_provider')
+    def test_custom_temperature(self):
+        with patch('models.ChatOpenAI') as mock_openai:
+            result = get_model('openai', temperature=0.5)
+            mock_openai.assert_called_once_with(model_name='gpt-4o-mini', temperature=0.5)
+            self.assertEqual(result, mock_openai.return_value)
+    def test_custom_model(self):
+        with patch('models.ChatOpenAI') as mock_openai:
+            result = get_model('openai/gpt-4')
+            mock_openai.assert_called_once_with(model_name='gpt-4', temperature=0.0)
+            self.assertEqual(result, mock_openai.return_value)
+if __name__ == '__main__':
+    unittest.main()

requirements.txt CHANGED Viewed

@@ -18,6 +18,8 @@ langchain_experimental
 langchain_openai
 langchain-ollama
 langchain_groq
 langsmith
 schema
 streamlit

 langchain_openai
 langchain-ollama
 langchain_groq
+langchain-google-genai
+langchain-together
 langsmith
 schema
 streamlit

search_agent.py CHANGED Viewed

@@ -5,10 +5,12 @@ Usage:
         [--domain=domain]
         [--provider=provider]
         [--model=model]
         [--temperature=temp]
         [--copywrite]
         [--max_pages=num]
         [--max_extracts=num]
         [--output=text]
         SEARCH_QUERY
     search_agent.py --version
@@ -19,10 +21,11 @@ Options:
     -c --copywrite                      First produce a draft, review it and rewrite for a final text
     -d domain --domain=domain           Limit search to a specific domain
     -t temp --temperature=temp          Set the temperature of the LLM [default: 0.0]
-    -p provider --provider=provider     Use a specific LLM (choices: bedrock,openai,groq,ollama,cohere,fireworks) [default: openai]
-    -m model --model=model              Use a specific model
     -n num --max_pages=num              Max number of pages to retrieve [default: 10]
     -e num --max_extracts=num           Max number of page extract to consider [default: 5]
     -o text --output=text               Output format (choices: text, markdown) [default: markdown]
 """
@@ -35,7 +38,7 @@ import dotenv
 from langchain.callbacks import LangChainTracer
-from langsmith import Client
 from rich.console import Console
 from rich.markdown import Markdown
@@ -43,6 +46,7 @@ from rich.markdown import Markdown
 import web_rag as wr
 import web_crawler as wc
 import copywriter as cw
 console = Console()
 dotenv.load_dotenv()
@@ -70,34 +74,24 @@ if os.getenv("LANGCHAIN_API_KEY"):
     callbacks.append(
         LangChainTracer(client=Client())
     )
-if __name__ == '__main__':
-    arguments = docopt(__doc__, version='Search Agent 0.1')
-    #schema = Schema({
-    #    '--max_pages': Use(int, error='--max_pages must be an integer'),
-    #    '--temperature': Use(float, error='--temperature must be an float'),
-    #})
-    #try:
-    #    arguments = schema.validate(arguments)
-    #except SchemaError as e:
-    #    exit(e)
     copywrite_mode = arguments["--copywrite"]
-    provider = arguments["--provider"]
     model = arguments["--model"]
     temperature = float(arguments["--temperature"])
     domain=arguments["--domain"]
-    max_pages=arguments["--max_pages"]
     max_extract=int(arguments["--max_extracts"])
     output=arguments["--output"]
     query = arguments["SEARCH_QUERY"]
-    chat, embedding_model = wr.get_models(provider, model, temperature)
     with console.status(f"[bold green]Optimizing query for search: {query}"):
-        optimize_search_query = wr.optimize_search_query(chat, query, callbacks=callbacks)
         if len(optimize_search_query) < 3:
             optimize_search_query = query
     console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
@@ -111,16 +105,16 @@ if __name__ == '__main__':
     with console.status(
         f"[bold green]Fetching content for {len(sources)} sources", spinner="growVertical"
     ):
-        contents = wc.get_links_contents(sources, get_selenium_driver)
     console.log(f"Managed to extract content from {len(contents)} sources")
     with console.status(f"[bold green]Embedding {len(contents)} sources for content", spinner="growVertical"):
         vector_store = wc.vectorize(contents, embedding_model)
     with console.status("[bold green]Writing content", spinner='dots8Bit'):
-        draft = wr.query_rag(chat, query, optimize_search_query, vector_store, top_k = max_extract, callbacks=callbacks)
-    console.rule(f"[bold green]Response from {provider}")
     if output == "text":
         console.print(draft)
     else:
@@ -129,7 +123,7 @@ if __name__ == '__main__':
     if(copywrite_mode):
         with console.status("[bold green]Getting comments from the reviewer", spinner="dots8Bit"):
-            comments = cw.generate_comments(chat, query, draft, callbacks=callbacks)
         console.rule("[bold green]Response from reviewer")
         if output == "text":
@@ -139,7 +133,7 @@ if __name__ == '__main__':
         console.rule("[bold green]")
         with console.status("[bold green]Writing the final text", spinner="dots8Bit"):
-            final_text = cw.generate_final_text(chat, query, draft, comments, callbacks=callbacks)
         console.rule("[bold green]Final text")
         if output == "text":
@@ -147,3 +141,8 @@ if __name__ == '__main__':
         else:
             console.print(Markdown(final_text))
         console.rule("[bold green]")

         [--domain=domain]
         [--provider=provider]
         [--model=model]
+        [--embedding_model=model]
         [--temperature=temp]
         [--copywrite]
         [--max_pages=num]
         [--max_extracts=num]
+        [--use_selenium]
         [--output=text]
         SEARCH_QUERY
     search_agent.py --version
     -c --copywrite                      First produce a draft, review it and rewrite for a final text
     -d domain --domain=domain           Limit search to a specific domain
     -t temp --temperature=temp          Set the temperature of the LLM [default: 0.0]
+    -m model --model=model              Use a specific model [default: openai/gpt-4o-mini]
+    -e model --embedding_model=model    Use a specific embedding model [default: openai/text-embedding-3-small]
     -n num --max_pages=num              Max number of pages to retrieve [default: 10]
     -e num --max_extracts=num           Max number of page extract to consider [default: 5]
+    -s --use_selenium                   Use selenium to fetch content from the web [default: False]
     -o text --output=text               Output format (choices: text, markdown) [default: markdown]
 """
 from langchain.callbacks import LangChainTracer
+from langsmith import Client, traceable
 from rich.console import Console
 from rich.markdown import Markdown
 import web_rag as wr
 import web_crawler as wc
 import copywriter as cw
+import models as md
 console = Console()
 dotenv.load_dotenv()
     callbacks.append(
         LangChainTracer(client=Client())
     )
+@traceable(run_type="tool", name="search_agent")
+def main(arguments):
     copywrite_mode = arguments["--copywrite"]
     model = arguments["--model"]
+    embedding_model = arguments["--embedding_model"]
     temperature = float(arguments["--temperature"])
     domain=arguments["--domain"]
+    max_pages=int(arguments["--max_pages"])
     max_extract=int(arguments["--max_extracts"])
     output=arguments["--output"]
+    use_selenium=arguments["--use_selenium"]
     query = arguments["SEARCH_QUERY"]
+    chat = md.get_model(model, temperature)
+    embedding_model = md.get_embedding_model(embedding_model)
     with console.status(f"[bold green]Optimizing query for search: {query}"):
+        optimize_search_query = wr.optimize_search_query(chat, query)
         if len(optimize_search_query) < 3:
             optimize_search_query = query
     console.log(f"Optimized search query: [bold blue]{optimize_search_query}")
     with console.status(
         f"[bold green]Fetching content for {len(sources)} sources", spinner="growVertical"
     ):
+        contents = wc.get_links_contents(sources, get_selenium_driver, use_selenium=use_selenium)
     console.log(f"Managed to extract content from {len(contents)} sources")
     with console.status(f"[bold green]Embedding {len(contents)} sources for content", spinner="growVertical"):
         vector_store = wc.vectorize(contents, embedding_model)
     with console.status("[bold green]Writing content", spinner='dots8Bit'):
+        draft = wr.query_rag(chat, query, optimize_search_query, vector_store, top_k = max_extract)
+    console.rule(f"[bold green]Response")
     if output == "text":
         console.print(draft)
     else:
     if(copywrite_mode):
         with console.status("[bold green]Getting comments from the reviewer", spinner="dots8Bit"):
+            comments = cw.generate_comments(chat, query, draft)
         console.rule("[bold green]Response from reviewer")
         if output == "text":
         console.rule("[bold green]")
         with console.status("[bold green]Writing the final text", spinner="dots8Bit"):
+            final_text = cw.generate_final_text(chat, query, draft, comments)
         console.rule("[bold green]Final text")
         if output == "text":
         else:
             console.print(Markdown(final_text))
         console.rule("[bold green]")
+if __name__ == '__main__':
+    arguments = docopt(__doc__, version='Search Agent 0.1')
+    main(arguments)

search_agent_ui.py CHANGED Viewed

@@ -11,7 +11,7 @@ from langsmith.client import Client
 import web_rag as wr
 import web_crawler as wc
 import copywriter as cw
 dotenv.load_dotenv()
 ls_tracer = LangChainTracer(
@@ -54,26 +54,26 @@ def create_links_markdown(sources_list):
 st.set_page_config(layout="wide")
 st.title("🔍 Simple Search Agent 💬")
-if "providers" not in st.session_state:
-    providers = []
     if os.getenv("FIREWORKS_API_KEY"):
-        providers.append("fireworks")
     if os.getenv("COHERE_API_KEY"):
-        providers.append("cohere")
     if os.getenv("OPENAI_API_KEY"):
-        providers.append("openai")
     if os.getenv("GROQ_API_KEY"):
-        providers.append("groq")
     if os.getenv("OLLAMA_API_KEY"):
-        providers.append("ollama")
     if os.getenv("CREDENTIALS_PROFILE_NAME"):
-        providers.append("bedrock")
-    st.session_state["providers"] = providers
 with st.sidebar.expander("Options", expanded=False):
-    model_provider = st.selectbox("Model provider 🧠", st.session_state["providers"])
     temperature = st.slider("Model temperature 🌡️", 0.0, 1.0, 0.1, help="The higher the more creative")
-    max_pages = st.slider("Max pages to retrieve 🔍", 1, 20, 15, help="How many web pages to retrive from the internet")
     top_k_documents = st.slider("Nbr of doc extracts to consider 📄", 1, 20, 5, help="How many of the top extracts to consider")
     reviewer_mode =  st.checkbox("Draft / Comment / Rewrite mode ✍️", value=False, help="First generate a draft, then comments and then rewrite")
@@ -108,7 +108,8 @@ if prompt := st.chat_input("Enter you instructions..." ):
     st.chat_message("user").write(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
-    chat, embedding_model = wr.get_models(model_provider, temperature=temperature)
     with st.status("Thinking", expanded=True):
         st.write("I first need to do some research")
@@ -120,7 +121,7 @@ if prompt := st.chat_input("Enter you instructions..." ):
         links_md.markdown(create_links_markdown(sources))
         st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
-        contents = wc.get_links_contents(sources)
         st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
         vector_store = wc.vectorize(contents, embedding_model=embedding_model)
@@ -147,8 +148,15 @@ if prompt := st.chat_input("Enter you instructions..." ):
     with st.chat_message("assistant"):
         st_cb = StreamHandler(st.empty())
-        result = chat.invoke(rag_prompt, stream=True, config={ "callbacks": [st_cb, ls_tracer]})
-        response = result.content.strip()
         message_id = f"{prompt}{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
         st.session_state.messages.append({"role": "assistant", "content": response})

 import web_rag as wr
 import web_crawler as wc
 import copywriter as cw
+import models as md
 dotenv.load_dotenv()
 ls_tracer = LangChainTracer(
 st.set_page_config(layout="wide")
 st.title("🔍 Simple Search Agent 💬")
+if "models" not in st.session_state:
+    models = []
     if os.getenv("FIREWORKS_API_KEY"):
+        models.append("fireworks")
     if os.getenv("COHERE_API_KEY"):
+        models.append("cohere")
     if os.getenv("OPENAI_API_KEY"):
+        models.append("openai")
     if os.getenv("GROQ_API_KEY"):
+        models.append("groq")
     if os.getenv("OLLAMA_API_KEY"):
+        models.append("ollama")
     if os.getenv("CREDENTIALS_PROFILE_NAME"):
+        models.append("bedrock")
+    st.session_state["models"] = models
 with st.sidebar.expander("Options", expanded=False):
+    model_provider = st.selectbox("Model provider 🧠", st.session_state["models"])
     temperature = st.slider("Model temperature 🌡️", 0.0, 1.0, 0.1, help="The higher the more creative")
+    max_pages = st.slider("Max pages to retrieve 🔍", 1, 20, 10, help="How many web pages to retrive from the internet")
     top_k_documents = st.slider("Nbr of doc extracts to consider 📄", 1, 20, 5, help="How many of the top extracts to consider")
     reviewer_mode =  st.checkbox("Draft / Comment / Rewrite mode ✍️", value=False, help="First generate a draft, then comments and then rewrite")
     st.chat_message("user").write(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
+    chat = md.get_model(model_provider, temperature)
+    embedding_model = md.get_embedding_model(model_provider)
     with st.status("Thinking", expanded=True):
         st.write("I first need to do some research")
         links_md.markdown(create_links_markdown(sources))
         st.write(f"I'll now retrieve the {len(sources)} webpages and documents I found")
+        contents = wc.get_links_contents(sources, use_selenium=False)
         st.write( f"Reading through the {len(contents)} sources I managed to retrieve")
         vector_store = wc.vectorize(contents, embedding_model=embedding_model)
     with st.chat_message("assistant"):
         st_cb = StreamHandler(st.empty())
+        if hasattr(chat, 'stream'):
+            response = ""
+            for chunk in chat.stream(rag_prompt, config={"callbacks": [st_cb, ls_tracer]}):
+                response += chunk.content
+        else:
+            result = chat.invoke(rag_prompt, config={"callbacks": [st_cb, ls_tracer]})
+            response = result.content
+        response = response.strip()
         message_id = f"{prompt}{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
         st.session_state.messages.append({"role": "assistant", "content": response})

web_crawler.py CHANGED Viewed

@@ -8,12 +8,14 @@ from trafilatura import extract
 from selenium.common.exceptions import TimeoutException
 from langchain_core.documents.base import Document
 from langchain_experimental.text_splitter import SemanticChunker
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores.faiss import FAISS
 import requests
 import pdfplumber
 def get_sources(query, max_pages=10, domain=None):
     search_query = query
     if domain:
@@ -78,8 +80,7 @@ def fetch_with_timeout(url, timeout=8):
 def process_source(source):
     url = source['link']
-    #console.log(f"Processing {url}")
-    response = fetch_with_timeout(url, 8)
     if response:
         content_type = response.headers.get('Content-Type')
         if content_type:
@@ -107,12 +108,13 @@ def process_source(source):
             return {**source, 'page_content': source['snippet']}
     return {**source, 'page_content': None}
-def get_links_contents(sources, get_driver_func=None):
     with ThreadPoolExecutor() as executor:
         results = list(executor.map(process_source, sources))
-    if get_driver_func is None:
-        return [result for result in results if result is not None]
     for result in results:
         if result['page_content'] is None:
@@ -125,19 +127,49 @@ def get_links_contents(sources, get_driver_func=None):
                 result['page_content'] = main_content
     return results
 def vectorize(contents, embedding_model):
     documents = []
     for content in contents:
         try:
             page_content = content['page_content']
-            if page_content: # Sometimes Selenium is not fetching properly
                 metadata = {'title': content['title'], 'source': content['link']}
                 doc = Document(page_content=content['page_content'], metadata=metadata)
                 documents.append(doc)
         except Exception as e:
             print(f"[gray]Error processing content for {content['link']}: {e}")
     semantic_chunker = SemanticChunker(embedding_model, breakpoint_threshold_type="percentile")
-    docs = semantic_chunker.split_documents(documents)
-    embeddings = OpenAIEmbeddings()
-    store = FAISS.from_documents(docs, embeddings)
-    return store

 from selenium.common.exceptions import TimeoutException
 from langchain_core.documents.base import Document
 from langchain_experimental.text_splitter import SemanticChunker
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores.faiss import FAISS
+from langsmith import traceable
 import requests
 import pdfplumber
+@traceable(run_type="tool", name="get_sources")
 def get_sources(query, max_pages=10, domain=None):
     search_query = query
     if domain:
 def process_source(source):
     url = source['link']
+    response = fetch_with_timeout(url, 2)
     if response:
         content_type = response.headers.get('Content-Type')
         if content_type:
             return {**source, 'page_content': source['snippet']}
     return {**source, 'page_content': None}
+@traceable(run_type="tool", name="get_links_contents")
+def get_links_contents(sources, get_driver_func=None, use_selenium=False):
     with ThreadPoolExecutor() as executor:
         results = list(executor.map(process_source, sources))
+    if get_driver_func is None or not use_selenium:
+        return [result for result in results if result is not None and result['page_content']]
     for result in results:
         if result['page_content'] is None:
                 result['page_content'] = main_content
     return results
+@traceable(run_type="embedding")
 def vectorize(contents, embedding_model):
     documents = []
+    total_content_length = 0
     for content in contents:
         try:
             page_content = content['page_content']
+            if page_content:
                 metadata = {'title': content['title'], 'source': content['link']}
                 doc = Document(page_content=content['page_content'], metadata=metadata)
                 documents.append(doc)
+                total_content_length += len(page_content)
         except Exception as e:
             print(f"[gray]Error processing content for {content['link']}: {e}")
+    # Define a threshold for when to use pre-splitting (e.g., 1 million characters)
+    pre_split_threshold = 1_000_000
+    if total_content_length > pre_split_threshold:
+        # Use pre-splitting for large datasets
+        pre_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=2000,
+            chunk_overlap=200,
+            length_function=len,
+        )
+        documents = pre_splitter.split_documents(documents)
     semantic_chunker = SemanticChunker(embedding_model, breakpoint_threshold_type="percentile")
+    vector_store = None
+    batch_size = 200  # Adjust this value if needed
+    for i in range(0, len(documents), batch_size):
+        batch = documents[i:i+batch_size]
+        # Split each document in the batch using SemanticChunker
+        chunked_docs = []
+        for doc in batch:
+            chunked_docs.extend(semantic_chunker.split_documents([doc]))
+        if vector_store is None:
+            vector_store = FAISS.from_documents(chunked_docs, embedding_model)
+        else:
+            vector_store.add_documents(chunked_docs)
+    return vector_store

web_rag.py CHANGED Viewed

@@ -36,53 +36,7 @@ from langchain_groq.chat_models import ChatGroq
 from langchain_openai import ChatOpenAI
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_ollama.chat_models import ChatOllama
-def get_models(provider, model=None, temperature=0.0):
-    match provider:
-        case 'bedrock':
-            credentials_profile_name=os.getenv('CREDENTIALS_PROFILE_NAME')
-            if model is None:
-                model = "anthropic.claude-3-sonnet-20240229-v1:0"
-            chat_llm = ChatBedrockConverse(
-                credentials_profile_name=credentials_profile_name,
-                model=model,
-                temperature=temperature,
-            )
-            embedding_model = BedrockEmbeddings(
-                model_id='cohere.embed-multilingual-v3',
-                credentials_profile_name=credentials_profile_name
-            )
-            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
-        case 'openai':
-            if model is None:
-                model = "gpt-4o-mini"
-            chat_llm = ChatOpenAI(model_name=model, temperature=temperature)
-            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
-        case 'groq':
-            if model is None:
-                model = 'mixtral-8x7b-32768'
-            chat_llm = ChatGroq(model_name=model, temperature=temperature)
-            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
-        case 'ollama':
-            if model is None:
-                model = 'llama3.1'
-            chat_llm = ChatOllama(model=model, temperature=temperature)
-            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
-        case 'cohere':
-            if model is None:
-                model = 'command-r-plus'
-            chat_llm = ChatCohere(model=model, temperature=temperature)
-            #embedding_model = CohereEmbeddings(model="embed-english-light-v3.0")
-            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
-        case 'fireworks':
-            if model is None:
-                model = 'accounts/fireworks/models/llama-v3p1-8b-instruct'
-            chat_llm = ChatFireworks(model_name=model, temperature=temperature, max_tokens=120000)
-            embedding_model = OpenAIEmbeddings(model='text-embedding-3-small')
-        case _:
-            raise ValueError(f"Unknown LLM provider {provider}")
-    return chat_llm, embedding_model
 def get_optimized_search_messages(query):
@@ -97,12 +51,13 @@ def get_optimized_search_messages(query):
     """
     system_message = SystemMessage(
         content="""
-            I want you to act as a prompt optimizer for web search.
-            I will provide you with a chat prompt, and your goal is to optimize it into a search string that will yield the most relevant and useful information from a search engine like Google.
             To optimize the prompt:
             - Identify the key information being requested
             - Arrange the keywords into a concise search string
-            - Keep it short, around 1 to 5 words total
             - Put the most important keywords first
             Some tips and things to be sure to remove:
@@ -111,7 +66,7 @@ def get_optimized_search_messages(query):
             - Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
             - Remove style instructions (exmaple: "in the style of", engaging, short, long)
             - Remove lenght instruction (example: essay, article, letter, etc)
             You should answer only with the optimized search query and add "**" to the end of the search string to indicate the end of the query
             Example:
@@ -119,19 +74,16 @@ def get_optimized_search_messages(query):
                 chocolate chip cookies recipe from scratch**
             Example:
                 Question: I would like you to show me a timeline of Marie Curie's life. Show results as a markdown table
-                Marie Curie timeline**
             Example:
                 Question: I would like you to write a long article on NATO vs Russia. Use known geopolitical frameworks.
                 geopolitics nato russia**
             Example:
                 Question: Write an engaging LinkedIn post about Andrew Ng
-                Andrew Ng**
             Example:
                 Question: Write a short article about the solar system in the style of Carl Sagan
                 solar system**
-            Example:
-                Question: Should I use Kubernetes? Answer in the style of Gilfoyle from the TV show Silicon Valley
-                Kubernetes decision**
             Example:
                 Question: Biography of Napoleon. Include a table with the major events.
                 napoleon biography events**
@@ -155,12 +107,73 @@ def get_optimized_search_messages(query):
     return [system_message, human_message]
 def optimize_search_query(chat_llm, query, callbacks=[]):
     messages = get_optimized_search_messages(query)
-    response = chat_llm.invoke(messages, config={"callbacks": callbacks})
-    optimized_search_query = response.content
-    return optimized_search_query.strip('"').split("**", 1)[0].strip()
 def get_rag_prompt_template():
     """
@@ -185,8 +198,9 @@ def get_rag_prompt_template():
                 - Format your answer in Markdown, using heading levels 2-3 as needed
                 - Include a "References" section at the end with the full citations and link for each source you used
-                If you cannot answer the question with confidence just say: "I'm not sure about the answer to be honest"
-                If the provided context is not relevant to the question, just say: "The context provided is not relevant to the question"
             """
         )
     )
@@ -245,7 +259,7 @@ def get_context_size(chat_llm):
     if isinstance(chat_llm, ChatOllama):
         return 120000
     if isinstance(chat_llm, ChatCohere):
-        return 128000
     if isinstance(chat_llm, ChatBedrockConverse):
         if chat_llm.model_id.startswith("meta.llama3-1"):
             return 128000
@@ -259,7 +273,7 @@ def get_context_size(chat_llm):
             return 32000
     return 4096
 def build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
     done = False
     while not done:
@@ -275,6 +289,7 @@ def build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k = 10,
     return prompt
 def query_rag(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
     prompt = build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k=top_k, callbacks = callbacks)
     response = chat_llm.invoke(prompt, config={"callbacks": callbacks})

 from langchain_openai import ChatOpenAI
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_ollama.chat_models import ChatOllama
+from langsmith import traceable
 def get_optimized_search_messages(query):
     """
     system_message = SystemMessage(
         content="""
+            You are a prompt optimizer for web search. Your task is to take a given chat prompt or question and transform it into an optimized search string that will yield the most relevant and useful information from a search engine like Google.
+            The goal is to create a search query that will help users find the most accurate and pertinent information related to their original prompt or question. An effective search string should be concise, use relevant keywords, and leverage search engine syntax for better results.
             To optimize the prompt:
             - Identify the key information being requested
+            - Consider any implicit information or context that might be useful for the search.
             - Arrange the keywords into a concise search string
             - Put the most important keywords first
             Some tips and things to be sure to remove:
             - Remove lenght instruction (example: essay, article, letter, blog, post, blogpost, etc)
             - Remove style instructions (exmaple: "in the style of", engaging, short, long)
             - Remove lenght instruction (example: essay, article, letter, etc)
             You should answer only with the optimized search query and add "**" to the end of the search string to indicate the end of the query
             Example:
                 chocolate chip cookies recipe from scratch**
             Example:
                 Question: I would like you to show me a timeline of Marie Curie's life. Show results as a markdown table
+                "Marie Curie" timeline**
             Example:
                 Question: I would like you to write a long article on NATO vs Russia. Use known geopolitical frameworks.
                 geopolitics nato russia**
             Example:
                 Question: Write an engaging LinkedIn post about Andrew Ng
+                "Andrew Ng"**
             Example:
                 Question: Write a short article about the solar system in the style of Carl Sagan
                 solar system**
             Example:
                 Question: Biography of Napoleon. Include a table with the major events.
                 napoleon biography events**
     return [system_message, human_message]
+def get_optimized_search_messages2(query):
+    """
+    Generate optimized search messages for a given query.
+    Args:
+        query (str): The user's query.
+    Returns:
+        list: A list containing the system message and human message for optimized search.
+    """
+    system_message = SystemMessage(
+        content="""
+            You are a prompt optimizer for web search. Your task is to take a given chat prompt or question and transform it into an optimized search string that will yield the most relevant and useful information from a search engine like Google.
+            The goal is to create a search query that will help users find the most accurate and pertinent information related to their original prompt or question. An effective search string should be concise, use relevant keywords, and leverage search engine syntax for better results.
+            Here are some key principles for creating effective search queries:
+            1. Use specific and relevant keywords
+            2. Remove unnecessary words (articles, prepositions, etc.)
+            3. Utilize quotation marks for exact phrases
+            4. Employ Boolean operators (AND, OR, NOT) when appropriate
+            5. Include synonyms or related terms to broaden the search
+            I will provide you with a chat prompt or question. Your task is to optimize this into an effective search string.
+            Process the input as follows:
+            1. Analyze the Question to identify the main topic and key concepts.
+            2. Extract the most relevant keywords and phrases.
+            3. Consider any implicit information or context that might be useful for the search.
+            Then, optimize the search string by:
+            1. Removing filler words and unnecessary language
+            2. Rearranging keywords in a logical order
+            3. Adding quotation marks around exact phrases if applicable
+            4. Including relevant synonyms or related terms (in parentheses) to broaden the search
+            5. Using Boolean operators if needed to refine the search
+            You should answer only with the optimized search query and add "**" to the end of the search string to indicate the end of the optimized search query
+        """
+    )
+    human_message = HumanMessage(
+        content=f"""
+            Question: {query}
+        """
+    )
+    return [system_message, human_message]
+@traceable(run_type="llm", name="optimize_search_query")
 def optimize_search_query(chat_llm, query, callbacks=[]):
     messages = get_optimized_search_messages(query)
+    response = chat_llm.invoke(messages)
+    optimized_search_query = response.content.strip()
+    # Split by '**' and take the first part, then strip whitespace
+    optimized_search_query = optimized_search_query.split("**", 1)[0].strip()
+    # Remove surrounding quotes if present
+    optimized_search_query = optimized_search_query.strip('"')
+    # If the result is empty, fall back to the original query
+    if not optimized_search_query:
+        optimized_search_query = query
+    return optimized_search_query
 def get_rag_prompt_template():
     """
                 - Format your answer in Markdown, using heading levels 2-3 as needed
                 - Include a "References" section at the end with the full citations and link for each source you used
+                If the provided context is not relevant to the question, say it and answer with your internal knowledge.
+                If you cannot answer the question using either the extracts or your internal knowledge, state that you don't have enough information to provide an accurate answer.
+                If the information in the provided context is in contradiction with your internal knowledge, answer but warn the user about the contradiction.
             """
         )
     )
     if isinstance(chat_llm, ChatOllama):
         return 120000
     if isinstance(chat_llm, ChatCohere):
+        return 120000
     if isinstance(chat_llm, ChatBedrockConverse):
         if chat_llm.model_id.startswith("meta.llama3-1"):
             return 128000
             return 32000
     return 4096
+@traceable(run_type="retriever")
 def build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
     done = False
     while not done:
     return prompt
+@traceable(run_type="llm", name="query_rag")
 def query_rag(chat_llm, question, search_query, vectorstore, top_k = 10, callbacks = []):
     prompt = build_rag_prompt(chat_llm, question, search_query, vectorstore, top_k=top_k, callbacks = callbacks)
     response = chat_llm.invoke(prompt, config={"callbacks": callbacks})