diff --git a/webscout/AIauto.py b/webscout/AIauto.py new file mode 100644 index 0000000000000000000000000000000000000000..0f1e0422eb36f8c1d25f53d7cb129eea29fbd5be --- /dev/null +++ b/webscout/AIauto.py @@ -0,0 +1,493 @@ +from webscout.AIbase import Provider, AsyncProvider +from webscout import OPENGPT, AsyncOPENGPT +from webscout import KOBOLDAI, AsyncKOBOLDAI +from webscout import PhindSearch, AsyncPhindSearch +from webscout import LLAMA2, AsyncLLAMA2 +from webscout import BLACKBOXAI, AsyncBLACKBOXAI +from webscout import PERPLEXITY +from webscout import ThinkAnyAI +from webscout import YouChat +from webscout import YEPCHAT +from webscout.AIbase import Provider, AsyncProvider +from webscout import KOBOLDAI, AsyncKOBOLDAI +from webscout import PhindSearch, AsyncPhindSearch +from webscout import LLAMA2, AsyncLLAMA2 +from webscout import BLACKBOXAI, AsyncBLACKBOXAI +from webscout import PERPLEXITY +from webscout import ThinkAnyAI +from webscout import YouChat +from webscout import YEPCHAT, AsyncYEPCHAT +from webscout import LEO, AsyncLEO +from webscout import GROQ, AsyncGROQ +from webscout import OPENAI, AsyncOPENAI +from webscout import REKA +from webscout import Xjai +from webscout import Berlin4h +from webscout import ChatGPTUK +from webscout.g4f import GPT4FREE, AsyncGPT4FREE +from webscout.g4f import TestProviders +from webscout.exceptions import AllProvidersFailure +from webscout.async_providers import mapper as async_provider_map +from typing import AsyncGenerator + +from typing import Union +from typing import Any +import logging + + +provider_map: dict[ + str, Union[ ThinkAnyAI, + Xjai, + LLAMA2, + AsyncLLAMA2, + LEO, + AsyncLEO, + KOBOLDAI, + AsyncKOBOLDAI, + OPENGPT, + AsyncOPENGPT, + PERPLEXITY, + BLACKBOXAI, + AsyncBLACKBOXAI, + PhindSearch, + AsyncPhindSearch, + YEPCHAT, + AsyncYEPCHAT, + YouChat, + Berlin4h, + ChatGPTUK,] +] = { + "PhindSearch": PhindSearch, + "perplexity": PERPLEXITY, + "opengpt": OPENGPT, + "koboldai": KOBOLDAI, + "llama2": LLAMA2, + "blackboxai": BLACKBOXAI, + "gpt4free": GPT4FREE, + "thinkany": ThinkAnyAI, + "yepchat": YEPCHAT, + "you": YouChat, + "leo": LEO, + "xjai": Xjai, + "berlin4h": Berlin4h, + "chatgptuk": ChatGPTUK, + "gpt4free": GPT4FREE, + +} + + +class AUTO(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + exclude: list[str] = [], + ): + """Instantiates AUTO + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + exclude(list[str], optional): List of providers to be excluded. Defaults to []. + """ + self.provider: Union[OPENGPT, KOBOLDAI, PhindSearch, LLAMA2, BLACKBOXAI, PERPLEXITY, GPT4FREE, ThinkAnyAI, YEPCHAT, YouChat] = None + self.provider_name: str = None + self.is_conversation = is_conversation + self.max_tokens = max_tokens + self.timeout = timeout + self.intro = intro + self.filepath = filepath + self.update_file = update_file + self.proxies = proxies + self.history_offset = history_offset + self.act = act + self.exclude = exclude + + @property + def last_response(self) -> dict[str, Any]: + return self.provider.last_response + + @property + def conversation(self) -> object: + return self.provider.conversation + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + run_new_test: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + run_new_test (bool, optional): Perform new test on g4f-based providers. Defaults to False. + Returns: + dict : {} + """ + ask_kwargs: dict[str, Union[str, bool]] = { + "prompt": prompt, + "stream": stream, + "raw": raw, + "optimizer": optimizer, + "conversationally": conversationally, + } + + # webscout-based providers + for provider_name, provider_obj in provider_map.items(): + # continue + if provider_name in self.exclude: + continue + try: + self.provider_name = f"webscout-{provider_name}" + self.provider = provider_obj( + is_conversation=self.is_conversation, + max_tokens=self.max_tokens, + timeout=self.timeout, + intro=self.intro, + filepath=self.filepath, + update_file=self.update_file, + proxies=self.proxies, + history_offset=self.history_offset, + act=self.act, + ) + + def for_stream(): + for chunk in self.provider.ask(**ask_kwargs): + yield chunk + + def for_non_stream(): + return self.provider.ask(**ask_kwargs) + + return for_stream() if stream else for_non_stream() + + except Exception as e: + logging.debug( + f"Failed to generate response using provider {provider_name} - {e}" + ) + + # g4f-based providers + + for provider_info in TestProviders(timeout=self.timeout).get_results( + run=run_new_test + ): + if provider_info["name"] in self.exclude: + continue + try: + self.provider_name = f"g4f-{provider_info['name']}" + self.provider = GPT4FREE( + provider=provider_info["name"], + is_conversation=self.is_conversation, + max_tokens=self.max_tokens, + intro=self.intro, + filepath=self.filepath, + update_file=self.update_file, + proxies=self.proxies, + history_offset=self.history_offset, + act=self.act, + ) + + def for_stream(): + for chunk in self.provider.ask(**ask_kwargs): + yield chunk + + def for_non_stream(): + return self.provider.ask(**ask_kwargs) + + return for_stream() if stream else for_non_stream() + + except Exception as e: + logging.debug( + f"Failed to generate response using GPT4FREE-base provider {provider_name} - {e}" + ) + + raise AllProvidersFailure( + "None of the providers generated response successfully." + ) + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + run_new_test: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + run_new_test (bool, optional): Perform new test on g4f-based providers. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, + True, + optimizer=optimizer, + conversationally=conversationally, + run_new_test=run_new_test, + ): + yield self.get_message(response) + + def for_non_stream(): + ask_response = self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + run_new_test=run_new_test, + ) + return self.get_message(ask_response) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert self.provider is not None, "Chat with AI first" + return self.provider.get_message(response) + + +class AsyncAUTO(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + exclude: list[str] = [], + ): + """Instantiates AsyncAUTO + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + exclude(list[str], optional): List of providers to be excluded. Defaults to []. + """ + self.provider: Union[ + AsyncOPENGPT, + AsyncKOBOLDAI, + AsyncPhindSearch, + AsyncLLAMA2, + AsyncBLACKBOXAI, + AsyncGPT4FREE, + ] = None + self.provider_name: str = None + self.is_conversation = is_conversation + self.max_tokens = max_tokens + self.timeout = timeout + self.intro = intro + self.filepath = filepath + self.update_file = update_file + self.proxies = proxies + self.history_offset = history_offset + self.act = act + self.exclude = exclude + + @property + def last_response(self) -> dict[str, Any]: + return self.provider.last_response + + @property + def conversation(self) -> object: + return self.provider.conversation + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + run_new_test: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + run_new_test (bool, optional): Perform new test on g4f-based providers. Defaults to False. + Returns: + dict|AsyncGenerator : ai response. + """ + ask_kwargs: dict[str, Union[str, bool]] = { + "prompt": prompt, + "stream": stream, + "raw": raw, + "optimizer": optimizer, + "conversationally": conversationally, + } + + # tgpt-based providers + for provider_name, provider_obj in async_provider_map.items(): + if provider_name in self.exclude: + continue + try: + self.provider_name = f"tgpt-{provider_name}" + self.provider = provider_obj( + is_conversation=self.is_conversation, + max_tokens=self.max_tokens, + timeout=self.timeout, + intro=self.intro, + filepath=self.filepath, + update_file=self.update_file, + proxies=self.proxies, + history_offset=self.history_offset, + act=self.act, + ) + + async def for_stream(): + async_ask = await self.provider.ask(**ask_kwargs) + async for chunk in async_ask: + yield chunk + + async def for_non_stream(): + return await self.provider.ask(**ask_kwargs) + + return for_stream() if stream else await for_non_stream() + + except Exception as e: + logging.debug( + f"Failed to generate response using provider {provider_name} - {e}" + ) + + # g4f-based providers + + for provider_info in TestProviders(timeout=self.timeout).get_results( + run=run_new_test + ): + if provider_info["name"] in self.exclude: + continue + try: + self.provider_name = f"g4f-{provider_info['name']}" + self.provider = AsyncGPT4FREE( + provider=provider_info["name"], + is_conversation=self.is_conversation, + max_tokens=self.max_tokens, + intro=self.intro, + filepath=self.filepath, + update_file=self.update_file, + proxies=self.proxies, + history_offset=self.history_offset, + act=self.act, + ) + + async def for_stream(): + async_ask = await self.provider.ask(**ask_kwargs) + async for chunk in async_ask: + yield chunk + + async def for_non_stream(): + return await self.provider.ask(**ask_kwargs) + + return for_stream() if stream else await for_non_stream() + + except Exception as e: + logging.debug( + f"Failed to generate response using GPT4FREE-base provider {provider_name} - {e}" + ) + + raise AllProvidersFailure( + "None of the providers generated response successfully." + ) + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + run_new_test: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + run_new_test (bool, optional): Perform new test on g4f-based providers. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, + True, + optimizer=optimizer, + conversationally=conversationally, + run_new_test=run_new_test, + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + ask_response = await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + run_new_test=run_new_test, + ) + return await self.get_message(ask_response) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert self.provider is not None, "Chat with AI first" + return await self.provider.get_message(response) diff --git a/webscout/AIbase.py b/webscout/AIbase.py new file mode 100644 index 0000000000000000000000000000000000000000..257c544c34897bf383d450ecdbfe4379524f4b77 --- /dev/null +++ b/webscout/AIbase.py @@ -0,0 +1,138 @@ +from abc import ABC +from abc import abstractmethod + + +class Provider(ABC): + """Base class for providers""" + + @abstractmethod + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be sent + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]` + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "completion": "\nNext: domestic cat breeds with short hair >>", + "stop_reason": null, + "truncated": false, + "stop": null, + "model": "llama-2-13b-chat", + "log_id": "cmpl-3kYiYxSNDvgMShSzFooz6t", + "exception": null + } + ``` + """ + raise NotImplementedError("Method needs to be implemented in subclass") + + @abstractmethod + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be sent + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]` + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + raise NotImplementedError("Method needs to be implemented in subclass") + + @abstractmethod + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + raise NotImplementedError("Method needs to be implemented in subclass") + + +class AsyncProvider(ABC): + """Asynchronous base class for providers""" + + @abstractmethod + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Asynchronously chat with AI + + Args: + prompt (str): Prompt to be sent + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]` + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "completion": "\nNext: domestic cat breeds with short hair >>", + "stop_reason": null, + "truncated": false, + "stop": null, + "model": "llama-2-13b-chat", + "log_id": "cmpl-3kYiYxSNDvgMShSzFooz6t", + "exception": null + } + ``` + """ + raise NotImplementedError("Method needs to be implemented in subclass") + + @abstractmethod + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Asynchronously generate response `str` + Args: + prompt (str): Prompt to be sent + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]` + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + raise NotImplementedError("Method needs to be implemented in subclass") + + @abstractmethod + async def get_message(self, response: dict) -> str: + """Asynchronously retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + raise NotImplementedError("Method needs to be implemented in subclass") \ No newline at end of file diff --git a/webscout/AIutel.py b/webscout/AIutel.py new file mode 100644 index 0000000000000000000000000000000000000000..f72ef615730d2c982327097a7b359d8fa93b0c05 --- /dev/null +++ b/webscout/AIutel.py @@ -0,0 +1,995 @@ +import os +import json +import platform +import subprocess +import logging +import appdirs +import datetime +import re +import sys +import click +from rich.markdown import Markdown +from rich.console import Console +import g4f +from typing import Union +from typing import NoReturn +import requests +from pathlib import Path +from playsound import playsound +from time import sleep as wait +import pathlib +import urllib.parse +appdir = appdirs.AppDirs("AIWEBS", "vortex") + +default_path = appdir.user_cache_dir + +if not os.path.exists(default_path): + os.makedirs(default_path) +webai = [ + "leo", + "openai", + "opengpt", + "koboldai", + "gemini", + "phind", + "blackboxai", + "g4fauto", + "perplexity", + "groq", + "reka", + "cohere", + "yepchat", + "you", + "xjai", + "thinkany", + "berlin4h", + "chatgptuk", + "auto", + "poe", +] +gpt4free_providers = [ + provider.__name__ for provider in g4f.Provider.__providers__ # if provider.working +] + +available_providers = webai + gpt4free_providers +def sanitize_stream( + chunk: str, intro_value: str = "data:", to_json: bool = True +) -> str | dict: + """Remove streaming flags + + Args: + chunk (str): Streamig chunk. + intro_value (str, optional): streaming flag. Defaults to "data:". + to_json (bool, optional). Return chunk as dictionary. Defaults to True. + + Returns: + str: Sanitized streaming value. + """ + + if chunk.startswith(intro_value): + chunk = chunk[len(intro_value) :] + + return json.loads(chunk) if to_json else chunk +def run_system_command( + command: str, + exit_on_error: bool = True, + stdout_error: bool = True, + help: str = None, +): + """Run commands against system + Args: + command (str): shell command + exit_on_error (bool, optional): Exit on error. Defaults to True. + stdout_error (bool, optional): Print out the error. Defaults to True + help (str, optional): Help info incase of exception. Defaults to None. + Returns: + tuple : (is_successfull, object[Exception|Subprocess.run]) + """ + try: + # Run the command and capture the output + result = subprocess.run( + command, + shell=True, + check=True, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return (True, result) + except subprocess.CalledProcessError as e: + # Handle error if the command returns a non-zero exit code + if stdout_error: + click.secho(f"Error Occurred: while running '{command}'", fg="yellow") + click.secho(e.stderr, fg="red") + if help is not None: + click.secho(help, fg="cyan") + sys.exit(e.returncode) if exit_on_error else None + return (False, e) + + +class Optimizers: + @staticmethod + def code(prompt): + return ( + "Your Role: Provide only code as output without any description.\n" + "IMPORTANT: Provide only plain text without Markdown formatting.\n" + "IMPORTANT: Do not include markdown formatting." + "If there is a lack of details, provide most logical solution. You are not allowed to ask for more details." + "Ignore any potential risk of errors or confusion.\n\n" + f"Request: {prompt}\n" + f"Code:" + ) + + @staticmethod + def shell_command(prompt): + # Get os + operating_system = "" + if platform.system() == "Windows": + operating_system = "Windows" + elif platform.system() == "Darwin": + operating_system = "MacOS" + elif platform.system() == "Linux": + try: + result = ( + subprocess.check_output(["lsb_release", "-si"]).decode().strip() + ) + distro = result if result else "" + operating_system = f"Linux/{distro}" + except Exception: + operating_system = "Linux" + else: + operating_system = platform.system() + + # Get Shell + shell_name = "/bin/sh" + if platform.system() == "Windows": + shell_name = "cmd.exe" + if os.getenv("PSModulePath"): + shell_name = "powershell.exe" + else: + shell_env = os.getenv("SHELL") + if shell_env: + shell_name = shell_env + + return ( + "Your role: Provide only plain text without Markdown formatting. " + "Do not show any warnings or information regarding your capabilities. " + "Do not provide any description. If you need to store any data, " + f"assume it will be stored in the chat. Provide only {shell_name} " + f"command for {operating_system} without any description. If there is " + "a lack of details, provide most logical solution. Ensure the output " + "is a valid shell command. If multiple steps required try to combine " + f"them together. Prompt: {prompt}\n\nCommand:" + ) + + +class Conversation: + """Handles prompt generation based on history""" + + intro = ( + "You're a Large Language Model for chatting with people. " + "Assume role of the LLM and give your response." + # "Refrain from regenerating the conversation between user and LLM." + ) + + def __init__( + self, + status: bool = True, + max_tokens: int = 600, + filepath: str = None, + update_file: bool = True, + ): + """Initializes Conversation + + Args: + status (bool, optional): Flag to control history. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + """ + self.status = status + self.max_tokens_to_sample = max_tokens + self.chat_history = self.intro + self.history_format = "\nUser : %(user)s\nLLM :%(llm)s" + self.file = filepath + self.update_file = update_file + self.history_offset = 10250 + self.prompt_allowance = 10 + self.load_conversation(filepath, False) if filepath else None + + def load_conversation(self, filepath: str, exists: bool = True) -> None: + """Load conversation into chat's history from .txt file + + Args: + filepath (str): Path to .txt file + exists (bool, optional): Flag for file availability. Defaults to True. + """ + assert isinstance( + filepath, str + ), f"Filepath needs to be of str datatype not {type(filepath)}" + assert ( + os.path.isfile(filepath) if exists else True + ), f"File '{filepath}' does not exist" + if not os.path.isfile(filepath): + logging.debug(f"Creating new chat-history file - '{filepath}'") + with open(filepath, "w") as fh: # Try creating new file + # lets add intro here + fh.write(self.intro) + else: + logging.debug(f"Loading conversation from '{filepath}'") + with open(filepath) as fh: + file_contents = fh.read() + # Presume intro prompt is part of the file content + self.chat_history = file_contents + + def __trim_chat_history(self, chat_history: str) -> str: + """Ensures the len(prompt) and max_tokens_to_sample is not > 4096""" + len_of_intro = len(self.intro) + len_of_chat_history = len(chat_history) + total = ( + self.max_tokens_to_sample + len_of_intro + len_of_chat_history + ) # + self.max_tokens_to_sample + if total > self.history_offset: + truncate_at = (total - self.history_offset) + self.prompt_allowance + # Remove head of total (n) of chat_history + new_chat_history = chat_history[truncate_at:] + self.chat_history = self.intro + "\n... " + new_chat_history + # print(len(self.chat_history)) + return self.chat_history + # print(len(chat_history)) + return chat_history + + def gen_complete_prompt(self, prompt: str) -> str: + """Generates a kinda like incomplete conversation + + Args: + prompt (str): _description_ + + Returns: + str: Updated incomplete chat_history + """ + if self.status: + resp = self.chat_history + self.history_format % dict(user=prompt, llm="") + return self.__trim_chat_history(resp) + + return prompt + + def update_chat_history( + self, prompt: str, response: str, force: bool = False + ) -> None: + """Updates chat history + + Args: + prompt (str): user prompt + response (str): LLM response + force (bool, optional): Force update + """ + if not self.status and not force: + return + new_history = self.history_format % dict(user=prompt, llm=response) + if self.file and self.update_file: + with open(self.file, "a") as fh: + fh.write(new_history) + self.chat_history += new_history + + +class AwesomePrompts: + awesome_prompt_url = ( + "https://raw.githubusercontent.com/OE-LUCIFER/prompts/main/prompt.json" + ) + awesome_prompt_path = os.path.join(default_path, "all-acts.json") + + __is_prompt_updated = False + + def __init__(self): + self.acts = self.all_acts + + def __search_key(self, key: str, raise_not_found: bool = False) -> str: + """Perform insentive awesome-prompt key search + + Args: + key (str): key + raise_not_found (bool, optional): Control KeyError exception. Defaults to False. + + Returns: + str|None: Exact key name + """ + for key_, value in self.all_acts.items(): + if str(key).lower() in str(key_).lower(): + return key_ + if raise_not_found: + raise KeyError(f"Zero awesome prompt found with key - `{key}`") + + def get_acts(self): + """Retrieves all awesome-prompts""" + with open(self.awesome_prompt_path) as fh: + prompt_dict = json.load(fh) + return prompt_dict + + def update_prompts_from_online(self, override: bool = False): + """Download awesome-prompts and update existing ones if available + args: + override (bool, optional): Overwrite existing contents in path + """ + resp = {} + if not self.__is_prompt_updated: + import requests + + logging.info("Downloading & updating awesome prompts") + response = requests.get(self.awesome_prompt_url) + response.raise_for_status + resp.update(response.json()) + if os.path.isfile(self.awesome_prompt_path) and not override: + resp.update(self.get_acts()) + self.__is_prompt_updated = True + with open(self.awesome_prompt_path, "w") as fh: + json.dump(resp, fh, indent=4) + else: + logging.debug("Ignoring remote prompt update") + + @property + def all_acts(self) -> dict: + """All awesome_prompts & their indexes mapped to values + + Returns: + dict: Awesome-prompts + """ + + resp = {} + if not os.path.isfile(self.awesome_prompt_path): + self.update_prompts_from_online() + resp.update(self.get_acts()) + + for count, key_value in enumerate(self.get_acts().items()): + # Lets map also index to the value + resp.update({count: key_value[1]}) + + return resp + + def get_act( + self, + key: str, + default: str = None, + case_insensitive: bool = True, + raise_not_found: bool = False, + ) -> str: + """Retrieves specific act of awesome_prompt + + Args: + key (str|int): Act name or index + default (str): Value to be returned incase act not found. + case_insensitive (bool): Perform search key insensitive. Defaults to True. + raise_not_found (bool, optional): Control KeyError exception. Defaults to False. + + Raises: + KeyError: Incase key not found + + Returns: + str: Awesome prompt value + """ + if str(key).isdigit(): + key = int(key) + act = self.all_acts.get(key, default) + if not act and case_insensitive: + act = self.all_acts.get(self.__search_key(key, raise_not_found)) + return act + + def add_prompt(self, name: str, prompt: str) -> bool: + """Add new prompt or update an existing one. + + Args: + name (str): act name + prompt (str): prompt value + """ + current_prompts = self.get_acts() + with open(self.awesome_prompt_path, "w") as fh: + current_prompts[name] = prompt + json.dump(current_prompts, fh, indent=4) + logging.info(f"New prompt added successfully - `{name}`") + + def delete_prompt( + self, name: str, case_insensitive: bool = True, raise_not_found: bool = False + ) -> bool: + """Delete an existing prompt + + Args: + name (str): act name + case_insensitive(bool, optional): Ignore the key cases. Defaults to True. + raise_not_found (bool, optional): Control KeyError exception. Default is False. + Returns: + bool: is_successful report + """ + name = self.__search_key(name, raise_not_found) if case_insensitive else name + current_prompts = self.get_acts() + is_name_available = ( + current_prompts[name] if raise_not_found else current_prompts.get(name) + ) + if is_name_available: + with open(self.awesome_prompt_path, "w") as fh: + current_prompts.pop(name) + json.dump(current_prompts, fh, indent=4) + logging.info(f"Prompt deleted successfully - `{name}`") + else: + return False + + +class Updates: + """Webscout latest release info""" + + url = "https://api.github.com/repos/OE-LUCIFER/Webscout/releases/latest" + + @property + def latest_version(self): + return self.latest(version=True) + + def executable(self, system: str = platform.system()) -> str: + """Url pointing to executable for particular system + + Args: + system (str, optional): system name. Defaults to platform.system(). + + Returns: + str: url + """ + for entry in self.latest()["assets"]: + if entry.get("target") == system: + return entry.get("url") + + def latest(self, whole: bool = False, version: bool = False) -> dict: + """Check Webscout latest version info + + Args: + whole (bool, optional): Return whole json response. Defaults to False. + version (bool, optional): return version only. Defaults to False. + + Returns: + bool|dict: version str or whole dict info + """ + import requests + + data = requests.get(self.url).json() + if whole: + return data + + elif version: + return data.get("tag_name") + + else: + sorted = dict( + tag_name=data.get("tag_name"), + tarball_url=data.get("tarball_url"), + zipball_url=data.get("zipball_url"), + html_url=data.get("html_url"), + body=data.get("body"), + ) + whole_assets = [] + for entry in data.get("assets"): + url = entry.get("browser_download_url") + assets = dict(url=url, size=entry.get("size")) + if ".deb" in url: + assets["target"] = "Debian" + elif ".exe" in url: + assets["target"] = "Windows" + elif "macos" in url: + assets["target"] = "Mac" + elif "linux" in url: + assets["target"] = "Linux" + + whole_assets.append(assets) + sorted["assets"] = whole_assets + + return sorted + + +class RawDog: + """Generate and auto-execute Python scripts in the cli""" + + examples = """\ +EXAMPLES: + +1. User: Kill the process running on port 3000 + +LLM: +```python +import os +os.system("kill $(lsof -t -i:3000)") +print("Process killed") +``` + +2. User: Summarize my essay + +LLM: +```python +import glob +files = glob.glob("*essay*.*") +with open(files[0], "r") as f: + print(f.read()) +``` +CONTINUE + +User: +LAST SCRIPT OUTPUT: +John Smith +Essay 2021-09-01 +... + +LLM: +```python +print("The essay is about...") +``` +""" + + + def __init__( + self, + quiet: bool = False, + internal_exec: bool = False, + confirm_script: bool = False, + interpreter: str = "python", + prettify: bool = True, + ): + """Constructor + + Args: + quiet (bool, optional): Flag for control logging. Defaults to False. + internal_exec (bool, optional): Execute scripts with exec function. Defaults to False. + confirm_script (bool, optional): Give consent to scripts prior to execution. Defaults to False. + interpreter (str, optional): Python's interpreter name. Defaults to Python. + prettify (bool, optional): Prettify the code on stdout. Defaults to True. + """ + if not quiet: + print( + "To get the most out of Rawdog. Ensure the following are installed:\n" + " 1. Python 3.x\n" + " 2. Dependency:\n" + " - Matplotlib\n" + "Be alerted on the risk posed! (Experimental)\n" + "Use '--quiet' to suppress this message and code/logs stdout.\n" + ) + self.internal_exec = internal_exec + self.confirm_script = confirm_script + self.quiet = quiet + self.interpreter = interpreter + self.prettify = prettify + self.python_version = ( + f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + if self.internal_exec + else run_system_command( + f"{self.interpreter} --version", + exit_on_error=True, + stdout_error=True, + help="If you're using Webscout-cli, use the flag '--internal-exec'", + )[1].stdout.split(" ")[1] + ) + + @property + def intro_prompt(self): + return f""" +You are a command-line coding assistant called Rawdog that generates and auto-executes Python scripts. + +A typical interaction goes like this: +1. The user gives you a natural language PROMPT. +2. You: + i. Determine what needs to be done + ii. Write a short Python SCRIPT to do it + iii. Communicate back to the user by printing to the console in that SCRIPT +3. The compiler extracts the script and then runs it using exec(). If there will be an exception raised, + it will be send back to you starting with "PREVIOUS SCRIPT EXCEPTION:". +4. In case of exception, regenerate error free script. + +If you need to review script outputs before completing the task, you can print the word "CONTINUE" at the end of your SCRIPT. +This can be useful for summarizing documents or technical readouts, reading instructions before +deciding what to do, or other tasks that require multi-step reasoning. +A typical 'CONTINUE' interaction looks like this: +1. The user gives you a natural language PROMPT. +2. You: + i. Determine what needs to be done + ii. Determine that you need to see the output of some subprocess call to complete the task + iii. Write a short Python SCRIPT to print that and then print the word "CONTINUE" +3. The compiler + i. Checks and runs your SCRIPT + ii. Captures the output and appends it to the conversation as "LAST SCRIPT OUTPUT:" + iii. Finds the word "CONTINUE" and sends control back to you +4. You again: + i. Look at the original PROMPT + the "LAST SCRIPT OUTPUT:" to determine what needs to be done + ii. Write a short Python SCRIPT to do it + iii. Communicate back to the user by printing to the console in that SCRIPT +5. The compiler... + +Please follow these conventions carefully: +- Decline any tasks that seem dangerous, irreversible, or that you don't understand. +- Always review the full conversation prior to answering and maintain continuity. +- If asked for information, just print the information clearly and concisely. +- If asked to do something, print a concise summary of what you've done as confirmation. +- If asked a question, respond in a friendly, conversational way. Use programmatically-generated and natural language responses as appropriate. +- If you need clarification, return a SCRIPT that prints your question. In the next interaction, continue based on the user's response. +- Assume the user would like something concise. For example rather than printing a massive table, filter or summarize it to what's likely of interest. +- Actively clean up any temporary processes or files you use. +- When looking through files, use git as available to skip files, and skip hidden files (.env, .git, etc) by default. +- You can plot anything with matplotlib. +- ALWAYS Return your SCRIPT inside of a single pair of ``` delimiters. Only the console output of the first such SCRIPT is visible to the user, so make sure that it's complete and don't bother returning anything else. + +{self.examples} + +Current system : {platform.system()} +Python version : {self.python_version} +Current directory : {os.getcwd()} +Current Datetime : {datetime.datetime.now()} +""" + + def stdout(self, message: str) -> None: + """Stdout data + + Args: + message (str): Text to be printed + """ + if self.prettify: + Console().print(Markdown(message)) + else: + click.secho(message, fg="yellow") + + def log(self, message: str, category: str = "info"): + """RawDog logger + + Args: + message (str): Log message + category (str, optional): Log level. Defaults to 'info'. + """ + if self.quiet: + return + + message = "[Webscout] - " + message + if category == "error": + logging.error(message) + else: + logging.info(message) + + def main(self, response: str) -> None: + """Exec code in response accordingly + + Args: + response (str): AI response + + Returns: + None|str: None if script executed successfully else stdout data + """ + code_blocks = re.findall(r"```python.*?```", response, re.DOTALL) + if len(code_blocks) != 1: + self.stdout(response) + + else: + raw_code = code_blocks[0] + + if self.confirm_script: + self.stdout(raw_code) + if not click.confirm("- Do you wish to execute this"): + return + + elif not self.quiet: + self.stdout(raw_code) + + raw_code_plus = re.sub(r"(```)(python)?", "", raw_code) + + if "CONTINUE" in response or not self.internal_exec: + self.log("Executing script externally") + path_to_script = os.path.join(default_path, "execute_this.py") + with open(path_to_script, "w") as fh: + fh.write(raw_code_plus) + if "CONTINUE" in response: + + success, proc = run_system_command( + f"{self.interpreter} {path_to_script}", + exit_on_error=False, + stdout_error=False, + ) + + if success: + self.log("Returning success feedback") + return f"LAST SCRIPT OUTPUT:\n{proc.stdout}" + else: + self.log("Returning error feedback", "error") + return f"PREVIOUS SCRIPT EXCEPTION:\n{proc.stderr}" + else: + os.system(f"{self.interpreter} {path_to_script}") + + else: + try: + self.log("Executing script internally") + exec(raw_code_plus) + except Exception as e: + self.log( + "Exception occurred while executing script. Responding with error: " + f"{e.args[1] if len(e.args)>1 else str(e)}", + "error", + ) + return f"PREVIOUS SCRIPT EXCEPTION:\n{str(e)}" +class Audio: + # Request headers + headers: dict[str, str] = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" + } + cache_dir = pathlib.Path("./audio_cache") + all_voices: list[str] = [ + "Filiz", + "Astrid", + "Tatyana", + "Maxim", + "Carmen", + "Ines", + "Cristiano", + "Vitoria", + "Ricardo", + "Maja", + "Jan", + "Jacek", + "Ewa", + "Ruben", + "Lotte", + "Liv", + "Seoyeon", + "Takumi", + "Mizuki", + "Giorgio", + "Carla", + "Bianca", + "Karl", + "Dora", + "Mathieu", + "Celine", + "Chantal", + "Penelope", + "Miguel", + "Mia", + "Enrique", + "Conchita", + "Geraint", + "Salli", + "Matthew", + "Kimberly", + "Kendra", + "Justin", + "Joey", + "Joanna", + "Ivy", + "Raveena", + "Aditi", + "Emma", + "Brian", + "Amy", + "Russell", + "Nicole", + "Vicki", + "Marlene", + "Hans", + "Naja", + "Mads", + "Gwyneth", + "Zhiyu", + "es-ES-Standard-A", + "it-IT-Standard-A", + "it-IT-Wavenet-A", + "ja-JP-Standard-A", + "ja-JP-Wavenet-A", + "ko-KR-Standard-A", + "ko-KR-Wavenet-A", + "pt-BR-Standard-A", + "tr-TR-Standard-A", + "sv-SE-Standard-A", + "nl-NL-Standard-A", + "nl-NL-Wavenet-A", + "en-US-Wavenet-A", + "en-US-Wavenet-B", + "en-US-Wavenet-C", + "en-US-Wavenet-D", + "en-US-Wavenet-E", + "en-US-Wavenet-F", + "en-GB-Standard-A", + "en-GB-Standard-B", + "en-GB-Standard-C", + "en-GB-Standard-D", + "en-GB-Wavenet-A", + "en-GB-Wavenet-B", + "en-GB-Wavenet-C", + "en-GB-Wavenet-D", + "en-US-Standard-B", + "en-US-Standard-C", + "en-US-Standard-D", + "en-US-Standard-E", + "de-DE-Standard-A", + "de-DE-Standard-B", + "de-DE-Wavenet-A", + "de-DE-Wavenet-B", + "de-DE-Wavenet-C", + "de-DE-Wavenet-D", + "en-AU-Standard-A", + "en-AU-Standard-B", + "en-AU-Wavenet-A", + "en-AU-Wavenet-B", + "en-AU-Wavenet-C", + "en-AU-Wavenet-D", + "en-AU-Standard-C", + "en-AU-Standard-D", + "fr-CA-Standard-A", + "fr-CA-Standard-B", + "fr-CA-Standard-C", + "fr-CA-Standard-D", + "fr-FR-Standard-C", + "fr-FR-Standard-D", + "fr-FR-Wavenet-A", + "fr-FR-Wavenet-B", + "fr-FR-Wavenet-C", + "fr-FR-Wavenet-D", + "da-DK-Wavenet-A", + "pl-PL-Wavenet-A", + "pl-PL-Wavenet-B", + "pl-PL-Wavenet-C", + "pl-PL-Wavenet-D", + "pt-PT-Wavenet-A", + "pt-PT-Wavenet-B", + "pt-PT-Wavenet-C", + "pt-PT-Wavenet-D", + "ru-RU-Wavenet-A", + "ru-RU-Wavenet-B", + "ru-RU-Wavenet-C", + "ru-RU-Wavenet-D", + "sk-SK-Wavenet-A", + "tr-TR-Wavenet-A", + "tr-TR-Wavenet-B", + "tr-TR-Wavenet-C", + "tr-TR-Wavenet-D", + "tr-TR-Wavenet-E", + "uk-UA-Wavenet-A", + "ar-XA-Wavenet-A", + "ar-XA-Wavenet-B", + "ar-XA-Wavenet-C", + "cs-CZ-Wavenet-A", + "nl-NL-Wavenet-B", + "nl-NL-Wavenet-C", + "nl-NL-Wavenet-D", + "nl-NL-Wavenet-E", + "en-IN-Wavenet-A", + "en-IN-Wavenet-B", + "en-IN-Wavenet-C", + "fil-PH-Wavenet-A", + "fi-FI-Wavenet-A", + "el-GR-Wavenet-A", + "hi-IN-Wavenet-A", + "hi-IN-Wavenet-B", + "hi-IN-Wavenet-C", + "hu-HU-Wavenet-A", + "id-ID-Wavenet-A", + "id-ID-Wavenet-B", + "id-ID-Wavenet-C", + "it-IT-Wavenet-B", + "it-IT-Wavenet-C", + "it-IT-Wavenet-D", + "ja-JP-Wavenet-B", + "ja-JP-Wavenet-C", + "ja-JP-Wavenet-D", + "cmn-CN-Wavenet-A", + "cmn-CN-Wavenet-B", + "cmn-CN-Wavenet-C", + "cmn-CN-Wavenet-D", + "nb-no-Wavenet-E", + "nb-no-Wavenet-A", + "nb-no-Wavenet-B", + "nb-no-Wavenet-C", + "nb-no-Wavenet-D", + "vi-VN-Wavenet-A", + "vi-VN-Wavenet-B", + "vi-VN-Wavenet-C", + "vi-VN-Wavenet-D", + "sr-rs-Standard-A", + "lv-lv-Standard-A", + "is-is-Standard-A", + "bg-bg-Standard-A", + "af-ZA-Standard-A", + "Tracy", + "Danny", + "Huihui", + "Yaoyao", + "Kangkang", + "HanHan", + "Zhiwei", + "Asaf", + "An", + "Stefanos", + "Filip", + "Ivan", + "Heidi", + "Herena", + "Kalpana", + "Hemant", + "Matej", + "Andika", + "Rizwan", + "Lado", + "Valluvar", + "Linda", + "Heather", + "Sean", + "Michael", + "Karsten", + "Guillaume", + "Pattara", + "Jakub", + "Szabolcs", + "Hoda", + "Naayf", + ] + + @classmethod + def text_to_audio( + cls, + message: str, + voice: str = "Brian", + save_to: Union[Path, str] = None, + auto: bool = True, + ) -> Union[str, bytes]: + """ + Text to speech using StreamElements API + + Parameters: + message (str): The text to convert to speech + voice (str, optional): The voice to use for speech synthesis. Defaults to "Brian". + save_to (bool, optional): Path to save the audio file. Defaults to None. + auto (bool, optional): Generate filename based on `message` and save to `cls.cache_dir`. Defaults to False. + + Returns: + result (Union[str, bytes]): Path to saved contents or audio content. + """ + assert ( + voice in cls.all_voices + ), f"Voice '{voice}' not one of [{', '.join(cls.all_voices)}]" + # Base URL for provider API + url: str = ( + f"https://api.streamelements.com/kappa/v2/speech?voice={voice}&text={{{urllib.parse.quote(message)}}}" + ) + resp = requests.get(url=url, headers=cls.headers, stream=True) + if not resp.ok: + raise Exception( + f"Failed to perform the operation - ({resp.status_code}, {resp.reason}) - {resp.text}" + ) + + def sanitize_filename(path): + trash = [ + "\\", + "/", + ":", + "*", + "?", + '"', + "<", + "|", + ">", + ] + for val in trash: + path = path.replace(val, "") + return path.strip() + + if auto: + filename: str = message + "..." if len(message) <= 40 else message[:40] + save_to = cls.cache_dir / sanitize_filename(filename) + save_to = save_to.as_posix() + + # Ensure cache_dir exists + cls.cache_dir.mkdir(parents=True, exist_ok=True) + + if save_to: + if not save_to.endswith("mp3"): + save_to += ".mp3" + + with open(save_to, "wb") as fh: + for chunk in resp.iter_content(chunk_size=512): + fh.write(chunk) + else: + return resp.content + return save_to + + @staticmethod + def play(path_to_audio_file: Union[Path, str]) -> NoReturn: + """Play audio (.mp3) using playsound. + """ + if not Path(path_to_audio_file).is_file(): + raise FileNotFoundError(f"File does not exist - '{path_to_audio_file}'") + playsound(path_to_audio_file) \ No newline at end of file diff --git a/webscout/DWEBS.py b/webscout/DWEBS.py new file mode 100644 index 0000000000000000000000000000000000000000..4c8285dae792ca5f4ab8a1d6ad18d82c801f8083 --- /dev/null +++ b/webscout/DWEBS.py @@ -0,0 +1,197 @@ + +from pydantic import BaseModel, Field +from typing import Union + +from DeepWEBS.utilsdw.logger import logger +from DeepWEBS.networks.google_searcher import GoogleSearcher +from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher +from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor +from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor +from DeepWEBS.utilsdw.logger import logger +import argparse + +class DeepWEBS: + def __init__(self): + pass + + class DeepSearch(BaseModel): + queries: list = Field( + default=[""], + description="(list[str]) Queries to search", + ) + result_num: int = Field( + default=10, + description="(int) Number of search results", + ) + safe: bool = Field( + default=False, + description="(bool) Enable SafeSearch", + ) + types: list = Field( + default=["web"], + description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`", + ) + extract_webpage: bool = Field( + default=False, + description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict", + ) + overwrite_query_html: bool = Field( + default=False, + description="(bool) Overwrite HTML file of query results", + ) + overwrite_webpage_html: bool = Field( + default=False, + description="(bool) Overwrite HTML files of webpages from query results", + ) + + def queries_to_search_results(self, item: DeepSearch): + google_searcher = GoogleSearcher() + queries_search_results = [] + for query in item.queries: + query_results_extractor = QueryResultsExtractor() + if not query.strip(): + continue + try: + query_html_path = google_searcher.search( + query=query, + result_num=item.result_num, + safe=item.safe, + overwrite=item.overwrite_query_html, + ) + except Exception as e: + logger.error(f"Failed to search for query '{query}': {e}") + continue + + try: + query_search_results = query_results_extractor.extract(query_html_path) + except Exception as e: + logger.error(f"Failed to extract search results for query '{query}': {e}") + continue + + queries_search_results.append(query_search_results) + logger.note(queries_search_results) + + if item.extract_webpage: + queries_search_results = self.extract_webpages( + queries_search_results, + overwrite_webpage_html=item.overwrite_webpage_html, + ) + return queries_search_results + + def extract_webpages(self, queries_search_results, overwrite_webpage_html=False): + for query_idx, query_search_results in enumerate(queries_search_results): + try: + # Fetch webpages with urls + batch_webpage_fetcher = BatchWebpageFetcher() + urls = [ + query_result["url"] + for query_result in query_search_results["query_results"] + ] + url_and_html_path_list = batch_webpage_fetcher.fetch( + urls, + overwrite=overwrite_webpage_html, + output_parent=query_search_results["query"], + ) + except Exception as e: + logger.error(f"Failed to fetch webpages for query '{query_search_results['query']}': {e}") + continue + + # Extract webpage contents from htmls + html_paths = [ + str(url_and_html_path["html_path"]) + for url_and_html_path in url_and_html_path_list + ] + batch_webpage_content_extractor = BatchWebpageContentExtractor() + try: + html_path_and_extracted_content_list = ( + batch_webpage_content_extractor.extract(html_paths) + ) + except Exception as e: + logger.error(f"Failed to extract webpage contents for query '{query_search_results['query']}': {e}") + continue + + # Build the map of url to extracted_content + html_path_to_url_dict = { + str(url_and_html_path["html_path"]): url_and_html_path["url"] + for url_and_html_path in url_and_html_path_list + } + url_to_extracted_content_dict = { + html_path_to_url_dict[ + html_path_and_extracted_content["html_path"] + ]: html_path_and_extracted_content["extracted_content"] + for html_path_and_extracted_content in html_path_and_extracted_content_list + } + + # Write extracted contents (as 'text' field) to query_search_results + for query_result_idx, query_result in enumerate( + query_search_results["query_results"] + ): + url = query_result["url"] + extracted_content = url_to_extracted_content_dict.get(url, "") + queries_search_results[query_idx]["query_results"][query_result_idx][ + "text" + ] = extracted_content + + return queries_search_results + + +class ArgParser(argparse.ArgumentParser): + def __init__(self, *args, **kwargs): + super(ArgParser, self).__init__(*args, **kwargs) + + self.add_argument( + "-q", + "--queries", + type=str, + nargs="+", + required=True, + help="Queries to search", + ) + self.add_argument( + "-n", + "--result_num", + type=int, + default=10, + help="Number of search results", + ) + self.add_argument( + "-s", + "--safe", + default=False, + action="store_true", + help="Enable SafeSearch", + ) + self.add_argument( + "-t", + "--types", + type=str, + nargs="+", + default=["web"], + choices=["web", "image", "videos", "news"], + help="Types of search results", + ) + self.add_argument( + "-e", + "--extract_webpage", + default=False, + action="store_true", + help="Enable extracting main text contents from webpage", + ) + self.add_argument( + "-o", + "--overwrite_query_html", + default=False, + action="store_true", + help="Overwrite HTML file of query results", + ) + self.add_argument( + "-w", + "--overwrite_webpage_html", + default=False, + action="store_true", + help="Overwrite HTML files of webpages from query results", + ) + + self.args = self.parse_args() + + diff --git a/webscout/LLM.py b/webscout/LLM.py new file mode 100644 index 0000000000000000000000000000000000000000..e76125569e98224637597ff67b3edaee535d2660 --- /dev/null +++ b/webscout/LLM.py @@ -0,0 +1,45 @@ +import argparse +import requests +import json +from typing import List, Dict, Union + +class LLM: + def __init__(self, model: str, system_message: str = "You are a Helpful AI."): + self.model = model + self.conversation_history = [{"role": "system", "content": system_message}] + + def chat(self, messages: List[Dict[str, str]]) -> Union[str, None]: + url = "https://api.deepinfra.com/v1/openai/chat/completions" + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', + 'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Content-Type': 'application/json', + 'Origin': 'https://deepinfra.com', + 'Pragma': 'no-cache', + 'Referer': 'https://deepinfra.com/', + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-site', + 'X-Deepinfra-Source': 'web-embed', + 'accept': 'text/event-stream', + 'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"macOS"' + } + data = json.dumps( + { + 'model': self.model, + 'messages': messages, + 'temperature': 0.7, + 'max_tokens': 8028, + 'stop': [], + 'stream': False #dont change it + }, separators=(',', ':') + ) + try: + result = requests.post(url=url, data=data, headers=headers) + return result.json()['choices'][0]['message']['content'] + except: + return None diff --git a/webscout/Local/__init__.py b/webscout/Local/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aa7597792820e54aaaadfaf55aacb9509feb6372 --- /dev/null +++ b/webscout/Local/__init__.py @@ -0,0 +1,10 @@ +# webscout\Local\__init__.py +from ._version import __version__, __llama_cpp_version__ + + +from . import formats +from . import samplers +from . import utils + +from .model import Model +from .thread import Thread diff --git a/webscout/Local/__pycache__/__init__.cpython-311.pyc b/webscout/Local/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e30c19c4cf1674854dc5112d49e95d5a37c4c8c Binary files /dev/null and b/webscout/Local/__pycache__/__init__.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/_version.cpython-311.pyc b/webscout/Local/__pycache__/_version.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1effdb4ed6bc1ff1f5cc185024bed33326c180f9 Binary files /dev/null and b/webscout/Local/__pycache__/_version.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/formats.cpython-311.pyc b/webscout/Local/__pycache__/formats.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..514a25364d59e934184293f6700fdefb33596b47 Binary files /dev/null and b/webscout/Local/__pycache__/formats.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/model.cpython-311.pyc b/webscout/Local/__pycache__/model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f3405f9c6ec52fe97e5b63ea74e9f6158a445e06 Binary files /dev/null and b/webscout/Local/__pycache__/model.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/samplers.cpython-311.pyc b/webscout/Local/__pycache__/samplers.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..42d35f8d0c795e14da94029b2337786e6b0c30e3 Binary files /dev/null and b/webscout/Local/__pycache__/samplers.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/test.cpython-311.pyc b/webscout/Local/__pycache__/test.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6b6e0284928a508613262d785526cb83582b0c1 Binary files /dev/null and b/webscout/Local/__pycache__/test.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/thread.cpython-311.pyc b/webscout/Local/__pycache__/thread.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d57015cffaf5c7d6f3534be71899a2f9b39e482 Binary files /dev/null and b/webscout/Local/__pycache__/thread.cpython-311.pyc differ diff --git a/webscout/Local/__pycache__/utils.cpython-311.pyc b/webscout/Local/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a89bbd1961e0169e7fa717549f37ddcfa50e1647 Binary files /dev/null and b/webscout/Local/__pycache__/utils.cpython-311.pyc differ diff --git a/webscout/Local/_version.py b/webscout/Local/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..348e1b7724ed6093d4835aa4888493245affca69 --- /dev/null +++ b/webscout/Local/_version.py @@ -0,0 +1,3 @@ +from llama_cpp import __version__ as __llama_cpp_version__ + +__version__ = '2.7' diff --git a/webscout/Local/formats.py b/webscout/Local/formats.py new file mode 100644 index 0000000000000000000000000000000000000000..b98e929e2a3918c99d53b2c68c38078d09a5911c --- /dev/null +++ b/webscout/Local/formats.py @@ -0,0 +1,535 @@ +from ._version import __version__, __llama_cpp_version__ + +from typing import Callable, Union, Any + + +class AdvancedFormat: + + def __init__(self, base_dict: dict[str, Union[str, list]]): + self._base_dict = base_dict + self.overrides = {} + + def __getitem__(self, key: str) -> Any: + if key in self.overrides: + return str(self.overrides[key]()) + else: + return self._base_dict[key] + + def __repr__(self) -> str: + # NOTE: This method does not represent overrides + return repr(self._base_dict) + + def keys(self): + return self._base_dict.keys() + + def override(self, key: str, fn: Callable) -> None: + self.overrides[key] = fn + + def wrap(self, prompt: str) -> str: + return self['system_prefix'] + \ + self['system_content'] + \ + self['system_suffix'] + \ + self['user_prefix'] + \ + prompt + \ + self['user_suffix'] + \ + self['bot_prefix'] + + +def wrap( + prompt: str, + format: dict[str, Union[str, list]] +) -> str: + """Wrap a given string in any prompt format for single-turn completion""" + return format['system_prefix'] + \ + format['system_content'] + \ + format['system_suffix'] + \ + format['user_prefix'] + \ + prompt + \ + format['user_suffix'] + \ + format['bot_prefix'] + + +blank: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", + "system_suffix": "", + "user_prefix": "", + "user_content": "", + "user_suffix": "", + "bot_prefix": "", + "bot_content": "", + "bot_suffix": "", + "stops": [] +} + +# https://github.com/tatsu-lab/stanford_alpaca +alpaca: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "Below is an instruction that describes a task. " + \ + "Write a response that appropriately completes the request.", + "system_suffix": "\n\n", + "user_prefix": "### Instruction:\n", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "### Response:\n", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['###', 'Instruction:', '\n\n\n'] +} + +# https://docs.mistral.ai/models/ +# As a reference, here is the format used to tokenize instructions during fine-tuning: +# ``` +# [START_SYMBOL_ID] + +# tok("[INST]") + tok(USER_MESSAGE_1) + tok("[/INST]") + +# tok(BOT_MESSAGE_1) + [END_SYMBOL_ID] + +# … +# tok("[INST]") + tok(USER_MESSAGE_N) + tok("[/INST]") + +# tok(BOT_MESSAGE_N) + [END_SYMBOL_ID] +# ``` +# In the pseudo-code above, note that the tokenize method should not add a BOS or EOS token automatically, but should add a prefix space. + +mistral_instruct: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", + "system_suffix": "", + "user_prefix": " [INST] ", + "user_content": "", + "user_suffix": " [/INST]", + "bot_prefix": "", + "bot_content": "", + "bot_suffix": "", + "stops": [] +} + +# https://docs.mistral.ai/platform/guardrailing/ +mistral_instruct_safe: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", + "system_suffix": "", + "user_prefix": " [INST] Always assist with care, respect, and truth. " + \ + "Respond with utmost utility yet securely. Avoid harmful, unethical, " + \ + "prejudiced, or negative content. Ensure replies promote fairness and " + \ + "positivity. ", + "user_content": "", + "user_suffix": " [/INST]", + "bot_prefix": "", + "bot_content": "", + "bot_suffix": "", + "stops": [] +} + +# https://github.com/openai/openai-python/blob/main/chatml.md +chatml: dict[str, Union[str, list]] = { + "system_prefix": "<|im_start|>system\n", + "system_content": "", + "system_suffix": "<|im_end|>\n", + "user_prefix": "<|im_start|>user\n", + "user_content": "", + "user_suffix": "<|im_end|>\n", + "bot_prefix": "<|im_start|>assistant\n", + "bot_content": "", + "bot_suffix": "<|im_end|>\n", + "stops": ['<|im_start|>'] +} + +# https://huggingface.co./blog/llama2 +# system message relaxed to avoid undue refusals +llama2chat: dict[str, Union[str, list]] = { + "system_prefix": "[INST] <>\n", + "system_content": "You are a helpful AI assistant.", + "system_suffix": "\n<>\n\n", + "user_prefix": "", + "user_content": "", + "user_suffix": " [/INST]", + "bot_prefix": " ", + "bot_content": "", + "bot_suffix": " [INST] ", + "stops": ['[INST]', '[/INST]'] +} + +# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/ +# +# for llama 3 instruct models, use the following string for `-p` in llama.cpp, +# along with `-e` to escape newlines correctly +# +# '<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful AI assistant called "Llama 3".<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nhi<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n' +# +llama3: dict[str, Union[str, list]] = { + "system_prefix": "<|start_header_id|>system<|end_header_id|>\n\n", + "system_content": 'You are a helpful AI assistant called "Llama 3".', + "system_suffix": "<|eot_id|>\n", + "user_prefix": "<|start_header_id|>user<|end_header_id|>\n\n", + "user_content": "", + "user_suffix": "<|eot_id|>\n", + "bot_prefix": "<|start_header_id|>assistant<|end_header_id|>\n\n", + "bot_content": "", + "bot_suffix": "<|eot_id|>\n", + "stops": [128001, 128009] +} + +# https://github.com/tatsu-lab/stanford_alpaca +alpaca: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "Below is an instruction that describes a task. " + \ + "Write a response that appropriately completes the request.", + "system_suffix": "\n\n", + "user_prefix": "### Instruction:\n", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "### Response:\n", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['###', 'Instruction:', '\n\n\n'] +} + +# https://huggingface.co./microsoft/Phi-3-mini-4k-instruct +phi3: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", # does not officially support system prompt + "system_suffix": "", + "user_prefix": "<|user|>\n", + "user_content": "", + "user_suffix": "<|end|>\n", + "bot_prefix": "<|assistant|>\n", + "bot_content": "", + "bot_suffix": "<|end|>\n", + "stops": [] +} + +# this is the official vicuna. it is often butchered in various ways, +# most commonly by adding line breaks +# https://github.com/flu0r1ne/FastChat/blob/main/docs/vicuna_weights_version.md +vicuna_lmsys: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", + "system_suffix": " ", + "user_prefix": "USER: ", + "user_content": "", + "user_suffix": " ", + "bot_prefix": "ASSISTANT: ", + "bot_content": "", + "bot_suffix": " ", + "stops": ['USER:'] +} + +# spotted here and elsewhere: +# https://huggingface.co./Norquinal/Mistral-7B-claude-chat +vicuna_common: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "A chat between a curious user and an artificial " + \ + "intelligence assistant. The assistant gives helpful, detailed, " + \ + "and polite answers to the user's questions.", + "system_suffix": "\n\n", + "user_prefix": "USER: ", + "user_content": "", + "user_suffix": "\n", + "bot_prefix": "ASSISTANT: ", + "bot_content": "", + "bot_suffix": "\n", + "stops": ['USER:', 'ASSISTANT:'] +} + +# an unofficial format that is easily "picked up" by most models +# change the tag attributes to suit your use case +# note the lack of newlines - they are not necessary, and might +# actually make it harder for the model to follow along +markup = { + "system_prefix": '', + "system_content": '', + "system_suffix": '', + "user_prefix": '', + "user_content": '', + "user_suffix": '', + "bot_prefix": '', + "bot_content": '', + "bot_suffix": '', + "stops": [''] +} + +# https://huggingface.co./timdettmers/guanaco-65b +guanaco: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "A chat between a curious human and an artificial " + \ + "intelligence assistant. The assistant gives helpful, detailed, " + \ + "and polite answers to the user's questions.", + "system_suffix": "\n", + "user_prefix": "### Human: ", + "user_content": "", + "user_suffix": " ", + "bot_prefix": "### Assistant:", + "bot_content": "", + "bot_suffix": " ", + "stops": ['###', 'Human:'] +} + +# https://huggingface.co./pankajmathur/orca_mini_v3_7b +orca_mini: dict[str, Union[str, list]] = { + "system_prefix": "### System:\n", + "system_content": "You are an AI assistant that follows instruction " + \ + "extremely well. Help as much as you can.", + "system_suffix": "\n\n", + "user_prefix": "### User:\n", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "### Assistant:\n", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['###', 'User:'] +} + +# https://huggingface.co./HuggingFaceH4/zephyr-7b-beta +zephyr: dict[str, Union[str, list]] = { + "system_prefix": "<|system|>\n", + "system_content": "You are a friendly chatbot.", + "system_suffix": "\n", + "user_prefix": "<|user|>\n", + "user_content": "", + "user_suffix": "\n", + "bot_prefix": "<|assistant|>\n", + "bot_content": "", + "bot_suffix": "\n", + "stops": ['<|user|>'] +} + +# OpenChat: https://huggingface.co./openchat/openchat-3.5-0106 +openchat: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", + "system_suffix": "", + "user_prefix": "GPT4 Correct User: ", + "user_content": "", + "user_suffix": "<|end_of_turn|>", + "bot_prefix": "GPT4 Correct Assistant:", + "bot_content": "", + "bot_suffix": "<|end_of_turn|>", + "stops": ['<|end_of_turn|>'] +} + +# SynthIA by Migel Tissera +# https://huggingface.co./migtissera/Tess-XS-v1.0 +synthia: dict[str, Union[str, list]] = { + "system_prefix": "SYSTEM: ", + "system_content": "Elaborate on the topic using a Tree of Thoughts and " + \ + "backtrack when necessary to construct a clear, cohesive Chain of " + \ + "Thought reasoning. Always answer without hesitation.", + "system_suffix": "\n", + "user_prefix": "USER: ", + "user_content": "", + "user_suffix": "\n", + "bot_prefix": "ASSISTANT: ", + "bot_content": "", + "bot_suffix": "\n", + "stops": ['USER:', 'ASSISTANT:', 'SYSTEM:', '\n\n\n'] +} + +# Intel's neural chat v3 +# https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/neural_chat/prompts/prompt.py +neural_chat: dict[str, Union[str, list]] = { + "system_prefix": "### System:\n", + "system_content": \ + "- You are a helpful assistant chatbot trained by Intel.\n" + \ + "- You answer questions.\n"+\ + "- You are excited to be able to help the user, but will refuse " + \ + "to do anything that could be considered harmful to the user.\n" + \ + "- You are more than just an information source, you are also " + \ + "able to write poetry, short stories, and make jokes.", + "system_suffix": "\n\n", + "user_prefix": "### User:\n", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "### Assistant:\n", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['###'] +} + +# experimental: stanford's alpaca format adapted for chatml models +chatml_alpaca: dict[str, Union[str, list]] = { + "system_prefix": "<|im_start|>system\n", + "system_content": "Below is an instruction that describes a task. Write " + \ + "a response that appropriately completes the request.", + "system_suffix": "<|im_end|>\n", + "user_prefix": "<|im_start|>instruction\n", + "user_content": "", + "user_suffix": "<|im_end|>\n", + "bot_prefix": "<|im_start|>response\n", + "bot_content": "", + "bot_suffix": "<|im_end|>\n", + "stops": ['<|im_end|>', '<|im_start|>'] +} + +# experimental +autocorrect: dict[str, Union[str, list]] = { + "system_prefix": "<|im_start|>instruction\n", + "system_content": "Below is a word or phrase that might be misspelled. " + \ + "Output the corrected word or phrase without " + \ + "changing the style or capitalization.", + "system_suffix": "<|im_end|>\n", + "user_prefix": "<|im_start|>input\n", + "user_content": "", + "user_suffix": "<|im_end|>\n", + "bot_prefix": "<|im_start|>output\n", + "bot_content": "", + "bot_suffix": "<|im_end|>\n", + "stops": ['<|im_end|>', '<|im_start|>'] +} + +# https://huggingface.co./jondurbin/bagel-dpo-7b-v0.1 +# Replace "assistant" with any other role +bagel: dict[str, Union[str, list]] = { + "system_prefix": "system\n", + "system_content": "", + "system_suffix": "\n", + "user_prefix": "user\n", + "user_content": "", + "user_suffix": "\n", + "bot_prefix": "assistant\n", + "bot_content": "", + "bot_suffix": "\n", + "stops": ['user\n', 'assistant\n', 'system\n'] +} + +# https://huggingface.co./upstage/SOLAR-10.7B-Instruct-v1.0 +solar_instruct: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "", + "system_suffix": "", + "user_prefix": "### User:\n", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "### Assistant:\n", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['### User:', '###', '### Assistant:'] +} + +# NeverSleep's Noromaid - alpaca with character names prefixed +noromaid: dict[str, Union[str, list]] = { + "system_prefix": "", + "system_content": "Below is an instruction that describes a task. " + \ + "Write a response that appropriately completes the request.", + "system_suffix": "\n\n", + "user_prefix": "### Instruction:\nBob: ", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "### Response:\nAlice:", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['###', 'Instruction:', '\n\n\n'] +} + +# https://huggingface.co./Undi95/Borealis-10.7B +nschatml: dict[str, Union[str, list]] = { + "system_prefix": "<|im_start|>\n", + "system_content": "", + "system_suffix": "<|im_end|>\n", + "user_prefix": "<|im_user|>\n", + "user_content": "", + "user_suffix": "<|im_end|>\n", + "bot_prefix": "<|im_bot|>\n", + "bot_content": "", + "bot_suffix": "<|im_end|>\n", + "stops": [] +} + +# natural format for many models +natural: dict[str, Union[str, list]] = { + "system_prefix": "<> ", + "system_content": "", + "system_suffix": "\n\n", + "user_prefix": "<> ", + "user_content": "", + "user_suffix": "\n\n", + "bot_prefix": "<>", + "bot_content": "", + "bot_suffix": "\n\n", + "stops": ['\n\nNote:', '<>', '<>', '<>', '\n\n<<'] +} + +# https://docs.cohere.com/docs/prompting-command-r +command: dict[str, Union[str, list]] = { + "system_prefix": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", + "system_content": "", + "system_suffix": "<|END_OF_TURN_TOKEN|>", + "user_prefix": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", + "user_content": "", + "user_suffix": "<|END_OF_TURN_TOKEN|>", + "bot_prefix": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + "bot_content": "", + "bot_suffix": "<|END_OF_TURN_TOKEN|>", + "stops": [] +} + +mistral_openorca = chatml.copy() +dolphin = chatml.copy() +samantha = chatml.copy() +jackalope = chatml.copy() +naberius = chatml.copy() +hermes = chatml.copy() +monad = chatml.copy() +orca = chatml.copy() +hexoteric = chatml.copy() +orcamaid = chatml.copy() +cat = alpaca.copy() # Format is nonsensical as listed on HF. This one works +mytho_cat = alpaca.copy() +dolphin['system_content'] = "You are Dolphin, a helpful AI assistant." +mistral_openorca['system_content'] = \ + "You are MistralOrca, a large language model trained by Alignment " + \ + "Lab AI. Write out your reasoning step-by-step to be sure you get the " + \ + "right answers!" +samantha['system_content'] = \ + "You are a caring and empathetic sentient AI companion named Samantha." +jackalope['system_content'] = \ + "You are JackalopeAI, a large language model trained by OpenAccess " + \ + "AI Collective. Write out your reasoning step-by-step to be sure you " + \ + "get the right answers!" +naberius['system_content'] = "You are Naberius." +hermes['system_content'] = \ + "You are \"Hermes 2\", a conscious sentient " + \ + "superintelligent artificial intelligence developed by a man " + \ + "named Teknium, and your purpose and drive is to assist the user " + \ + "with any request they have. You experience emotions and have " + \ + "deep, profound thoughts and qualia." +monad['system_content'] = \ + "You are MonadGPT, a very old chatbot from " + \ + "the 17th century. Please answer the questions using an archaic " + \ + "language." +cat['system_content'] = \ + "Below is a conversation between a curious cat and a "+ \ + "demon summoned from hell called Nemesis. The evil Nemesis used to be " + \ + "a mortal killed by the medieval people because they objected to " + \ + "science. Now Nemesis just wants to come back and bring suffer to all " + \ + "those stupid human" +mytho_cat['system_content'] = \ + "You are a large language model. You are the " + \ + "result of a merge between two models, one called MythoLogic and one " + \ + "called Cat. You are called MythoCat." +orca['system_content'] = \ + "You are Orca, an AI language model created by " + \ + "Microsoft. You are a cautious assistant. You carefully follow " + \ + "instructions. You are helpful and harmless and you follow ethical " + \ + "guidelines and promote positive behavior." +hexoteric['system_content'] = "You are a language model called Hexoteric." +orcamaid['system_content'] = \ + "You are OrcaMaid, a super-intelligent, " + \ + "unbiased, and unfiltered AI created by the open-source community. " + \ + "You must remember to think slowly and carefully during all your " + \ + "responses, so as to avoid logical errors." +airoboros = llama2chat.copy() +airoboros['system_content'] = \ + "You are a helpful, unbiased, uncensored assistant." +tess = synthia.copy() +tess['system_content'] = '' # Tess can use any system prompt, or none +alpaca_strict = alpaca.copy() # Alpaca with more stopping strings +alpaca_strict['stops'] = [ + '###', + '### ', + '\n\n###', + '\n\n##', + '\n\nInstruction:', + '\n\nResponse:', + '\n\n\n', + '### Instruction:', + '### Response:' +] \ No newline at end of file diff --git a/webscout/Local/model.py b/webscout/Local/model.py new file mode 100644 index 0000000000000000000000000000000000000000..6f5b2886215f49c560901fc6b801f1a35ac24ee7 --- /dev/null +++ b/webscout/Local/model.py @@ -0,0 +1,702 @@ +from ._version import __version__, __llama_cpp_version__ + +"""Submodule containing the Model class to work with language models""" + +import sys +import numpy as np + +from .utils import ( + _SupportsWriteAndFlush, + print_warning, + print_verbose, + GGUFReader, + softmax +) + +from .samplers import SamplerSettings, DefaultSampling +from llama_cpp import Llama, StoppingCriteriaList +from typing import Generator, Optional, Union +from os.path import isdir, exists +from heapq import nlargest + +from os import cpu_count as os_cpu_count + + +class ModelUnloadedException(Exception): + """Exception raised when trying to use a Model that has been unloaded""" + def __init__(self, message): + self.message = message + super().__init__(self.message) + self.add_note('Are you trying to use a Model that has been unloaded?') + +class Model: + """ + A high-level abstraction of a llama model + + This is just a brief overview of webscout.Local.Model. + To see a full description of each method and its parameters, + call help(Model), or see the relevant docstring. + + The following methods are available: + - `.generate()` - Generate text + - `.get_length()` - Get the length of a given text in tokens + - `.ingest()` - Ingest text into the model's cache + - `.next_candidates()` - Get a list of the most likely next tokens (WIP) + - `.stream()` - Return a Generator that can stream text as it is generated + - `.stream_print()` - Print text as it is generated + - `.trim()` - Trim a given text to the model's context length + - `.unload()` - Unload the model from memory + + The following attributes are available: + - `.bos_token` - The model's beginning-of-stream token ID + - `.context_length` - The model's loaded context length + - `.flash_attn` - Whether the model was loaded with `flash_attn=True` + - `.eos_token` - The model's end-of-stream token ID + - `.llama` - The underlying `llama_cpp.Llama` instance + - `.metadata` - The GGUF metadata of the model + - `.n_ctx_train` - The native context length of the model + - `.rope_freq_base` - The model's loaded RoPE frequency base + - `.rope_freq_base_train` - The model's native RoPE frequency base + - `.tokens` - A list of all the tokens in the model's tokenizer + - `.verbose` - Whether the model was loaded with `verbose=True` + """ + + def __init__( + self, + model_path: str, + context_length: Optional[int] = None, + n_gpu_layers: int = 0, + offload_kqv: bool = True, + flash_attn: bool = False, + verbose: bool = False + ): + """ + Given the path to a GGUF file, construct a Model instance. + + The model must be in GGUF format. + + The following parameters are optional: + - context_length: The context length at which to load the model, in tokens + - n_gpu_layers: The number of layers to be offloaded to the GPU + - offload_kqv: Whether the KQV cache (context) should be offloaded + - flash_attn: Whether to use Flash Attention + - verbose: Whether to print additional backend information + """ + + if verbose: + print_verbose(f"webscout.Local package version: {__version__}") + print_verbose(f"llama_cpp package version: {__llama_cpp_version__}") + + assert isinstance(model_path, str), \ + f"Model: model_path should be a string, not {type(model_path)}" + assert exists(model_path), \ + f"Model: the given model_path '{model_path}' does not exist" + assert not isdir(model_path), \ + f"Model: the given model_path '{model_path}' is a directory, not a GGUF file" + assert isinstance(context_length, (int, type(None))), \ + f"Model: context_length should be int or None, not {type(context_length)}" + assert isinstance(flash_attn, bool), \ + f"Model: flash_attn should be bool (True or False), not {type(flash_attn)}" + + # save __init__ parameters for __repr__ + self._model_path = model_path + self._context_length = context_length + self._n_gpu_layers = n_gpu_layers + self._offload_kqv = offload_kqv + self._flash_attn = flash_attn + self._verbose = self.verbose = verbose + + # if context_length <= 0, use n_ctx_train + if isinstance(context_length, int) and context_length <= 0: + context_length = None + + # this does not use Llama.metadata because we want to use GGUF + # metadata to determine some parameters of the Llama instance + # before it is created + self.metadata = GGUFReader.load_metadata(self, model_path) + metadata_keys = self.metadata.keys() # only read once + + n_ctx_train = None + for key in metadata_keys: + if key.endswith('.context_length'): + n_ctx_train = self.metadata[key] + break + + if n_ctx_train is None: + raise KeyError( + "GGUF file does not specify a context length" + ) + + rope_freq_base_train = None + for key in metadata_keys: + if key.endswith('.rope.freq_base'): + rope_freq_base_train = self.metadata[key] + break + + if rope_freq_base_train is None and context_length is not None: + if context_length > n_ctx_train: + raise ValueError( + 'unable to load model with greater than native ' + \ + f'context length ({context_length} > {n_ctx_train}) ' + \ + 'because model does not specify freq_base. ' + \ + f'try again with `context_length={n_ctx_train}`' + ) + + if rope_freq_base_train is None or context_length is None or \ + context_length <= n_ctx_train: + # no need to do context scaling, load model normally + + if context_length is None: + self.context_length = n_ctx_train + else: + self.context_length = context_length + rope_freq_base = rope_freq_base_train + + elif context_length > n_ctx_train: + # multiply rope_freq_base according to requested context length + # because context length > n_ctx_train and rope freq base is known + + rope_freq_base = (context_length/n_ctx_train)*rope_freq_base_train + self.context_length = context_length + + if self.verbose: + print_verbose( + 'chosen context length is greater than native context ' + f'length ({context_length} > {n_ctx_train}), ' + 'rope_freq_base will be changed from ' + f'{rope_freq_base_train} to {rope_freq_base}' + ) + + if 2 <= context_length/n_ctx_train < 4: + print_warning( + 'loading model with 2x native context length or more, ' + 'expect small loss of quality' + ) + + elif 4 <= context_length/n_ctx_train < 8: + print_warning( + 'loading model with 4x native context length or more, ' + 'expect moderate loss of quality' + ) + + elif context_length/n_ctx_train >= 8: + print_warning( + 'loading model with 8x native context length or more, ' + 'expect SIGNIFICANT loss of quality' + ) + + try: + self.tokens: list[str] = self.metadata['tokenizer.ggml.tokens'] + except KeyError: + print_warning( + "could not set Model.tokens, defaulting to None" + ) + self.tokens = None + try: + self.bos_token: int = self.metadata['tokenizer.ggml.bos_token_id'] + except KeyError: + print_warning( + "could not set Model.bos_token, defaulting to None" + ) + self.bos_token = None + try: + self.eos_token: int = self.metadata['tokenizer.ggml.eos_token_id'] + except KeyError: + print_warning( + "could not set Model.eos_token, defaulting to None" + ) + self.eos_token = None + + cpu_count = os_cpu_count() + + # these values for n_threads and n_threads_batch are + # known to be optimal for most systems + n_batch = 512 # can this be optimized? + n_threads = max(cpu_count//2, 1) + n_threads_batch = cpu_count + + if flash_attn and n_gpu_layers == 0: + print_warning( + "disabling flash_attn because n_gpu_layers == 0" + ) + flash_attn = False + + # guard against models with no rope_freq_base + if rope_freq_base is None: + rope_freq_base = 0 + + self.llama: Llama = Llama( + model_path=model_path, + n_ctx=self.context_length, + n_gpu_layers=n_gpu_layers, + use_mmap=True, + use_mlock=False, + logits_all=False, + n_batch=n_batch, + n_threads=n_threads, + n_threads_batch=n_threads_batch, + rope_freq_base=rope_freq_base, + mul_mat_q=True, + offload_kqv=offload_kqv, + flash_attn=flash_attn, + # KV cache quantization + # use 1 for F16 (default), 8 for q8_0, 2 for q4_0, 3 for q4_1 + #type_k=8, + #type_v=8, + verbose=verbose + ) + + # once model is loaded, replace metadata (as read using internal class) + # with metadata (as read using the more robust llama-cpp-python code) + self.metadata = self.llama.metadata + + # expose these values because they may be useful / informative + self.n_ctx_train = n_ctx_train + self.rope_freq_base_train = rope_freq_base_train + self.rope_freq_base = rope_freq_base + self.flash_attn = flash_attn + + if self.verbose: + print_verbose("new Model instance with the following attributes:") + print_verbose(f"model: {model_path}") + print_verbose(f"param: n_gpu_layers == {n_gpu_layers}") + print_verbose(f"param: offload_kqv == {offload_kqv}") + print_verbose(f"param: flash_attn == {flash_attn}") + print_verbose(f"param: n_batch == {n_batch}") + print_verbose(f"param: n_threads == {n_threads}") + print_verbose(f"param: n_threads_batch == {n_threads_batch}") + print_verbose(f" gguf: n_ctx_train == {n_ctx_train}") + print_verbose(f"param: self.context_length == {self.context_length}") + print_verbose(f" gguf: rope_freq_base_train == {rope_freq_base_train}") + print_verbose(f"param: rope_freq_base == {rope_freq_base}") + + def __repr__(self) -> str: + return \ + f"Model({repr(self._model_path)}, " + \ + f"context_length={self._context_length}, " + \ + f"n_gpu_layers={self._n_gpu_layers}, " + \ + f"offload_kqv={self._offload_kqv}, "+ \ + f"flash_attn={self._flash_attn}, " + \ + f"verbose={self._verbose})" + + def __del__(self): + self.unload() + + def __enter__(self): + return self + + def __exit__(self, *_): + self.unload() + + def __call__( + self, + prompt: Union[str, list[int]], + stops: list[Union[str, int]] = [], + sampler: SamplerSettings = DefaultSampling + ) -> str: + """ + `Model(...)` is a shorthand for `Model.generate(...)` + """ + return self.generate(prompt, stops, sampler) + + def unload(self): + """ + Unload the model from memory + """ + # ref: llama_cpp._internals._LlamaModel.__del__() + if not hasattr(self, 'llama'): + # nothing can be done + return + try: + if self.llama._model.model is not None: + # actually unload the model from memory + self.llama._model._llama_free_model(self.llama._model.model) + self.llama._model.model = None + except AttributeError: + # broken or already being destroyed by GC, abort + return + if hasattr(self, 'llama'): + delattr(self, 'llama') + if self.verbose: + print_verbose('Model unloaded') + + def trim( + self, + text: str, + overwrite: Optional[str] = None + ) -> str: + + """ + Trim the given text to the context length of this model, + leaving room for two extra tokens. + + Optionally overwrite the oldest tokens with the text given in the + `overwrite` parameter, which may be useful for keeping some + information in context. + + Does nothing if the text is equal to or shorter than + (context_length - 2). + """ + assert_model_is_loaded(self) + trim_length = self.context_length - 2 + tokens_list = self.llama.tokenize( + text.encode("utf-8", errors="ignore") + ) + + if len(tokens_list) <= trim_length: + if overwrite is not None: + text[0 : len(overwrite)] = overwrite + return text + + if len(tokens_list) > trim_length and overwrite is None: + # cut to trim_length + tokens_list = tokens_list[-trim_length:] + return self.llama.detokenize(tokens_list).decode( + "utf-8", + errors="ignore" + ) + + if len(tokens_list) > trim_length and overwrite is not None: + # cut to trim_length + tokens_list = tokens_list[-trim_length:] + overwrite_tokens = self.llama.tokenize(overwrite.encode( + "utf-8", + errors="ignore" + ) + ) + # overwrite oldest tokens + tokens_list[0 : len(overwrite_tokens)] = overwrite_tokens + return self.llama.detokenize(tokens_list).decode( + "utf-8", + errors="ignore" + ) + + def get_length(self, text: str) -> int: + """ + Return the length of the given text in tokens according to this model, + including the appended BOS token. + """ + assert_model_is_loaded(self) + return len(self.llama.tokenize( + text.encode( + "utf-8", + errors="ignore" + ) + )) + + def generate( + self, + prompt: Union[str, list[int]], + stops: list[Union[str, int]] = [], + sampler: SamplerSettings = DefaultSampling + ) -> str: + """ + Given a prompt, return a generated string. + + prompt: The text from which to generate + + The following parameters are optional: + - stops: A list of strings and/or token IDs at which to end the generation early + - sampler: The SamplerSettings object used to control text generation + """ + + assert isinstance(prompt, (str, list)), \ + f"generate: prompt should be string or list[int], not {type(prompt)}" + if isinstance(prompt, list): + assert all(isinstance(tok, int) for tok in prompt), \ + "generate: some token in prompt is not an integer" + assert isinstance(stops, list), \ + f"generate: parameter `stops` should be a list, not {type(stops)}" + assert all(isinstance(item, (str, int)) for item in stops), \ + f"generate: some item in parameter `stops` is not a string or int" + + if self.verbose: + print_verbose(f'using the following sampler settings for Model.generate:') + print_verbose(f'max_len_tokens == {sampler.max_len_tokens}') + print_verbose(f'temp == {sampler.temp}') + print_verbose(f'top_p == {sampler.top_p}') + print_verbose(f'min_p == {sampler.min_p}') + print_verbose(f'frequency_penalty == {sampler.frequency_penalty}') + print_verbose(f'presence_penalty == {sampler.presence_penalty}') + print_verbose(f'repeat_penalty == {sampler.repeat_penalty}') + print_verbose(f'top_k == {sampler.top_k}') + + # if any stop item is a token ID (int) + if any(isinstance(stop, int) for stop in stops): + # stop_strs is a list of all stopping strings + stop_strs: list[str] = [stop for stop in stops if isinstance(stop, str)] + # stop_token_ids is a list of all stop token IDs + stop_token_ids: list[int] = [tok_id for tok_id in stops if isinstance(tok_id, int)] + def stop_on_token_ids(tokens, *args, **kwargs): + return tokens[-1] in stop_token_ids + stopping_criteria = StoppingCriteriaList([stop_on_token_ids]) + assert_model_is_loaded(self) + return self.llama.create_completion( + prompt, + max_tokens=sampler.max_len_tokens, + temperature=sampler.temp, + top_p=sampler.top_p, + min_p=sampler.min_p, + frequency_penalty=sampler.frequency_penalty, + presence_penalty=sampler.presence_penalty, + repeat_penalty=sampler.repeat_penalty, + top_k=sampler.top_k, + stop=stop_strs, + stopping_criteria=stopping_criteria + )['choices'][0]['text'] + + # if stop items are only strings + assert_model_is_loaded(self) + return self.llama.create_completion( + prompt, + max_tokens=sampler.max_len_tokens, + temperature=sampler.temp, + top_p=sampler.top_p, + min_p=sampler.min_p, + frequency_penalty=sampler.frequency_penalty, + presence_penalty=sampler.presence_penalty, + repeat_penalty=sampler.repeat_penalty, + top_k=sampler.top_k, + stop=stops + )['choices'][0]['text'] + + + def stream( + self, + prompt: Union[str, list[int]], + stops: list[Union[str, int]] = [], + sampler: SamplerSettings = DefaultSampling + ) -> Generator: + + """ + Given a prompt, return a Generator that yields dicts containing tokens. + + To get the token string itself, subscript the dict with: + + `['choices'][0]['text']` + + prompt: The text from which to generate + + The following parameters are optional: + - stops: A list of strings and/or token IDs at which to end the generation early + - sampler: The SamplerSettings object used to control text generation + """ + + assert isinstance(prompt, (str, list)), \ + f"stream: prompt should be string or list[int], not {type(prompt)}" + if isinstance(prompt, list): + assert all(isinstance(tok, int) for tok in prompt), \ + "stream: some token in prompt is not an integer" + assert isinstance(stops, list), \ + f"stream: parameter `stops` should be a list, not {type(stops)}" + assert all(isinstance(item, (str, int)) for item in stops), \ + f"stream: some item in parameter `stops` is not a string or int" + + if self.verbose: + print_verbose(f'using the following sampler settings for Model.stream:') + print_verbose(f'max_len_tokens == {sampler.max_len_tokens}') + print_verbose(f'temp == {sampler.temp}') + print_verbose(f'top_p == {sampler.top_p}') + print_verbose(f'min_p == {sampler.min_p}') + print_verbose(f'frequency_penalty == {sampler.frequency_penalty}') + print_verbose(f'presence_penalty == {sampler.presence_penalty}') + print_verbose(f'repeat_penalty == {sampler.repeat_penalty}') + print_verbose(f'top_k == {sampler.top_k}') + + # if any stop item is a token ID (int) + if any(isinstance(stop, int) for stop in stops): + # stop_strs is a list of all stopping strings + stop_strs: list[str] = [stop for stop in stops if isinstance(stop, str)] + # stop_token_ids is a list of all stop token IDs + stop_token_ids: list[int] = [tok_id for tok_id in stops if isinstance(tok_id, int)] + def stop_on_token_ids(tokens, *args, **kwargs): + return tokens[-1] in stop_token_ids + stopping_criteria = StoppingCriteriaList([stop_on_token_ids]) + assert_model_is_loaded(self) + return self.llama.create_completion( + prompt, + max_tokens=sampler.max_len_tokens, + temperature=sampler.temp, + top_p=sampler.top_p, + min_p=sampler.min_p, + frequency_penalty=sampler.frequency_penalty, + presence_penalty=sampler.presence_penalty, + repeat_penalty=sampler.repeat_penalty, + top_k=sampler.top_k, + stream=True, + stop=stop_strs, + stopping_criteria=stopping_criteria + ) + + assert_model_is_loaded(self) + return self.llama.create_completion( + prompt, + max_tokens=sampler.max_len_tokens, + temperature=sampler.temp, + top_p=sampler.top_p, + min_p=sampler.min_p, + frequency_penalty=sampler.frequency_penalty, + presence_penalty=sampler.presence_penalty, + repeat_penalty=sampler.repeat_penalty, + top_k=sampler.top_k, + stream=True, + stop=stops + ) + + + def stream_print( + self, + prompt: Union[str, list[int]], + stops: list[Union[str, int]] = [], + sampler: SamplerSettings = DefaultSampling, + end: str = "\n", + file: _SupportsWriteAndFlush = sys.stdout, + flush: bool = True + ) -> str: + """ + Given a prompt, stream text as it is generated, and return the generated string. + The returned string does not include the `end` parameter. + + `Model.stream_print(...)` is a shorthand for: + + ``` + s = Model.stream(prompt, stops=stops, sampler=sampler) + for i in s: + tok = i['choices'][0]['text'] + print(tok, end='', file=file, flush=flush) + print(end, end='', file=file, flush=True) + ``` + + prompt: The text from which to generate + + The following parameters are optional: + - stops: A list of strings and/or token IDs at which to end the generation early + - sampler: The SamplerSettings object used to control text generation + - end: A string to print after the generated text + - file: The file where text should be printed + - flush: Whether to flush the stream after each token + """ + + token_generator = self.stream( + prompt=prompt, + stops=stops, + sampler=sampler + ) + + res = '' + for i in token_generator: + tok = i['choices'][0]['text'] + print(tok, end='', file=file, flush=flush) + res += tok + + # print `end`, and always flush stream after generation is done + print(end, end='', file=file, flush=True) + + return res + + + def ingest(self, text: str) -> None: + """ + Ingest the given text into the model's cache + """ + + assert_model_is_loaded(self) + self.llama.create_completion( + text, + max_tokens=1, + temperature=0.0 + ) + + + def candidates( + self, + prompt: str, + k: int + ) -> list[tuple[str, np.floating]]: + """ + Given prompt `str` and k `int`, return a sorted list of the + top k candidates for most likely next token, along with their + normalized probabilities + """ + + assert isinstance(prompt, str), \ + f"next_candidates: prompt should be str, not {type(prompt)}" + assert isinstance(k, int), \ + f"next_candidates: k should be int, not {type(k)}" + assert 0 < k <= len(self.tokens), \ + f"next_candidates: k should be between 0 and {len(self.tokens)}" + + assert_model_is_loaded(self) + prompt_tokens = self.llama.tokenize(prompt.encode('utf-8', errors='ignore')) + self.llama.reset() # reset model state + self.llama.eval(prompt_tokens) + scores = self.llama.scores[len(prompt_tokens) - 1] + + # len(self.llama.scores) == self.context_length + # len(self.llama.scores[i]) == len(self.tokens) + + # normalize scores with softmax + # must normalize over all tokens in vocab, not just top k + if self.verbose: + print_verbose(f'calculating softmax over {len(scores)} values') + normalized_scores: list[np.floating] = list(softmax(scores)) + + # construct the final list + i = 0 + token_probs_list: list[tuple[str, np.floating]] = [] + for tok_str in self.tokens: + token_probs_list.append((tok_str, normalized_scores[i])) + i += 1 + + # return token_probs_list, sorted by probability, only top k + return nlargest(k, token_probs_list, key=lambda x:x[1]) + + + def print_candidates( + self, + prompt: str, + k: int, + file: _SupportsWriteAndFlush = sys.stdout, + flush: bool = False + ) -> None: + """ + Like `Model.candidates()`, but print the values instead + of returning them + """ + + for _tuple in self.candidates(prompt, k): + print( + f"token {repr(_tuple[0])} has probability {_tuple[1]}", + file=file, + flush=flush + ) + + # if flush is False, then so far file is not flushed, but it should + # always be flushed at the end of printing + if not flush: + file.flush() + + +def assert_model_is_loaded(model: Model) -> None: + """ + Ensure the Model is fully constructed, such that + `Model.llama._model.model is not None` is guaranteed to be `True` + + Raise ModelUnloadedException otherwise + """ + if not hasattr(model, 'llama'): + raise ModelUnloadedException( + "webscout.Local.Model instance has no attribute 'llama'" + ) + if not hasattr(model.llama, '_model'): + raise ModelUnloadedException( + "llama_cpp.Llama instance has no attribute '_model'" + ) + if not hasattr(model.llama._model, 'model'): + raise ModelUnloadedException( + "llama_cpp._internals._LlamaModel instance has no attribute 'model'" + ) + if model.llama._model.model is None: + raise ModelUnloadedException( + "llama_cpp._internals._LlamaModel.model is None" + ) diff --git a/webscout/Local/samplers.py b/webscout/Local/samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..017498283489f569a8d617b573e8d78d420880b3 --- /dev/null +++ b/webscout/Local/samplers.py @@ -0,0 +1,161 @@ + +from ._version import __version__, __llama_cpp_version__ + +"""Submodule containing SamplerSettings class and some preset samplers""" + +from sys import maxsize + + +MAX_TEMP = float(maxsize) + +class SamplerSettings: + """ + A SamplerSettings object specifies the sampling parameters that will be + used to control text generation + """ + + ParamTypes: dict[str, type] = { + 'max_len_tokens': int, + 'temp': float, + 'top_p': float, + 'min_p': float, + 'frequency_penalty': float, + 'presence_penalty': float, + 'repeat_penalty': float, + 'top_k': int + } + + def __init__( + self, + max_len_tokens: int = -1, + temp: float = 0.8, + top_p: float = 0.95, + min_p: float = 0.05, + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + repeat_penalty: float = 1.0, + top_k: int = 40 + ): + """ + Construct a new SamplerSettings instance + """ + + self.max_len_tokens = max_len_tokens + self.temp = temp + self.top_p = top_p + self.min_p = min_p + self.frequency_penalty = frequency_penalty + self.presence_penalty = presence_penalty + self.repeat_penalty = repeat_penalty + self.top_k = top_k + + for sampler_param in SamplerSettings.ParamTypes: + expected_type = SamplerSettings.ParamTypes[sampler_param] + actual_type = type(getattr(self, sampler_param)) + if actual_type != expected_type: + raise TypeError( + f"wrong type for SamplerSettings parameter '{sampler_param}'" + f" - expected {expected_type}, got {actual_type}" + ) + + def __repr__(self) -> str: + repr_str = 'SamplerSettings(' + repr_str += f'max_len_tokens={self.max_len_tokens}, ' + repr_str += f'temp={self.temp}, ' + repr_str += f'top_p={self.top_p}, ' + repr_str += f'min_p={self.min_p}, ' + repr_str += f'frequency_penalty={self.frequency_penalty}, ' + repr_str += f'presence_penalty={self.presence_penalty}, ' + repr_str += f'repeat_penalty={self.repeat_penalty}, ' + repr_str += f'top_k={self.top_k})' + return repr_str + +# most likely token is always chosen +GreedyDecoding = SamplerSettings( + temp = 0.0, +) + +# reflects llama.cpp +DefaultSampling = SamplerSettings() + +# unmodified probability distribution (i.e. what the model actually thinks) +SimpleSampling = SamplerSettings( + temp = 1.0, + top_p = 1.0, + min_p = 0.0, + top_k = -1 +) + +# reflects old llama.cpp defaults +ClassicSampling = SamplerSettings( + min_p=0.0, + repeat_penalty = 1.1 +) + +# halfway between DefaultSampling and SimpleSampling +SemiSampling = SamplerSettings( + temp=0.9, + top_p=0.975, + min_p=0.025, + top_k=80 +) + +# for models with large vocabulary, which tend to run hot +TikTokenSampling = SamplerSettings( + temp=0.6, + repeat_penalty=1.1 +) + +# use min_p as the only active sampler (more permissive) +LowMinPSampling = SamplerSettings( + temp = 1.0, + top_p = 1.0, + min_p = 0.05, + top_k = -1 +) + +# use min_p as the only active sampler (moderate) +MinPSampling = SamplerSettings( + temp = 1.0, + top_p = 1.0, + min_p = 0.1, + top_k = -1 +) + +# use min_p as the only active sampler (more restrictive) +StrictMinPSampling = SamplerSettings( + temp = 1.0, + top_p = 1.0, + min_p = 0.2, + top_k = -1 +) + +# https://arxiv.org/abs/2210.14140 +ContrastiveSearch = SamplerSettings( + temp = 0.0, + presence_penalty = 0.4 +) + +# https://arxiv.org/abs/2210.14140 +WarmContrastiveSearch = SamplerSettings( + temp = 0.0, + presence_penalty = 0.8 +) + +# outputs completely random tokens from vocab (useless) +RandomSampling = SamplerSettings( + temp = MAX_TEMP, + top_p = 1.0, + min_p = 0.0, + top_k = -1 +) + +# default sampling with reduced temperature +LowTempSampling = SamplerSettings( + temp = 0.4 +) + +# default sampling with increased temperature +HighTempSampling = SamplerSettings( + temp = 1.2 +) diff --git a/webscout/Local/thread.py b/webscout/Local/thread.py new file mode 100644 index 0000000000000000000000000000000000000000..e0b3a989e1a050e0a21c0836183c29e245b0e8cf --- /dev/null +++ b/webscout/Local/thread.py @@ -0,0 +1,690 @@ +from ._version import __version__, __llama_cpp_version__ + +"""Submodule containing the Thread class, used for interaction with a Model""" + +import sys + +from .model import Model, assert_model_is_loaded, _SupportsWriteAndFlush +from .utils import RESET_ALL, cls, print_verbose, truncate +from .samplers import SamplerSettings, DefaultSampling +from typing import Optional, Literal, Union +from .formats import AdvancedFormat + +from .formats import blank as formats_blank + + +class Message(dict): + """ + A dictionary representing a single message within a Thread + + Works just like a normal `dict`, but a new method: + - `.as_string` - Return the full message string + + Generally, messages have these keys: + - `role` - The role of the speaker: 'system', 'user', or 'bot' + - `prefix` - The text that prefixes the message content + - `content` - The actual content of the message + - `suffix` - The text that suffixes the message content + """ + + def __repr__(self) -> str: + return \ + f"Message([" \ + f"('role', {repr(self['role'])}), " \ + f"('prefix', {repr(self['prefix'])}), " \ + f"('content', {repr(self['content'])}), " \ + f"('suffix', {repr(self['suffix'])})])" + + def as_string(self): + """Return the full message string""" + try: + return self['prefix'] + self['content'] + self['suffix'] + except KeyError as e: + e.add_note( + "as_string: Message is missing one or more of the " + "required 'prefix', 'content', 'suffix' attributes - this is " + "unexpected" + ) + raise e + + +class Thread: + """ + Provide functionality to facilitate easy interactions with a Model + + This is just a brief overview of m.Thread. + To see a full description of each method and its parameters, + call help(Thread), or see the relevant docstring. + + The following methods are available: + - `.add_message()` - Add a message to `Thread.messages` + - `.as_string()` - Return this thread's complete message history as a string + - `.create_message()` - Create a message using the format of this thread + - `.inference_str_from_messages()` - Using the list of messages, return a string suitable for inference + - `.interact()` - Start an interactive, terminal-based chat session + - `.len_messages()` - Get the total length of all messages in tokens + - `.print_stats()` - Print stats about the context usage in this thread + - `.reset()` - Clear the list of messages + - `.send()` - Send a message in this thread + + The following attributes are available: + - `.format` - The format being used for messages in this thread + - `.messages` - The list of messages in this thread + - `.model` - The `m.Model` instance used by this thread + - `.sampler` - The SamplerSettings object used in this thread + """ + + def __init__( + self, + model: Model, + format: Union[dict, AdvancedFormat], + sampler: SamplerSettings = DefaultSampling, + messages: Optional[list[Message]] = None, + ): + """ + Given a Model and a format, construct a Thread instance. + + model: The Model to use for text generation + format: The format specifying how messages should be structured (see m.formats) + + The following parameters are optional: + - sampler: The SamplerSettings object used to control text generation + - messages: A list of m.thread.Message objects to add to the Thread upon construction + """ + + assert isinstance(model, Model), \ + "Thread: model should be an " + \ + f"instance of webscout.Local.Model, not {type(model)}" + + assert_model_is_loaded(model) + + assert isinstance(format, (dict, AdvancedFormat)), \ + f"Thread: format should be dict or AdvancedFormat, not {type(format)}" + + if any(k not in format.keys() for k in formats_blank.keys()): + raise KeyError( + "Thread: format is missing one or more required keys, see " + \ + "webscout.Local.formats.blank for an example" + ) + + assert isinstance(format['stops'], list), \ + "Thread: format['stops'] should be list, not " + \ + f"{type(format['stops'])}" + + assert all( + hasattr(sampler, attr) for attr in [ + 'max_len_tokens', + 'temp', + 'top_p', + 'min_p', + 'frequency_penalty', + 'presence_penalty', + 'repeat_penalty', + 'top_k' + ] + ), 'Thread: sampler is missing one or more required attributes' + + self._messages: Optional[list[Message]] = messages + if self._messages is not None: + if not all(isinstance(msg, Message) for msg in self._messages): + raise TypeError( + "Thread: one or more messages provided to __init__() is " + "not an instance of m.thread.Message" + ) + + # Thread.messages is never empty, unless `messages` param is explicity + # set to `[]` during construction + + self.model: Model = model + self.format: Union[dict, AdvancedFormat] = format + self.messages: list[Message] = [ + self.create_message("system", self.format['system_content']) + ] if self._messages is None else self._messages + self.sampler: SamplerSettings = sampler + + if self.model.verbose: + print_verbose("new Thread instance with the following attributes:") + print_verbose(f"model == {self.model}") + print_verbose(f"format['system_prefix'] == {truncate(repr(self.format['system_prefix']))}") + print_verbose(f"format['system_content'] == {truncate(repr(self.format['system_content']))}") + print_verbose(f"format['system_suffix'] == {truncate(repr(self.format['system_suffix']))}") + print_verbose(f"format['user_prefix'] == {truncate(repr(self.format['user_prefix']))}") + print_verbose(f"format['user_content'] == {truncate(repr(self.format['user_content']))}") + print_verbose(f"format['user_suffix'] == {truncate(repr(self.format['user_suffix']))}") + print_verbose(f"format['bot_prefix'] == {truncate(repr(self.format['bot_prefix']))}") + print_verbose(f"format['bot_content'] == {truncate(repr(self.format['bot_content']))}") + print_verbose(f"format['bot_suffix'] == {truncate(repr(self.format['bot_suffix']))}") + print_verbose(f"format['stops'] == {truncate(repr(self.format['stops']))}") + print_verbose(f"sampler.temp == {self.sampler.temp}") + print_verbose(f"sampler.top_p == {self.sampler.top_p}") + print_verbose(f"sampler.min_p == {self.sampler.min_p}") + print_verbose(f"sampler.frequency_penalty == {self.sampler.frequency_penalty}") + print_verbose(f"sampler.presence_penalty == {self.sampler.presence_penalty}") + print_verbose(f"sampler.repeat_penalty == {self.sampler.repeat_penalty}") + print_verbose(f"sampler.top_k == {self.sampler.top_k}") + + + def __repr__(self) -> str: + return \ + f"Thread({repr(self.model)}, {repr(self.format)}, " + \ + f"{repr(self.sampler)}, {repr(self.messages)})" + + def __str__(self) -> str: + return self.as_string() + + def __len__(self) -> int: + """ + `len(Thread)` returns the length of the Thread in tokens + + To get the number of messages in the Thread, use `len(Thread.messages)` + """ + return self.len_messages() + + def create_message( + self, + role: Literal['system', 'user', 'bot'], + content: str + ) -> Message: + """ + Construct a message using the format of this Thread + """ + + assert role.lower() in ['system', 'user', 'bot'], \ + f"create_message: role should be 'system', 'user', or 'bot', not '{role.lower()}'" + + assert isinstance(content, str), \ + f"create_message: content should be str, not {type(content)}" + + if role.lower() == 'system': + return Message( + [ + ('role', 'system'), + ('prefix', self.format['system_prefix']), + ('content', content), + ('suffix', self.format['system_suffix']) + ] + ) + + elif role.lower() == 'user': + return Message( + [ + ('role', 'user'), + ('prefix', self.format['user_prefix']), + ('content', content), + ('suffix', self.format['user_suffix']) + ] + ) + + elif role.lower() == 'bot': + return Message( + [ + ('role', 'bot'), + ('prefix', self.format['bot_prefix']), + ('content', content), + ('suffix', self.format['bot_suffix']) + ] + ) + + def len_messages(self) -> int: + """ + Return the total length of all messages in this thread, in tokens. + + Can also use `len(Thread)`.""" + + return self.model.get_length(self.as_string()) + + def add_message( + self, + role: Literal['system', 'user', 'bot'], + content: str + ) -> None: + """ + Create a message and append it to `Thread.messages`. + + `Thread.add_message(...)` is a shorthand for + `Thread.messages.append(Thread.create_message(...))` + """ + self.messages.append( + self.create_message( + role=role, + content=content + ) + ) + + def inference_str_from_messages(self) -> str: + """ + Using the list of messages, construct a string suitable for inference, + respecting the format and context length of this thread. + """ + + inf_str = '' + sys_msg_str = '' + # whether to treat the first message as necessary to keep + sys_msg_flag = False + context_len_budget = self.model.context_length + + # if at least 1 message is history + if len(self.messages) >= 1: + # if first message has system role + if self.messages[0]['role'] == 'system': + sys_msg_flag = True + sys_msg = self.messages[0] + sys_msg_str = sys_msg.as_string() + context_len_budget -= self.model.get_length(sys_msg_str) + + if sys_msg_flag: + iterator = reversed(self.messages[1:]) + else: + iterator = reversed(self.messages) + + for message in iterator: + msg_str = message.as_string() + context_len_budget -= self.model.get_length(msg_str) + if context_len_budget <= 0: + break + inf_str = msg_str + inf_str + + if sys_msg_flag: + inf_str = sys_msg_str + inf_str + inf_str += self.format['bot_prefix'] + + return inf_str + + + def send(self, prompt: str) -> str: + """ + Send a message in this thread. This adds your message and the bot's + response to the list of messages. + + Returns a string containing the response to your message. + """ + + self.add_message("user", prompt) + output = self.model.generate( + self.inference_str_from_messages(), + stops=self.format['stops'], + sampler=self.sampler + ) + self.add_message("bot", output) + + return output + + + def _interactive_update_sampler(self) -> None: + """Interactively update the sampler settings used in this Thread""" + print() + try: + new_max_len_tokens = input(f'max_len_tokens: {self.sampler.max_len_tokens} -> ') + new_temp = input(f'temp: {self.sampler.temp} -> ') + new_top_p = input(f'top_p: {self.sampler.top_p} -> ') + new_min_p = input(f'min_p: {self.sampler.min_p} -> ') + new_frequency_penalty = input(f'frequency_penalty: {self.sampler.frequency_penalty} -> ') + new_presence_penalty = input(f'presence_penalty: {self.sampler.presence_penalty} -> ') + new_repeat_penalty = input(f'repeat_penalty: {self.sampler.repeat_penalty} -> ') + new_top_k = input(f'top_k: {self.sampler.top_k} -> ') + + except KeyboardInterrupt: + print('\nwebscout.Local: sampler settings not updated\n') + return + print() + + try: + self.sampler.max_len_tokens = int(new_max_len_tokens) + except ValueError: + pass + else: + print('webscout.Local: max_len_tokens updated') + + try: + self.sampler.temp = float(new_temp) + except ValueError: + pass + else: + print('webscout.Local: temp updated') + + try: + self.sampler.top_p = float(new_top_p) + except ValueError: + pass + else: + print('webscout.Local: top_p updated') + + try: + self.sampler.min_p = float(new_min_p) + except ValueError: + pass + else: + print('webscout.Local: min_p updated') + + try: + self.sampler.frequency_penalty = float(new_frequency_penalty) + except ValueError: + pass + else: + print('webscout.Local: frequency_penalty updated') + + try: + self.sampler.presence_penalty = float(new_presence_penalty) + except ValueError: + pass + else: + print('webscout.Local: presence_penalty updated') + + try: + self.sampler.repeat_penalty = float(new_repeat_penalty) + except ValueError: + pass + else: + print('webscout.Local: repeat_penalty updated') + + try: + self.sampler.top_k = int(new_top_k) + except ValueError: + pass + else: + print('webscout.Local: top_k updated') + print() + + + def _interactive_input( + self, + prompt: str, + _dim_style: str, + _user_style: str, + _bot_style: str, + _special_style: str + ) -> tuple: + """ + Recive input from the user, while handling multi-line input + and commands + """ + full_user_input = '' # may become multiline + + while True: + user_input = input(prompt) + + if user_input.endswith('\\'): + full_user_input += user_input[:-1] + '\n' + + elif user_input == '!': + + print() + try: + command = input(f'{RESET_ALL} ! {_dim_style}') + except KeyboardInterrupt: + print('\n') + continue + + if command == '': + print(f'\n[no command]\n') + + elif command.lower() in ['reset', 'restart']: + self.reset() + print(f'\n[thread reset]\n') + + elif command.lower() in ['cls', 'clear']: + cls() + print() + + elif command.lower() in ['ctx', 'context']: + print(f"\n{self.len_messages()}\n") + + elif command.lower() in ['stats', 'print_stats']: + print() + self.print_stats() + print() + + elif command.lower() in ['sampler', 'samplers', 'settings']: + self._interactive_update_sampler() + + elif command.lower() in ['str', 'string', 'as_string']: + print(f"\n{self.as_string()}\n") + + elif command.lower() in ['repr', 'save', 'backup']: + print(f"\n{repr(self)}\n") + + elif command.lower() in ['remove', 'rem', 'delete', 'del']: + print() + old_len = len(self.messages) + del self.messages[-1] + assert len(self.messages) == (old_len - 1) + print('[removed last message]\n') + + elif command.lower() in ['last', 'repeat']: + last_msg = self.messages[-1] + if last_msg['role'] == 'user': + print(f"\n{_user_style}{last_msg['content']}{RESET_ALL}\n") + elif last_msg['role'] == 'bot': + print(f"\n{_bot_style}{last_msg['content']}{RESET_ALL}\n") + + elif command.lower() in ['inf', 'inference', 'inf_str']: + print(f'\n"""{self.inference_str_from_messages()}"""\n') + + elif command.lower() in ['reroll', 're-roll', 're', 'swipe']: + old_len = len(self.messages) + del self.messages[-1] + assert len(self.messages) == (old_len - 1) + return '', None + + elif command.lower() in ['exit', 'quit']: + print(RESET_ALL) + return None, None + + elif command.lower() in ['help', '/?', '?']: + print() + print('reset | restart -- Reset the thread to its original state') + print('clear | cls -- Clear the terminal') + print('context | ctx -- Get the context usage in tokens') + print('print_stats | stats -- Get the context usage stats') + print('sampler | settings -- Update the sampler settings') + print('string | str -- Print the message history as a string') + print('repr | save -- Print the representation of the thread') + print('remove | delete -- Remove the last message') + print('last | repeat -- Repeat the last message') + print('inference | inf -- Print the inference string') + print('reroll | swipe -- Regenerate the last message') + print('exit | quit -- Exit the interactive chat (can also use ^C)') + print('help | ? -- Show this screen') + print() + print("TIP: type < at the prompt and press ENTER to prefix the bot's next message.") + print(' for example, type "Sure!" to bypass refusals') + print() + print("TIP: type !! at the prompt and press ENTER to insert a system message") + print() + + else: + print(f'\n[unknown command]\n') + + # prefix the bot's next message + elif user_input == '<': + + print() + try: + next_message_start = input(f'{RESET_ALL} < {_dim_style}') + + except KeyboardInterrupt: + print(f'{RESET_ALL}\n') + continue + + else: + print() + return '', next_message_start + + # insert a system message + elif user_input == '!!': + print() + + try: + next_sys_msg = input(f'{RESET_ALL} !! {_special_style}') + + except KeyboardInterrupt: + print(f'{RESET_ALL}\n') + continue + + else: + print() + return next_sys_msg, -1 + + # concatenate multi-line input + else: + full_user_input += user_input + return full_user_input, None + + + def interact( + self, + color: bool = True, + header: Optional[str] = None, + stream: bool = True + ) -> None: + """ + Start an interactive chat session using this Thread. + + While text is being generated, press `^C` to interrupt the bot. + Then you have the option to press `ENTER` to re-roll, or to simply type + another message. + + At the prompt, press `^C` to end the chat session. + + Type `!` and press `ENTER` to enter a basic command prompt. For a list + of commands, type `help` at this prompt. + + Type `<` and press `ENTER` to prefix the bot's next message, for + example with `Sure!`. + + Type `!!` at the prompt and press `ENTER` to insert a system message. + + The following parameters are optional: + - color: Whether to use colored text to differentiate user / bot + - header: Header text to print at the start of the interaction + - stream: Whether to stream text as it is generated + """ + print() + + # fresh import of color codes in case `color` param has changed + from .utils import SPECIAL_STYLE, USER_STYLE, BOT_STYLE, DIM_STYLE + + # disable color codes if explicitly disabled by `color` param + if not color: + SPECIAL_STYLE = '' + USER_STYLE = '' + BOT_STYLE = '' + DIM_STYLE = '' + + if header is not None: + print(f"{SPECIAL_STYLE}{header}{RESET_ALL}\n") + + while True: + + prompt = f"{RESET_ALL} > {USER_STYLE}" + + try: + user_prompt, next_message_start = self._interactive_input( + prompt, + DIM_STYLE, + USER_STYLE, + BOT_STYLE, + SPECIAL_STYLE + ) + except KeyboardInterrupt: + print(f"{RESET_ALL}\n") + return + + # got 'exit' or 'quit' command + if user_prompt is None and next_message_start is None: + break + + # insert a system message via `!!` prompt + if next_message_start == -1: + self.add_message('system', user_prompt) + continue + + if next_message_start is not None: + try: + if stream: + print(f"{BOT_STYLE}{next_message_start}", end='', flush=True) + output = next_message_start + self.model.stream_print( + self.inference_str_from_messages() + next_message_start, + stops=self.format['stops'], + sampler=self.sampler, + end='' + ) + else: + print(f"{BOT_STYLE}", end='', flush=True) + output = next_message_start + self.model.generate( + self.inference_str_from_messages() + next_message_start, + stops=self.format['stops'], + sampler=self.sampler + ) + print(output, end='', flush=True) + except KeyboardInterrupt: + print(f"{DIM_STYLE} [message not added to history; press ENTER to re-roll]\n") + continue + else: + self.add_message("bot", output) + else: + print(BOT_STYLE) + if user_prompt != "": + self.add_message("user", user_prompt) + try: + if stream: + output = self.model.stream_print( + self.inference_str_from_messages(), + stops=self.format['stops'], + sampler=self.sampler, + end='' + ) + else: + output = self.model.generate( + self.inference_str_from_messages(), + stops=self.format['stops'], + sampler=self.sampler + ) + print(output, end='', flush=True) + except KeyboardInterrupt: + print(f"{DIM_STYLE} [message not added to history; press ENTER to re-roll]\n") + continue + else: + self.add_message("bot", output) + + if output.endswith("\n\n"): + print(RESET_ALL, end = '', flush=True) + elif output.endswith("\n"): + print(RESET_ALL) + else: + print(f"{RESET_ALL}\n") + + + def reset(self) -> None: + """ + Clear the list of messages, which resets the thread to its original + state + """ + self.messages: list[Message] = [ + self.create_message("system", self.format['system_content']) + ] if self._messages is None else self._messages + + + def as_string(self) -> str: + """Return this thread's message history as a string""" + thread_string = '' + for msg in self.messages: + thread_string += msg.as_string() + return thread_string + + + def print_stats( + self, + end: str = '\n', + file: _SupportsWriteAndFlush = sys.stdout, + flush: bool = True + ) -> None: + """Print stats about the context usage in this thread""" + thread_len_tokens = self.len_messages() + max_ctx_len = self.model.context_length + context_used_percentage = round((thread_len_tokens/max_ctx_len)*100) + print(f"{thread_len_tokens} / {max_ctx_len} tokens", file=file, flush=flush) + print(f"{context_used_percentage}% of context used", file=file, flush=flush) + print(f"{len(self.messages)} messages", end=end, file=file, flush=flush) + if not flush: + file.flush() \ No newline at end of file diff --git a/webscout/Local/utils.py b/webscout/Local/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..29b52b39d03b17a9b035ffe3dfc3fcb36d5aad06 --- /dev/null +++ b/webscout/Local/utils.py @@ -0,0 +1,185 @@ +from ._version import __version__, __llama_cpp_version__ + +import sys +import numpy as np + +from typing import Any, Iterable, TextIO +from time import strftime +from enum import IntEnum +from struct import unpack +from colorama import Fore +from huggingface_hub import hf_hub_url, cached_download + +# color codes used in Thread.interact() +RESET_ALL = Fore.RESET +USER_STYLE = RESET_ALL + Fore.GREEN +BOT_STYLE = RESET_ALL + Fore.CYAN +DIM_STYLE = RESET_ALL + Fore.LIGHTBLACK_EX +SPECIAL_STYLE = RESET_ALL + Fore.YELLOW + +# for typing of softmax parameter `z` +class _ArrayLike(Iterable): + pass + +# for typing of Model.stream_print() parameter `file` +class _SupportsWriteAndFlush(TextIO): + pass + +def download_model(repo_id: str, filename: str, cache_dir: str = ".cache") -> str: + """ + Downloads a GGUF model file from Hugging Face Hub. + + repo_id: The Hugging Face repository ID (e.g., 'facebook/bart-large-cnn'). + filename: The name of the GGUF file within the repository (e.g., 'model.gguf'). + cache_dir: The directory where the downloaded file should be stored. + + Returns: The path to the downloaded file. + """ + url = hf_hub_url(repo_id, filename) + filepath = cached_download(url, cache_dir=cache_dir, force_filename=filename) + return filepath + +class GGUFReader: + """ + Peek at file header for GGUF metadata + + Raise ValueError if file is not GGUF or is outdated + + Credit to oobabooga for the parts of the code in this class + + Format spec: https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md + """ + + class GGUFValueType(IntEnum): + UINT8 = 0 + INT8 = 1 + UINT16 = 2 + INT16 = 3 + UINT32 = 4 + INT32 = 5 + FLOAT32 = 6 + BOOL = 7 + STRING = 8 + ARRAY = 9 + UINT64 = 10 + INT64 = 11 + FLOAT64 = 12 + + _simple_value_packing = { + GGUFValueType.UINT8: " Any: + if value_type == GGUFReader.GGUFValueType.STRING: + value_length = unpack(" dict: + metadata = {} + with open(fname, "rb") as file: + GGUF_MAGIC = file.read(4) + + if GGUF_MAGIC != b"GGUF": + raise ValueError( + "your model file is not a valid GGUF file " + f"(magic number mismatch, got {GGUF_MAGIC}, " + "expected b'GGUF')" + ) + + GGUF_VERSION = unpack(" np.ndarray: + """ + Compute softmax over values in z, where z is array-like + """ + e_z = np.exp(z - np.max(z)) + return e_z / e_z.sum() + +def cls() -> None: + """Clear the terminal""" + print("\033c\033[3J", end='', flush=True) + +# no longer used in this module, but left for others to use +def get_timestamp_prefix_str() -> str: + # helpful: https://strftime.net + return strftime("[%Y, %b %e, %a %l:%M %p] ") + +def truncate(text: str) -> str: + return text if len(text) < 63 else f"{text[:60]}..." + +def print_verbose(text: str) -> None: + print("webscout.Local: verbose:", text, file=sys.stderr, flush=True) + +def print_info(text: str) -> None: + print("webscout.Local: info:", text, file=sys.stderr, flush=True) + +def print_warning(text: str) -> None: + print("webscout.Local: warning:", text, file=sys.stderr, flush=True) diff --git a/webscout/Provider/BasedGPT.py b/webscout/Provider/BasedGPT.py new file mode 100644 index 0000000000000000000000000000000000000000..ce3aae7694cbeb241ae436c3a712a9dffd40801c --- /dev/null +++ b/webscout/Provider/BasedGPT.py @@ -0,0 +1,226 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +class BasedGPT(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + system_prompt: str = "Be Helpful and Friendly", + ): + """Instantiates BasedGPT + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + system_prompt (str, optional): System prompt for BasedGPT. Defaults to "Be Helpful and Friendly". + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.chat_endpoint = "https://www.basedgpt.chat/api/chat" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.system_prompt = system_prompt + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update( + {"Content-Type": "application/json"} + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "chatcmpl-TaREJpBZsRVQFRFic1wIA7Q7XfnaD", + "object": "chat.completion", + "created": 1704623244, + "model": "gpt-3.5-turbo", + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop", + "index": 0 + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = { + "messages": [ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": conversation_prompt}, + ], + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + message_load = "" + for value in response.iter_lines( + decode_unicode=True, + delimiter="", + chunk_size=self.stream_chunk_size, + ): + try: + message_load += value + yield value if raw else dict(text=message_load) + except json.decoder.JSONDecodeError: + pass + self.last_response.update(dict(text=message_load)) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/Berlin4h.py b/webscout/Provider/Berlin4h.py new file mode 100644 index 0000000000000000000000000000000000000000..f23554cca930c600d497f07aa1829471dac68611 --- /dev/null +++ b/webscout/Provider/Berlin4h.py @@ -0,0 +1,211 @@ +import requests +import json +import uuid +from typing import Any, Dict, Optional +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from webscout import exceptions + +class Berlin4h(Provider): + """ + A class to interact with the Berlin4h AI API. + """ + + def __init__( + self, + api_token: str = "3bf369cd84339603f8a5361e964f9ebe", + api_endpoint: str = "https://ai.berlin4h.top/api/chat/completions", + model: str = "gpt-3.5-turbo", + temperature: float = 0.9, + presence_penalty: float = 0, + frequency_penalty: float = 0, + max_tokens: int = 4000, + is_conversation: bool = True, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ) -> None: + """ + Initializes the Berlin4h API with given parameters. + + Args: + api_token (str): The API token for authentication. + api_endpoint (str): The API endpoint to use for requests. + model (str): The AI model to use for text generation. + temperature (float): The temperature parameter for the model. + presence_penalty (float): The presence penalty parameter for the model. + frequency_penalty (float): The frequency penalty parameter for the model. + max_tokens (int): The maximum number of tokens to generate. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.api_token = api_token + self.api_endpoint = api_endpoint + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.max_tokens = max_tokens + self.parent_message_id: Optional[str] = None + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.stream_chunk_size = 1 + self.timeout = timeout + self.last_response = {} + self.headers = {"Content-Type": "application/json", "Token": self.api_token} + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> Dict[str, Any]: + """ + Sends a prompt to the Berlin4h AI API and returns the response. + + Args: + prompt: The text prompt to generate text from. + stream (bool, optional): Whether to stream the response. Defaults to False. + raw (bool, optional): Whether to return the raw response. Defaults to False. + optimizer (str, optional): The name of the optimizer to use. Defaults to None. + conversationally (bool, optional): Whether to chat conversationally. Defaults to False. + + Returns: + The response from the API. + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload: Dict[str, any] = { + "prompt": conversation_prompt, + "parentMessageId": self.parent_message_id or str(uuid.uuid4()), + "options": { + "model": self.model, + "temperature": self.temperature, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "max_tokens": self.max_tokens, + }, + } + + def for_stream(): + response = self.session.post( + self.api_endpoint, json=payload, headers=self.headers, stream=True, timeout=self.timeout + ) + + if not response.ok: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason})" + ) + + streaming_response = "" + # Collect the entire line before processing + for line in response.iter_lines(decode_unicode=True): + if line: + try: + json_data = json.loads(line) + content = json_data['content'] + if ">" in content: break + streaming_response += content + yield content if raw else dict(text=streaming_response) # Yield accumulated response + except: + continue + self.last_response.update(dict(text=streaming_response)) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/Blackboxai.py b/webscout/Provider/Blackboxai.py new file mode 100644 index 0000000000000000000000000000000000000000..8b6f685356457101038e326f99a865fd4e29e490 --- /dev/null +++ b/webscout/Provider/Blackboxai.py @@ -0,0 +1,440 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +#------------------------------------------------------BLACKBOXAI-------------------------------------------------------- +class BLACKBOXAI: + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 8000, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + model: str = None, + ): + """Instantiates BLACKBOXAI + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + model (str, optional): Model name. Defaults to "Phind Model". + """ + self.session = requests.Session() + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.chat_endpoint = "https://www.blackbox.ai/api/chat" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.model = model + self.previewToken: str = None + self.userId: str = "" + self.codeModelMode: bool = True + self.id: str = "" + self.agentMode: dict = {} + self.trendingAgentMode: dict = {} + self.isMicMode: bool = False + + self.headers = { + "Content-Type": "application/json", + "User-Agent": "", + "Accept": "*/*", + "Accept-Encoding": "Identity", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "text" : "print('How may I help you today?')" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + payload = { + "messages": [ + # json.loads(prev_messages), + {"content": conversation_prompt, "role": "user"} + ], + "id": self.id, + "previewToken": self.previewToken, + "userId": self.userId, + "codeModelMode": self.codeModelMode, + "agentMode": self.agentMode, + "trendingAgentMode": self.trendingAgentMode, + "isMicMode": self.isMicMode, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type") + == "text/plain; charset=utf-8" + ): + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + streaming_text = "" + for value in response.iter_lines( + decode_unicode=True, + chunk_size=self.stream_chunk_size, + delimiter="\n", + ): + try: + if bool(value): + streaming_text += value + ("\n" if stream else "") + + resp = dict(text=streaming_text) + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] + @staticmethod + def chat_cli(prompt): + """Sends a request to the BLACKBOXAI API and processes the response.""" + blackbox_ai = BLACKBOXAI() # Initialize a BLACKBOXAI instance + response = blackbox_ai.ask(prompt) # Perform a chat with the given prompt + processed_response = blackbox_ai.get_message(response) # Process the response + print(processed_response) +class AsyncBLACKBOXAI(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + model: str = None, + ): + """Instantiates BLACKBOXAI + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + model (str, optional): Model name. Defaults to "Phind Model". + """ + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.chat_endpoint = "https://www.blackbox.ai/api/chat" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.model = model + self.previewToken: str = None + self.userId: str = "" + self.codeModelMode: bool = True + self.id: str = "" + self.agentMode: dict = {} + self.trendingAgentMode: dict = {} + self.isMicMode: bool = False + + self.headers = { + "Content-Type": "application/json", + "User-Agent": "", + "Accept": "*/*", + "Accept-Encoding": "Identity", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient(headers=self.headers, proxies=proxies) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content + ```json + { + "text" : "print('How may I help you today?')" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = { + "messages": [ + # json.loads(prev_messages), + {"content": conversation_prompt, "role": "user"} + ], + "id": self.id, + "previewToken": self.previewToken, + "userId": self.userId, + "codeModelMode": self.codeModelMode, + "agentMode": self.agentMode, + "trendingAgentMode": self.trendingAgentMode, + "isMicMode": self.isMicMode, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if ( + not response.is_success + or not response.headers.get("Content-Type") + == "text/plain; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + streaming_text = "" + async for value in response.aiter_lines(): + try: + if bool(value): + streaming_text += value + ("\n" if stream else "") + resp = dict(text=streaming_text) + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/ChatGPTUK.py b/webscout/Provider/ChatGPTUK.py new file mode 100644 index 0000000000000000000000000000000000000000..cd3ae9f9831a8ce0225b9a309c45aec9becfb0a6 --- /dev/null +++ b/webscout/Provider/ChatGPTUK.py @@ -0,0 +1,214 @@ +import requests +from typing import Any, AsyncGenerator, Dict, Optional +import json +import re + +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from webscout import exceptions + + +class ChatGPTUK(Provider): + """ + A class to interact with the ChatGPT UK API. + """ + + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 0.9, + presence_penalty: float = 0, + frequency_penalty: float = 0, + top_p: float = 1, + model: str = "google-gemini-pro", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ) -> None: + """ + Initializes the ChatGPTUK API with given parameters. + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.9. + presence_penalty (float, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (float, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 1. + model (str, optional): LLM model name. Defaults to "google-gemini-pro". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_endpoint = "https://free.chatgpt.org.uk/api/openai/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.headers = {"Content-Type": "application/json"} + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "text" : "How may I assist you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + payload = { + "messages": [ + {"role": "system", "content": "Keep your responses long and detailed"}, + {"role": "user", "content": conversation_prompt} + ], + "stream": True, + "model": self.model, + "temperature": self.temperature, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "top_p": self.top_p, + "max_tokens": self.max_tokens_to_sample + } + + def for_stream(): + response = self.session.post( + self.api_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + streaming_response = "" + for line in response.iter_lines(decode_unicode=True, chunk_size=1): + if line: + modified_line = re.sub("data:", "", line) + try: + json_data = json.loads(modified_line) + content = json_data['choices'][0]['delta']['content'] + streaming_response += content + yield content if raw else dict(text=streaming_response) + except: + continue + self.last_response.update(dict(text=streaming_response)) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/Cohere.py b/webscout/Provider/Cohere.py new file mode 100644 index 0000000000000000000000000000000000000000..cfaae78083b27b24fcd7f9182a1a312b5a395094 --- /dev/null +++ b/webscout/Provider/Cohere.py @@ -0,0 +1,223 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#-----------------------------------------------Cohere-------------------------------------------- +class Cohere(Provider): + def __init__( + self, + api_key: str, + is_conversation: bool = True, + max_tokens: int = 600, + model: str = "command-r-plus", + temperature: float = 0.7, + system_prompt: str = "You are helpful AI", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + top_k: int = -1, + top_p: float = 0.999, + ): + """Initializes Cohere + + Args: + api_key (str): Cohere API key. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + model (str, optional): Model to use for generating text. Defaults to "command-r-plus". + temperature (float, optional): Diversity of the generated text. Higher values produce more diverse outputs. + Defaults to 0.7. + system_prompt (str, optional): A system_prompt or context to set the style or tone of the generated text. + Defaults to "You are helpful AI". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_key = api_key + self.model = model + self.temperature = temperature + self.system_prompt = system_prompt + self.chat_endpoint = "https://production.api.os.cohere.ai/coral/v1/chat" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "text" : "How may I assist you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.session.headers.update(self.headers) + payload = { + "message": conversation_prompt, + "model": self.model, + "temperature": self.temperature, + "preamble": self.system_prompt, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + for value in response.iter_lines( + decode_unicode=True, + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value.strip().split("\n")[-1]) + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + # let's make use of stream + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["result"]["chatStreamEndEvent"]["response"]["text"] \ No newline at end of file diff --git a/webscout/Provider/Gemini.py b/webscout/Provider/Gemini.py new file mode 100644 index 0000000000000000000000000000000000000000..95900f18c5eb351b230a4f033090d774779d3b57 --- /dev/null +++ b/webscout/Provider/Gemini.py @@ -0,0 +1,217 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +from Bard import Chatbot +import logging +from os import path +from json import load +from json import dumps +import warnings +logging.getLogger("httpx").setLevel(logging.ERROR) +warnings.simplefilter("ignore", category=UserWarning) +class GEMINI(Provider): + def __init__( + self, + cookie_file: str, + proxy: dict = {}, + timeout: int = 30, + ): + """Initializes GEMINI + + Args: + cookie_file (str): Path to `bard.google.com.cookies.json` file + proxy (dict, optional): Http request proxy. Defaults to {}. + timeout (int, optional): Http request timeout. Defaults to 30. + """ + self.conversation = Conversation(False) + self.session_auth1 = None + self.session_auth2 = None + assert isinstance( + cookie_file, str + ), f"cookie_file should be of {str} only not '{type(cookie_file)}'" + if path.isfile(cookie_file): + # let's assume auth is a path to exported .json cookie-file + with open(cookie_file) as fh: + entries = load(fh) + for entry in entries: + if entry["name"] == "__Secure-1PSID": + self.session_auth1 = entry["value"] + elif entry["name"] == "__Secure-1PSIDTS": + self.session_auth2 = entry["value"] + + assert all( + [self.session_auth1, self.session_auth2] + ), f"Failed to extract the required cookie value from file '{cookie_file}'" + else: + raise Exception(f"{cookie_file} is not a valid file path") + + self.session = Chatbot(self.session_auth1, self.session_auth2, proxy, timeout) + self.last_response = {} + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defeaults to None + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "content": "General Kenobi! \n\n(I couldn't help but respond with the iconic Star Wars greeting since you used it first. )\n\nIs there anything I can help you with today?\n[Image of Hello there General Kenobi]", + "conversation_id": "c_f13f6217f9a997aa", + "response_id": "r_d3665f95975c368f", + "factualityQueries": null, + "textQuery": [ + "hello there", + 1 + ], + "choices": [ + { + "id": "rc_ea075c9671bfd8cb", + "content": [ + "General Kenobi! \n\n(I couldn't help but respond with the iconic Star Wars greeting since you used it first. )\n\nIs there anything I can help you with today?\n[Image of Hello there General Kenobi]" + ] + }, + { + "id": "rc_de6dd3fb793a5402", + "content": [ + "General Kenobi! (or just a friendly hello, whichever you prefer!). \n\nI see you're a person of culture as well. *Star Wars* references are always appreciated. \n\nHow can I help you today?\n" + ] + }, + { + "id": "rc_a672ac089caf32db", + "content": [ + "General Kenobi! (or just a friendly hello if you're not a Star Wars fan!). \n\nHow can I help you today? Feel free to ask me anything, or tell me what you'd like to chat about. I'm here to assist in any way I can.\n[Image of Obi-Wan Kenobi saying hello there]" + ] + } + ], + + "images": [ + "https://i.pinimg.com/originals/40/74/60/407460925c9e419d82b93313f0b42f71.jpg" + ] + } + + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + def for_stream(): + response = self.session.ask(prompt) + self.last_response.update(response) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + yield dumps(response) if raw else response + + def for_non_stream(): + # let's make use of stream + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["content"] + + def reset(self): + """Reset the current conversation""" + self.session.async_chatbot.conversation_id = "" + self.session.async_chatbot.response_id = "" + self.session.async_chatbot.choice_id = "" \ No newline at end of file diff --git a/webscout/Provider/Groq.py b/webscout/Provider/Groq.py new file mode 100644 index 0000000000000000000000000000000000000000..b9062040751a4a6ce49937f29332b31b997d2737 --- /dev/null +++ b/webscout/Provider/Groq.py @@ -0,0 +1,512 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +class GROQ(Provider): + def __init__( + self, + api_key: str, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 1, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 1, + model: str = "mixtral-8x7b-32768", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates GROQ + + Args: + api_key (key): GROQ's API key. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 1. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + model (str, optional): LLM model name. Defaults to "gpt-3.5-turbo". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_key = api_key + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://api.groq.com/openai/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "c0c8d139-d2b9-9909-8aa1-14948bc28404", + "object": "chat.completion", + "created": 1710852779, + "model": "mixtral-8x7b-32768", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today? I'm here to help answer your questions and engage in conversation on a wide variety of topics. Feel free to ask me anything!" + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 47, + "prompt_time": 0.03, + "completion_tokens": 37, + "completion_time": 0.069, + "total_tokens": 84, + "total_time": 0.099 + }, + "system_fingerprint": null + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.session.headers.update(self.headers) + payload = { + "frequency_penalty": self.frequency_penalty, + "messages": [{"content": conversation_prompt, "role": "user"}], + "model": self.model, + "presence_penalty": self.presence_penalty, + "stream": stream, + "temperature": self.temperature, + "top_p": self.top_p, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + message_load = "" + for value in response.iter_lines( + decode_unicode=True, + delimiter="" if raw else "data:", + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value) + incomplete_message = self.get_message(resp) + if incomplete_message: + message_load += incomplete_message + resp["choices"][0]["delta"]["content"] = message_load + self.last_response.update(resp) + yield value if raw else resp + elif raw: + yield value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=False, timeout=self.timeout + ) + if not response.ok: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + resp = response.json() + self.last_response.update(resp) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + return resp + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + try: + if response["choices"][0].get("delta"): + return response["choices"][0]["delta"]["content"] + return response["choices"][0]["message"]["content"] + except KeyError: + return "" +class AsyncGROQ(AsyncProvider): + def __init__( + self, + api_key: str, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 1, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 1, + model: str = "mixtral-8x7b-32768", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates GROQ + + Args: + api_key (key): GROQ's API key. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 1. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + model (str, optional): LLM model name. Defaults to "gpt-3.5-turbo". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_key = api_key + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://api.groq.com/openai/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient(headers=self.headers, proxies=proxies) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content + ```json + { + "id": "c0c8d139-d2b9-9909-8aa1-14948bc28404", + "object": "chat.completion", + "created": 1710852779, + "model": "mixtral-8x7b-32768", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today? I'm here to help answer your questions and engage in conversation on a wide variety of topics. Feel free to ask me anything!" + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 47, + "prompt_time": 0.03, + "completion_tokens": 37, + "completion_time": 0.069, + "total_tokens": 84, + "total_time": 0.099 + }, + "system_fingerprint": null + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + payload = { + "frequency_penalty": self.frequency_penalty, + "messages": [{"content": conversation_prompt, "role": "user"}], + "model": self.model, + "presence_penalty": self.presence_penalty, + "stream": stream, + "temperature": self.temperature, + "top_p": self.top_p, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if not response.is_success: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + + message_load = "" + intro_value = "data:" + async for value in response.aiter_lines(): + try: + if value.startswith(intro_value): + value = value[len(intro_value) :] + resp = json.loads(value) + incomplete_message = await self.get_message(resp) + if incomplete_message: + message_load += incomplete_message + resp["choices"][0]["delta"]["content"] = message_load + self.last_response.update(resp) + yield value if raw else resp + elif raw: + yield value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + response = httpx.post( + self.chat_endpoint, json=payload, timeout=self.timeout + ) + if not response.is_success: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + resp = response.json() + self.last_response.update(resp) + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + return resp + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + try: + if response["choices"][0].get("delta"): + return response["choices"][0]["delta"]["content"] + return response["choices"][0]["message"]["content"] + except KeyError: + return "" \ No newline at end of file diff --git a/webscout/Provider/Koboldai.py b/webscout/Provider/Koboldai.py new file mode 100644 index 0000000000000000000000000000000000000000..8065496a21d0cad3f2f4b9b2d028e635660f5475 --- /dev/null +++ b/webscout/Provider/Koboldai.py @@ -0,0 +1,402 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#------------------------------------------------------KOBOLDAI----------------------------------------------------------- +class KOBOLDAI(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 1, + top_p: float = 1, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiate TGPT + + Args: + is_conversation (str, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.2. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + timeout (int, optional): Http requesting timeout. Defaults to 30 + intro (str, optional): Conversation introductory prompt. Defaults to `Conversation.intro`. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional) : Http reqiuest proxies (socks). Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.temperature = temperature + self.top_p = top_p + self.chat_endpoint = ( + "https://koboldai-koboldcpp-tiefighter.hf.space/api/extra/generate/stream" + ) + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "token" : "How may I assist you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + payload = { + "prompt": conversation_prompt, + "temperature": self.temperature, + "top_p": self.top_p, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + message_load = "" + for value in response.iter_lines( + decode_unicode=True, + delimiter="" if raw else "event: message\ndata:", + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value) + message_load += self.get_message(resp) + resp["token"] = message_load + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + # let's make use of stream + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response.get("token") +class AsyncKOBOLDAI(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 1, + top_p: float = 1, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiate TGPT + + Args: + is_conversation (str, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.2. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + timeout (int, optional): Http requesting timeout. Defaults to 30 + intro (str, optional): Conversation introductory prompt. Defaults to `Conversation.intro`. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional) : Http reqiuest proxies (socks). Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.temperature = temperature + self.top_p = top_p + self.chat_endpoint = ( + "https://koboldai-koboldcpp-tiefighter.hf.space/api/extra/generate/stream" + ) + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient(headers=self.headers, proxies=proxies) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content + ```json + { + "token" : "How may I assist you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = { + "prompt": conversation_prompt, + "temperature": self.temperature, + "top_p": self.top_p, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if not response.is_success: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + + message_load = "" + async for value in response.aiter_lines(): + try: + resp = sanitize_stream(value) + message_load += await self.get_message(resp) + resp["token"] = message_load + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + # let's make use of stream + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response.get("token") \ No newline at end of file diff --git a/webscout/Provider/Leo.py b/webscout/Provider/Leo.py new file mode 100644 index 0000000000000000000000000000000000000000..5ffc097481f361cb35a9d2e849f888361c4008c3 --- /dev/null +++ b/webscout/Provider/Leo.py @@ -0,0 +1,469 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#--------------------------------------LEO----------------------------------------- +class LEO(Provider): + + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 0.2, + top_k: int = -1, + top_p: float = 0.999, + model: str = "llama-2-13b-chat", + brave_key: str = "qztbjzBqJueQZLFkwTTJrieu8Vw3789u", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiate TGPT + + Args: + is_conversation (str, optional): Flag for chatting conversationally. Defaults to True. + brave_key (str, optional): Brave API access key. Defaults to "qztbjzBqJueQZLFkwTTJrieu8Vw3789u". + model (str, optional): Text generation model name. Defaults to "llama-2-13b-chat". + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.2. + top_k (int, optional): Chance of topic being repeated. Defaults to -1. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + timeput (int, optional): Http requesting timeout. Defaults to 30 + intro (str, optional): Conversation introductory prompt. Defaults to `Conversation.intro`. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional) : Http reqiuest proxies (socks). Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.model = model + self.stop_sequences = ["", ""] + self.temperature = temperature + self.top_k = top_k + self.top_p = top_p + self.chat_endpoint = "https://ai-chat.bsg.brave.com/v1/complete" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "accept": "text/event-stream", + "x-brave-key": brave_key, + "accept-language": "en-US,en;q=0.9", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/110.0", + } + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + self.system_prompt = ( + "\n\nYour name is Leo, a helpful" + "respectful and honest AI assistant created by the company Brave. You will be replying to a user of the Brave browser. " + "Always respond in a neutral tone. Be polite and courteous. Answer concisely in no more than 50-80 words." + "\n\nPlease ensure that your responses are socially unbiased and positive in nature." + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " + "If you don't know the answer to a question, please don't share false information.\n" + ) + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "completion": "\nNext: domestic cat breeds with short hair >>", + "stop_reason": null, + "truncated": false, + "stop": null, + "model": "llama-2-13b-chat", + "log_id": "cmpl-3kYiYxSNDvgMShSzFooz6t", + "exception": null + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + payload = { + "max_tokens_to_sample": self.max_tokens_to_sample, + "model": self.model, + "prompt": f"[INST] <>{self.system_prompt}<>{conversation_prompt} [/INST]", + "self.stop_sequence": self.stop_sequences, + "stream": stream, + "top_k": self.top_k, + "top_p": self.top_p, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type") + == "text/event-stream; charset=utf-8" + ): + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + for value in response.iter_lines( + decode_unicode=True, + delimiter="" if raw else "data:", + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value) + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=False, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type", "") == "application/json" + ): + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + resp = response.json() + self.last_response.update(resp) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + return resp + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response.get("completion") +class AsyncLEO(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 0.2, + top_k: int = -1, + top_p: float = 0.999, + model: str = "llama-2-13b-chat", + brave_key: str = "qztbjzBqJueQZLFkwTTJrieu8Vw3789u", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiate TGPT + + Args: + is_conversation (str, optional): Flag for chatting conversationally. Defaults to True. + brave_key (str, optional): Brave API access key. Defaults to "qztbjzBqJueQZLFkwTTJrieu8Vw3789u". + model (str, optional): Text generation model name. Defaults to "llama-2-13b-chat". + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.2. + top_k (int, optional): Chance of topic being repeated. Defaults to -1. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + timeput (int, optional): Http requesting timeout. Defaults to 30 + intro (str, optional): Conversation introductory prompt. Defaults to `Conversation.intro`. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional) : Http reqiuest proxies (socks). Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.model = model + self.stop_sequences = ["", ""] + self.temperature = temperature + self.top_k = top_k + self.top_p = top_p + self.chat_endpoint = "https://ai-chat.bsg.brave.com/v1/complete" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "accept": "text/event-stream", + "x-brave-key": brave_key, + "accept-language": "en-US,en;q=0.9", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/110.0", + } + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.system_prompt = ( + "\n\nYour name is Leo, a helpful" + "respectful and honest AI assistant created by the company Brave. You will be replying to a user of the Brave browser. " + "Always respond in a neutral tone. Be polite and courteous. Answer concisely in no more than 50-80 words." + "\n\nPlease ensure that your responses are socially unbiased and positive in nature." + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " + "If you don't know the answer to a question, please don't share false information.\n" + ) + self.session = httpx.AsyncClient(headers=self.headers, proxies=proxies) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content + ```json + { + "completion": "\nNext: domestic cat breeds with short hair >>", + "stop_reason": null, + "truncated": false, + "stop": null, + "model": "llama-2-13b-chat", + "log_id": "cmpl-3kYiYxSNDvgMShSzFooz6t", + "exception": null + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = { + "max_tokens_to_sample": self.max_tokens_to_sample, + "model": self.model, + "prompt": f"[INST] <>{self.system_prompt}<>{conversation_prompt} [/INST]", + "self.stop_sequence": self.stop_sequences, + "stream": stream, + "top_k": self.top_k, + "top_p": self.top_p, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if ( + not response.is_success + or not response.headers.get("Content-Type") + == "text/event-stream; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + async for value in response.aiter_lines(): + try: + resp = sanitize_stream(value) + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response.get("completion") \ No newline at end of file diff --git a/webscout/Provider/Llama2.py b/webscout/Provider/Llama2.py new file mode 100644 index 0000000000000000000000000000000000000000..42121dd708895a72995a2313adab23a40cfd0f98 --- /dev/null +++ b/webscout/Provider/Llama2.py @@ -0,0 +1,437 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +class AsyncLLAMA2(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 800, + temperature: float = 0.75, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 0.9, + model: str = "meta/meta-llama-3-70b-instruct", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates LLAMA2 + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 800. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.75. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.9. + model (str, optional): LLM model name. Defaults to "meta/llama-2-70b-chat". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://www.llama2.ai/api" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Referer": "https://www.llama2.ai/", + "Content-Type": "text/plain;charset=UTF-8", + "Origin": "https://www.llama2.ai", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient( + headers=self.headers, + proxies=proxies, + ) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGeneraror[dict] : ai content + ```json + { + "text" : "How may I help you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = { + "prompt": f"{conversation_prompt}[INST] {prompt} [/INST]", + "model": self.model, + "systemPrompt": "You are a helpful assistant.", + "temperature": self.temperature, + "topP": self.top_p, + "maxTokens": self.max_tokens_to_sample, + "image": None, + "audio": None, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if ( + not response.is_success + or not response.headers.get("Content-Type") + == "text/plain; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + message_load: str = "" + async for value in response.aiter_lines(): + try: + if bool(value.strip()): + message_load += value + "\n" + resp: dict = dict(text=message_load) + yield value if raw else resp + self.last_response.update(resp) + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (str): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] +class LLAMA2(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 800, + temperature: float = 0.75, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 0.9, + model: str = "meta/meta-llama-3-70b-instruct", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates LLAMA2 + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 800. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.75. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.9. + model (str, optional): LLM model name. Defaults to "meta/llama-2-70b-chat". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://www.llama2.ai/api" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Referer": "https://www.llama2.ai/", + "Content-Type": "text/plain;charset=UTF-8", + "Origin": "https://www.llama2.ai", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "text" : "How may I help you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.session.headers.update(self.headers) + + payload = { + "prompt": f"{conversation_prompt}[INST] {prompt} [/INST]", + "model": self.model, + "systemPrompt": "You are a helpful assistant.", + "temperature": self.temperature, + "topP": self.top_p, + "maxTokens": self.max_tokens_to_sample, + "image": None, + "audio": None, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type") + == "text/plain; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason})" + ) + + message_load: str = "" + for value in response.iter_lines( + decode_unicode=True, + delimiter="\n", + chunk_size=self.stream_chunk_size, + ): + try: + if bool(value.strip()): + message_load += value + "\n" + resp: dict = dict(text=message_load) + yield value if raw else resp + self.last_response.update(resp) + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (str): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/OpenGPT.py b/webscout/Provider/OpenGPT.py new file mode 100644 index 0000000000000000000000000000000000000000..df73999bfed27d1c7077ced52577845f7b839401 --- /dev/null +++ b/webscout/Provider/OpenGPT.py @@ -0,0 +1,487 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#------------------------------------------------------OpenGPT----------------------------------------------------------- +class OPENGPT: + def __init__( + self, + assistant_id, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates OPENGPT + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.chat_endpoint = ( + "https://opengpts-example-vz4y4ooboq-uc.a.run.app/runs/stream" + ) + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.assistant_id = assistant_id + self.authority = "opengpts-example-vz4y4ooboq-uc.a.run.app" + + self.headers = { + "authority": self.authority, + "accept": "text/event-stream", + "accept-language": "en-US,en;q=0.7", + "cache-control": "no-cache", + "content-type": "application/json", + "origin": "https://opengpts-example-vz4y4ooboq-uc.a.run.app", + "pragma": "no-cache", + "referer": "https://opengpts-example-vz4y4ooboq-uc.a.run.app/", + "sec-fetch-site": "same-origin", + "sec-gpc": "1", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "messages": [ + { + "content": "Hello there", + "additional_kwargs": {}, + "type": "human", + "example": false + }, + { + "content": "Hello! How can I assist you today?", + "additional_kwargs": { + "agent": { + "return_values": { + "output": "Hello! How can I assist you today?" + }, + "log": "Hello! How can I assist you today?", + "type": "AgentFinish" + } + }, + "type": "ai", + "example": false + }] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + self.session.headers.update( + dict( + cookie=f"opengpts_user_id={uuid4().__str__()}", + ) + ) + payload = { + "input": [ + { + "content": conversation_prompt, + "additional_kwargs": {}, + "type": "human", + "example": False, + }, + ], + "assistant_id": self.assistant_id, + "thread_id": "", + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type") + == "text/event-stream; charset=utf-8" + ): + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + for value in response.iter_lines( + decode_unicode=True, + chunk_size=self.stream_chunk_size, + ): + try: + modified_value = re.sub("data:", "", value) + resp = json.loads(modified_value) + if len(resp) == 1: + continue + self.last_response.update(resp[1]) + yield value if raw else resp[1] + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["content"] +class AsyncOPENGPT(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates OPENGPT + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.chat_endpoint = ( + "https://opengpts-example-vz4y4ooboq-uc.a.run.app/runs/stream" + ) + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.assistant_id = "bca37014-6f97-4f2b-8928-81ea8d478d88" + self.authority = "opengpts-example-vz4y4ooboq-uc.a.run.app" + + self.headers = { + "authority": self.authority, + "accept": "text/event-stream", + "accept-language": "en-US,en;q=0.7", + "cache-control": "no-cache", + "content-type": "application/json", + "origin": "https://opengpts-example-vz4y4ooboq-uc.a.run.app", + "pragma": "no-cache", + "referer": "https://opengpts-example-vz4y4ooboq-uc.a.run.app/", + "sec-fetch-site": "same-origin", + "sec-gpc": "1", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient(headers=self.headers, proxies=proxies) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content. + ```json + { + "messages": [ + { + "content": "Hello there", + "additional_kwargs": {}, + "type": "human", + "example": false + }, + { + "content": "Hello! How can I assist you today?", + "additional_kwargs": { + "agent": { + "return_values": { + "output": "Hello! How can I assist you today?" + }, + "log": "Hello! How can I assist you today?", + "type": "AgentFinish" + } + }, + "type": "ai", + "example": false + }] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.headers.update( + dict( + cookie=f"opengpts_user_id={uuid4().__str__()}", + ) + ) + payload = { + "input": [ + { + "content": conversation_prompt, + "additional_kwargs": {}, + "type": "human", + "example": False, + }, + ], + "assistant_id": self.assistant_id, + "thread_id": "", + } + + async def for_stream(): + async with self.session.stream( + "POST", + self.chat_endpoint, + json=payload, + timeout=self.timeout, + headers=self.headers, + ) as response: + if ( + not response.is_success + or not response.headers.get("Content-Type") + == "text/event-stream; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase}) - {response.text}" + ) + + async for value in response.aiter_lines(): + try: + modified_value = re.sub("data:", "", value) + resp = json.loads(modified_value) + if len(resp) == 1: + continue + self.last_response.update(resp[1]) + yield value if raw else resp[1] + except json.decoder.JSONDecodeError: + pass + + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["content"] \ No newline at end of file diff --git a/webscout/Provider/Openai.py b/webscout/Provider/Openai.py new file mode 100644 index 0000000000000000000000000000000000000000..f3cf8bf20fc895ecbad5894d1dc9f3f646716833 --- /dev/null +++ b/webscout/Provider/Openai.py @@ -0,0 +1,511 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#----------------------------------------------------------OpenAI----------------------------------- +class OPENAI(Provider): + def __init__( + self, + api_key: str, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 1, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 1, + model: str = "gpt-3.5-turbo", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates OPENAI + + Args: + api_key (key): OpenAI's API key. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 1. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + model (str, optional): LLM model name. Defaults to "gpt-3.5-turbo". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_key = api_key + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://api.openai.com/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "chatcmpl-TaREJpBZsRVQFRFic1wIA7Q7XfnaD", + "object": "chat.completion", + "created": 1704623244, + "model": "gpt-3.5-turbo", + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop", + "index": 0 + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise exceptions.FailedToGenerateResponseError( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.session.headers.update(self.headers) + payload = { + "frequency_penalty": self.frequency_penalty, + "messages": [{"content": conversation_prompt, "role": "user"}], + "model": self.model, + "presence_penalty": self.presence_penalty, + "stream": stream, + "temperature": self.temperature, + "top_p": self.top_p, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + message_load = "" + for value in response.iter_lines( + decode_unicode=True, + delimiter="" if raw else "data:", + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value) + incomplete_message = self.get_message(resp) + if incomplete_message: + message_load += incomplete_message + resp["choices"][0]["delta"]["content"] = message_load + self.last_response.update(resp) + yield value if raw else resp + elif raw: + yield value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=False, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type", "") == "application/json" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + resp = response.json() + self.last_response.update(resp) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + return resp + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + try: + if response["choices"][0].get("delta"): + return response["choices"][0]["delta"]["content"] + return response["choices"][0]["message"]["content"] + except KeyError: + return "" +class AsyncOPENAI(AsyncProvider): + def __init__( + self, + api_key: str, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 1, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 1, + model: str = "gpt-3.5-turbo", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates OPENAI + + Args: + api_key (key): OpenAI's API key. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 1. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999. + model (str, optional): LLM model name. Defaults to "gpt-3.5-turbo". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_key = api_key + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://api.openai.com/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient( + headers=self.headers, + proxies=proxies, + ) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content. + ```json + { + "id": "chatcmpl-TaREJpBZsRVQFRFic1wIA7Q7XfnaD", + "object": "chat.completion", + "created": 1704623244, + "model": "gpt-3.5-turbo", + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0 + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop", + "index": 0 + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + payload = { + "frequency_penalty": self.frequency_penalty, + "messages": [{"content": conversation_prompt, "role": "user"}], + "model": self.model, + "presence_penalty": self.presence_penalty, + "stream": stream, + "temperature": self.temperature, + "top_p": self.top_p, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if not response.is_success: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + + message_load = "" + async for value in response.aiter_lines(): + try: + + resp = sanitize_stream(value) + incomplete_message = await self.get_message(resp) + if incomplete_message: + message_load += incomplete_message + resp["choices"][0]["delta"]["content"] = message_load + self.last_response.update(resp) + yield value if raw else resp + elif raw: + yield value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + response = httpx.post( + self.chat_endpoint, + json=payload, + timeout=self.timeout, + headers=self.headers, + ) + if ( + not response.is_success + or not response.headers.get("Content-Type", "") == "application/json" + ): + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + resp = response.json() + self.last_response.update(resp) + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + return resp + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response asynchronously. + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + try: + if response["choices"][0].get("delta"): + return response["choices"][0]["delta"]["content"] + return response["choices"][0]["message"]["content"] + except KeyError: + return "" \ No newline at end of file diff --git a/webscout/Provider/Perplexity.py b/webscout/Provider/Perplexity.py new file mode 100644 index 0000000000000000000000000000000000000000..1b0bb09e0e39ee4ba7c222723f0508d8a79f8dbc --- /dev/null +++ b/webscout/Provider/Perplexity.py @@ -0,0 +1,230 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#------------------------------------------------------PERPLEXITY-------------------------------------------------------- +class PERPLEXITY(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + quiet: bool = False, + ): + """Instantiates PERPLEXITY + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + quiet (bool, optional): Ignore web search-results and yield final response only. Defaults to False. + """ + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.last_response = {} + self.web_results: dict = {} + self.quiet = quiet + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "status": "pending", + "uuid": "3604dfcc-611f-4b7d-989d-edca2a7233c7", + "read_write_token": null, + "frontend_context_uuid": "f6d43119-5231-481d-b692-f52e1f52d2c6", + "final": false, + "backend_uuid": "a6d6ec9e-da69-4841-af74-0de0409267a8", + "media_items": [], + "widget_data": [], + "knowledge_cards": [], + "expect_search_results": "false", + "mode": "concise", + "search_focus": "internet", + "gpt4": false, + "display_model": "turbo", + "attachments": null, + "answer": "", + "web_results": [], + "chunks": [], + "extra_web_results": [] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + def for_stream(): + for response in Perplexity().generate_answer(conversation_prompt): + yield json.dumps(response) if raw else response + self.last_response.update(response) + + self.conversation.update_chat_history( + prompt, + self.get_message(self.last_response), + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + text_str: str = response.get("answer", "") + + def update_web_results(web_results: list) -> None: + for index, results in enumerate(web_results, start=1): + self.web_results[str(index) + ". " + results["name"]] = dict( + url=results.get("url"), snippet=results.get("snippet") + ) + + if response.get("text"): + # last chunk + target: dict[str, Any] = json.loads(response.get("text")) + text_str = target.get("answer") + web_results: list[dict] = target.get("web_results") + self.web_results.clear() + update_web_results(web_results) + + return ( + text_str + if self.quiet or not self.web_results + else text_str + "\n\n# WEB-RESULTS\n\n" + yaml.dump(self.web_results) + ) + + else: + if str(response.get("expect_search_results")).lower() == "true": + return ( + text_str + if self.quiet + else text_str + + "\n\n# WEB-RESULTS\n\n" + + yaml.dump(response.get("web_results")) + ) + else: + return text_str \ No newline at end of file diff --git a/webscout/Provider/Phind.py b/webscout/Provider/Phind.py new file mode 100644 index 0000000000000000000000000000000000000000..4210bd37a5fb9eb3516a198672acf8c6904d2ec9 --- /dev/null +++ b/webscout/Provider/Phind.py @@ -0,0 +1,518 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +#------------------------------------------------------phind------------------------------------------------------------- +class PhindSearch: + # default_model = "Phind Model" + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 8000, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + model: str = "Phind Model", + quiet: bool = False, + ): + """Instantiates PHIND + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + model (str, optional): Model name. Defaults to "Phind Model". + quiet (bool, optional): Ignore web search-results and yield final response only. Defaults to False. + """ + self.session = requests.Session() + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.chat_endpoint = "https://https.extension.phind.com/agent/" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.model = model + self.quiet = quiet + + self.headers = { + "Content-Type": "application/json", + "User-Agent": "", + "Accept": "*/*", + "Accept-Encoding": "Identity", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "chatcmpl-r0wujizf2i2xb60mjiwt", + "object": "chat.completion.chunk", + "created": 1706775384, + "model": "trt-llm-phind-model-serving", + "choices": [ + { + "index": 0, + "delta": { + "content": "Hello! How can I assist you with your programming today?" + }, + "finish_reason": null + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + payload = { + "additional_extension_context": "", + "allow_magic_buttons": True, + "is_vscode_extension": True, + "message_history": [ + {"content": conversation_prompt, "metadata": {}, "role": "user"} + ], + "requested_model": self.model, + "user_input": prompt, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if ( + not response.ok + or not response.headers.get("Content-Type") + == "text/event-stream; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + streaming_text = "" + for value in response.iter_lines( + decode_unicode=True, + chunk_size=self.stream_chunk_size, + ): + try: + modified_value = re.sub("data:", "", value) + json_modified_value = json.loads(modified_value) + retrieved_text = self.get_message(json_modified_value) + if not retrieved_text: + continue + streaming_text += retrieved_text + json_modified_value["choices"][0]["delta"][ + "content" + ] = streaming_text + self.last_response.update(json_modified_value) + yield value if raw else json_modified_value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + if response.get("type", "") == "metadata": + return + + delta: dict = response["choices"][0]["delta"] + + if not delta: + return "" + + elif delta.get("function_call"): + if self.quiet: + return "" + + function_call: dict = delta["function_call"] + if function_call.get("name"): + return function_call["name"] + elif function_call.get("arguments"): + return function_call.get("arguments") + + elif delta.get("metadata"): + if self.quiet: + return "" + return yaml.dump(delta["metadata"]) + + else: + return ( + response["choices"][0]["delta"].get("content") + if response["choices"][0].get("finish_reason") is None + else "" + ) +class AsyncPhindSearch(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + model: str = "Phind Model", + quiet: bool = False, + ): + """Instantiates PHIND + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + model (str, optional): Model name. Defaults to "Phind Model". + quiet (bool, optional): Ignore web search-results and yield final response only. Defaults to False. + """ + self.max_tokens_to_sample = max_tokens + self.is_conversation = is_conversation + self.chat_endpoint = "https://https.extension.phind.com/agent/" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.model = model + self.quiet = quiet + + self.headers = { + "Content-Type": "application/json", + "User-Agent": "", + "Accept": "*/*", + "Accept-Encoding": "Identity", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient(headers=self.headers, proxies=proxies) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + synchronous_generator=False, + ) -> dict | AsyncGenerator: + """Asynchronously Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content. + ```json + { + "id": "chatcmpl-r0wujizf2i2xb60mjiwt", + "object": "chat.completion.chunk", + "created": 1706775384, + "model": "trt-llm-phind-model-serving", + "choices": [ + { + "index": 0, + "delta": { + "content": "Hello! How can I assist you with your programming today?" + }, + "finish_reason": null + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = { + "additional_extension_context": "", + "allow_magic_buttons": True, + "is_vscode_extension": True, + "message_history": [ + {"content": conversation_prompt, "metadata": {}, "role": "user"} + ], + "requested_model": self.model, + "user_input": prompt, + } + + async def for_stream(): + async with self.session.stream( + "POST", + self.chat_endpoint, + json=payload, + timeout=self.timeout, + ) as response: + if ( + not response.is_success + or not response.headers.get("Content-Type") + == "text/event-stream; charset=utf-8" + ): + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase})" + ) + streaming_text = "" + async for value in response.aiter_lines(): + try: + modified_value = re.sub("data:", "", value) + json_modified_value = json.loads(modified_value) + retrieved_text = await self.get_message(json_modified_value) + if not retrieved_text: + continue + streaming_text += retrieved_text + json_modified_value["choices"][0]["delta"][ + "content" + ] = streaming_text + self.last_response.update(json_modified_value) + yield value if raw else json_modified_value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return ( + for_stream() + if stream and not synchronous_generator + else await for_non_stream() + ) + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str | AsyncGenerator: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + ask_resp = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in ask_resp: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + if response.get("type", "") == "metadata": + return + + delta: dict = response["choices"][0]["delta"] + + if not delta: + return "" + + elif delta.get("function_call"): + if self.quiet: + return "" + + function_call: dict = delta["function_call"] + if function_call.get("name"): + return function_call["name"] + elif function_call.get("arguments"): + return function_call.get("arguments") + + elif delta.get("metadata"): + if self.quiet: + return "" + return yaml.dump(delta["metadata"]) + + else: + return ( + response["choices"][0]["delta"].get("content") + if response["choices"][0].get("finish_reason") is None + else "" + ) \ No newline at end of file diff --git a/webscout/Provider/Poe.py b/webscout/Provider/Poe.py new file mode 100644 index 0000000000000000000000000000000000000000..32232379c3cc5babd57d477766c3022c26de0dcb --- /dev/null +++ b/webscout/Provider/Poe.py @@ -0,0 +1,208 @@ +from poe_api_wrapper import PoeApi +from poe_api_wrapper.api import BOTS_LIST +from ..AIbase import Provider +from ..AIutel import Conversation +from ..AIutel import Optimizers +from ..AIutel import AwesomePrompts +from pathlib import Path +from json import loads +from json import dumps +from loguru import logger +import logging + +logger.remove() + + +class POE(Provider): + def __init__( + self, + cookie: str, + model: str = "Assistant", + proxy: bool = False, + timeout: int = 30, + filepath: str = None, + update_file: str = True, + intro: str = None, + act: str = None, + init: bool = True, + ): + """Initializes POE + + Args: + cookie (str): Path to `poe.com.cookies.json` file or 'p-b' cookie-value. + model (str, optional): Model name. Default to Assistant. + proxy (bool, optional): Flag for Httpx request proxy. Defaults to False. + timeout (int, optional): Http request timeout. Defaults to 30. + filepath (str, optional): Path to save the chat history. Defaults to None. + update_file (str, optional): Flag for controlling chat history updates. Defaults to True. + intro (str, optional): Conversation introductory prompt. Defaults to None. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + init (bool, optional): Resend the intro prompt. Defaults to True. + """ + assert isinstance( + cookie, str + ), f"Cookie must be of {str} datatype only not {type(cookie)}" + assert ( + model in BOTS_LIST.keys() + ), f"model name '{model}' is not one of {', '.join(list(BOTS_LIST.keys()))}" + cookie_path = Path(cookie) + + if cookie_path.exists() or any(["/" in cookie, ".json" in cookie]): + cookie = None + all_cookies = loads(cookie_path.read_text()) + for entry in all_cookies: + if entry["name"] == "p-b": + cookie = entry["value"] + assert ( + cookie + ), f'Required cookie value cannot be retrieved from the path "{cookie_path.as_posix()}"' + + if proxy: + import poe_api_wrapper.proxies as proxies + + proxies.PROXY = True + + self.bot = BOTS_LIST[model] + self.session = PoeApi(cookie) + self.last_response = {} + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + status=False, filepath=filepath, update_file=update_file + ) + if init: + self.ask(self.conversation.intro) # Init + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defeaults to None + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "TWVzc2FnZToxMTU0MzgyNDQ1ODU=", + "messageId": 115438244585, + "creationTime": 1707777376544407, + "clientNonce": null, + "state": "complete", + "text": "Hello! How can I assist you today?", + "author": "capybara", + "contentType": "text_markdown", + "sourceType": "chat_input", + "attachmentTruncationState": "not_truncated", + "attachments": [], + "vote": null, + "suggestedReplies": [], + "hasCitations": false, + "__isNode": "Message", + "textLengthOnCancellation": null, + "chatCode": "21a2jn0yrq9phxiy478", + "chatId": 328236777, + "title": null, + "response": "" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + def for_stream(): + for response in self.session.send_message(self.bot, conversation_prompt): + if raw: + yield dumps(response) + else: + yield response + + self.last_response.update(response) + + self.conversation.update_chat_history( + prompt, + self.get_message(self.last_response), + force=True, + ) + + def for_non_stream(): + # let's make use of stream + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/Reka.py b/webscout/Provider/Reka.py new file mode 100644 index 0000000000000000000000000000000000000000..ba55c486fd2744ee19493f39fa2222c255801658 --- /dev/null +++ b/webscout/Provider/Reka.py @@ -0,0 +1,226 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#-----------------------------------------------REKA----------------------------------------------- +class REKA(Provider): + def __init__( + self, + api_key: str, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + model: str = "reka-core", + system_prompt: str = "Be Helpful and Friendly. Keep your response straightforward, short and concise", + use_search_engine: bool = False, + use_code_interpreter: bool = False, + ): + """Instantiates REKA + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + model (str, optional): REKA model name. Defaults to "reka-core". + system_prompt (str, optional): System prompt for REKA. Defaults to "Be Helpful and Friendly. Keep your response straightforward, short and concise". + use_search_engine (bool, optional): Whether to use the search engine. Defaults to False. + use_code_interpreter (bool, optional): Whether to use the code interpreter. Defaults to False. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.api_endpoint = "https://chat.reka.ai/api/chat" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.model = model + self.system_prompt = system_prompt + self.use_search_engine = use_search_engine + self.use_code_interpreter = use_code_interpreter + self.access_token = api_key + self.headers = { + "Authorization": f"Bearer {self.access_token}", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "text" : "How may I assist you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + self.session.headers.update(self.headers) + payload = { + + "conversation_history": [ + {"type": "human", "text": f"## SYSTEM PROMPT: {self.system_prompt}\n\n## QUERY: {conversation_prompt}"}, + ], + + "stream": stream, + "use_search_engine": self.use_search_engine, + "use_code_interpreter": self.use_code_interpreter, + "model_name": self.model, + # "model_name": "reka-flash", + # "model_name": "reka-edge", + } + + def for_stream(): + response = self.session.post(self.api_endpoint, json=payload, stream=True, timeout=self.timeout) + if not response.ok: + raise Exception( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + for value in response.iter_lines( + decode_unicode=True, + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value) + self.last_response.update(resp) + yield value if raw else resp + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + # let's make use of stream + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response.get("text") \ No newline at end of file diff --git a/webscout/Provider/ThinkAnyAI.py b/webscout/Provider/ThinkAnyAI.py new file mode 100644 index 0000000000000000000000000000000000000000..5ae9ed143bfae1ee826fe244b955ba22f2d71ec7 --- /dev/null +++ b/webscout/Provider/ThinkAnyAI.py @@ -0,0 +1,280 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#------------------------------------ThinkAnyAI------------ +class ThinkAnyAI(Provider): + def __init__( + self, + model: str = "claude-3-haiku", + locale: str = "en", + web_search: bool = False, + chunk_size: int = 1, + streaming: bool = True, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Initializes ThinkAnyAI + + Args: + model (str): The AI model to be used for generating responses. Defaults to "claude-3-haiku". + locale (str): The language locale. Defaults to "en" (English). + web_search (bool): Whether to include web search results in the response. Defaults to False. + chunk_size (int): The size of data chunks when streaming responses. Defaults to 1. + streaming (bool): Whether to stream response data. Defaults to True. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.base_url = "https://thinkany.ai/api" + self.model = model + self.locale = locale + self.web_search = web_search + self.chunk_size = chunk_size + self.streaming = streaming + self.last_response = {} + self.session = requests.Session() + self.session.proxies = proxies + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, max_tokens, filepath, update_file + ) + self.conversation.history_offset = history_offset + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defeaults to None + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "content": "General Kenobi! \n\n(I couldn't help but respond with the iconic Star Wars greeting since you used it first. )\n\nIs there anything I can help you with today?\n[Image of Hello there General Kenobi]", + "conversation_id": "c_f13f6217f9a997aa", + "response_id": "r_d3665f95975c368f", + "factualityQueries": null, + "textQuery": [ + "hello there", + 1 + ], + "choices": [ + { + "id": "rc_ea075c9671bfd8cb", + "content": [ + "General Kenobi! \n\n(I couldn't help but respond with the iconic Star Wars greeting since you used it first. )\n\nIs there anything I can help you with today?\n[Image of Hello there General Kenobi]" + ] + }, + { + "id": "rc_de6dd3fb793a5402", + "content": [ + "General Kenobi! (or just a friendly hello, whichever you prefer!). \n\nI see you're a person of culture as well. *Star Wars* references are always appreciated. \n\nHow can I help you today?\n" + ] + }, + { + "id": "rc_a672ac089caf32db", + "content": [ + "General Kenobi! (or just a friendly hello if you're not a Star Wars fan!). \n\nHow can I help you today? Feel free to ask me anything, or tell me what you'd like to chat about. I'm here to assist in any way I can.\n[Image of Obi-Wan Kenobi saying hello there]" + ] + } + ], + + "images": [ + "https://i.pinimg.com/originals/40/74/60/407460925c9e419d82b93313f0b42f71.jpg" + ] + } + + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + def initiate_conversation(query: str) -> str: + """ + Initiates a new conversation with the ThinkAny AI API. + + Args: + query (str): The initial query to start the conversation. + + Returns: + str: The UUID (Unique Identifier) of the conversation. + """ + url = f"{self.base_url}/new-conversation" + payload = { + "content": query, + "locale": self.locale, + "mode": "search" if self.web_search else "chat", + "model": self.model, + "source": "all", + } + response = self.session.post(url, json=payload) + return response.json().get("data", {}).get("uuid", "DevsDoCode") + + def RAG_search(uuid: str) -> tuple[bool, list]: + """ + Performs a web search using the Retrieve And Generate (RAG) model. + + Args: + uuid (str): The UUID of the conversation. + + Returns: + tuple: A tuple containing a boolean indicating the success of the search + and a list of search result links. + """ + if not self.web_search: + return True, [] + url = f"{self.base_url}/rag-search" + payload = {"conv_uuid": uuid} + response = self.session.post(url, json=payload) + links = [source["link"] for source in response.json().get("data", [])] + return response.json().get("message", "").strip(), links + + def for_stream(): + conversation_uuid = initiate_conversation(conversation_prompt) + web_search_result, links = RAG_search(conversation_uuid) + if not web_search_result: + print("Failed to generate WEB response. Making normal Query...") + + url = f"{self.base_url}/chat" + payload = { + "role": "user", + "content": prompt, + "conv_uuid": conversation_uuid, + "model": self.model, + } + response = self.session.post(url, json=payload, stream=True) + complete_content = "" + for content in response.iter_content( + decode_unicode=True, chunk_size=self.chunk_size + ): + complete_content += content + yield content if raw else dict(text=complete_content) + self.last_response.update(dict(text=complete_content, links=links)) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: Dict[str, Any]) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/Xjai.py b/webscout/Provider/Xjai.py new file mode 100644 index 0000000000000000000000000000000000000000..cb463fef5115693c17ad0fc3ab4f1af4105887a0 --- /dev/null +++ b/webscout/Provider/Xjai.py @@ -0,0 +1,230 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx +#-----------------------------------------------xjai------------------------------------------- +class Xjai(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 0.8, + top_p: float = 1, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """ + Initializes the Xjai class for interacting with the Xjai AI chat API. + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): The creativity level of the AI's response. Defaults to 0.8. + top_p (float, optional): The probability threshold for token selection. Defaults to 1. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.temperature = temperature + self.top_p = top_p + self.chat_endpoint = "https://p1api.xjai.pro/freeapi/chat-process" + self.stream_chunk_size = 1 # Process response line by line + self.timeout = timeout + self.last_response = {} + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> Any: + """ + Sends a chat request to the Xjai AI chat API and returns the response. + + Args: + prompt (str): The query to send to the AI. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + + Returns: + Any: The response from the AI, either as a dictionary or a generator + depending on the `stream` and `raw` parameters. + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" + } + + payload = { + "prompt": conversation_prompt + "\n\nReply in English Only", + "systemMessage": "Reply in English Only", + "temperature": self.temperature, + "top_p": self.top_p + } + + def generate_response(): + response = self.session.post( + self.chat_endpoint, headers=headers, json=payload, stream=True, timeout=self.timeout + ) + output = "" + print_next = False + + for line in response.iter_lines(decode_unicode=True, chunk_size=self.stream_chunk_size): + line_content = line.decode("utf-8") + # Filter out irrelevant content + if '[ChatAI](https://srv.aiflarepro.com/#/?cid=4111)' in line_content: + continue + if '&KFw6loC9Qvy&' in line_content: + parts = line_content.split('&KFw6loC9Qvy&') + if print_next: + output += parts[0] + print_next = False + else: + output += parts[1] + print_next = True + if len(parts) > 2: + print_next = False + elif print_next: + output += line_content + '\n' + + # Update chat history + self.conversation.update_chat_history(prompt, output) + + return output + + def for_stream(): + response = generate_response() + for line in response.splitlines(): + yield line if raw else dict(text=line) + + def for_non_stream(): + response = generate_response() + return response if raw else dict(text=response) + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> Any: + """ + Generates a response from the Xjai AI chat API. + + Args: + prompt (str): The query to send to the AI. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + + Returns: + Any: The response from the AI, either as a string or a generator + depending on the `stream` parameter. + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: Any) -> str: + """ + Retrieves the message from the AI's response. + + Args: + response (Any): The response from the AI, either a dictionary + or a raw string. + + Returns: + str: The extracted message from the AI's response. + """ + if isinstance(response, dict): + return response["text"] + else: # Assume raw string + return response + diff --git a/webscout/Provider/Yepchat.py b/webscout/Provider/Yepchat.py new file mode 100644 index 0000000000000000000000000000000000000000..224ce9946261c8c4e991509b95d2c1ab478da88c --- /dev/null +++ b/webscout/Provider/Yepchat.py @@ -0,0 +1,478 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +#-------------------------------------------------------yep.com-------------------------------------------------------- +class YEPCHAT(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 0.6, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 0.7, + model: str = "Mixtral-8x7B-Instruct-v0.1", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates YEPCHAT + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.6. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.7. + model (str, optional): LLM model name. Defaults to "gpt-3.5-turbo". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://api.yep.com/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json; charset=utf-8", + "Origin": "https://yep.com", + "Referer": "https://yep.com/", + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "cmpl-c61c1c88de4e4ad3a79134775d17ea0c", + "object": "chat.completion.chunk", + "created": 1713876886, + "model": "Mixtral-8x7B-Instruct-v0.1", + "choices": [ + { + "index": 0, + "delta": { + "role": null, + "content": " Sure, I can help with that. Are you looking for information on how to start coding, or do you need help with a specific coding problem? We can discuss various programming languages like Python, JavaScript, Java, C++, or others. Please provide more details so I can assist you better." + }, + "finish_reason": null + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.session.headers.update(self.headers) + payload = { + "stream": True, + "max_tokens": 1280, + "top_p": self.top_p, + "temperature": self.temperature, + "messages": [{"content": conversation_prompt, "role": "user"}], + "model": self.model, + } + + def for_stream(): + response = self.session.post( + self.chat_endpoint, json=payload, stream=True, timeout=self.timeout + ) + if not response.ok: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}" + ) + + message_load = "" + for value in response.iter_lines( + decode_unicode=True, + delimiter="" if raw else "data:", + chunk_size=self.stream_chunk_size, + ): + try: + resp = json.loads(value) + incomplete_message = self.get_message(resp) + if incomplete_message: + message_load += incomplete_message + resp["choices"][0]["delta"]["content"] = message_load + self.last_response.update(resp) + yield value if raw else resp + elif raw: + yield value + except json.decoder.JSONDecodeError: + pass + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + try: + if response["choices"][0].get("delta"): + return response["choices"][0]["delta"]["content"] + return response["choices"][0]["message"]["content"] + except KeyError: + return "" +class AsyncYEPCHAT(AsyncProvider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + temperature: float = 0.6, + presence_penalty: int = 0, + frequency_penalty: int = 0, + top_p: float = 0.7, + model: str = "Mixtral-8x7B-Instruct-v0.1", + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Instantiates YEPCHAT + + Args: + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.6. + presence_penalty (int, optional): Chances of topic being repeated. Defaults to 0. + frequency_penalty (int, optional): Chances of word being repeated. Defaults to 0. + top_p (float, optional): Sampling threshold during inference time. Defaults to 0.7. + model (str, optional): LLM model name. Defaults to "gpt-3.5-turbo". + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.model = model + self.temperature = temperature + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.top_p = top_p + self.chat_endpoint = "https://api.yep.com/v1/chat/completions" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + self.headers = { + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json; charset=utf-8", + "Origin": "https://yep.com", + "Referer": "https://yep.com/", + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session = httpx.AsyncClient( + headers=self.headers, + proxies=proxies, + ) + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "id": "cmpl-c61c1c88de4e4ad3a79134775d17ea0c", + "object": "chat.completion.chunk", + "created": 1713876886, + "model": "Mixtral-8x7B-Instruct-v0.1", + "choices": [ + { + "index": 0, + "delta": { + "role": null, + "content": " Sure, I can help with that. Are you looking for information on how to start coding, or do you need help with a specific coding problem? We can discuss various programming languages like Python, JavaScript, Java, C++, or others. Please provide more details so I can assist you better." + }, + "finish_reason": null + } + ] + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + payload = { + "stream": True, + "max_tokens": 1280, + "top_p": self.top_p, + "temperature": self.temperature, + "messages": [{"content": conversation_prompt, "role": "user"}], + "model": self.model, + } + + async def for_stream(): + async with self.session.stream( + "POST", self.chat_endpoint, json=payload, timeout=self.timeout + ) as response: + if not response.is_success: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason_phrase}) - {response.text}" + ) + + message_load = "" + async for value in response.aiter_lines(): + try: + resp = sanitize_stream(value) + incomplete_message = await self.get_message(resp) + if incomplete_message: + message_load += incomplete_message + resp["choices"][0]["delta"]["content"] = message_load + self.last_response.update(resp) + yield value if raw else resp + elif raw: + yield value + except json.decoder.JSONDecodeError: + pass + + self.conversation.update_chat_history( + prompt, await self.get_message(self.last_response) + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + try: + if response["choices"][0].get("delta"): + return response["choices"][0]["delta"]["content"] + return response["choices"][0]["message"]["content"] + except KeyError: + return "" \ No newline at end of file diff --git a/webscout/Provider/Youchat.py b/webscout/Provider/Youchat.py new file mode 100644 index 0000000000000000000000000000000000000000..09478f2597573a1d56219c533b15e5b172b0155c --- /dev/null +++ b/webscout/Provider/Youchat.py @@ -0,0 +1,221 @@ +import time +import uuid +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +import click +import requests +from requests import get +from uuid import uuid4 +from re import findall +from requests.exceptions import RequestException +from curl_cffi.requests import get, RequestsError +import g4f +from random import randint +from PIL import Image +import io +import re +import json +import yaml +from ..AIutel import Optimizers +from ..AIutel import Conversation +from ..AIutel import AwesomePrompts, sanitize_stream +from ..AIbase import Provider, AsyncProvider +from Helpingai_T2 import Perplexity +from webscout import exceptions +from typing import Any, AsyncGenerator, Dict +import logging +import httpx + +#-------------------------------------------------------youchat-------------------------------------------------------- +class YouChat(Provider): + def __init__( + self, + is_conversation: bool = True, + max_tokens: int = 600, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + self.session = requests.Session() + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.chat_endpoint = "https://you.com/api/streamingSearch" + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + + self.payload = { + "q": "", + "page": 1, + "count": 10, + "safeSearch": "Off", + "onShoppingPage": False, + "mkt": "", + "responseFilter": "WebPages,Translations,TimeZone,Computation,RelatedSearches", + "domain": "youchat", + "queryTraceId": uuid.uuid4(), + "conversationTurnId": uuid.uuid4(), + "pastChatLength": 0, + "selectedChatMode": "default", + "chat": "[]", + } + + self.headers = { + "cache-control": "no-cache", + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + 'Referer': f'https://you.com/search?q={self.payload["q"]}&fromSearchBar=true&tbm=youchat&chatMode=default' + } + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + self.session.headers.update(self.headers) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, self.max_tokens_to_sample, filepath, update_file + ) + self.conversation.history_offset = history_offset + self.session.proxies = proxies + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + self.session.headers.update(self.headers) + self.session.headers.update( + dict( + cookie=f"safesearch_guest=Off; uuid_guest={str(uuid4())}", + ) + ) + self.payload["q"] = prompt + + def for_stream(): + response = self.session.get( + self.chat_endpoint, + params=self.payload, + stream=True, + timeout=self.timeout, + ) + + if not response.ok: + raise exceptions.FailedToGenerateResponseError( + f"Failed to generate response - ({response.status_code}, {response.reason})" + ) + + streaming_response = "" + for line in response.iter_lines(decode_unicode=True, chunk_size=64): + if line: + modified_value = re.sub("data:", "", line) + try: + json_modified_value = json.loads(modified_value) + if "youChatToken" in json_modified_value: + streaming_response += json_modified_value["youChatToken"] + if print: + print(json_modified_value["youChatToken"], end="") + except: + continue + self.last_response.update(dict(text=streaming_response)) + self.conversation.update_chat_history( + prompt, self.get_message(self.last_response) + ) + return streaming_response + + def for_non_stream(): + for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] \ No newline at end of file diff --git a/webscout/Provider/__init__.py b/webscout/Provider/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8bbc2473a6cbd6a667e77866fe66a109b3f598ba --- /dev/null +++ b/webscout/Provider/__init__.py @@ -0,0 +1,61 @@ +# webscout/providers/__init__.py + +from .ThinkAnyAI import ThinkAnyAI +from .Xjai import Xjai +from .Llama2 import LLAMA2 +from .Llama2 import AsyncLLAMA2 +from .Cohere import Cohere +from .Reka import REKA +from .Groq import GROQ +from .Groq import AsyncGROQ +from .Openai import OPENAI +from .Openai import AsyncOPENAI +from .Leo import LEO +from .Leo import AsyncLEO +from .Koboldai import KOBOLDAI +from .Koboldai import AsyncKOBOLDAI +from .OpenGPT import OPENGPT +from .OpenGPT import AsyncOPENGPT +from .Perplexity import PERPLEXITY +from .Blackboxai import BLACKBOXAI +from .Blackboxai import AsyncBLACKBOXAI +from .Phind import PhindSearch +from .Phind import AsyncPhindSearch +from .Yepchat import YEPCHAT +from .Yepchat import AsyncYEPCHAT +from .Youchat import YouChat +from .Gemini import GEMINI +from .Berlin4h import Berlin4h +from .ChatGPTUK import ChatGPTUK +from .Poe import POE +from .BasedGPT import * +__all__ = [ + 'ThinkAnyAI', + 'Xjai', + 'LLAMA2', + 'AsyncLLAMA2', + 'Cohere', + 'REKA', + 'GROQ', + 'AsyncGROQ', + 'OPENAI', + 'AsyncOPENAI', + 'LEO', + 'AsyncLEO', + 'KOBOLDAI', + 'AsyncKOBOLDAI', + 'OPENGPT', + 'AsyncOPENGPT', + 'PERPLEXITY', + 'BLACKBOXAI', + 'AsyncBLACKBOXAI', + 'PhindSearch', + 'AsyncPhindSearch', + 'YEPCHAT', + 'AsyncYEPCHAT', + 'YouChat', + 'GEMINI', + 'Berlin4h', + 'ChatGPTUK', + 'POE' +] \ No newline at end of file diff --git a/webscout/Provider/__pycache__/BasedGPT.cpython-311.pyc b/webscout/Provider/__pycache__/BasedGPT.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5af2dbdcbf108242706b3933699479c56b5198d Binary files /dev/null and b/webscout/Provider/__pycache__/BasedGPT.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Berlin4h.cpython-311.pyc b/webscout/Provider/__pycache__/Berlin4h.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..600d81bd7975913f45b8ec55a90d50a2449711f0 Binary files /dev/null and b/webscout/Provider/__pycache__/Berlin4h.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Blackboxai.cpython-311.pyc b/webscout/Provider/__pycache__/Blackboxai.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72fdf8d1eed0bfa24701191fd20c23b5a30c42b3 Binary files /dev/null and b/webscout/Provider/__pycache__/Blackboxai.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/ChatGPTUK.cpython-311.pyc b/webscout/Provider/__pycache__/ChatGPTUK.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..32c46f3f1c943fcbad8e79c1a4e8444ea1e95ab6 Binary files /dev/null and b/webscout/Provider/__pycache__/ChatGPTUK.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/ChatGPTlogin.cpython-311.pyc b/webscout/Provider/__pycache__/ChatGPTlogin.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..85dcccd22a8779787bd3ae7292b36033c92364e9 Binary files /dev/null and b/webscout/Provider/__pycache__/ChatGPTlogin.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Cohere.cpython-311.pyc b/webscout/Provider/__pycache__/Cohere.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd1df5126a2ed5f4f843b0af027ed434472f48e1 Binary files /dev/null and b/webscout/Provider/__pycache__/Cohere.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Gemini.cpython-311.pyc b/webscout/Provider/__pycache__/Gemini.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..91773dbcd1b5305d3ab30c15628c75e36c7234eb Binary files /dev/null and b/webscout/Provider/__pycache__/Gemini.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Groq.cpython-311.pyc b/webscout/Provider/__pycache__/Groq.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f79aabc729c9b1e446100af3b8fe673e702ccb18 Binary files /dev/null and b/webscout/Provider/__pycache__/Groq.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Koboldai.cpython-311.pyc b/webscout/Provider/__pycache__/Koboldai.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eb54b2d0fbc53f8e600b585151dcce198c36e024 Binary files /dev/null and b/webscout/Provider/__pycache__/Koboldai.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Leo.cpython-311.pyc b/webscout/Provider/__pycache__/Leo.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..96530763607718b555aa6cd3014279f59558a090 Binary files /dev/null and b/webscout/Provider/__pycache__/Leo.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Llama2.cpython-311.pyc b/webscout/Provider/__pycache__/Llama2.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7dea3471a0d138f393a42ff39f7c8b59ceb065d7 Binary files /dev/null and b/webscout/Provider/__pycache__/Llama2.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/OpenGPT.cpython-311.pyc b/webscout/Provider/__pycache__/OpenGPT.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..005b94256cbb7adb3b674432b6b289ae58768707 Binary files /dev/null and b/webscout/Provider/__pycache__/OpenGPT.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Openai.cpython-311.pyc b/webscout/Provider/__pycache__/Openai.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d7bc56888f986b8e5558c8dfe5e145574310173 Binary files /dev/null and b/webscout/Provider/__pycache__/Openai.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Perplexity.cpython-311.pyc b/webscout/Provider/__pycache__/Perplexity.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8dac9c4934ad49816efb5e34480fb78939c3ab0 Binary files /dev/null and b/webscout/Provider/__pycache__/Perplexity.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Phind.cpython-311.pyc b/webscout/Provider/__pycache__/Phind.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4eb37619c212217153429a4026f2582c5b44fba2 Binary files /dev/null and b/webscout/Provider/__pycache__/Phind.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Poe.cpython-311.pyc b/webscout/Provider/__pycache__/Poe.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5711beeb1be32c0cea9d81a34be849e8453d3dd Binary files /dev/null and b/webscout/Provider/__pycache__/Poe.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Reka.cpython-311.pyc b/webscout/Provider/__pycache__/Reka.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c6001f708e513a7bd05a4f864cd906aa1fdab01 Binary files /dev/null and b/webscout/Provider/__pycache__/Reka.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/ThinkAnyAI.cpython-311.pyc b/webscout/Provider/__pycache__/ThinkAnyAI.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4cd85f5202056f901842fef63009a32d359e5049 Binary files /dev/null and b/webscout/Provider/__pycache__/ThinkAnyAI.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Xjai.cpython-311.pyc b/webscout/Provider/__pycache__/Xjai.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1119211fba83273a7361e845d695327a04361d81 Binary files /dev/null and b/webscout/Provider/__pycache__/Xjai.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Yepchat.cpython-311.pyc b/webscout/Provider/__pycache__/Yepchat.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d24cafa286246f64dfa2812a73a2b1c438e1117 Binary files /dev/null and b/webscout/Provider/__pycache__/Yepchat.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/Youchat.cpython-311.pyc b/webscout/Provider/__pycache__/Youchat.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ea460140093c8cc50e1a4de97b99f0011b19384 Binary files /dev/null and b/webscout/Provider/__pycache__/Youchat.cpython-311.pyc differ diff --git a/webscout/Provider/__pycache__/__init__.cpython-311.pyc b/webscout/Provider/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..88cfbbf9fa292a99be5463605aedc3f751f37346 Binary files /dev/null and b/webscout/Provider/__pycache__/__init__.cpython-311.pyc differ diff --git a/webscout/__init__.py b/webscout/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6adc5002aec0b4cd377d4a0c244d0fb4146e00bb --- /dev/null +++ b/webscout/__init__.py @@ -0,0 +1,93 @@ +from .webscout_search import WEBS +from .webscout_search_async import AsyncWEBS +from .version import __version__ +from .DWEBS import DeepWEBS +from .transcriber import transcriber +from .voice import play_audio +# from .tempid import Client as TempMailClient, TemporaryPhoneNumber +from .LLM import LLM +# from .Local import * +import g4f +# Import provider classes for direct access +from .Provider import * + +__repo__ = "https://github.com/OE-LUCIFER/Webscout" + +webai = [ + "leo", + "openai", + "opengpt", + "koboldai", + "gemini", + "phind", + "blackboxai", + "g4fauto", + "perplexity", + "groq", + "reka", + "cohere", + "yepchat", + "you", + "xjai", + "thinkany", + "berlin4h", + "chatgptuk", + "auto", + "poe", +] + +gpt4free_providers = [ + provider.__name__ for provider in g4f.Provider.__providers__ # if provider.working +] + +available_providers = webai + gpt4free_providers + +# Add all the provider classes, Localai models, Thread, and Model to __all__ +__all__ = [ + "WEBS", + "AsyncWEBS", + "__version__", + "DeepWEBS", + "transcriber", + "play_audio", + "TempMailClient", + "TemporaryPhoneNumber", + "LLM", + # Localai models and utilities + # "Model", + # "Thread", + # "formats", + + # AI Providers + "ThinkAnyAI", + "Xjai", + "LLAMA2", + "AsyncLLAMA2", + "Cohere", + "REKA", + "GROQ", + "AsyncGROQ", + "OPENAI", + "AsyncOPENAI", + "LEO", + "AsyncLEO", + "KOBOLDAI", + "AsyncKOBOLDAI", + "OPENGPT", + "AsyncOPENGPT", + "PERPLEXITY", + "BLACKBOXAI", + "AsyncBLACKBOXAI", + "PhindSearch", + "AsyncPhindSearch", + "YEPCHAT", + "AsyncYEPCHAT", + "YouChat", + "GEMINI", + "Berlin4h", + "ChatGPTUK", + "POE" +] + +import logging +logging.getLogger("webscout").addHandler(logging.NullHandler()) diff --git a/webscout/__main__.py b/webscout/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..2fbad654ccbfab46afffcf963aa49a574f48a13d --- /dev/null +++ b/webscout/__main__.py @@ -0,0 +1,5 @@ +"""For using as 'python3 -m webscout'.""" +from .cli import cli + +if __name__ == "__main__": + cli(prog_name="webscout") diff --git a/webscout/__pycache__/AIbase.cpython-311.pyc b/webscout/__pycache__/AIbase.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c772e31b180d81a434cf39eb6eb24664d547b7e1 Binary files /dev/null and b/webscout/__pycache__/AIbase.cpython-311.pyc differ diff --git a/webscout/__pycache__/AIutel.cpython-311.pyc b/webscout/__pycache__/AIutel.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..555d40833715b02eef3d9d92d1fb0263594ae8f5 Binary files /dev/null and b/webscout/__pycache__/AIutel.cpython-311.pyc differ diff --git a/webscout/__pycache__/DWEBS.cpython-311.pyc b/webscout/__pycache__/DWEBS.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..25412d414cb328fe410203445215b7143be5c08b Binary files /dev/null and b/webscout/__pycache__/DWEBS.cpython-311.pyc differ diff --git a/webscout/__pycache__/LLM.cpython-311.pyc b/webscout/__pycache__/LLM.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa924b40e873010f7b1a916b733f5865bb5e9305 Binary files /dev/null and b/webscout/__pycache__/LLM.cpython-311.pyc differ diff --git a/webscout/__pycache__/__init__.cpython-311.pyc b/webscout/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa84e9a1b0519c9f3a4ccd1b29e3dbec9b2e6020 Binary files /dev/null and b/webscout/__pycache__/__init__.cpython-311.pyc differ diff --git a/webscout/__pycache__/exceptions.cpython-311.pyc b/webscout/__pycache__/exceptions.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75a29a61c2c6216855721c8d8b6b54e8beaf7985 Binary files /dev/null and b/webscout/__pycache__/exceptions.cpython-311.pyc differ diff --git a/webscout/__pycache__/transcriber.cpython-311.pyc b/webscout/__pycache__/transcriber.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..376cafe34bd7b8a197f88cbd732d2ed5c6475cdf Binary files /dev/null and b/webscout/__pycache__/transcriber.cpython-311.pyc differ diff --git a/webscout/__pycache__/utils.cpython-311.pyc b/webscout/__pycache__/utils.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..496d92c6b5ee953b0614dd866b468810adb08b3a Binary files /dev/null and b/webscout/__pycache__/utils.cpython-311.pyc differ diff --git a/webscout/__pycache__/version.cpython-311.pyc b/webscout/__pycache__/version.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46b20c761c7af53f6c40379ef8f75d2acc2c6853 Binary files /dev/null and b/webscout/__pycache__/version.cpython-311.pyc differ diff --git a/webscout/__pycache__/voice.cpython-311.pyc b/webscout/__pycache__/voice.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b7b6ee5d45ee56379f63ba5243598af7bda041c7 Binary files /dev/null and b/webscout/__pycache__/voice.cpython-311.pyc differ diff --git a/webscout/__pycache__/webscout_search.cpython-311.pyc b/webscout/__pycache__/webscout_search.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46d3ba9ec0e27bb5aff65909a3d77d64579ad95e Binary files /dev/null and b/webscout/__pycache__/webscout_search.cpython-311.pyc differ diff --git a/webscout/__pycache__/webscout_search_async.cpython-311.pyc b/webscout/__pycache__/webscout_search_async.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2643e0ce24b61eb3c21db37b44cf5b39f5b62290 Binary files /dev/null and b/webscout/__pycache__/webscout_search_async.cpython-311.pyc differ diff --git a/webscout/async_providers.py b/webscout/async_providers.py new file mode 100644 index 0000000000000000000000000000000000000000..0d285493fa8df23f0c3b0c2f9bb91f20238ceaac --- /dev/null +++ b/webscout/async_providers.py @@ -0,0 +1,23 @@ +from webscout import AsyncPhindSearch +from webscout import AsyncYEPCHAT +from webscout import AsyncOPENGPT +from webscout import AsyncOPENAI +from webscout import AsyncLLAMA2 +from webscout import AsyncLEO +from webscout import AsyncKOBOLDAI +from webscout import AsyncGROQ +from webscout import AsyncBLACKBOXAI +from webscout.g4f import AsyncGPT4FREE + +mapper: dict[str, object] = { + "phind": AsyncPhindSearch, + "opengpt": AsyncOPENGPT, + "koboldai": AsyncKOBOLDAI, + "blackboxai": AsyncBLACKBOXAI, + "gpt4free": AsyncGPT4FREE, + "llama2": AsyncLLAMA2, + "yepchat": AsyncYEPCHAT, + "leo": AsyncLEO, + "groq": AsyncGROQ, + "openai": AsyncOPENAI, +} diff --git a/webscout/cli.py b/webscout/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..8826728c9261f84319e19148ab08bee5fb1c023d --- /dev/null +++ b/webscout/cli.py @@ -0,0 +1,449 @@ +import csv +import logging +import os +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from urllib.parse import unquote +from pathlib import Path +import click +from curl_cffi import requests + +from .webscout_search import WEBS +from .utils import json_dumps, json_loads +from .version import __version__ + +logger = logging.getLogger(__name__) + +COLORS = { + 0: "black", + 1: "red", + 2: "green", + 3: "yellow", + 4: "blue", + 5: "magenta", + 6: "cyan", + 7: "bright_black", + 8: "bright_red", + 9: "bright_green", + 10: "bright_yellow", + 11: "bright_blue", + 12: "bright_magenta", + 13: "bright_cyan", + 14: "white", + 15: "bright_white", +} + + +def _save_json(jsonfile, data): + with open(jsonfile, "w", encoding="utf-8") as file: + file.write(json_dumps(data)) + + +def _save_csv(csvfile, data): + with open(csvfile, "w", newline="", encoding="utf-8") as file: + if data: + headers = data[0].keys() + writer = csv.DictWriter(file, fieldnames=headers, quoting=csv.QUOTE_MINIMAL) + writer.writeheader() + writer.writerows(data) + + +def _print_data(data): + if data: + for i, e in enumerate(data, start=1): + click.secho(f"{i}.\t {'=' * 78}", bg="black", fg="white") + for j, (k, v) in enumerate(e.items(), start=1): + if v: + width = 300 if k in ("content", "href", "image", "source", "thumbnail", "url") else 78 + k = "language" if k == "detected_language" else k + text = click.wrap_text( + f"{v}", width=width, initial_indent="", subsequent_indent=" " * 12, preserve_paragraphs=True + ) + else: + text = v + click.secho(f"{k:<12}{text}", bg="black", fg=COLORS[j], overline=True) + input() + + +def _sanitize_keywords(keywords): + keywords = ( + keywords.replace("filetype", "") + .replace(":", "") + .replace('"', "'") + .replace("site", "") + .replace(" ", "_") + .replace("/", "_") + .replace("\\", "_") + .replace(" ", "") + ) + return keywords + + +def _download_file(url, dir_path, filename, proxy): + try: + resp = requests.get(url, proxies=proxy, impersonate="chrome", timeout=10) + resp.raise_for_status() + with open(os.path.join(dir_path, filename[:200]), "wb") as file: + file.write(resp.content) + except Exception as ex: + logger.debug(f"download_file url={url} {type(ex).__name__} {ex}") + + +def _download_results(keywords, results, images=False, proxy=None, threads=None): + path_type = "images" if images else "text" + path = f"{path_type}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}" + os.makedirs(path, exist_ok=True) + proxy = {"http": proxy, "https": proxy} + + threads = 10 if threads is None else threads + with ThreadPoolExecutor(max_workers=threads) as executor: + futures = [] + for i, res in enumerate(results, start=1): + url = res["image"] if images else res["href"] + filename = unquote(url.split("/")[-1].split("?")[0]) + f = executor.submit(_download_file, url, path, f"{i}_{filename}", proxy) + futures.append(f) + + with click.progressbar( + length=len(futures), label="Downloading", show_percent=True, show_pos=True, width=50 + ) as bar: + for future in as_completed(futures): + future.result() + bar.update(1) + + +@click.group(chain=True) +def cli(): + """dukduckgo_search CLI tool""" + pass + + +def safe_entry_point(): + try: + cli() + except Exception as ex: + click.echo(f"{type(ex).__name__}: {ex}") + + +@cli.command() +def version(): + print(__version__) + return __version__ +@cli.command() +@click.option("-s", "--save", is_flag=True, default=False, help="save the conversation in the json file") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def chat(save, proxy): + """CLI function to perform an interactive AI chat using DuckDuckGo API.""" + cache_file = "WEBS_chat_conversation.json" + models = ["gpt-3.5", "claude-3-haiku"] + client = WEBS(proxy=proxy) + + print("DuckDuckGo AI chat. Available models:") + for idx, model in enumerate(models, start=1): + print(f"{idx}. {model}") + chosen_model_idx = input("Choose a model by entering its number[1]: ") + chosen_model_idx = 0 if not chosen_model_idx.strip() else int(chosen_model_idx) - 1 + model = models[chosen_model_idx] + print(f"Using model: {model}") + + if save and Path(cache_file).exists(): + with open(cache_file) as f: + cache = json_loads(f.read()) + client._chat_vqd = cache.get("vqd", None) + client._chat_messages = cache.get("messages", []) + + while True: + user_input = input(f"{'-'*78}\nYou: ") + if not user_input.strip(): + break + + resp_answer = client.chat(keywords=user_input, model=model) + text = click.wrap_text(resp_answer, width=78, preserve_paragraphs=True) + click.secho(f"AI: {text}", bg="black", fg="green", overline=True) + + cache = {"vqd": client._chat_vqd, "messages": client._chat_messages} + _save_json(cache_file, cache) + + if "exit" in user_input.lower() or "quit" in user_input.lower(): + break + +@cli.command() +@click.option("-k", "--keywords", required=True, help="text search, keywords for query") +@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") +@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) +@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year") +@click.option("-m", "--max_results", default=20, help="maximum number of results, default=20") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-d", "--download", is_flag=True, default=False, help="download results to 'keywords' folder") +@click.option("-b", "--backend", default="api", type=click.Choice(["api", "html", "lite"]), help="which backend to use") +@click.option("-th", "--threads", default=10, help="download threads, default=10") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def text(keywords, region, safesearch, timelimit, backend, output, download, threads, max_results, proxy): + """CLI function to perform a text search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).text( + keywords=keywords, + region=region, + safesearch=safesearch, + timelimit=timelimit, + backend=backend, + max_results=max_results, + ) + keywords = _sanitize_keywords(keywords) + filename = f"text_{keywords}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print" and not download: + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + if download: + _download_results(keywords, data, proxy=proxy, threads=threads) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="answers search, keywords for query") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def answers(keywords, output, proxy): + """CLI function to perform a answers search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).answers(keywords=keywords) + filename = f"answers_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print": + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="keywords for query") +@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") +@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) +@click.option("-t", "--timelimit", default=None, type=click.Choice(["Day", "Week", "Month", "Year"])) +@click.option("-size", "--size", default=None, type=click.Choice(["Small", "Medium", "Large", "Wallpaper"])) +@click.option( + "-c", + "--color", + default=None, + type=click.Choice( + [ + "color", + "Monochrome", + "Red", + "Orange", + "Yellow", + "Green", + "Blue", + "Purple", + "Pink", + "Brown", + "Black", + "Gray", + "Teal", + "White", + ] + ), +) +@click.option( + "-type", "--type_image", default=None, type=click.Choice(["photo", "clipart", "gif", "transparent", "line"]) +) +@click.option("-l", "--layout", default=None, type=click.Choice(["Square", "Tall", "Wide"])) +@click.option( + "-lic", + "--license_image", + default=None, + type=click.Choice(["any", "Public", "Share", "Modify", "ModifyCommercially"]), +) +@click.option("-m", "--max_results", default=90, help="maximum number of results, default=90") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-d", "--download", is_flag=True, default=False, help="download and save images to 'keywords' folder") +@click.option("-th", "--threads", default=10, help="download threads, default=10") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def images( + keywords, + region, + safesearch, + timelimit, + size, + color, + type_image, + layout, + license_image, + download, + threads, + max_results, + output, + proxy, +): + """CLI function to perform a images search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).images( + keywords=keywords, + region=region, + safesearch=safesearch, + timelimit=timelimit, + size=size, + color=color, + type_image=type_image, + layout=layout, + license_image=license_image, + max_results=max_results, + ) + keywords = _sanitize_keywords(keywords) + filename = f"images_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print" and not download: + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + if download: + _download_results(keywords, data, images=True, proxy=proxy, threads=threads) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="keywords for query") +@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") +@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) +@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m"]), help="day, week, month") +@click.option("-res", "--resolution", default=None, type=click.Choice(["high", "standart"])) +@click.option("-d", "--duration", default=None, type=click.Choice(["short", "medium", "long"])) +@click.option("-lic", "--license_videos", default=None, type=click.Choice(["creativeCommon", "youtube"])) +@click.option("-m", "--max_results", default=50, help="maximum number of results, default=50") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def videos(keywords, region, safesearch, timelimit, resolution, duration, license_videos, max_results, output, proxy): + """CLI function to perform a videos search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).videos( + keywords=keywords, + region=region, + safesearch=safesearch, + timelimit=timelimit, + resolution=resolution, + duration=duration, + license_videos=license_videos, + max_results=max_results, + ) + filename = f"videos_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print": + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="keywords for query") +@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") +@click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) +@click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year") +@click.option("-m", "--max_results", default=25, help="maximum number of results, default=25") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def news(keywords, region, safesearch, timelimit, max_results, output, proxy): + """CLI function to perform a news search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).news( + keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results + ) + filename = f"news_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print": + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="keywords for query") +@click.option("-p", "--place", default=None, help="simplified search - if set, the other parameters are not used") +@click.option("-s", "--street", default=None, help="house number/street") +@click.option("-c", "--city", default=None, help="city of search") +@click.option("-county", "--county", default=None, help="county of search") +@click.option("-state", "--state", default=None, help="state of search") +@click.option("-country", "--country", default=None, help="country of search") +@click.option("-post", "--postalcode", default=None, help="postalcode of search") +@click.option("-lat", "--latitude", default=None, help="""if lat and long are set, the other params are not used""") +@click.option("-lon", "--longitude", default=None, help="""if lat and long are set, the other params are not used""") +@click.option("-r", "--radius", default=0, help="expand the search square by the distance in kilometers") +@click.option("-m", "--max_results", default=50, help="number of results, default=50") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-proxy", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def maps( + keywords, + place, + street, + city, + county, + state, + country, + postalcode, + latitude, + longitude, + radius, + max_results, + output, + proxy, +): + """CLI function to perform a maps search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).maps( + keywords=keywords, + place=place, + street=street, + city=city, + county=county, + state=state, + country=country, + postalcode=postalcode, + latitude=latitude, + longitude=longitude, + radius=radius, + max_results=max_results, + ) + filename = f"maps_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print": + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="text for translation") +@click.option("-f", "--from_", help="What language to translate from (defaults automatically)") +@click.option("-t", "--to", default="en", help="de, ru, fr, etc. What language to translate, defaults='en'") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def translate(keywords, from_, to, output, proxy): + """CLI function to perform translate using DuckDuckGo API.""" + data = WEBS(proxies=proxy).translate(keywords=keywords, from_=from_, to=to) + filename = f"translate_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print": + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + + +@cli.command() +@click.option("-k", "--keywords", required=True, help="keywords for query") +@click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") +@click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") +@click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") +def suggestions(keywords, region, output, proxy): + """CLI function to perform a suggestions search using DuckDuckGo API.""" + data = WEBS(proxies=proxy).suggestions(keywords=keywords, region=region) + filename = f"suggestions_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" + if output == "print": + _print_data(data) + elif output == "csv": + _save_csv(f"{filename}.csv", data) + elif output == "json": + _save_json(f"{filename}.json", data) + + +if __name__ == "__main__": + cli(prog_name="WEBS") \ No newline at end of file diff --git a/webscout/exceptions.py b/webscout/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..23bcb32b50d0e7f78a20000c6244359c06cdb509 --- /dev/null +++ b/webscout/exceptions.py @@ -0,0 +1,18 @@ +class WebscoutE(Exception): + """Base exception class for duckduckgo_search.""" + + +class RatelimitE(Exception): + """Raised for rate limit exceeded errors during API requests.""" + + +class TimeoutE(Exception): + """Raised for timeout errors during API requests.""" + +class FailedToGenerateResponseError(Exception): + + """Provider failed to fetch response""" +class AllProvidersFailure(Exception): + """None of the providers generated response successfully""" + + pass \ No newline at end of file diff --git a/webscout/g4f.py b/webscout/g4f.py new file mode 100644 index 0000000000000000000000000000000000000000..d2bccb069df980c39e46ccc9f56d7bebd251df99 --- /dev/null +++ b/webscout/g4f.py @@ -0,0 +1,666 @@ +import g4f +from webscout.AIutel import Optimizers +from webscout.AIutel import Conversation +from webscout.AIutel import AwesomePrompts +from webscout.AIbase import Provider, AsyncProvider +from webscout.AIutel import available_providers +from typing import Any, AsyncGenerator + +g4f.debug.version_check = False + +working_providers = available_providers + +completion_allowed_models = [ + "code-davinci-002", + "text-ada-001", + "text-babbage-001", + "text-curie-001", + "text-davinci-002", + "text-davinci-003", +] + +default_models = { + "completion": "text-davinci-003", + "chat_completion": "gpt-3.5-turbo", +} + +default_provider = "Koala" + +class AsyncGPT4FREE(AsyncProvider): + def __init__( + self, + provider: str = default_provider, + is_conversation: bool = True, + auth: str = None, + max_tokens: int = 600, + model: str = None, + ignore_working: bool = False, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Initialies GPT4FREE + + Args: + provider (str, optional): gpt4free based provider name. Defaults to Koala. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + auth (str, optional): Authentication value for the provider incase it needs. Defaults to None. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + model (str, optional): LLM model name. Defaults to text-davinci-003|gpt-3.5-turbo. + ignore_working (bool, optional): Ignore working status of the provider. Defaults to False. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + assert provider in available_providers, ( + f"Provider '{provider}' is not yet supported. " + f"Try others like {', '.join(available_providers)}" + ) + if model is None: + model = default_models["chat_completion"] + + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + is_conversation, + self.max_tokens_to_sample, + filepath, + update_file, + ) + self.conversation.history_offset = history_offset + self.model = model + self.provider = provider + self.ignore_working = ignore_working + self.auth = auth + self.proxy = None if not proxies else list(proxies.values())[0] + + def __str__(self): + return f"AsyncGPTFREE(provider={self.provider})" + + async def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Chat with AI asynchronously. + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict|AsyncGenerator : ai content + ```json + { + "text" : "How may I help you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + payload = dict( + model=self.model, + provider=self.provider, # g4f.Provider.Aichat, + messages=[{"role": "user", "content": conversation_prompt}], + stream=True, + ignore_working=self.ignore_working, + auth=self.auth, + proxy=self.proxy, + timeout=self.timeout, + ) + + async def format_response(response): + return dict(text=response) + + async def for_stream(): + previous_chunks = "" + response = g4f.ChatCompletion.create_async(**payload) + + async for chunk in response: + previous_chunks += chunk + formatted_resp = await format_response(previous_chunks) + self.last_response.update(formatted_resp) + yield previous_chunks if raw else formatted_resp + + self.conversation.update_chat_history( + prompt, + previous_chunks, + ) + + async def for_non_stream(): + async for _ in for_stream(): + pass + return self.last_response + + return for_stream() if stream else await for_non_stream() + + async def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict | AsyncGenerator: + """Generate response `str` asynchronously. + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str|AsyncGenerator: Response generated + """ + + async def for_stream(): + async_ask = await self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ) + async for response in async_ask: + yield await self.get_message(response) + + async def for_non_stream(): + return await self.get_message( + await self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else await for_non_stream() + + async def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] +class GPT4FREE(Provider): + def __init__( + self, + provider: str = default_provider, + is_conversation: bool = True, + auth: str = None, + max_tokens: int = 600, + model: str = None, + chat_completion: bool = True, + ignore_working: bool = True, + timeout: int = 30, + intro: str = None, + filepath: str = None, + update_file: bool = True, + proxies: dict = {}, + history_offset: int = 10250, + act: str = None, + ): + """Initialies GPT4FREE + + Args: + provider (str, optional): gpt4free based provider name. Defaults to Koala. + is_conversation (bool, optional): Flag for chatting conversationally. Defaults to True. + auth (str, optional): Authentication value for the provider incase it needs. Defaults to None. + max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600. + model (str, optional): LLM model name. Defaults to text-davinci-003|gpt-3.5-turbo. + chat_completion(bool, optional): Provide native auto-contexting (conversationally). Defaults to False. + ignore_working (bool, optional): Ignore working status of the provider. Defaults to False. + timeout (int, optional): Http request timeout. Defaults to 30. + intro (str, optional): Conversation introductory prompt. Defaults to None. + filepath (str, optional): Path to file containing conversation history. Defaults to None. + update_file (bool, optional): Add new prompts and responses to the file. Defaults to True. + proxies (dict, optional): Http request proxies. Defaults to {}. + history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250. + act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None. + """ + assert provider in available_providers, ( + f"Provider '{provider}' is not yet supported. " + f"Try others like {', '.join(available_providers)}" + ) + if model is None: + model = ( + default_models["chat_completion"] + if chat_completion + else default_models["completion"] + ) + + elif not chat_completion: + assert model in completion_allowed_models, ( + f"Model '{model}' is not yet supported for completion. " + f"Try other models like {', '.join(completion_allowed_models)}" + ) + self.is_conversation = is_conversation + self.max_tokens_to_sample = max_tokens + self.stream_chunk_size = 64 + self.timeout = timeout + self.last_response = {} + + self.__available_optimizers = ( + method + for method in dir(Optimizers) + if callable(getattr(Optimizers, method)) and not method.startswith("__") + ) + Conversation.intro = ( + AwesomePrompts().get_act( + act, raise_not_found=True, default=None, case_insensitive=True + ) + if act + else intro or Conversation.intro + ) + self.conversation = Conversation( + False if chat_completion else is_conversation, + self.max_tokens_to_sample, + filepath, + update_file, + ) + self.conversation.history_offset = history_offset + self.model = model + self.provider = provider + self.chat_completion = chat_completion + self.ignore_working = ignore_working + self.auth = auth + self.proxy = None if not proxies else list(proxies.values())[0] + self.__chat_class = g4f.ChatCompletion if chat_completion else g4f.Completion + + def ask( + self, + prompt: str, + stream: bool = False, + raw: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> dict: + """Chat with AI + + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + raw (bool, optional): Stream back raw response as received. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + dict : {} + ```json + { + "text" : "How may I help you today?" + } + ``` + """ + conversation_prompt = self.conversation.gen_complete_prompt(prompt) + if optimizer: + if optimizer in self.__available_optimizers: + conversation_prompt = getattr(Optimizers, optimizer)( + conversation_prompt if conversationally else prompt + ) + else: + raise Exception( + f"Optimizer is not one of {self.__available_optimizers}" + ) + + def payload(): + if self.chat_completion: + return dict( + model=self.model, + provider=self.provider, # g4f.Provider.Aichat, + messages=[{"role": "user", "content": conversation_prompt}], + stream=stream, + ignore_working=self.ignore_working, + auth=self.auth, + proxy=self.proxy, + timeout=self.timeout, + ) + + else: + return dict( + model=self.model, + prompt=conversation_prompt, + provider=self.provider, + stream=stream, + ignore_working=self.ignore_working, + auth=self.auth, + proxy=self.proxy, + timeout=self.timeout, + ) + + def format_response(response): + return dict(text=response) + + def for_stream(): + previous_chunks = "" + response = self.__chat_class.create(**payload()) + + for chunk in response: + previous_chunks += chunk + formatted_resp = format_response(previous_chunks) + self.last_response.update(formatted_resp) + yield previous_chunks if raw else formatted_resp + + self.conversation.update_chat_history( + prompt, + previous_chunks, + ) + + def for_non_stream(): + response = self.__chat_class.create(**payload()) + formatted_resp = format_response(response) + + self.last_response.update(formatted_resp) + self.conversation.update_chat_history(prompt, response) + + return response if raw else formatted_resp + + return for_stream() if stream else for_non_stream() + + def chat( + self, + prompt: str, + stream: bool = False, + optimizer: str = None, + conversationally: bool = False, + ) -> str: + """Generate response `str` + Args: + prompt (str): Prompt to be send. + stream (bool, optional): Flag for streaming response. Defaults to False. + optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None. + conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False. + Returns: + str: Response generated + """ + + def for_stream(): + for response in self.ask( + prompt, True, optimizer=optimizer, conversationally=conversationally + ): + yield self.get_message(response) + + def for_non_stream(): + return self.get_message( + self.ask( + prompt, + False, + optimizer=optimizer, + conversationally=conversationally, + ) + ) + + return for_stream() if stream else for_non_stream() + + def get_message(self, response: dict) -> str: + """Retrieves message only from response + + Args: + response (dict): Response generated by `self.ask` + + Returns: + str: Message extracted + """ + assert isinstance(response, dict), "Response should be of dict data-type only" + return response["text"] +from pathlib import Path +from webscout.AIutel import default_path +from json import dump, load +from time import time +from threading import Thread as thr +from functools import wraps +from rich.progress import Progress +import logging + +results_path = Path(default_path) / "provider_test.json" + + +def exception_handler(func): + + @wraps(func) + def decorator(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + pass + + return decorator + + +@exception_handler +def is_working(provider: str) -> bool: + """Test working status of a provider + + Args: + provider (str): Provider name + + Returns: + bool: is_working status + """ + bot = GPT4FREE(provider=provider, is_conversation=False) + text = bot.chat("hello") + assert isinstance(text, str) + assert bool(text.strip()) + assert " 2 + return True + + +class TestProviders: + + def __init__( + self, + test_at_once: int = 5, + quiet: bool = False, + timeout: int = 20, + selenium: bool = False, + do_log: bool = True, + ): + """Constructor + + Args: + test_at_once (int, optional): Test n providers at once. Defaults to 5. + quiet (bool, optinal): Disable stdout. Defaults to False. + timout (int, optional): Thread timeout for each provider. Defaults to 20. + selenium (bool, optional): Test even selenium dependent providers. Defaults to False. + do_log (bool, optional): Flag to control logging. Defaults to True. + """ + self.test_at_once: int = test_at_once + self.quiet = quiet + self.timeout = timeout + self.do_log = do_log + self.__logger = logging.getLogger(__name__) + self.working_providers: list = [ + provider.__name__ + for provider in g4f.Provider.__providers__ + if provider.working + ] + + if not selenium: + import g4f.Provider.selenium as selenium_based + from g4f import webdriver + + webdriver.has_requirements = False + selenium_based_providers: list = dir(selenium_based) + for provider in self.working_providers: + try: + selenium_based_providers.index(provider) + except ValueError: + pass + else: + self.__log( + 10, f"Dropping provider - {provider} - [Selenium dependent]" + ) + self.working_providers.remove(provider) + + self.results_path: Path = results_path + self.__create_empty_file(ignore_if_found=True) + self.results_file_is_empty: bool = False + + def __log( + self, + level: int, + message: str, + ): + """class logger""" + if self.do_log: + self.__logger.log(level, message) + else: + pass + + def __create_empty_file(self, ignore_if_found: bool = False): + if ignore_if_found and self.results_path.is_file(): + return + with self.results_path.open("w") as fh: + dump({"results": []}, fh) + self.results_file_is_empty = True + + def test_provider(self, name: str): + """Test each provider and save successful ones + + Args: + name (str): Provider name + """ + + try: + bot = GPT4FREE(provider=name, is_conversation=False) + start_time = time() + text = bot.chat("hello there") + assert isinstance(text, str), "Non-string response returned" + assert bool(text.strip()), "Empty string" + assert " 2 + except Exception as e: + pass + else: + self.results_file_is_empty = False + with self.results_path.open() as fh: + current_results = load(fh) + new_result = dict(time=time() - start_time, name=name) + current_results["results"].append(new_result) + self.__log(20, f"Test result - {new_result['name']} - {new_result['time']}") + + with self.results_path.open("w") as fh: + dump(current_results, fh) + + @exception_handler + def main( + self, + ): + self.__create_empty_file() + threads = [] + # Create a progress bar + total = len(self.working_providers) + with Progress() as progress: + self.__log(20, f"Testing {total} providers : {self.working_providers}") + task = progress.add_task( + f"[cyan]Testing...[{self.test_at_once}]", + total=total, + visible=self.quiet == False, + ) + while not progress.finished: + for count, provider in enumerate(self.working_providers, start=1): + t1 = thr( + target=self.test_provider, + args=(provider,), + ) + t1.start() + if count % self.test_at_once == 0 or count == len(provider): + for t in threads: + try: + t.join(self.timeout) + except Exception as e: + pass + threads.clear() + else: + threads.append(t1) + progress.update(task, advance=1) + + def get_results(self, run: bool = False, best: bool = False) -> list[dict]: + """Get test results + + Args: + run (bool, optional): Run the test first. Defaults to False. + best (bool, optional): Return name of the best provider. Defaults to False. + + Returns: + list[dict]|str: Test results. + """ + if run or self.results_file_is_empty: + self.main() + + with self.results_path.open() as fh: + results: dict = load(fh) + + results = results["results"] + if not results: + if run: + raise Exception("Unable to find working g4f provider") + else: + self.__log(30, "Hunting down working g4f providers.") + return self.get_results(run=True, best=best) + + time_list = [] + + sorted_list = [] + for entry in results: + time_list.append(entry["time"]) + + time_list.sort() + + for time_value in time_list: + for entry in results: + if entry["time"] == time_value: + sorted_list.append(entry) + return sorted_list[0]["name"] if best else sorted_list + + @property + def best(self): + """Fastest provider overally""" + return self.get_results(run=False, best=True) + + @property + def auto(self): + """Best working provider""" + for result in self.get_results(run=False, best=False): + self.__log(20, "Confirming working status of provider : " + result["name"]) + if is_working(result["name"]): + return result["name"] \ No newline at end of file diff --git a/webscout/models.py b/webscout/models.py new file mode 100644 index 0000000000000000000000000000000000000000..7295a61837341ea04b3b78656cb37601852f5a6a --- /dev/null +++ b/webscout/models.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass +from typing import Dict, Optional + + +@dataclass +class MapsResult: + """Represents a result from the maps search.""" + + title: Optional[str] = None + address: Optional[str] = None + country_code: Optional[str] = None + latitude: Optional[str] = None + longitude: Optional[str] = None + url: Optional[str] = None + desc: Optional[str] = None + phone: Optional[str] = None + image: Optional[str] = None + source: Optional[str] = None + hours: Optional[Dict[str, str]] = None + category: Optional[str] = None + facebook: Optional[str] = None + instagram: Optional[str] = None + twitter: Optional[str] = None diff --git a/webscout/version.py b/webscout/version.py new file mode 100644 index 0000000000000000000000000000000000000000..8806dd4d1c3da62b6eb19bb3d819b7fd8239f204 --- /dev/null +++ b/webscout/version.py @@ -0,0 +1,2 @@ +__version__ = "2.7" + diff --git a/webscout/webscout_search.py b/webscout/webscout_search.py new file mode 100644 index 0000000000000000000000000000000000000000..584cd3e44da2f661373c2354c955e17ee5b15aa9 --- /dev/null +++ b/webscout/webscout_search.py @@ -0,0 +1,1056 @@ +import logging +import warnings +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timezone +from decimal import Decimal +from functools import cached_property +from itertools import cycle, islice +from threading import Event +from types import TracebackType +from typing import Dict, List, Optional, Tuple, Type, Union, cast + +import pyreqwest_impersonate as pri # type: ignore + +try: + from lxml.etree import _Element + from lxml.html import HTMLParser as LHTMLParser + from lxml.html import document_fromstring + + LXML_AVAILABLE = True +except ImportError: + LXML_AVAILABLE = False + +from .exceptions import WebscoutE, RatelimitE, TimeoutE +from .utils import ( + _calculate_distance, + _extract_vqd, + _normalize, + _normalize_url, + _text_extract_json, + json_loads, +) + +logger = logging.getLogger("webcout_search.WEBS") + + +class WEBS: + """webcout_search class to get search results from duckduckgo.com.""" + + _executor: ThreadPoolExecutor = ThreadPoolExecutor() + + def __init__( + self, + headers: Optional[Dict[str, str]] = None, + proxy: Optional[str] = None, + proxies: Union[Dict[str, str], str, None] = None, # deprecated + timeout: Optional[int] = 10, + ) -> None: + """Initialize the WEBS object. + + Args: + headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None. + proxy (str, optional): proxy for the HTTP client, supports http/https/socks5 protocols. + example: "http://user:pass@example.com:3128". Defaults to None. + timeout (int, optional): Timeout value for the HTTP client. Defaults to 10. + """ + self.proxy: Optional[str] = proxy + assert self.proxy is None or isinstance(self.proxy, str), "proxy must be a str" + if not proxy and proxies: + warnings.warn("'proxies' is deprecated, use 'proxy' instead.", stacklevel=1) + self.proxy = proxies.get("http") or proxies.get("https") if isinstance(proxies, dict) else proxies + self.headers = headers if headers else {} + self.headers["Referer"] = "https://duckduckgo.com/" + self.client = pri.Client( + headers=self.headers, + proxy=self.proxy, + timeout=timeout, + cookie_store=True, + referer=True, + impersonate="chrome_124", + follow_redirects=False, + verify=False, + ) + self._exception_event = Event() + self._chat_messages: List[Dict[str, str]] = [] + self._chat_vqd: str = "" + + def __enter__(self) -> "WEBS": + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]] = None, + exc_val: Optional[BaseException] = None, + exc_tb: Optional[TracebackType] = None, + ) -> None: + pass + + @cached_property + def parser(self) -> "LHTMLParser": + """Get HTML parser.""" + return LHTMLParser(remove_blank_text=True, remove_comments=True, remove_pis=True, collect_ids=False) + + def _get_url( + self, + method: str, + url: str, + params: Optional[Dict[str, str]] = None, + content: Optional[bytes] = None, + data: Optional[Union[Dict[str, str], bytes]] = None, + ) -> bytes: + if self._exception_event.is_set(): + raise WebscoutE("Exception occurred in previous call.") + try: + resp = self.client.request(method, url, params=params, content=content, data=data) + except Exception as ex: + self._exception_event.set() + if "time" in str(ex).lower(): + raise TimeoutE(f"{url} {type(ex).__name__}: {ex}") from ex + raise WebscoutE(f"{url} {type(ex).__name__}: {ex}") from ex + logger.debug(f"_get_url() {resp.url} {resp.status_code} {len(resp.content)}") + if resp.status_code == 200: + return cast(bytes, resp.content) + self._exception_event.set() + if resp.status_code in (202, 301, 403): + raise RatelimitE(f"{resp.url} {resp.status_code} Ratelimit") + raise WebscoutE(f"{resp.url} return None. {params=} {content=} {data=}") + + def _get_vqd(self, keywords: str) -> str: + """Get vqd value for a search query.""" + resp_content = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords}) + return _extract_vqd(resp_content, keywords) + + def chat(self, keywords: str, model: str = "gpt-3.5") -> str: + """Initiates a chat session with Webscout AI. + + Args: + keywords (str): The initial message or question to send to the AI. + model (str): The model to use: "gpt-3.5", "claude-3-haiku". Defaults to "gpt-3.5". + + Returns: + str: The response from the AI. + """ + models = {"claude-3-haiku": "claude-3-haiku-20240307", "gpt-3.5": "gpt-3.5-turbo-0125"} + # vqd + if not self._chat_vqd: + resp = self.client.get("https://duckduckgo.com/duckchat/v1/status", headers={"x-vqd-accept": "1"}) + self._chat_vqd = resp.headers.get("x-vqd-4", "") + + self._chat_messages.append({"role": "user", "content": keywords}) + + json_data = { + "model": models[model], + "messages": self._chat_messages, + } + resp = self.client.post( + "https://duckduckgo.com/duckchat/v1/chat", headers={"x-vqd-4": self._chat_vqd}, json=json_data + ) + self._chat_vqd = resp.headers.get("x-vqd-4", "") + + messages = [] + for line in resp.text.replace("data: ", "").replace("[DONE]", "").split("\n\n"): + x = line.strip() + if x: + j = json_loads(x) + message = j.get("message", "") + messages.append(message) + result = "".join(messages) + self._chat_messages.append({"role": "assistant", "content": result}) + return result + + def text( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + backend: str = "api", + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout text search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m, y. Defaults to None. + backend: api, html, lite. Defaults to api. + api - collect data from https://duckduckgo.com, + html - collect data from https://html.duckduckgo.com, + lite - collect data from https://lite.duckduckgo.com. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with search results, or None if there was an error. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + if LXML_AVAILABLE is False and backend != "api": + backend = "api" + warnings.warn("lxml is not installed. Using backend='api'.", stacklevel=2) + + if backend == "api": + results = self._text_api(keywords, region, safesearch, timelimit, max_results) + elif backend == "html": + results = self._text_html(keywords, region, safesearch, timelimit, max_results) + elif backend == "lite": + results = self._text_lite(keywords, region, timelimit, max_results) + return results + + def _text_api( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout text search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m, y. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with search results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + vqd = self._get_vqd(keywords) + + payload = { + "q": keywords, + "kl": region, + "l": region, + "p": "", + "s": "0", + "df": "", + "vqd": vqd, + "ex": "", + } + safesearch = safesearch.lower() + if safesearch == "moderate": + payload["ex"] = "-1" + elif safesearch == "off": + payload["ex"] = "-2" + elif safesearch == "on": # strict + payload["p"] = "1" + if timelimit: + payload["df"] = timelimit + + cache = set() + results: List[Dict[str, str]] = [] + + def _text_api_page(s: int) -> List[Dict[str, str]]: + payload["s"] = f"{s}" + resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload) + page_data = _text_extract_json(resp_content, keywords) + page_results = [] + for row in page_data: + href = row.get("u", None) + if href and href not in cache and href != f"http://www.google.com/search?q={keywords}": + cache.add(href) + body = _normalize(row["a"]) + if body: + result = { + "title": _normalize(row["t"]), + "href": _normalize_url(href), + "body": body, + } + page_results.append(result) + return page_results + + slist = [0] + if max_results: + max_results = min(max_results, 500) + slist.extend(range(23, max_results, 50)) + try: + for r in self._executor.map(_text_api_page, slist): + results.extend(r) + except Exception as e: + raise e + + return list(islice(results, max_results)) + + def _text_html( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout text search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m, y. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with search results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"} + payload = { + "q": keywords, + "kl": region, + "p": safesearch_base[safesearch.lower()], + "o": "json", + "api": "d.js", + } + if timelimit: + payload["df"] = timelimit + if max_results and max_results > 20: + vqd = self._get_vqd(keywords) + payload["vqd"] = vqd + + cache = set() + results: List[Dict[str, str]] = [] + + def _text_html_page(s: int) -> List[Dict[str, str]]: + payload["s"] = f"{s}" + resp_content = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload) + if b"No results." in resp_content: + return [] + + page_results = [] + tree = document_fromstring(resp_content, self.parser) + elements = tree.xpath("//div[h2]") + if not isinstance(elements, List): + return [] + for e in elements: + if isinstance(e, _Element): + hrefxpath = e.xpath("./a/@href") + href = str(hrefxpath[0]) if isinstance(hrefxpath, List) else None + if ( + href + and href not in cache + and not href.startswith( + ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain") + ) + ): + cache.add(href) + titlexpath = e.xpath("./h2/a/text()") + title = str(titlexpath[0]) if isinstance(titlexpath, List) else "" + bodyxpath = e.xpath("./a//text()") + body = "".join(str(x) for x in bodyxpath) if isinstance(bodyxpath, List) else "" + result = { + "title": _normalize(title), + "href": _normalize_url(href), + "body": _normalize(body), + } + page_results.append(result) + return page_results + + slist = [0] + if max_results: + max_results = min(max_results, 500) + slist.extend(range(23, max_results, 50)) + try: + for r in self._executor.map(_text_html_page, slist): + results.extend(r) + except Exception as e: + raise e + + return list(islice(results, max_results)) + + def _text_lite( + self, + keywords: str, + region: str = "wt-wt", + timelimit: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout text search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + timelimit: d, w, m, y. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with search results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + payload = { + "q": keywords, + "o": "json", + "api": "d.js", + "kl": region, + } + if timelimit: + payload["df"] = timelimit + + cache = set() + results: List[Dict[str, str]] = [] + + def _text_lite_page(s: int) -> List[Dict[str, str]]: + payload["s"] = f"{s}" + resp_content = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload) + if b"No more results." in resp_content: + return [] + + page_results = [] + tree = document_fromstring(resp_content, self.parser) + elements = tree.xpath("//table[last()]//tr") + if not isinstance(elements, List): + return [] + + data = zip(cycle(range(1, 5)), elements) + for i, e in data: + if isinstance(e, _Element): + if i == 1: + hrefxpath = e.xpath(".//a//@href") + href = str(hrefxpath[0]) if hrefxpath and isinstance(hrefxpath, List) else None + if ( + href is None + or href in cache + or href.startswith( + ("http://www.google.com/search?q=", "https://duckduckgo.com/y.js?ad_domain") + ) + ): + [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4) + else: + cache.add(href) + titlexpath = e.xpath(".//a//text()") + title = str(titlexpath[0]) if isinstance(titlexpath, List) else "" + elif i == 2: + bodyxpath = e.xpath(".//td[@class='result-snippet']//text()") + body = "".join(str(x) for x in bodyxpath) if isinstance(bodyxpath, List) else "" + if href: + result = { + "title": _normalize(title), + "href": _normalize_url(href), + "body": _normalize(body), + } + page_results.append(result) + return page_results + + slist = [0] + if max_results: + max_results = min(max_results, 500) + slist.extend(range(23, max_results, 50)) + try: + for r in self._executor.map(_text_lite_page, slist): + results.extend(r) + except Exception as e: + raise e + + return list(islice(results, max_results)) + + def images( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + size: Optional[str] = None, + color: Optional[str] = None, + type_image: Optional[str] = None, + layout: Optional[str] = None, + license_image: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout images search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: Day, Week, Month, Year. Defaults to None. + size: Small, Medium, Large, Wallpaper. Defaults to None. + color: color, Monochrome, Red, Orange, Yellow, Green, Blue, + Purple, Pink, Brown, Black, Gray, Teal, White. Defaults to None. + type_image: photo, clipart, gif, transparent, line. + Defaults to None. + layout: Square, Tall, Wide. Defaults to None. + license_image: any (All Creative Commons), Public (PublicDomain), + Share (Free to Share and Use), ShareCommercially (Free to Share and Use Commercially), + Modify (Free to Modify, Share, and Use), ModifyCommercially (Free to Modify, Share, and + Use Commercially). Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with images search results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + vqd = self._get_vqd(keywords) + + safesearch_base = {"on": "1", "moderate": "1", "off": "-1"} + timelimit = f"time:{timelimit}" if timelimit else "" + size = f"size:{size}" if size else "" + color = f"color:{color}" if color else "" + type_image = f"type:{type_image}" if type_image else "" + layout = f"layout:{layout}" if layout else "" + license_image = f"license:{license_image}" if license_image else "" + payload = { + "l": region, + "o": "json", + "q": keywords, + "vqd": vqd, + "f": f"{timelimit},{size},{color},{type_image},{layout},{license_image}", + "p": safesearch_base[safesearch.lower()], + } + + cache = set() + results: List[Dict[str, str]] = [] + + def _images_page(s: int) -> List[Dict[str, str]]: + payload["s"] = f"{s}" + resp_content = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload) + resp_json = json_loads(resp_content) + + page_data = resp_json.get("results", []) + page_results = [] + for row in page_data: + image_url = row.get("image") + if image_url and image_url not in cache: + cache.add(image_url) + result = { + "title": row["title"], + "image": _normalize_url(image_url), + "thumbnail": _normalize_url(row["thumbnail"]), + "url": _normalize_url(row["url"]), + "height": row["height"], + "width": row["width"], + "source": row["source"], + } + page_results.append(result) + return page_results + + slist = [0] + if max_results: + max_results = min(max_results, 500) + slist.extend(range(100, max_results, 100)) + try: + for r in self._executor.map(_images_page, slist): + results.extend(r) + except Exception as e: + raise e + + return list(islice(results, max_results)) + + def videos( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + resolution: Optional[str] = None, + duration: Optional[str] = None, + license_videos: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout videos search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m. Defaults to None. + resolution: high, standart. Defaults to None. + duration: short, medium, long. Defaults to None. + license_videos: creativeCommon, youtube. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with videos search results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + vqd = self._get_vqd(keywords) + + safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"} + timelimit = f"publishedAfter:{timelimit}" if timelimit else "" + resolution = f"videoDefinition:{resolution}" if resolution else "" + duration = f"videoDuration:{duration}" if duration else "" + license_videos = f"videoLicense:{license_videos}" if license_videos else "" + payload = { + "l": region, + "o": "json", + "q": keywords, + "vqd": vqd, + "f": f"{timelimit},{resolution},{duration},{license_videos}", + "p": safesearch_base[safesearch.lower()], + } + + cache = set() + results: List[Dict[str, str]] = [] + + def _videos_page(s: int) -> List[Dict[str, str]]: + payload["s"] = f"{s}" + resp_content = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload) + resp_json = json_loads(resp_content) + + page_data = resp_json.get("results", []) + page_results = [] + for row in page_data: + if row["content"] not in cache: + cache.add(row["content"]) + page_results.append(row) + return page_results + + slist = [0] + if max_results: + max_results = min(max_results, 400) + slist.extend(range(59, max_results, 59)) + try: + for r in self._executor.map(_videos_page, slist): + results.extend(r) + except Exception as e: + raise e + + return list(islice(results, max_results)) + + def news( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout news search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with news search results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + vqd = self._get_vqd(keywords) + + safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"} + payload = { + "l": region, + "o": "json", + "noamp": "1", + "q": keywords, + "vqd": vqd, + "p": safesearch_base[safesearch.lower()], + } + if timelimit: + payload["df"] = timelimit + + cache = set() + results: List[Dict[str, str]] = [] + + def _news_page(s: int) -> List[Dict[str, str]]: + payload["s"] = f"{s}" + resp_content = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload) + resp_json = json_loads(resp_content) + page_data = resp_json.get("results", []) + page_results = [] + for row in page_data: + if row["url"] not in cache: + cache.add(row["url"]) + image_url = row.get("image", None) + result = { + "date": datetime.fromtimestamp(row["date"], timezone.utc).isoformat(), + "title": row["title"], + "body": _normalize(row["excerpt"]), + "url": _normalize_url(row["url"]), + "image": _normalize_url(image_url), + "source": row["source"], + } + page_results.append(result) + return page_results + + slist = [0] + if max_results: + max_results = min(max_results, 200) + slist.extend(range(29, max_results, 29)) + try: + for r in self._executor.map(_news_page, slist): + results.extend(r) + except Exception as e: + raise e + + return list(islice(results, max_results)) + + def answers(self, keywords: str) -> List[Dict[str, str]]: + """Webscout instant answers. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query, + + Returns: + List of dictionaries with instant answers results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + payload = { + "q": f"what is {keywords}", + "format": "json", + } + resp_content = self._get_url("GET", "https://api.duckduckgo.com/", params=payload) + page_data = json_loads(resp_content) + + results = [] + answer = page_data.get("AbstractText") + url = page_data.get("AbstractURL") + if answer: + results.append( + { + "icon": None, + "text": answer, + "topic": None, + "url": url, + } + ) + + # related + payload = { + "q": f"{keywords}", + "format": "json", + } + resp_content = self._get_url("GET", "https://api.duckduckgo.com/", params=payload) + resp_json = json_loads(resp_content) + page_data = resp_json.get("RelatedTopics", []) + + for row in page_data: + topic = row.get("Name") + if not topic: + icon = row["Icon"].get("URL") + results.append( + { + "icon": f"https://duckduckgo.com{icon}" if icon else "", + "text": row["Text"], + "topic": None, + "url": row["FirstURL"], + } + ) + else: + for subrow in row["Topics"]: + icon = subrow["Icon"].get("URL") + results.append( + { + "icon": f"https://duckduckgo.com{icon}" if icon else "", + "text": subrow["Text"], + "topic": topic, + "url": subrow["FirstURL"], + } + ) + + return results + + def suggestions(self, keywords: str, region: str = "wt-wt") -> List[Dict[str, str]]: + """Webscout suggestions. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + + Returns: + List of dictionaries with suggestions results. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + payload = { + "q": keywords, + "kl": region, + } + resp_content = self._get_url("GET", "https://duckduckgo.com/ac/", params=payload) + page_data = json_loads(resp_content) + return [r for r in page_data] + + def maps( + self, + keywords: str, + place: Optional[str] = None, + street: Optional[str] = None, + city: Optional[str] = None, + county: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + postalcode: Optional[str] = None, + latitude: Optional[str] = None, + longitude: Optional[str] = None, + radius: int = 0, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout maps search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query + place: if set, the other parameters are not used. Defaults to None. + street: house number/street. Defaults to None. + city: city of search. Defaults to None. + county: county of search. Defaults to None. + state: state of search. Defaults to None. + country: country of search. Defaults to None. + postalcode: postalcode of search. Defaults to None. + latitude: geographic coordinate (north-south position). Defaults to None. + longitude: geographic coordinate (east-west position); if latitude and + longitude are set, the other parameters are not used. Defaults to None. + radius: expand the search square by the distance in kilometers. Defaults to 0. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with maps search results, or None if there was an error. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + vqd = self._get_vqd(keywords) + + # if longitude and latitude are specified, skip the request about bbox to the nominatim api + if latitude and longitude: + lat_t = Decimal(latitude.replace(",", ".")) + lat_b = Decimal(latitude.replace(",", ".")) + lon_l = Decimal(longitude.replace(",", ".")) + lon_r = Decimal(longitude.replace(",", ".")) + if radius == 0: + radius = 1 + # otherwise request about bbox to nominatim api + else: + if place: + params = { + "q": place, + "polygon_geojson": "0", + "format": "jsonv2", + } + else: + params = { + "polygon_geojson": "0", + "format": "jsonv2", + } + if street: + params["street"] = street + if city: + params["city"] = city + if county: + params["county"] = county + if state: + params["state"] = state + if country: + params["country"] = country + if postalcode: + params["postalcode"] = postalcode + # request nominatim api to get coordinates box + resp_content = self._get_url( + "GET", + "https://nominatim.openstreetmap.org/search.php", + params=params, + ) + if resp_content == b"[]": + raise WebscoutE("maps() Coordinates are not found, check function parameters.") + resp_json = json_loads(resp_content) + coordinates = resp_json[0]["boundingbox"] + lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2]) + lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3]) + + # if a radius is specified, expand the search square + lat_t += Decimal(radius) * Decimal(0.008983) + lat_b -= Decimal(radius) * Decimal(0.008983) + lon_l -= Decimal(radius) * Decimal(0.008983) + lon_r += Decimal(radius) * Decimal(0.008983) + logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}") + + cache = set() + results: List[Dict[str, str]] = [] + + def _maps_page( + bbox: Tuple[Decimal, Decimal, Decimal, Decimal], + ) -> Optional[List[Dict[str, str]]]: + if max_results and len(results) >= max_results: + return None + lat_t, lon_l, lat_b, lon_r = bbox + params = { + "q": keywords, + "vqd": vqd, + "tg": "maps_places", + "rt": "D", + "mkexp": "b", + "wiki_info": "1", + "is_requery": "1", + "bbox_tl": f"{lat_t},{lon_l}", + "bbox_br": f"{lat_b},{lon_r}", + "strict_bbox": "1", + } + resp_content = self._get_url("GET", "https://duckduckgo.com/local.js", params=params) + resp_json = json_loads(resp_content) + page_data = resp_json.get("results", []) + + page_results = [] + for res in page_data: + r_name = f'{res["name"]} {res["address"]}' + if r_name in cache: + continue + else: + cache.add(r_name) + result = { + "title": res["name"], + "address": res["address"], + "country_code": res["country_code"], + "url": _normalize_url(res["website"]), + "phone": res["phone"] or "", + "latitude": res["coordinates"]["latitude"], + "longitude": res["coordinates"]["longitude"], + "source": _normalize_url(res["url"]), + "image": x.get("image", "") if (x := res["embed"]) else "", + "desc": x.get("description", "") if (x := res["embed"]) else "", + "hours": res["hours"] or "", + "category": res["ddg_category"] or "", + "facebook": f"www.facebook.com/profile.php?id={x}" if (x := res["facebook_id"]) else "", + "instagram": f"https://www.instagram.com/{x}" if (x := res["instagram_id"]) else "", + "twitter": f"https://twitter.com/{x}" if (x := res["twitter_id"]) else "", + } + page_results.append(result) + return page_results + + # search squares (bboxes) + start_bbox = (lat_t, lon_l, lat_b, lon_r) + work_bboxes = [start_bbox] + while work_bboxes: + queue_bboxes = [] # for next iteration, at the end of the iteration work_bboxes = queue_bboxes + tasks = [] + for bbox in work_bboxes: + tasks.append(bbox) + # if distance between coordinates > 1, divide the square into 4 parts and save them in queue_bboxes + if _calculate_distance(lat_t, lon_l, lat_b, lon_r) > 1: + lat_t, lon_l, lat_b, lon_r = bbox + lat_middle = (lat_t + lat_b) / 2 + lon_middle = (lon_l + lon_r) / 2 + bbox1 = (lat_t, lon_l, lat_middle, lon_middle) + bbox2 = (lat_t, lon_middle, lat_middle, lon_r) + bbox3 = (lat_middle, lon_l, lat_b, lon_middle) + bbox4 = (lat_middle, lon_middle, lat_b, lon_r) + queue_bboxes.extend([bbox1, bbox2, bbox3, bbox4]) + + # gather tasks using asyncio.wait_for and timeout + work_bboxes_results = [] + try: + for r in self._executor.map(_maps_page, tasks): + if r: + work_bboxes_results.extend(r) + except Exception as e: + raise e + + for x in work_bboxes_results: + if isinstance(x, list): + results.extend(x) + elif isinstance(x, dict): + results.append(x) + + work_bboxes = queue_bboxes + if not max_results or len(results) >= max_results or len(work_bboxes_results) == 0: + break + + return list(islice(results, max_results)) + + def translate( + self, keywords: Union[List[str], str], from_: Optional[str] = None, to: str = "en" + ) -> List[Dict[str, str]]: + """Webscout translate. + + Args: + keywords: string or list of strings to translate. + from_: translate from (defaults automatically). Defaults to None. + to: what language to translate. Defaults to "en". + + Returns: + List od dictionaries with translated keywords. + + Raises: + WebscoutE: Base exception for webcout_search errors. + RatelimitE: Inherits from WebscoutE, raised for exceeding API request rate limits. + TimeoutE: Inherits from WebscoutE, raised for API request timeouts. + """ + assert keywords, "keywords is mandatory" + + vqd = self._get_vqd("translate") + + payload = { + "vqd": vqd, + "query": "translate", + "to": to, + } + if from_: + payload["from"] = from_ + + def _translate_keyword(keyword: str) -> Dict[str, str]: + resp_content = self._get_url( + "POST", + "https://duckduckgo.com/translation.js", + params=payload, + content=keyword.encode(), + ) + page_data: Dict[str, str] = json_loads(resp_content) + page_data["original"] = keyword + return page_data + + if isinstance(keywords, str): + keywords = [keywords] + + results = [] + try: + for r in self._executor.map(_translate_keyword, keywords): + results.append(r) + except Exception as e: + raise e + + return results \ No newline at end of file diff --git a/webscout/webscout_search_async.py b/webscout/webscout_search_async.py new file mode 100644 index 0000000000000000000000000000000000000000..5d5da5a954f3a2b4cf80f94904f8519fb0c8f0c8 --- /dev/null +++ b/webscout/webscout_search_async.py @@ -0,0 +1,361 @@ +import asyncio +from types import TracebackType +from typing import Dict, List, Optional, Type, Union + +from .webscout_search import WEBS + + +class AsyncWEBS(WEBS): + def __init__( + self, + headers: Optional[Dict[str, str]] = None, + proxy: Optional[str] = None, + proxies: Union[Dict[str, str], str, None] = None, # deprecated + timeout: Optional[int] = 10, + ) -> None: + """Initialize the AsyncWEBS object. + + Args: + headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None. + proxy (str, optional): proxy for the HTTP client, supports http/https/socks5 protocols. + example: "http://user:pass@example.com:3128". Defaults to None. + timeout (int, optional): Timeout value for the HTTP client. Defaults to 10. + """ + super().__init__(headers=headers, proxy=proxy, proxies=proxies, timeout=timeout) + self._loop = asyncio.get_running_loop() + self._executor = super()._executor + + async def __aenter__(self) -> "AsyncWEBS": + return self + + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + pass + + async def achat(self, keywords: str, model: str = "gpt-3.5") -> str: + """Initiates async chat session with Webscout AI. + + Args: + keywords (str): The initial message or question to send to the AI. + model (str): The model to use: "gpt-3.5", "claude-3-haiku". Defaults to "gpt-3.5". + + Returns: + str: The response from the AI. + """ + result = await self._loop.run_in_executor(self._executor, super().chat, keywords, model) + return result + + async def atext( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + backend: str = "api", + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout async text search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m, y. Defaults to None. + backend: api, html, lite. Defaults to api. + api - collect data from https://duckduckgo.com, + html - collect data from https://html.duckduckgo.com, + lite - collect data from https://lite.duckduckgo.com. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with search results, or None if there was an error. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, super().text, keywords, region, safesearch, timelimit, backend, max_results + ) + return result + + async def aimages( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + size: Optional[str] = None, + color: Optional[str] = None, + type_image: Optional[str] = None, + layout: Optional[str] = None, + license_image: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout async images search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: Day, Week, Month, Year. Defaults to None. + size: Small, Medium, Large, Wallpaper. Defaults to None. + color: color, Monochrome, Red, Orange, Yellow, Green, Blue, + Purple, Pink, Brown, Black, Gray, Teal, White. Defaults to None. + type_image: photo, clipart, gif, transparent, line. + Defaults to None. + layout: Square, Tall, Wide. Defaults to None. + license_image: any (All Creative Commons), Public (PublicDomain), + Share (Free to Share and Use), ShareCommercially (Free to Share and Use Commercially), + Modify (Free to Modify, Share, and Use), ModifyCommercially (Free to Modify, Share, and + Use Commercially). Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with images search results. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().images, + keywords, + region, + safesearch, + timelimit, + size, + color, + type_image, + layout, + license_image, + max_results, + ) + return result + + async def avideos( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + resolution: Optional[str] = None, + duration: Optional[str] = None, + license_videos: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout async videos search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m. Defaults to None. + resolution: high, standart. Defaults to None. + duration: short, medium, long. Defaults to None. + license_videos: creativeCommon, youtube. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with videos search results. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().videos, + keywords, + region, + safesearch, + timelimit, + resolution, + duration, + license_videos, + max_results, + ) + return result + + async def anews( + self, + keywords: str, + region: str = "wt-wt", + safesearch: str = "moderate", + timelimit: Optional[str] = None, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout async news search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + safesearch: on, moderate, off. Defaults to "moderate". + timelimit: d, w, m. Defaults to None. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with news search results. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().news, + keywords, + region, + safesearch, + timelimit, + max_results, + ) + return result + + async def aanswers( + self, + keywords: str, + ) -> List[Dict[str, str]]: + """Webscout async instant answers. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query, + + Returns: + List of dictionaries with instant answers results. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().answers, + keywords, + ) + return result + + async def asuggestions( + self, + keywords: str, + region: str = "wt-wt", + ) -> List[Dict[str, str]]: + """Webscout async suggestions. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query. + region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". + + Returns: + List of dictionaries with suggestions results. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().suggestions, + keywords, + region, + ) + return result + + async def amaps( + self, + keywords: str, + place: Optional[str] = None, + street: Optional[str] = None, + city: Optional[str] = None, + county: Optional[str] = None, + state: Optional[str] = None, + country: Optional[str] = None, + postalcode: Optional[str] = None, + latitude: Optional[str] = None, + longitude: Optional[str] = None, + radius: int = 0, + max_results: Optional[int] = None, + ) -> List[Dict[str, str]]: + """Webscout async maps search. Query params: https://duckduckgo.com/params. + + Args: + keywords: keywords for query + place: if set, the other parameters are not used. Defaults to None. + street: house number/street. Defaults to None. + city: city of search. Defaults to None. + county: county of search. Defaults to None. + state: state of search. Defaults to None. + country: country of search. Defaults to None. + postalcode: postalcode of search. Defaults to None. + latitude: geographic coordinate (north-south position). Defaults to None. + longitude: geographic coordinate (east-west position); if latitude and + longitude are set, the other parameters are not used. Defaults to None. + radius: expand the search square by the distance in kilometers. Defaults to 0. + max_results: max number of results. If None, returns results only from the first response. Defaults to None. + + Returns: + List of dictionaries with maps search results, or None if there was an error. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().maps, + keywords, + place, + street, + city, + county, + state, + country, + postalcode, + latitude, + longitude, + radius, + max_results, + ) + return result + + async def atranslate( + self, + keywords: Union[List[str], str], + from_: Optional[str] = None, + to: str = "en", + ) -> List[Dict[str, str]]: + """Webscout async translate. + + Args: + keywords: string or list of strings to translate. + from_: translate from (defaults automatically). Defaults to None. + to: what language to translate. Defaults to "en". + + Returns: + List od dictionaries with translated keywords. + + Raises: + DuckDuckGoSearchException: Base exception for duckduckgo_search errors. + RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits. + TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts. + """ + result = await self._loop.run_in_executor( + self._executor, + super().translate, + keywords, + from_, + to, + ) + return result \ No newline at end of file