import csv import logging import os from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from urllib.parse import unquote from pathlib import Path import click from curl_cffi import requests from .webscout_search import WEBS from .utils import json_dumps, json_loads from .version import __version__ logger = logging.getLogger(__name__) COLORS = { 0: "black", 1: "red", 2: "green", 3: "yellow", 4: "blue", 5: "magenta", 6: "cyan", 7: "bright_black", 8: "bright_red", 9: "bright_green", 10: "bright_yellow", 11: "bright_blue", 12: "bright_magenta", 13: "bright_cyan", 14: "white", 15: "bright_white", } def _save_json(jsonfile, data): with open(jsonfile, "w", encoding="utf-8") as file: file.write(json_dumps(data)) def _save_csv(csvfile, data): with open(csvfile, "w", newline="", encoding="utf-8") as file: if data: headers = data[0].keys() writer = csv.DictWriter(file, fieldnames=headers, quoting=csv.QUOTE_MINIMAL) writer.writeheader() writer.writerows(data) def _print_data(data): if data: for i, e in enumerate(data, start=1): click.secho(f"{i}.\t {'=' * 78}", bg="black", fg="white") for j, (k, v) in enumerate(e.items(), start=1): if v: width = 300 if k in ("content", "href", "image", "source", "thumbnail", "url") else 78 k = "language" if k == "detected_language" else k text = click.wrap_text( f"{v}", width=width, initial_indent="", subsequent_indent=" " * 12, preserve_paragraphs=True ) else: text = v click.secho(f"{k:<12}{text}", bg="black", fg=COLORS[j], overline=True) input() def _sanitize_keywords(keywords): keywords = ( keywords.replace("filetype", "") .replace(":", "") .replace('"', "'") .replace("site", "") .replace(" ", "_") .replace("/", "_") .replace("\\", "_") .replace(" ", "") ) return keywords def _download_file(url, dir_path, filename, proxy): try: resp = requests.get(url, proxies=proxy, impersonate="chrome", timeout=10) resp.raise_for_status() with open(os.path.join(dir_path, filename[:200]), "wb") as file: file.write(resp.content) except Exception as ex: logger.debug(f"download_file url={url} {type(ex).__name__} {ex}") def _download_results(keywords, results, images=False, proxy=None, threads=None): path_type = "images" if images else "text" path = f"{path_type}_{keywords}_{datetime.now():%Y%m%d_%H%M%S}" os.makedirs(path, exist_ok=True) proxy = {"http": proxy, "https": proxy} threads = 10 if threads is None else threads with ThreadPoolExecutor(max_workers=threads) as executor: futures = [] for i, res in enumerate(results, start=1): url = res["image"] if images else res["href"] filename = unquote(url.split("/")[-1].split("?")[0]) f = executor.submit(_download_file, url, path, f"{i}_{filename}", proxy) futures.append(f) with click.progressbar( length=len(futures), label="Downloading", show_percent=True, show_pos=True, width=50 ) as bar: for future in as_completed(futures): future.result() bar.update(1) @click.group(chain=True) def cli(): """dukduckgo_search CLI tool""" pass def safe_entry_point(): try: cli() except Exception as ex: click.echo(f"{type(ex).__name__}: {ex}") @cli.command() def version(): print(__version__) return __version__ @cli.command() @click.option("-s", "--save", is_flag=True, default=False, help="save the conversation in the json file") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def chat(save, proxy): """CLI function to perform an interactive AI chat using DuckDuckGo API.""" cache_file = "WEBS_chat_conversation.json" models = ["gpt-3.5", "claude-3-haiku"] client = WEBS(proxy=proxy) print("DuckDuckGo AI chat. Available models:") for idx, model in enumerate(models, start=1): print(f"{idx}. {model}") chosen_model_idx = input("Choose a model by entering its number[1]: ") chosen_model_idx = 0 if not chosen_model_idx.strip() else int(chosen_model_idx) - 1 model = models[chosen_model_idx] print(f"Using model: {model}") if save and Path(cache_file).exists(): with open(cache_file) as f: cache = json_loads(f.read()) client._chat_vqd = cache.get("vqd", None) client._chat_messages = cache.get("messages", []) while True: user_input = input(f"{'-'*78}\nYou: ") if not user_input.strip(): break resp_answer = client.chat(keywords=user_input, model=model) text = click.wrap_text(resp_answer, width=78, preserve_paragraphs=True) click.secho(f"AI: {text}", bg="black", fg="green", overline=True) cache = {"vqd": client._chat_vqd, "messages": client._chat_messages} _save_json(cache_file, cache) if "exit" in user_input.lower() or "quit" in user_input.lower(): break @cli.command() @click.option("-k", "--keywords", required=True, help="text search, keywords for query") @click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") @click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) @click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year") @click.option("-m", "--max_results", default=20, help="maximum number of results, default=20") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-d", "--download", is_flag=True, default=False, help="download results to 'keywords' folder") @click.option("-b", "--backend", default="api", type=click.Choice(["api", "html", "lite"]), help="which backend to use") @click.option("-th", "--threads", default=10, help="download threads, default=10") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def text(keywords, region, safesearch, timelimit, backend, output, download, threads, max_results, proxy): """CLI function to perform a text search using DuckDuckGo API.""" data = WEBS(proxies=proxy).text( keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, backend=backend, max_results=max_results, ) keywords = _sanitize_keywords(keywords) filename = f"text_{keywords}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print" and not download: _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) if download: _download_results(keywords, data, proxy=proxy, threads=threads) @cli.command() @click.option("-k", "--keywords", required=True, help="answers search, keywords for query") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def answers(keywords, output, proxy): """CLI function to perform a answers search using DuckDuckGo API.""" data = WEBS(proxies=proxy).answers(keywords=keywords) filename = f"answers_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print": _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) @cli.command() @click.option("-k", "--keywords", required=True, help="keywords for query") @click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") @click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) @click.option("-t", "--timelimit", default=None, type=click.Choice(["Day", "Week", "Month", "Year"])) @click.option("-size", "--size", default=None, type=click.Choice(["Small", "Medium", "Large", "Wallpaper"])) @click.option( "-c", "--color", default=None, type=click.Choice( [ "color", "Monochrome", "Red", "Orange", "Yellow", "Green", "Blue", "Purple", "Pink", "Brown", "Black", "Gray", "Teal", "White", ] ), ) @click.option( "-type", "--type_image", default=None, type=click.Choice(["photo", "clipart", "gif", "transparent", "line"]) ) @click.option("-l", "--layout", default=None, type=click.Choice(["Square", "Tall", "Wide"])) @click.option( "-lic", "--license_image", default=None, type=click.Choice(["any", "Public", "Share", "Modify", "ModifyCommercially"]), ) @click.option("-m", "--max_results", default=90, help="maximum number of results, default=90") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-d", "--download", is_flag=True, default=False, help="download and save images to 'keywords' folder") @click.option("-th", "--threads", default=10, help="download threads, default=10") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def images( keywords, region, safesearch, timelimit, size, color, type_image, layout, license_image, download, threads, max_results, output, proxy, ): """CLI function to perform a images search using DuckDuckGo API.""" data = WEBS(proxies=proxy).images( keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, size=size, color=color, type_image=type_image, layout=layout, license_image=license_image, max_results=max_results, ) keywords = _sanitize_keywords(keywords) filename = f"images_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print" and not download: _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) if download: _download_results(keywords, data, images=True, proxy=proxy, threads=threads) @cli.command() @click.option("-k", "--keywords", required=True, help="keywords for query") @click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") @click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) @click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m"]), help="day, week, month") @click.option("-res", "--resolution", default=None, type=click.Choice(["high", "standart"])) @click.option("-d", "--duration", default=None, type=click.Choice(["short", "medium", "long"])) @click.option("-lic", "--license_videos", default=None, type=click.Choice(["creativeCommon", "youtube"])) @click.option("-m", "--max_results", default=50, help="maximum number of results, default=50") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def videos(keywords, region, safesearch, timelimit, resolution, duration, license_videos, max_results, output, proxy): """CLI function to perform a videos search using DuckDuckGo API.""" data = WEBS(proxies=proxy).videos( keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, resolution=resolution, duration=duration, license_videos=license_videos, max_results=max_results, ) filename = f"videos_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print": _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) @cli.command() @click.option("-k", "--keywords", required=True, help="keywords for query") @click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") @click.option("-s", "--safesearch", default="moderate", type=click.Choice(["on", "moderate", "off"])) @click.option("-t", "--timelimit", default=None, type=click.Choice(["d", "w", "m", "y"]), help="day, week, month, year") @click.option("-m", "--max_results", default=25, help="maximum number of results, default=25") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def news(keywords, region, safesearch, timelimit, max_results, output, proxy): """CLI function to perform a news search using DuckDuckGo API.""" data = WEBS(proxies=proxy).news( keywords=keywords, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results ) filename = f"news_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print": _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) @cli.command() @click.option("-k", "--keywords", required=True, help="keywords for query") @click.option("-p", "--place", default=None, help="simplified search - if set, the other parameters are not used") @click.option("-s", "--street", default=None, help="house number/street") @click.option("-c", "--city", default=None, help="city of search") @click.option("-county", "--county", default=None, help="county of search") @click.option("-state", "--state", default=None, help="state of search") @click.option("-country", "--country", default=None, help="country of search") @click.option("-post", "--postalcode", default=None, help="postalcode of search") @click.option("-lat", "--latitude", default=None, help="""if lat and long are set, the other params are not used""") @click.option("-lon", "--longitude", default=None, help="""if lat and long are set, the other params are not used""") @click.option("-r", "--radius", default=0, help="expand the search square by the distance in kilometers") @click.option("-m", "--max_results", default=50, help="number of results, default=50") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-proxy", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def maps( keywords, place, street, city, county, state, country, postalcode, latitude, longitude, radius, max_results, output, proxy, ): """CLI function to perform a maps search using DuckDuckGo API.""" data = WEBS(proxies=proxy).maps( keywords=keywords, place=place, street=street, city=city, county=county, state=state, country=country, postalcode=postalcode, latitude=latitude, longitude=longitude, radius=radius, max_results=max_results, ) filename = f"maps_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print": _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) @cli.command() @click.option("-k", "--keywords", required=True, help="text for translation") @click.option("-f", "--from_", help="What language to translate from (defaults automatically)") @click.option("-t", "--to", default="en", help="de, ru, fr, etc. What language to translate, defaults='en'") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def translate(keywords, from_, to, output, proxy): """CLI function to perform translate using DuckDuckGo API.""" data = WEBS(proxies=proxy).translate(keywords=keywords, from_=from_, to=to) filename = f"translate_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print": _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) @cli.command() @click.option("-k", "--keywords", required=True, help="keywords for query") @click.option("-r", "--region", default="wt-wt", help="wt-wt, us-en, ru-ru, etc. -region https://duckduckgo.com/params") @click.option("-o", "--output", default="print", help="csv, json (save the results to a csv or json file)") @click.option("-p", "--proxy", default=None, help="the proxy to send requests, example: socks5://localhost:9150") def suggestions(keywords, region, output, proxy): """CLI function to perform a suggestions search using DuckDuckGo API.""" data = WEBS(proxies=proxy).suggestions(keywords=keywords, region=region) filename = f"suggestions_{_sanitize_keywords(keywords)}_{datetime.now():%Y%m%d_%H%M%S}" if output == "print": _print_data(data) elif output == "csv": _save_csv(f"{filename}.csv", data) elif output == "json": _save_json(f"{filename}.json", data) if __name__ == "__main__": cli(prog_name="WEBS")