//! This module provides the error enum to handle different errors associated while requesting data from //! the upstream search engines with the search query provided by the user. use super::aggregation_models::SearchResult; use error_stack::{Report, Result, ResultExt}; use reqwest::Client; use std::{collections::HashMap, fmt}; /// A custom error type used for handle engine associated errors. #[derive(Debug)] pub enum EngineError { /// No matching engine found NoSuchEngineFound(String), /// This variant handles all request related errors like forbidden, not found, /// etc. EmptyResultSet, /// This variant handles the not results found error provide by the upstream /// search engines. RequestError, /// This variant handles all the errors which are unexpected or occur rarely /// and are errors mostly related to failure in initialization of HeaderMap, /// Selector errors and all other errors occurring within the code handling /// the `upstream search engines`. UnexpectedError, } impl fmt::Display for EngineError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { EngineError::NoSuchEngineFound(engine) => { write!(f, "No such engine with the name '{engine}' found") } EngineError::EmptyResultSet => { write!(f, "The upstream search engine returned an empty result set") } EngineError::RequestError => { write!( f, "Error occurred while requesting data from upstream search engine" ) } EngineError::UnexpectedError => { write!(f, "An unexpected error occurred while processing the data") } } } } impl error_stack::Context for EngineError {} /// A trait to define common behavior for all search engines. #[async_trait::async_trait] pub trait SearchEngine: Sync + Send { /// This helper function fetches/requests the search results from the upstream search engine in /// an html form. /// /// # Arguments /// /// * `url` - It takes the url of the upstream search engine with the user requested search /// query appended in the search parameters. /// * `header_map` - It takes the http request headers to be sent to the upstream engine in /// order to prevent being detected as a bot. It takes the header as a HeaderMap type. /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit /// the amount of time for each request to remain connected when until the results can be provided /// by the upstream engine. /// /// # Error /// /// It returns the html data as a string if the upstream engine provides the data as expected /// otherwise it returns a custom `EngineError`. async fn fetch_html_from_upstream( &self, url: &str, header_map: reqwest::header::HeaderMap, client: &Client, ) -> Result { // fetch the html from upstream search engine Ok(client .get(url) .headers(header_map) // add spoofed headers to emulate human behavior .send() .await .change_context(EngineError::RequestError)? .text() .await .change_context(EngineError::RequestError)?) } /// This helper function fetches/requests the json search results from the upstream search engine as a vector of bytes. /// /// # Arguments /// /// * `url` - It takes the url of the upstream search engine with the user requested search /// query appended in the search parameters. /// * `header_map` - It takes the http request headers to be sent to the upstream engine in /// order to prevent being detected as a bot. It takes the header as a HeaderMap type. /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit /// the amount of time for each request to remain connected when until the results can be provided /// by the upstream engine. /// /// # Error /// /// It returns the html data as a vector of bytes if the upstream engine provides the data as expected /// otherwise it returns a custom `EngineError`. async fn fetch_json_as_bytes_from_upstream( &self, url: &str, header_map: reqwest::header::HeaderMap, client: &Client, ) -> Result, EngineError> { // fetch the json response from upstream search engine Ok(client .get(url) .headers(header_map) // add spoofed headers to emulate human behavior .send() .await .change_context(EngineError::RequestError)? .bytes() .await .change_context(EngineError::RequestError)? .to_vec()) } /// This function scrapes results from the upstream engine and puts all the scraped results like /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult /// struct and then returns it within a Result enum. /// /// # Arguments /// /// * `query` - Takes the user provided query to query to the upstream search engine with. /// * `page` - Takes an u32 as an argument. /// * `user_agent` - Takes a random user agent string as an argument. /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. /// /// # Errors /// /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to /// provide results for the requested search query and also returns error if the scraping selector /// or HeaderMap fails to initialize. async fn results( &self, query: &str, page: u32, user_agent: &str, client: &Client, safe_search: u8, ) -> Result, EngineError>; } /// A named struct which stores the engine struct with the name of the associated engine. pub struct EngineHandler { /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements /// the `SearchEngine` trait. engine: Box, /// It stores the name of the engine to which the struct is associated to. name: &'static str, } impl Clone for EngineHandler { fn clone(&self) -> Self { Self::new(self.name).unwrap() } } impl EngineHandler { /// Parses an engine name into an engine handler. /// /// # Arguments /// /// * `engine_name` - It takes the name of the engine to which the struct was associated to. /// /// # Returns /// /// It returns an option either containing the value or a none if the engine is unknown pub fn new(engine_name: &str) -> Result { let engine: (&'static str, Box) = match engine_name.to_lowercase().as_str() { "duckduckgo" => { let engine = crate::engines::duckduckgo::DuckDuckGo::new()?; ("duckduckgo", Box::new(engine)) } "searx" => { let engine = crate::engines::searx::Searx::new()?; ("searx", Box::new(engine)) } "brave" => { let engine = crate::engines::brave::Brave::new()?; ("brave", Box::new(engine)) } "startpage" => { let engine = crate::engines::startpage::Startpage::new()?; ("startpage", Box::new(engine)) } "librex" => { let engine = crate::engines::librex::LibreX::new()?; ("librex", Box::new(engine)) } "mojeek" => { let engine = crate::engines::mojeek::Mojeek::new()?; ("mojeek", Box::new(engine)) } "bing" => { let engine = crate::engines::bing::Bing::new()?; ("bing", Box::new(engine)) } _ => { return Err(Report::from(EngineError::NoSuchEngineFound( engine_name.to_string(), ))) } }; Ok(Self { engine: engine.1, name: engine.0, }) } /// This function converts the EngineHandler type into a tuple containing the engine name and /// the associated engine struct. pub fn into_name_engine(self) -> (&'static str, Box) { (self.name, self.engine) } }