//! This module handles the search route of the search engine website. use crate::{ cache::cacher::SharedCache, config::parser::Config, handler::{file_path, FileType}, models::{ aggregation_models::SearchResults, engine_models::{EngineError, EngineHandler}, server_models::{Cookie, SearchParams}, }, results::aggregator::aggregate, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use regex::Regex; use std::{ fs::File, io::{BufRead, BufReader, Read}, }; use tokio::join; /// Handles the route of search page of the `websurfx` meta search engine website and it takes /// two search url parameters `q` and `page` where `page` parameter is optional. /// /// # Example /// /// ```bash /// curl "http://127.0.0.1:8080/search?q=sweden&page=1" /// ``` /// /// Or /// /// ```bash /// curl "http://127.0.0.1:8080/search?q=sweden" /// ``` #[get("/search")] pub async fn search( req: HttpRequest, config: web::Data, cache: web::Data, ) -> Result> { let params = web::Query::::from_query(req.query_string())?; match ¶ms.q { Some(query) => { if query.trim().is_empty() { return Ok(HttpResponse::TemporaryRedirect() .insert_header(("location", "/")) .finish()); } let get_results = |page| { results( &config, &cache, query, page, req.clone(), ¶ms.safesearch, ) }; // .max(1) makes sure that the page > 0. let page = params.page.unwrap_or(1).max(1); let (_, results, _) = join!( get_results(page - 1), get_results(page), get_results(page + 1) ); Ok(HttpResponse::Ok().body( crate::templates::views::search::search( &config.style.colorscheme, &config.style.theme, query, &results?, ) .0, )) } None => Ok(HttpResponse::TemporaryRedirect() .insert_header(("location", "/")) .finish()), } } /// Fetches the results for a query and page. It First checks the redis cache, if that /// fails it gets proper results by requesting from the upstream search engines. /// /// # Arguments /// /// * `url` - It takes the url of the current page that requested the search results for a /// particular search query. /// * `config` - It takes a parsed config struct. /// * `query` - It takes the page number as u32 value. /// * `req` - It takes the `HttpRequest` struct as a value. /// /// # Error /// /// It returns the `SearchResults` struct if the search results could be successfully fetched from /// the cache or from the upstream search engines otherwise it returns an appropriate error. async fn results( config: &Config, cache: &web::Data, query: &str, page: u32, req: HttpRequest, safe_search: &Option, ) -> Result> { // eagerly parse cookie value to evaluate safe search level let cookie_value = req.cookie("appCookie"); let cookie_value: Option> = cookie_value .as_ref() .and_then(|cv| serde_json::from_str(cv.name_value().1).ok()); let safe_search_level = get_safesearch_level( safe_search, &cookie_value.as_ref().map(|cv| cv.safe_search_level), config.safe_search, ); let cache_key = format!( "http://{}:{}/search?q={}&page={}&safesearch={}", config.binding_ip, config.port, query, page, safe_search_level ); // fetch the cached results json. let cached_results = cache.cached_results(&cache_key).await; // check if fetched cache results was indeed fetched or it was an error and if so // handle the data accordingly. match cached_results { Ok(results) => Ok(results), Err(_) => { if safe_search_level == 4 { let mut results: SearchResults = SearchResults::default(); let flag: bool = !is_match_from_filter_list(file_path(FileType::BlockList)?, query)?; // Return early when query contains disallowed words, if flag { results.set_disallowed(); cache.cache_results(&results, &cache_key).await?; results.set_safe_search_level(safe_search_level); return Ok(results); } } // check if the cookie value is empty or not if it is empty then use the // default selected upstream search engines from the config file otherwise // parse the non-empty cookie and grab the user selected engines from the // UI and use that. let mut results: SearchResults = match cookie_value { Some(cookie_value) => { let engines: Vec = cookie_value .engines .iter() .filter_map(|name| EngineHandler::new(name).ok()) .collect(); match engines.is_empty() { false => { aggregate( query, page, config.aggregator.random_delay, config.debug, &engines, config.request_timeout, safe_search_level, ) .await? } true => { let mut search_results = SearchResults::default(); search_results.set_no_engines_selected(); search_results } } } None => aggregate( query, page, config.aggregator.random_delay, config.debug, &config .upstream_search_engines .clone() .into_iter() .filter_map(|(key, value)| value.then_some(key)) .map(|engine| EngineHandler::new(&engine)) .collect::, error_stack::Report>>( )?, config.request_timeout, safe_search_level, ) .await?, }; if results.engine_errors_info().is_empty() && results.results().is_empty() && !results.no_engines_selected() { results.set_filtered(); } cache.cache_results(&results, &cache_key).await?; results.set_safe_search_level(safe_search_level); Ok(results) } } } /// A helper function which checks whether the search query contains any keywords which should be /// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files. /// /// # Arguments /// /// * `file_path` - It takes the file path of the list as the argument. /// * `query` - It takes the search query to be checked against the list as an argument. /// /// # Error /// /// Returns a bool indicating whether the results were found in the list or not on success /// otherwise returns a standard error type on a failure. fn is_match_from_filter_list( file_path: &str, query: &str, ) -> Result> { let mut reader = BufReader::new(File::open(file_path)?); for line in reader.by_ref().lines() { let re = Regex::new(&line?)?; if re.is_match(query) { return Ok(true); } } Ok(false) } /// A helper function which returns the safe search level based on the url params /// and cookie value. /// /// # Argurments /// /// * `safe_search` - Safe search level from the url. /// * `cookie` - User's cookie /// * `default` - Safe search level to fall back to fn get_safesearch_level(safe_search: &Option, cookie: &Option, default: u8) -> u8 { match safe_search { Some(ss) => { if *ss >= 3 { default } else { *ss } } None => cookie.unwrap_or(default), } }