Spaces:

alamin655
/

spacex

Runtime error

App Files Files Community

neon_arch commited on Jul 15, 2023

Commit

94ef62e

•

1 Parent(s): 8133de1

✨ feat: add documentation to code

Browse files

Files changed (7) hide show

src/config_parser/parser.rs +9 -1
src/engines/duckduckgo.rs +19 -18
src/engines/engine_models.rs +2 -1
src/engines/searx.rs +20 -18
src/search_results_handler/aggregation_models.rs +10 -0
src/search_results_handler/aggregator.rs +16 -6
src/server/routes.rs +15 -1

src/config_parser/parser.rs CHANGED Viewed

@@ -18,6 +18,10 @@ static CONFIG_FILE_NAME: &str = "config.lua";
 /// * `style` - It stores the theming options for the website.
 /// * `redis_connection_url` - It stores the redis connection url address on which the redis
 /// client should connect.
 #[derive(Clone)]
 pub struct Config {
     pub port: u16,
@@ -31,9 +35,13 @@ pub struct Config {
 }
 /// Configuration options for the aggregator.
 #[derive(Clone)]
 pub struct AggreatorConfig {
-    /// Whether to introduce a random delay before sending the request to the search engine.
     pub random_delay: bool,
 }

 /// * `style` - It stores the theming options for the website.
 /// * `redis_connection_url` - It stores the redis connection url address on which the redis
 /// client should connect.
+/// * `aggregator` -  It stores the option to whether enable or disable production use.
+/// * `logging` - It stores the option to whether enable or disable logs.
+/// * `debug` - It stores the option to whether enable or disable debug mode.
+/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
 #[derive(Clone)]
 pub struct Config {
     pub port: u16,
 }
 /// Configuration options for the aggregator.
+///
+/// # Fields
+///
+/// * `random_delay` - It stores the option to whether enable or disable random delays between
+/// requests.
 #[derive(Clone)]
 pub struct AggreatorConfig {
     pub random_delay: bool,
 }

src/engines/duckduckgo.rs CHANGED Viewed

@@ -13,28 +13,29 @@ use super::engine_models::{EngineError, SearchEngine};
 use error_stack::{IntoReport, Report, Result, ResultExt};
-/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
-/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
-/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
-/// values are RawSearchResult struct and then returns it within a Result enum.
-///
-/// # Arguments
-///
-/// * `query` - Takes the user provided query to query to the upstream search engine with.
-/// * `page` - Takes an u32 as an argument.
-/// * `user_agent` - Takes a random user agent string as an argument.
-///
-/// # Errors
-///
-/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
-/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
-/// provide results for the requested search query and also returns error if the scraping selector
-/// or HeaderMap fails to initialize.
 pub struct DuckDuckGo;
 #[async_trait::async_trait]
 impl SearchEngine for DuckDuckGo {
     async fn results(
         &self,
         query: String,

 use error_stack::{IntoReport, Report, Result, ResultExt};
+/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
+/// reduce code duplication as well as allows to create vector of different search engines easily.
 pub struct DuckDuckGo;
 #[async_trait::async_trait]
 impl SearchEngine for DuckDuckGo {
+    /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
+    /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
+    /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
+    /// values are RawSearchResult struct and then returns it within a Result enum.
+    ///
+    /// # Arguments
+    ///
+    /// * `query` - Takes the user provided query to query to the upstream search engine with.
+    /// * `page` - Takes an u32 as an argument.
+    /// * `user_agent` - Takes a random user agent string as an argument.
+    ///
+    /// # Errors
+    ///
+    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
+    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
+    /// provide results for the requested search query and also returns error if the scraping selector
+    /// or HeaderMap fails to initialize.
     async fn results(
         &self,
         query: String,

src/engines/engine_models.rs CHANGED Viewed

@@ -43,6 +43,7 @@ impl fmt::Display for EngineError {
 impl error_stack::Context for EngineError {}
 #[async_trait::async_trait]
 pub trait SearchEngine {
     async fn fetch_html_from_upstream(
@@ -53,7 +54,7 @@ pub trait SearchEngine {
         // fetch the html from upstream search engine
         Ok(reqwest::Client::new()
             .get(url)
-            .timeout(Duration::from_secs(30))
             .headers(header_map) // add spoofed headers to emulate human behaviour
             .send()
             .await

 impl error_stack::Context for EngineError {}
+/// A trait to define common behaviour for all search engines.
 #[async_trait::async_trait]
 pub trait SearchEngine {
     async fn fetch_html_from_upstream(
         // fetch the html from upstream search engine
         Ok(reqwest::Client::new()
             .get(url)
+            .timeout(Duration::from_secs(30)) // Add timeout to request to avoid DDOSing the server
             .headers(header_map) // add spoofed headers to emulate human behaviour
             .send()
             .await

src/engines/searx.rs CHANGED Viewed

@@ -11,28 +11,30 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
 use super::engine_models::{EngineError, SearchEngine};
 use error_stack::{IntoReport, Report, Result, ResultExt};
-/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
-/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
-/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
-/// values are RawSearchResult struct and then returns it within a Result enum.
-///
-/// # Arguments
-///
-/// * `query` - Takes the user provided query to query to the upstream search engine with.
-/// * `page` - Takes an u32 as an argument.
-/// * `user_agent` - Takes a random user agent string as an argument.
-///
-/// # Errors
-///
-/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
-/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
-/// provide results for the requested search query and also returns error if the scraping selector
-/// or HeaderMap fails to initialize.
 pub struct Searx;
 #[async_trait::async_trait]
 impl SearchEngine for Searx {
     async fn results(
         &self,
         query: String,

 use super::engine_models::{EngineError, SearchEngine};
 use error_stack::{IntoReport, Report, Result, ResultExt};
+/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
+/// reduce code duplication as well as allows to create vector of different search engines easily.
 pub struct Searx;
 #[async_trait::async_trait]
 impl SearchEngine for Searx {
+    /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
+    /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
+    /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
+    /// values are RawSearchResult struct and then returns it within a Result enum.
+    ///
+    /// # Arguments
+    ///
+    /// * `query` - Takes the user provided query to query to the upstream search engine with.
+    /// * `page` - Takes an u32 as an argument.
+    /// * `user_agent` - Takes a random user agent string as an argument.
+    ///
+    /// # Errors
+    ///
+    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
+    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
+    /// provide results for the requested search query and also returns error if the scraping selector
+    /// or HeaderMap fails to initialize.
     async fn results(
         &self,
         query: String,

src/search_results_handler/aggregation_models.rs CHANGED Viewed

@@ -143,6 +143,11 @@ impl EngineErrorInfo {
 /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
 /// `SearchResult` structs.
 /// * `page_query` - Stores the current pages search query `q` provided in the search url.
 #[derive(Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
@@ -162,6 +167,8 @@ impl SearchResults {
     /// and stores it into a vector of `SearchResult` structs.
     /// * `page_query` - Takes an argument of current page`s search query `q` provided in
     /// the search url.
     pub fn new(
         results: Vec<SearchResult>,
         page_query: String,
@@ -176,14 +183,17 @@ impl SearchResults {
         }
     }
     pub fn add_style(&mut self, style: Style) {
         self.style = style;
     }
     pub fn is_empty_result_set(&self) -> bool {
         self.results.is_empty()
     }
     pub fn set_empty_result_set(&mut self) {
         self.empty_result_set = true;
     }

 /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
 /// `SearchResult` structs.
 /// * `page_query` - Stores the current pages search query `q` provided in the search url.
+/// * `style` - Stores the theming options for the website.
+/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
+/// and the type of error that caused it.
+/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
+/// given search query.
 #[derive(Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
     /// and stores it into a vector of `SearchResult` structs.
     /// * `page_query` - Takes an argument of current page`s search query `q` provided in
     /// the search url.
+    /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
+    /// given search query.
     pub fn new(
         results: Vec<SearchResult>,
         page_query: String,
         }
     }
+    /// A setter function to add website style to the return search results.
     pub fn add_style(&mut self, style: Style) {
         self.style = style;
     }
+    /// A function which checks whether the results stored are empty or not.
     pub fn is_empty_result_set(&self) -> bool {
         self.results.is_empty()
     }
+    /// A setter function which sets the empty_result_set to true.
     pub fn set_empty_result_set(&mut self) {
         self.empty_result_set = true;
     }

src/search_results_handler/aggregator.rs CHANGED Viewed

@@ -18,14 +18,21 @@ use crate::engines::{
     searx,
 };
 type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<EngineError>>>>;
-/// A function that aggregates all the scraped results from the above upstream engines and
-/// then removes duplicate results and if two results are found to be from two or more engines
-/// then puts their names together to show the results are fetched from these upstream engines
-/// and then removes all data from the HashMap and puts into a struct of all results aggregated
-/// into a vector and also adds the query used into the struct this is neccessory because
-/// otherwise the search bar in search remains empty if searched from the query url
 ///
 /// # Example:
 ///
@@ -37,6 +44,9 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<
 /// * `query` - Accepts a string to query with the above upstream search engines.
 /// * `page` - Accepts an u32 page number.
 /// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
 ///
 /// # Error
 ///

     searx,
 };
+/// Aliases for long type annotations
 type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<EngineError>>>>;
+/// A function that aggregates all the scraped results from the above user selected upstream
+/// search engines either selected from the UI or from the config file which is handled by the code
+/// by matching over the selected search engines and adding the selected ones to the vector which
+/// is then used to create an async task vector with `tokio::spawn` which returns a future which
+/// is then awaited on in another loop and then all the collected results is filtered for errors
+/// and proper results and if an error is found is then sent to the UI with the engine name and the
+/// error type that caused it by putting them finallt in the returned `SearchResults` struct. Also
+/// the same process also removes duplicate results and if two results are found to be from two or
+/// more engines then puts their names together to show the results are fetched from these upstream
+/// engines and then removes all data from the HashMap and puts into a struct of all results aggregated
+/// into a vector and also adds the query used into the struct this is neccessory because otherwise the
+/// search bar in search remains empty if searched from the query url.
 ///
 /// # Example:
 ///
 /// * `query` - Accepts a string to query with the above upstream search engines.
 /// * `page` - Accepts an u32 page number.
 /// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
+/// * `debug` - Accepts a boolean value to enable or disable debug mode option.
+/// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
+/// user through the UI or the config file.
 ///
 /// # Error
 ///

src/server/routes.rs CHANGED Viewed

@@ -51,6 +51,13 @@ pub async fn not_found(
         .body(page_content))
 }
 #[allow(dead_code)]
 #[derive(Deserialize)]
 struct Cookie {
@@ -126,7 +133,7 @@ pub async fn search(
                 // fetch the cached results json.
                 let cached_results_json = redis_cache.cached_results_json(&page_url);
-                // check if fetched results was indeed fetched or it was an error and if so
                 // handle the data accordingly.
                 match cached_results_json {
                     Ok(results_json) => {
@@ -135,6 +142,10 @@ pub async fn search(
                         Ok(HttpResponse::Ok().body(page_content))
                     }
                     Err(_) => {
                         let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
                             Some(cookie_value) => {
                                     let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
@@ -143,6 +154,9 @@ pub async fn search(
                             None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
                         };
                         results_json.add_style(config.style.clone());
                         if results_json.is_empty_result_set() {
                             results_json.set_empty_result_set();
                         }

         .body(page_content))
 }
+/// A named struct which is used to deserialize the cookies fetched from the client side.
+///
+/// # Fields
+///
+/// * `theme` - It stores the theme name used in the website.
+/// * `colorscheme` - It stores the colorscheme name used for the website theme.
+/// * `engines` - It stores the user selected upstream search engines selected from the UI.
 #[allow(dead_code)]
 #[derive(Deserialize)]
 struct Cookie {
                 // fetch the cached results json.
                 let cached_results_json = redis_cache.cached_results_json(&page_url);
+                // check if fetched catch results was indeed fetched or it was an error and if so
                 // handle the data accordingly.
                 match cached_results_json {
                     Ok(results_json) => {
                         Ok(HttpResponse::Ok().body(page_content))
                     }
                     Err(_) => {
+                        // check if the cookie value is empty or not if it is empty then use the
+                        // default selected upstream search engines from the config file otherwise
+                        // parse the non-empty cookie and grab the user selected engines from the
+                        // UI and use that.
                         let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
                             Some(cookie_value) => {
                                     let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
                             None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
                         };
                         results_json.add_style(config.style.clone());
+                        // check whether the results grabbed from the upstream engines are empty or
+                        // not if they are empty then set the empty_result_set option to true in
+                        // the result json.
                         if results_json.is_empty_result_set() {
                             results_json.set_empty_result_set();
                         }