neon_arch commited on
Commit
94ef62e
1 Parent(s): 8133de1

✨ feat: add documentation to code

Browse files
src/config_parser/parser.rs CHANGED
@@ -18,6 +18,10 @@ static CONFIG_FILE_NAME: &str = "config.lua";
18
  /// * `style` - It stores the theming options for the website.
19
  /// * `redis_connection_url` - It stores the redis connection url address on which the redis
20
  /// client should connect.
 
 
 
 
21
  #[derive(Clone)]
22
  pub struct Config {
23
  pub port: u16,
@@ -31,9 +35,13 @@ pub struct Config {
31
  }
32
 
33
  /// Configuration options for the aggregator.
 
 
 
 
 
34
  #[derive(Clone)]
35
  pub struct AggreatorConfig {
36
- /// Whether to introduce a random delay before sending the request to the search engine.
37
  pub random_delay: bool,
38
  }
39
 
 
18
  /// * `style` - It stores the theming options for the website.
19
  /// * `redis_connection_url` - It stores the redis connection url address on which the redis
20
  /// client should connect.
21
+ /// * `aggregator` - It stores the option to whether enable or disable production use.
22
+ /// * `logging` - It stores the option to whether enable or disable logs.
23
+ /// * `debug` - It stores the option to whether enable or disable debug mode.
24
+ /// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
25
  #[derive(Clone)]
26
  pub struct Config {
27
  pub port: u16,
 
35
  }
36
 
37
  /// Configuration options for the aggregator.
38
+ ///
39
+ /// # Fields
40
+ ///
41
+ /// * `random_delay` - It stores the option to whether enable or disable random delays between
42
+ /// requests.
43
  #[derive(Clone)]
44
  pub struct AggreatorConfig {
 
45
  pub random_delay: bool,
46
  }
47
 
src/engines/duckduckgo.rs CHANGED
@@ -13,28 +13,29 @@ use super::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
16
- /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
17
- /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
18
- /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
19
- /// values are RawSearchResult struct and then returns it within a Result enum.
20
- ///
21
- /// # Arguments
22
- ///
23
- /// * `query` - Takes the user provided query to query to the upstream search engine with.
24
- /// * `page` - Takes an u32 as an argument.
25
- /// * `user_agent` - Takes a random user agent string as an argument.
26
- ///
27
- /// # Errors
28
- ///
29
- /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
30
- /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
31
- /// provide results for the requested search query and also returns error if the scraping selector
32
- /// or HeaderMap fails to initialize.
33
-
34
  pub struct DuckDuckGo;
35
 
36
  #[async_trait::async_trait]
37
  impl SearchEngine for DuckDuckGo {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  async fn results(
39
  &self,
40
  query: String,
 
13
 
14
  use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
16
+ /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
17
+ /// reduce code duplication as well as allows to create vector of different search engines easily.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  pub struct DuckDuckGo;
19
 
20
  #[async_trait::async_trait]
21
  impl SearchEngine for DuckDuckGo {
22
+ /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
23
+ /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
24
+ /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
25
+ /// values are RawSearchResult struct and then returns it within a Result enum.
26
+ ///
27
+ /// # Arguments
28
+ ///
29
+ /// * `query` - Takes the user provided query to query to the upstream search engine with.
30
+ /// * `page` - Takes an u32 as an argument.
31
+ /// * `user_agent` - Takes a random user agent string as an argument.
32
+ ///
33
+ /// # Errors
34
+ ///
35
+ /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
36
+ /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
37
+ /// provide results for the requested search query and also returns error if the scraping selector
38
+ /// or HeaderMap fails to initialize.
39
  async fn results(
40
  &self,
41
  query: String,
src/engines/engine_models.rs CHANGED
@@ -43,6 +43,7 @@ impl fmt::Display for EngineError {
43
 
44
  impl error_stack::Context for EngineError {}
45
 
 
46
  #[async_trait::async_trait]
47
  pub trait SearchEngine {
48
  async fn fetch_html_from_upstream(
@@ -53,7 +54,7 @@ pub trait SearchEngine {
53
  // fetch the html from upstream search engine
54
  Ok(reqwest::Client::new()
55
  .get(url)
56
- .timeout(Duration::from_secs(30))
57
  .headers(header_map) // add spoofed headers to emulate human behaviour
58
  .send()
59
  .await
 
43
 
44
  impl error_stack::Context for EngineError {}
45
 
46
+ /// A trait to define common behaviour for all search engines.
47
  #[async_trait::async_trait]
48
  pub trait SearchEngine {
49
  async fn fetch_html_from_upstream(
 
54
  // fetch the html from upstream search engine
55
  Ok(reqwest::Client::new()
56
  .get(url)
57
+ .timeout(Duration::from_secs(30)) // Add timeout to request to avoid DDOSing the server
58
  .headers(header_map) // add spoofed headers to emulate human behaviour
59
  .send()
60
  .await
src/engines/searx.rs CHANGED
@@ -11,28 +11,30 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
11
  use super::engine_models::{EngineError, SearchEngine};
12
  use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
- /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
15
- /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
16
- /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
17
- /// values are RawSearchResult struct and then returns it within a Result enum.
18
- ///
19
- /// # Arguments
20
- ///
21
- /// * `query` - Takes the user provided query to query to the upstream search engine with.
22
- /// * `page` - Takes an u32 as an argument.
23
- /// * `user_agent` - Takes a random user agent string as an argument.
24
- ///
25
- /// # Errors
26
- ///
27
- /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
28
- /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
29
- /// provide results for the requested search query and also returns error if the scraping selector
30
- /// or HeaderMap fails to initialize.
31
-
32
  pub struct Searx;
33
 
34
  #[async_trait::async_trait]
35
  impl SearchEngine for Searx {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  async fn results(
37
  &self,
38
  query: String,
 
11
  use super::engine_models::{EngineError, SearchEngine};
12
  use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
+ /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
15
+ /// reduce code duplication as well as allows to create vector of different search engines easily.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  pub struct Searx;
17
 
18
  #[async_trait::async_trait]
19
  impl SearchEngine for Searx {
20
+ /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
21
+ /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
22
+ /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
23
+ /// values are RawSearchResult struct and then returns it within a Result enum.
24
+ ///
25
+ /// # Arguments
26
+ ///
27
+ /// * `query` - Takes the user provided query to query to the upstream search engine with.
28
+ /// * `page` - Takes an u32 as an argument.
29
+ /// * `user_agent` - Takes a random user agent string as an argument.
30
+ ///
31
+ /// # Errors
32
+ ///
33
+ /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
34
+ /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
35
+ /// provide results for the requested search query and also returns error if the scraping selector
36
+ /// or HeaderMap fails to initialize.
37
+
38
  async fn results(
39
  &self,
40
  query: String,
src/search_results_handler/aggregation_models.rs CHANGED
@@ -143,6 +143,11 @@ impl EngineErrorInfo {
143
  /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
144
  /// `SearchResult` structs.
145
  /// * `page_query` - Stores the current pages search query `q` provided in the search url.
 
 
 
 
 
146
  #[derive(Serialize, Deserialize)]
147
  #[serde(rename_all = "camelCase")]
148
  pub struct SearchResults {
@@ -162,6 +167,8 @@ impl SearchResults {
162
  /// and stores it into a vector of `SearchResult` structs.
163
  /// * `page_query` - Takes an argument of current page`s search query `q` provided in
164
  /// the search url.
 
 
165
  pub fn new(
166
  results: Vec<SearchResult>,
167
  page_query: String,
@@ -176,14 +183,17 @@ impl SearchResults {
176
  }
177
  }
178
 
 
179
  pub fn add_style(&mut self, style: Style) {
180
  self.style = style;
181
  }
182
 
 
183
  pub fn is_empty_result_set(&self) -> bool {
184
  self.results.is_empty()
185
  }
186
 
 
187
  pub fn set_empty_result_set(&mut self) {
188
  self.empty_result_set = true;
189
  }
 
143
  /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
144
  /// `SearchResult` structs.
145
  /// * `page_query` - Stores the current pages search query `q` provided in the search url.
146
+ /// * `style` - Stores the theming options for the website.
147
+ /// * `engine_errors_info` - Stores the information on which engines failed with their engine name
148
+ /// and the type of error that caused it.
149
+ /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
150
+ /// given search query.
151
  #[derive(Serialize, Deserialize)]
152
  #[serde(rename_all = "camelCase")]
153
  pub struct SearchResults {
 
167
  /// and stores it into a vector of `SearchResult` structs.
168
  /// * `page_query` - Takes an argument of current page`s search query `q` provided in
169
  /// the search url.
170
+ /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
171
+ /// given search query.
172
  pub fn new(
173
  results: Vec<SearchResult>,
174
  page_query: String,
 
183
  }
184
  }
185
 
186
+ /// A setter function to add website style to the return search results.
187
  pub fn add_style(&mut self, style: Style) {
188
  self.style = style;
189
  }
190
 
191
+ /// A function which checks whether the results stored are empty or not.
192
  pub fn is_empty_result_set(&self) -> bool {
193
  self.results.is_empty()
194
  }
195
 
196
+ /// A setter function which sets the empty_result_set to true.
197
  pub fn set_empty_result_set(&mut self) {
198
  self.empty_result_set = true;
199
  }
src/search_results_handler/aggregator.rs CHANGED
@@ -18,14 +18,21 @@ use crate::engines::{
18
  searx,
19
  };
20
 
 
21
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<EngineError>>>>;
22
 
23
- /// A function that aggregates all the scraped results from the above upstream engines and
24
- /// then removes duplicate results and if two results are found to be from two or more engines
25
- /// then puts their names together to show the results are fetched from these upstream engines
26
- /// and then removes all data from the HashMap and puts into a struct of all results aggregated
27
- /// into a vector and also adds the query used into the struct this is neccessory because
28
- /// otherwise the search bar in search remains empty if searched from the query url
 
 
 
 
 
 
29
  ///
30
  /// # Example:
31
  ///
@@ -37,6 +44,9 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<
37
  /// * `query` - Accepts a string to query with the above upstream search engines.
38
  /// * `page` - Accepts an u32 page number.
39
  /// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
 
 
 
40
  ///
41
  /// # Error
42
  ///
 
18
  searx,
19
  };
20
 
21
+ /// Aliases for long type annotations
22
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, RawSearchResult>, Report<EngineError>>>>;
23
 
24
+ /// A function that aggregates all the scraped results from the above user selected upstream
25
+ /// search engines either selected from the UI or from the config file which is handled by the code
26
+ /// by matching over the selected search engines and adding the selected ones to the vector which
27
+ /// is then used to create an async task vector with `tokio::spawn` which returns a future which
28
+ /// is then awaited on in another loop and then all the collected results is filtered for errors
29
+ /// and proper results and if an error is found is then sent to the UI with the engine name and the
30
+ /// error type that caused it by putting them finallt in the returned `SearchResults` struct. Also
31
+ /// the same process also removes duplicate results and if two results are found to be from two or
32
+ /// more engines then puts their names together to show the results are fetched from these upstream
33
+ /// engines and then removes all data from the HashMap and puts into a struct of all results aggregated
34
+ /// into a vector and also adds the query used into the struct this is neccessory because otherwise the
35
+ /// search bar in search remains empty if searched from the query url.
36
  ///
37
  /// # Example:
38
  ///
 
44
  /// * `query` - Accepts a string to query with the above upstream search engines.
45
  /// * `page` - Accepts an u32 page number.
46
  /// * `random_delay` - Accepts a boolean value to add a random delay before making the request.
47
+ /// * `debug` - Accepts a boolean value to enable or disable debug mode option.
48
+ /// * `upstream_search_engines` - Accepts a vector of search engine names which was selected by the
49
+ /// user through the UI or the config file.
50
  ///
51
  /// # Error
52
  ///
src/server/routes.rs CHANGED
@@ -51,6 +51,13 @@ pub async fn not_found(
51
  .body(page_content))
52
  }
53
 
 
 
 
 
 
 
 
54
  #[allow(dead_code)]
55
  #[derive(Deserialize)]
56
  struct Cookie {
@@ -126,7 +133,7 @@ pub async fn search(
126
 
127
  // fetch the cached results json.
128
  let cached_results_json = redis_cache.cached_results_json(&page_url);
129
- // check if fetched results was indeed fetched or it was an error and if so
130
  // handle the data accordingly.
131
  match cached_results_json {
132
  Ok(results_json) => {
@@ -135,6 +142,10 @@ pub async fn search(
135
  Ok(HttpResponse::Ok().body(page_content))
136
  }
137
  Err(_) => {
 
 
 
 
138
  let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
139
  Some(cookie_value) => {
140
  let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
@@ -143,6 +154,9 @@ pub async fn search(
143
  None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
144
  };
145
  results_json.add_style(config.style.clone());
 
 
 
146
  if results_json.is_empty_result_set() {
147
  results_json.set_empty_result_set();
148
  }
 
51
  .body(page_content))
52
  }
53
 
54
+ /// A named struct which is used to deserialize the cookies fetched from the client side.
55
+ ///
56
+ /// # Fields
57
+ ///
58
+ /// * `theme` - It stores the theme name used in the website.
59
+ /// * `colorscheme` - It stores the colorscheme name used for the website theme.
60
+ /// * `engines` - It stores the user selected upstream search engines selected from the UI.
61
  #[allow(dead_code)]
62
  #[derive(Deserialize)]
63
  struct Cookie {
 
133
 
134
  // fetch the cached results json.
135
  let cached_results_json = redis_cache.cached_results_json(&page_url);
136
+ // check if fetched catch results was indeed fetched or it was an error and if so
137
  // handle the data accordingly.
138
  match cached_results_json {
139
  Ok(results_json) => {
 
142
  Ok(HttpResponse::Ok().body(page_content))
143
  }
144
  Err(_) => {
145
+ // check if the cookie value is empty or not if it is empty then use the
146
+ // default selected upstream search engines from the config file otherwise
147
+ // parse the non-empty cookie and grab the user selected engines from the
148
+ // UI and use that.
149
  let mut results_json: crate::search_results_handler::aggregation_models::SearchResults = match req.cookie("appCookie") {
150
  Some(cookie_value) => {
151
  let cookie_value:Cookie = serde_json::from_str(cookie_value.name_value().1)?;
 
154
  None => aggregate(query.clone(), page, config.aggregator.random_delay, config.debug, config.upstream_search_engines.clone()).await?,
155
  };
156
  results_json.add_style(config.style.clone());
157
+ // check whether the results grabbed from the upstream engines are empty or
158
+ // not if they are empty then set the empty_result_set option to true in
159
+ // the result json.
160
  if results_json.is_empty_result_set() {
161
  results_json.set_empty_result_set();
162
  }