Spaces:

alamin655
/

spacex

Runtime error

App Files Files Community

alamin655 commited on Sep 14, 2023

Commit

2a04e64

•

2 Parent(s): f20ac50 e19038b

Merge branch 'rolling' into fix-gitpod-setup

Browse files

Files changed (26) hide show

Cargo.lock +111 -10
Cargo.toml +1 -0
src/cache/cacher.rs +4 -7
src/cache/error.rs +3 -6
src/cache/mod.rs +3 -0
src/config/mod.rs +3 -1
src/config/parser.rs +29 -28
src/engines/duckduckgo.rs +2 -20
src/engines/mod.rs +5 -1
src/engines/searx.rs +2 -22
src/handler/mod.rs +4 -0
src/handler/paths.rs +25 -21
src/lib.rs +24 -17
src/{results → models}/aggregation_models.rs +32 -22
src/{engines → models}/engine_models.rs +64 -13
src/models/mod.rs +8 -0
src/{config → models}/parser_models.rs +20 -6
src/models/server_models.rs +26 -0
src/results/aggregator.rs +9 -13
src/results/mod.rs +4 -1
src/results/user_agent.rs +2 -0
src/server/mod.rs +6 -0
src/server/router.rs +64 -0
src/server/routes/mod.rs +3 -0
src/server/{routes.rs → routes/search.rs} +37 -28
websurfx/config.lua +8 -1

Cargo.lock CHANGED Viewed

@@ -57,6 +57,18 @@ dependencies = [
  "pin-project-lite",
 ]
 [[package]]
 name = "actix-http"
 version = "3.4.0"
@@ -590,7 +602,7 @@ version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
 dependencies = [
- "time 0.1.45",
  "url 1.7.2",
 ]
@@ -618,7 +630,7 @@ dependencies = [
  "publicsuffix",
  "serde",
  "serde_json",
- "time 0.1.45",
  "try_from",
  "url 1.7.2",
 ]
@@ -817,6 +829,19 @@ dependencies = [
  "syn 2.0.32",
 ]
 [[package]]
 name = "deranged"
 version = "0.3.8"
@@ -1162,6 +1187,12 @@ version = "0.3.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
 [[package]]
 name = "futures-util"
 version = "0.3.28"
@@ -1225,6 +1256,24 @@ version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
 [[package]]
 name = "h2"
 version = "0.1.26"
@@ -1289,6 +1338,12 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 [[package]]
 name = "hermit-abi"
 version = "0.3.2"
@@ -1410,7 +1465,7 @@ dependencies = [
  "log",
  "net2",
  "rustc_version 0.2.3",
- "time 0.1.45",
  "tokio 0.1.22",
  "tokio-buf",
  "tokio-executor",
@@ -1511,7 +1566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
  "autocfg 1.1.0",
- "hashbrown",
 ]
 [[package]]
@@ -1672,6 +1727,15 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
 [[package]]
 name = "markup5ever"
 version = "0.8.1"
@@ -1887,6 +1951,18 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0"
 [[package]]
 name = "num-traits"
 version = "0.2.16"
@@ -2307,6 +2383,22 @@ dependencies = [
  "url 2.4.1",
 ]
 [[package]]
 name = "quote"
 version = "0.6.13"
@@ -2461,6 +2553,15 @@ dependencies = [
  "rand_core 0.3.1",
 ]
 [[package]]
 name = "rayon"
 version = "1.7.0"
@@ -2583,7 +2684,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_urlencoded 0.5.5",
- "time 0.1.45",
  "tokio 0.1.22",
  "tokio-executor",
  "tokio-io",
@@ -3157,12 +3258,11 @@ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
 [[package]]
 name = "time"
-version = "0.1.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
 dependencies = [
  "libc",
- "wasi 0.10.0+wasi-snapshot-preview1",
  "winapi 0.3.9",
 ]
@@ -3609,9 +3709,9 @@ dependencies = [
 [[package]]
 name = "wasi"
-version = "0.10.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
 [[package]]
 name = "wasi"
@@ -3701,6 +3801,7 @@ version = "0.20.7"
 dependencies = [
  "actix-cors",
  "actix-files",
  "actix-web",
  "async-once-cell",
  "async-trait",

  "pin-project-lite",
 ]
+[[package]]
+name = "actix-governor"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46ff2d40f2bc627b8054c5e20fa6b0b0cf9428699b54bd41634e9ae3098ad555"
+dependencies = [
+ "actix-http",
+ "actix-web",
+ "futures 0.3.28",
+ "governor",
+]
 [[package]]
 name = "actix-http"
 version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
 dependencies = [
+ "time 0.1.43",
  "url 1.7.2",
 ]
  "publicsuffix",
  "serde",
  "serde_json",
+ "time 0.1.43",
  "try_from",
  "url 1.7.2",
 ]
  "syn 2.0.32",
 ]
+[[package]]
+name = "dashmap"
+version = "5.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
+dependencies = [
+ "cfg-if 1.0.0",
+ "hashbrown 0.14.0",
+ "lock_api 0.4.10",
+ "once_cell",
+ "parking_lot_core 0.9.8",
+]
 [[package]]
 name = "deranged"
 version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
+[[package]]
+name = "futures-timer"
+version = "3.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
 [[package]]
 name = "futures-util"
 version = "0.3.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
+[[package]]
+name = "governor"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c390a940a5d157878dd057c78680a33ce3415bcd05b4799509ea44210914b4d5"
+dependencies = [
+ "cfg-if 1.0.0",
+ "dashmap",
+ "futures 0.3.28",
+ "futures-timer",
+ "no-std-compat",
+ "nonzero_ext",
+ "parking_lot 0.12.1",
+ "quanta",
+ "rand 0.8.5",
+ "smallvec 1.11.0",
+]
 [[package]]
 name = "h2"
 version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+[[package]]
+name = "hashbrown"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
 [[package]]
 name = "hermit-abi"
 version = "0.3.2"
  "log",
  "net2",
  "rustc_version 0.2.3",
+ "time 0.1.43",
  "tokio 0.1.22",
  "tokio-buf",
  "tokio-executor",
 checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
  "autocfg 1.1.0",
+ "hashbrown 0.12.3",
 ]
 [[package]]
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
+[[package]]
+name = "mach"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
+dependencies = [
+ "libc",
+]
 [[package]]
 name = "markup5ever"
 version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0"
+[[package]]
+name = "no-std-compat"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
+[[package]]
+name = "nonzero_ext"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
 [[package]]
 name = "num-traits"
 version = "0.2.16"
  "url 2.4.1",
 ]
+[[package]]
+name = "quanta"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8"
+dependencies = [
+ "crossbeam-utils 0.8.16",
+ "libc",
+ "mach",
+ "once_cell",
+ "raw-cpuid",
+ "wasi 0.10.2+wasi-snapshot-preview1",
+ "web-sys",
+ "winapi 0.3.9",
+]
 [[package]]
 name = "quote"
 version = "0.6.13"
  "rand_core 0.3.1",
 ]
+[[package]]
+name = "raw-cpuid"
+version = "10.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
+dependencies = [
+ "bitflags 1.3.2",
+]
 [[package]]
 name = "rayon"
 version = "1.7.0"
  "serde",
  "serde_json",
  "serde_urlencoded 0.5.5",
+ "time 0.1.43",
  "tokio 0.1.22",
  "tokio-executor",
  "tokio-io",
 [[package]]
 name = "time"
+version = "0.1.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
 dependencies = [
  "libc",
  "winapi 0.3.9",
 ]
 [[package]]
 name = "wasi"
+version = "0.10.2+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
 [[package]]
 name = "wasi"
 dependencies = [
  "actix-cors",
  "actix-files",
+ "actix-governor",
  "actix-web",
  "async-once-cell",
  "async-trait",

Cargo.toml CHANGED Viewed

@@ -32,6 +32,7 @@ futures = {version="0.3.28"}
 dhat = {version="0.3.2", optional = true}
 mimalloc = { version = "0.1.38", default-features = false }
 async-once-cell = {version="0.5.3"}
 [dev-dependencies]
 rusty-hook = "^0.11.2"

 dhat = {version="0.3.2", optional = true}
 mimalloc = { version = "0.1.38", default-features = false }
 async-once-cell = {version="0.5.3"}
+actix-governor = {version="0.4.1"}
 [dev-dependencies]
 rusty-hook = "^0.11.2"

src/cache/cacher.rs CHANGED Viewed

@@ -10,17 +10,14 @@ use super::error::PoolError;
 /// A named struct which stores the redis Connection url address to which the client will
 /// connect to.
-///
-/// # Fields
-///
-/// * `connection_pool` - It stores a pool of connections ready to be used.
-/// * `pool_size` - It stores the size of the connection pool (in other words the number of
-/// connections that should be stored in the pool).
-/// * `current_connection` - It stores the index of which connection is being used at the moment.
 #[derive(Clone)]
 pub struct RedisCache {
     connection_pool: Vec<ConnectionManager>,
     pool_size: u8,
     current_connection: u8,
 }

 /// A named struct which stores the redis Connection url address to which the client will
 /// connect to.
 #[derive(Clone)]
 pub struct RedisCache {
+    /// It stores a pool of connections ready to be used.
     connection_pool: Vec<ConnectionManager>,
+    /// It stores the size of the connection pool (in other words the number of
+    /// connections that should be stored in the pool).
     pool_size: u8,
+    /// It stores the index of which connection is being used at the moment.
     current_connection: u8,
 }

src/cache/error.rs CHANGED Viewed

@@ -5,15 +5,12 @@ use std::fmt;
 use redis::RedisError;
 /// A custom error type used for handling redis async pool associated errors.
-///
-/// This enum provides variants three different categories of errors:
-/// * `RedisError` - This variant handles all errors related to `RedisError`,
-/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
-/// which occurs when all the connections in the connection pool return a connection
-/// dropped redis error.
 #[derive(Debug)]
 pub enum PoolError {
     RedisError(RedisError),
     PoolExhaustionWithConnectionDropError,
 }

 use redis::RedisError;
 /// A custom error type used for handling redis async pool associated errors.
 #[derive(Debug)]
 pub enum PoolError {
+    /// This variant handles all errors related to `RedisError`,
     RedisError(RedisError),
+    /// This variant handles the errors which occurs when all the connections
+    /// in the connection pool return a connection dropped redis error.
     PoolExhaustionWithConnectionDropError,
 }

src/cache/mod.rs CHANGED Viewed

@@ -1,2 +1,5 @@
 pub mod cacher;
 pub mod error;

+//! This module provides the modules which provide the functionality to cache the aggregated
+//! results fetched and aggregated from the upstream search engines in a json format.
 pub mod cacher;
 pub mod error;

src/config/mod.rs CHANGED Viewed

@@ -1,2 +1,4 @@
 pub mod parser;
-pub mod parser_models;

+//! This module provides the modules which handles the functionality to parse the lua config
+//! and convert the config options into rust readable form.
 pub mod parser;

src/config/parser.rs CHANGED Viewed

@@ -3,52 +3,42 @@
 use crate::handler::paths::{file_path, FileType};
-use super::parser_models::Style;
 use log::LevelFilter;
 use mlua::Lua;
 use std::{collections::HashMap, fs, thread::available_parallelism};
 /// A named struct which stores the parsed config file options.
-///
-/// # Fields
-//
-/// * `port` - It stores the parsed port number option on which the server should launch.
-/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
-/// * `style` - It stores the theming options for the website.
-/// * `redis_url` - It stores the redis connection url address on which the redis
-/// client should connect.
-/// * `aggregator` -  It stores the option to whether enable or disable production use.
-/// * `logging` - It stores the option to whether enable or disable logs.
-/// * `debug` - It stores the option to whether enable or disable debug mode.
-/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
-/// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
-/// * `threads` - It stores the number of threads which controls the app will use to run.
 #[derive(Clone)]
 pub struct Config {
     pub port: u16,
     pub binding_ip: String,
     pub style: Style,
     pub redis_url: String,
     pub aggregator: AggregatorConfig,
     pub logging: bool,
     pub debug: bool,
-    pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
     pub request_timeout: u8,
     pub threads: u8,
     pub safe_search: u8,
 }
-/// Configuration options for the aggregator.
-///
-/// # Fields
-///
-/// * `random_delay` - It stores the option to whether enable or disable random delays between
-/// requests.
-#[derive(Clone)]
-pub struct AggregatorConfig {
-    pub random_delay: bool,
-}
 impl Config {
     /// A function which parses the config.lua file and puts all the parsed options in the newly
     /// constructed Config struct and returns it.
@@ -90,6 +80,8 @@ impl Config {
             parsed_threads
         };
         let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
         let safe_search: u8 = match parsed_safe_search {
             0..=4 => parsed_safe_search,
@@ -117,16 +109,25 @@ impl Config {
                 .get::<_, HashMap<String, bool>>("upstream_search_engines")?
                 .into_iter()
                 .filter_map(|(key, value)| value.then_some(key))
-                .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
                 .collect(),
             request_timeout: globals.get::<_, u8>("request_timeout")?,
             threads,
             safe_search,
         })
     }
 }
 /// a helper function that sets the proper logging level
 fn set_logging_level(debug: bool, logging: bool) {
     if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
         if pkg_env_var.to_lowercase() == "dev" {

 use crate::handler::paths::{file_path, FileType};
+use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
 use log::LevelFilter;
 use mlua::Lua;
 use std::{collections::HashMap, fs, thread::available_parallelism};
 /// A named struct which stores the parsed config file options.
 #[derive(Clone)]
 pub struct Config {
+    /// It stores the parsed port number option on which the server should launch.
     pub port: u16,
+    /// It stores the parsed ip address option on which the server should launch
     pub binding_ip: String,
+    /// It stores the theming options for the website.
     pub style: Style,
+    /// It stores the redis connection url address on which the redis
+    /// client should connect.
     pub redis_url: String,
+    /// It stores the option to whether enable or disable production use.
     pub aggregator: AggregatorConfig,
+    /// It stores the option to whether enable or disable logs.
     pub logging: bool,
+    /// It stores the option to whether enable or disable debug mode.
     pub debug: bool,
+    /// It stores all the engine names that were enabled by the user.
+    pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
+    /// It stores the time (secs) which controls the server request timeout.
     pub request_timeout: u8,
+    /// It stores the number of threads which controls the app will use to run.
     pub threads: u8,
+    /// It stores configuration options for the ratelimiting middleware.
+    pub rate_limiter: RateLimiter,
+    /// It stores the level of safe search to be used for restricting content in the
+    /// search results.
     pub safe_search: u8,
 }
 impl Config {
     /// A function which parses the config.lua file and puts all the parsed options in the newly
     /// constructed Config struct and returns it.
             parsed_threads
         };
+        let rate_limiter = globals.get::<_, HashMap<String, u8>>("rate_limiter")?;
         let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
         let safe_search: u8 = match parsed_safe_search {
             0..=4 => parsed_safe_search,
                 .get::<_, HashMap<String, bool>>("upstream_search_engines")?
                 .into_iter()
                 .filter_map(|(key, value)| value.then_some(key))
+                .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
                 .collect(),
             request_timeout: globals.get::<_, u8>("request_timeout")?,
             threads,
+            rate_limiter: RateLimiter {
+                number_of_requests: rate_limiter["number_of_requests"],
+                time_limit: rate_limiter["time_limit"],
+            },
             safe_search,
         })
     }
 }
 /// a helper function that sets the proper logging level
+///
+/// # Arguments
+///
+/// * `debug` - It takes the option to whether enable or disable debug mode.
+/// * `logging` - It takes the option to whether enable or disable logs.
 fn set_logging_level(debug: bool, logging: bool) {
     if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
         if pkg_env_var.to_lowercase() == "dev" {

src/engines/duckduckgo.rs CHANGED Viewed

@@ -7,9 +7,9 @@ use std::collections::HashMap;
 use reqwest::header::HeaderMap;
 use scraper::{Html, Selector};
-use crate::results::aggregation_models::SearchResult;
-use super::engine_models::{EngineError, SearchEngine};
 use error_stack::{Report, Result, ResultExt};
@@ -19,24 +19,6 @@ pub struct DuckDuckGo;
 #[async_trait::async_trait]
 impl SearchEngine for DuckDuckGo {
-    /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
-    /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
-    /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
-    /// values are RawSearchResult struct and then returns it within a Result enum.
-    ///
-    /// # Arguments
-    ///
-    /// * `query` - Takes the user provided query to query to the upstream search engine with.
-    /// * `page` - Takes an u32 as an argument.
-    /// * `user_agent` - Takes a random user agent string as an argument.
-    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
-    ///
-    /// # Errors
-    ///
-    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
-    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
-    /// provide results for the requested search query and also returns error if the scraping selector
-    /// or HeaderMap fails to initialize.
     async fn results(
         &self,
         query: &str,

 use reqwest::header::HeaderMap;
 use scraper::{Html, Selector};
+use crate::models::aggregation_models::SearchResult;
+use crate::models::engine_models::{EngineError, SearchEngine};
 use error_stack::{Report, Result, ResultExt};
 #[async_trait::async_trait]
 impl SearchEngine for DuckDuckGo {
     async fn results(
         &self,
         query: &str,

src/engines/mod.rs CHANGED Viewed

@@ -1,3 +1,7 @@
 pub mod duckduckgo;
-pub mod engine_models;
 pub mod searx;

+//! This module provides different modules which handles the functionlity to fetch results from the
+//! upstream search engines based on user requested queries. Also provides different models to
+//! provide a standard functions to be implemented for all the upstream search engine handling
+//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
 pub mod duckduckgo;
 pub mod searx;

src/engines/searx.rs CHANGED Viewed

@@ -6,9 +6,8 @@ use reqwest::header::HeaderMap;
 use scraper::{Html, Selector};
 use std::collections::HashMap;
-use crate::results::aggregation_models::SearchResult;
-use super::engine_models::{EngineError, SearchEngine};
 use error_stack::{Report, Result, ResultExt};
 /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
@@ -17,25 +16,6 @@ pub struct Searx;
 #[async_trait::async_trait]
 impl SearchEngine for Searx {
-    /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
-    /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
-    /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
-    /// values are RawSearchResult struct and then returns it within a Result enum.
-    ///
-    /// # Arguments
-    ///
-    /// * `query` - Takes the user provided query to query to the upstream search engine with.
-    /// * `page` - Takes an u32 as an argument.
-    /// * `user_agent` - Takes a random user agent string as an argument.
-    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
-    ///
-    /// # Errors
-    ///
-    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
-    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
-    /// provide results for the requested search query and also returns error if the scraping selector
-    /// or HeaderMap fails to initialize.
     async fn results(
         &self,
         query: &str,

 use scraper::{Html, Selector};
 use std::collections::HashMap;
+use crate::models::aggregation_models::SearchResult;
+use crate::models::engine_models::{EngineError, SearchEngine};
 use error_stack::{Report, Result, ResultExt};
 /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
 #[async_trait::async_trait]
 impl SearchEngine for Searx {
     async fn results(
         &self,
         query: &str,

src/handler/mod.rs CHANGED Viewed

	@@ -1 +1,5 @@




1	pub mod paths;

+//! This module provides modules which provide the functionality to handle paths for different
+//! files present on different paths and provide one appropriate path on which it is present and
+//! can be used.
 pub mod paths;

src/handler/paths.rs CHANGED Viewed

@@ -7,42 +7,46 @@ use std::path::Path;
 use std::sync::OnceLock;
 // ------- Constants --------
-static PUBLIC_DIRECTORY_NAME: &str = "public";
-static COMMON_DIRECTORY_NAME: &str = "websurfx";
-static CONFIG_FILE_NAME: &str = "config.lua";
-static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
-static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
 #[derive(Hash, PartialEq, Eq, Debug)]
 pub enum FileType {
     Config,
     AllowList,
     BlockList,
     Theme,
 }
 static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
-/// A helper function which returns an appropriate config file path checking if the config
-/// file exists on that path.
 ///
 /// # Error
 ///
-/// Returns a `config file not found!!` error if the config file is not present under following
-/// paths which are:
-/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
-/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
-///    one (3).
-/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
-///    here then it returns an error as mentioned above.
-/// A function which returns an appropriate theme directory path checking if the theme
-/// directory exists on that path.
 ///
-/// # Error
 ///
-/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
-/// paths which are:
 /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
 /// 2. Under project folder ( or codebase in other words) if it is not present
 ///    here then it returns an error as mentioned above.
@@ -110,6 +114,6 @@ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
     // if no of the configs above exist, return error
     Err(Error::new(
         std::io::ErrorKind::NotFound,
-        format!("{:?} file not found!!", file_type),
     ))
 }

 use std::sync::OnceLock;
 // ------- Constants --------
+/// The constant holding the name of the theme folder.
+const PUBLIC_DIRECTORY_NAME: &str = "public";
+/// The constant holding the name of the common folder.
+const COMMON_DIRECTORY_NAME: &str = "websurfx";
+/// The constant holding the name of the config file.
+const CONFIG_FILE_NAME: &str = "config.lua";
+/// The constant holding the name of the AllowList text file.
+const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
+/// The constant holding the name of the BlockList text file.
+const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
+/// An enum type which provides different variants to handle paths for various files/folders.
 #[derive(Hash, PartialEq, Eq, Debug)]
 pub enum FileType {
+    /// This variant handles all the paths associated with the config file.
     Config,
+    /// This variant handles all the paths associated with the Allowlist text file.
     AllowList,
+    /// This variant handles all the paths associated with the BlockList text file.
     BlockList,
+    /// This variant handles all the paths associated with the public folder (Theme folder).
     Theme,
 }
+/// A static variable which stores the different filesystem paths for various file/folder types.
 static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
+/// A function which returns an appropriate path for thr provided file type by checking if the path
+/// for the given file type exists on that path.
 ///
 /// # Error
 ///
+/// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
+/// present on the path on which it is being tested.
 ///
+/// # Example
+///
+/// If this function is give the file_type of Theme variant then the theme folder is checked by the
+/// following steps:
 ///
 /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
 /// 2. Under project folder ( or codebase in other words) if it is not present
 ///    here then it returns an error as mentioned above.
     // if no of the configs above exist, return error
     Err(Error::new(
         std::io::ErrorKind::NotFound,
+        format!("{:?} file/folder not found!!", file_type),
     ))
 }

src/lib.rs CHANGED Viewed

@@ -1,25 +1,26 @@
 //! This main library module provides the functionality to provide and handle the Tcp server
 //! and register all the routes for the `websurfx` meta search engine website.
 pub mod cache;
 pub mod config;
 pub mod engines;
 pub mod handler;
 pub mod results;
 pub mod server;
 use std::net::TcpListener;
-use crate::server::routes;
 use actix_cors::Cors;
 use actix_files as fs;
-use actix_web::{
-    dev::Server,
-    http::header,
-    middleware::{Compress, Logger},
-    web, App, HttpServer,
-};
 use config::parser::Config;
 use handlebars::Handlebars;
 use handler::paths::{file_path, FileType};
@@ -45,7 +46,7 @@ use handler::paths::{file_path, FileType};
 /// let server = run(listener,config).expect("Failed to start server");
 /// ```
 pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
-    let mut handlebars: Handlebars = Handlebars::new();
     let public_folder_path: &str = file_path(FileType::Theme)?;
@@ -53,7 +54,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
         .register_templates_directory(".html", format!("{}/templates", public_folder_path))
         .unwrap();
-    let handlebars_ref: web::Data<Handlebars> = web::Data::new(handlebars);
     let cloned_config_threads_opt: u8 = config.threads;
@@ -69,11 +70,17 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
             ]);
         App::new()
             .app_data(handlebars_ref.clone())
             .app_data(web::Data::new(config.clone()))
             .wrap(cors)
-            .wrap(Logger::default()) // added logging middleware for logging.
-            .wrap(Compress::default()) // compress request headers to reduce memory usage.
             // Serve images and static files (css and js files).
             .service(
                 fs::Files::new("/static", format!("{}/static", public_folder_path))
@@ -83,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
                 fs::Files::new("/images", format!("{}/images", public_folder_path))
                     .show_files_listing(),
             )
-            .service(routes::robots_data) // robots.txt
-            .service(routes::index) // index page
-            .service(routes::search) // search page
-            .service(routes::about) // about page
-            .service(routes::settings) // settings page
-            .default_service(web::route().to(routes::not_found)) // error page
     })
     .workers(cloned_config_threads_opt as usize)
     // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.

 //! This main library module provides the functionality to provide and handle the Tcp server
 //! and register all the routes for the `websurfx` meta search engine website.
+#![forbid(unsafe_code, clippy::panic)]
+#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
+#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
 pub mod cache;
 pub mod config;
 pub mod engines;
 pub mod handler;
+pub mod models;
 pub mod results;
 pub mod server;
 use std::net::TcpListener;
+use crate::server::router;
 use actix_cors::Cors;
 use actix_files as fs;
+use actix_governor::{Governor, GovernorConfigBuilder};
+use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
 use config::parser::Config;
 use handlebars::Handlebars;
 use handler::paths::{file_path, FileType};
 /// let server = run(listener,config).expect("Failed to start server");
 /// ```
 pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
+    let mut handlebars: Handlebars<'_> = Handlebars::new();
     let public_folder_path: &str = file_path(FileType::Theme)?;
         .register_templates_directory(".html", format!("{}/templates", public_folder_path))
         .unwrap();
+    let handlebars_ref: web::Data<Handlebars<'_>> = web::Data::new(handlebars);
     let cloned_config_threads_opt: u8 = config.threads;
             ]);
         App::new()
+            .wrap(Logger::default()) // added logging middleware for logging.
             .app_data(handlebars_ref.clone())
             .app_data(web::Data::new(config.clone()))
             .wrap(cors)
+            .wrap(Governor::new(
+                &GovernorConfigBuilder::default()
+                    .per_second(config.rate_limiter.time_limit as u64)
+                    .burst_size(config.rate_limiter.number_of_requests as u32)
+                    .finish()
+                    .unwrap(),
+            ))
             // Serve images and static files (css and js files).
             .service(
                 fs::Files::new("/static", format!("{}/static", public_folder_path))
                 fs::Files::new("/images", format!("{}/images", public_folder_path))
                     .show_files_listing(),
             )
+            .service(router::robots_data) // robots.txt
+            .service(router::index) // index page
+            .service(server::routes::search::search) // search page
+            .service(router::about) // about page
+            .service(router::settings) // settings page
+            .default_service(web::route().to(router::not_found)) // error page
     })
     .workers(cloned_config_threads_opt as usize)
     // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.

src/{results → models}/aggregation_models.rs RENAMED Viewed

@@ -4,25 +4,22 @@
 use serde::{Deserialize, Serialize};
 use smallvec::SmallVec;
-use crate::{config::parser_models::Style, engines::engine_models::EngineError};
 /// A named struct to store the raw scraped search results scraped search results from the
 /// upstream search engines before aggregating it.It derives the Clone trait which is needed
 /// to write idiomatic rust using `Iterators`.
-///
-/// # Fields
-///
-/// * `title` - The title of the search result.
-/// * `url` - The url which is accessed when clicked on it
 /// (href url in html in simple words).
-/// * `description` - The description of the search result.
-/// * `engine` - The names of the upstream engines from which this results were provided.
-#[derive(Clone, Serialize, Deserialize, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
     pub title: String,
     pub url: String,
     pub description: String,
     pub engine: SmallVec<[String; 0]>,
 }
@@ -64,14 +61,27 @@ impl SearchResult {
     }
 }
 #[derive(Serialize, Deserialize, Clone)]
 pub struct EngineErrorInfo {
     pub error: String,
     pub engine: String,
     pub severity_color: String,
 }
 impl EngineErrorInfo {
     pub fn new(error: &EngineError, engine: &str) -> Self {
         Self {
             error: match error {
@@ -91,25 +101,26 @@ impl EngineErrorInfo {
 /// A named struct to store, serialize, deserialize the all the search results scraped and
 /// aggregated from the upstream search engines.
-///
-/// # Fields
-///
-/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
 /// `SearchResult` structs.
-/// * `page_query` - Stores the current pages search query `q` provided in the search url.
-/// * `style` - Stores the theming options for the website.
-/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
-/// and the type of error that caused it.
-/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
-/// given search query.
 #[derive(Serialize, Deserialize, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
     pub results: Vec<SearchResult>,
     pub page_query: String,
     pub style: Style,
     pub engine_errors_info: Vec<EngineErrorInfo>,
     pub disallowed: bool,
     pub filtered: bool,
 }
@@ -122,9 +133,8 @@ impl SearchResults {
     /// and stores it into a vector of `SearchResult` structs.
     /// * `page_query` - Takes an argument of current page`s search query `q` provided in
     /// the search url.
-    /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
-    /// given search query.
-    /// * ``
     pub fn new(
         results: Vec<SearchResult>,
         page_query: &str,

 use serde::{Deserialize, Serialize};
 use smallvec::SmallVec;
+use super::{engine_models::EngineError, parser_models::Style};
 /// A named struct to store the raw scraped search results scraped search results from the
 /// upstream search engines before aggregating it.It derives the Clone trait which is needed
 /// to write idiomatic rust using `Iterators`.
 /// (href url in html in simple words).
+#[derive(Clone, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResult {
+    /// The title of the search result.
     pub title: String,
+    /// The url which is accessed when clicked on it
     pub url: String,
+    /// The description of the search result.
     pub description: String,
+    /// The names of the upstream engines from which this results were provided.
     pub engine: SmallVec<[String; 0]>,
 }
     }
 }
+/// A named struct that stores the error info related to the upstream search engines.
 #[derive(Serialize, Deserialize, Clone)]
 pub struct EngineErrorInfo {
+    /// It stores the error type which occured while fetching the result from a particular search
+    /// engine.
     pub error: String,
+    /// It stores the name of the engine that failed to provide the requested search results.
     pub engine: String,
+    /// It stores the name of the color to indicate whether how severe the particular error is (In
+    /// other words it indicates the severity of the error/issue).
     pub severity_color: String,
 }
 impl EngineErrorInfo {
+    /// Constructs a new `SearchResult` with the given arguments needed for the struct.
+    ///
+    /// # Arguments
+    ///
+    /// * `error` - It takes the error type which occured while fetching the result from a particular
+    /// search engine.
+    /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
     pub fn new(error: &EngineError, engine: &str) -> Self {
         Self {
             error: match error {
 /// A named struct to store, serialize, deserialize the all the search results scraped and
 /// aggregated from the upstream search engines.
 /// `SearchResult` structs.
 #[derive(Serialize, Deserialize, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
+    /// Stores the individual serializable `SearchResult` struct into a vector of
     pub results: Vec<SearchResult>,
+    /// Stores the current pages search query `q` provided in the search url.
     pub page_query: String,
+    /// Stores the theming options for the website.
     pub style: Style,
+    /// Stores the information on which engines failed with their engine name
+    /// and the type of error that caused it.
     pub engine_errors_info: Vec<EngineErrorInfo>,
+    /// Stores the flag option which holds the check value that the following
+    /// search query was disallowed when the safe search level set to 4 and it
+    /// was present in the `Blocklist` file.
     pub disallowed: bool,
+    /// Stores the flag option which holds the check value that the following
+    /// search query was filtered when the safe search level set to 3 and it
+    /// was present in the `Blocklist` file.
     pub filtered: bool,
 }
     /// and stores it into a vector of `SearchResult` structs.
     /// * `page_query` - Takes an argument of current page`s search query `q` provided in
     /// the search url.
+    /// * `engine_errors_info` - Takes an array of structs which contains information regarding
+    /// which engines failed with their names, reason and their severity color name.
     pub fn new(
         results: Vec<SearchResult>,
         page_query: &str,

src/{engines → models}/engine_models.rs RENAMED Viewed

@@ -1,24 +1,23 @@
 //! This module provides the error enum to handle different errors associated while requesting data from
 //! the upstream search engines with the search query provided by the user.
-use crate::results::aggregation_models::SearchResult;
 use error_stack::{Result, ResultExt};
 use std::{collections::HashMap, fmt, time::Duration};
 /// A custom error type used for handle engine associated errors.
-///
-/// This enum provides variants three different categories of errors:
-/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
-/// etc.
-/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
-/// search engines.
-/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
-/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
-/// all other errors occurring within the code handling the `upstream search engines`.
 #[derive(Debug)]
 pub enum EngineError {
     EmptyResultSet,
     RequestError,
     UnexpectedError,
 }
@@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
 /// A trait to define common behavior for all search engines.
 #[async_trait::async_trait]
 pub trait SearchEngine: Sync + Send {
     async fn fetch_html_from_upstream(
         &self,
         url: &str,
@@ -65,6 +81,24 @@ pub trait SearchEngine: Sync + Send {
             .change_context(EngineError::RequestError)?)
     }
     async fn results(
         &self,
         query: &str,
@@ -75,8 +109,12 @@ pub trait SearchEngine: Sync + Send {
     ) -> Result<HashMap<String, SearchResult>, EngineError>;
 }
 pub struct EngineHandler {
     engine: Box<dyn SearchEngine>,
     name: &'static str,
 }
@@ -87,12 +125,23 @@ impl Clone for EngineHandler {
 }
 impl EngineHandler {
-    /// parses an engine name into an engine handler, returns none if the engine is unknown
     pub fn new(engine_name: &str) -> Option<Self> {
         let engine: (&'static str, Box<dyn SearchEngine>) =
             match engine_name.to_lowercase().as_str() {
-                "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
-                "searx" => ("searx", Box::new(super::searx::Searx)),
                 _ => return None,
             };
@@ -102,6 +151,8 @@ impl EngineHandler {
         })
     }
     pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
         (self.name, self.engine)
     }

 //! This module provides the error enum to handle different errors associated while requesting data from
 //! the upstream search engines with the search query provided by the user.
+use super::aggregation_models::SearchResult;
 use error_stack::{Result, ResultExt};
 use std::{collections::HashMap, fmt, time::Duration};
 /// A custom error type used for handle engine associated errors.
 #[derive(Debug)]
 pub enum EngineError {
+    /// This variant handles all request related errors like forbidden, not found,
+    /// etc.
     EmptyResultSet,
+    /// This variant handles the not results found error provide by the upstream
+    /// search engines.
     RequestError,
+    ///  This variant handles all the errors which are unexpected or occur rarely
+    /// and are errors mostly related to failure in initialization of HeaderMap,
+    /// Selector errors and all other errors occurring within the code handling
+    /// the `upstream search engines`.
     UnexpectedError,
 }
 /// A trait to define common behavior for all search engines.
 #[async_trait::async_trait]
 pub trait SearchEngine: Sync + Send {
+    /// This helper function fetches/requests the search results from the upstream search engine in
+    /// an html form.
+    ///
+    /// # Arguments
+    ///
+    /// * `url` - It takes the url of the upstream search engine with the user requested search
+    /// query appended in the search parameters.
+    /// * `header_map` - It takes the http request headers to be sent to the upstream engine in
+    /// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
+    /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
+    /// the amount of time for each request to remain connected when until the results can be provided
+    /// by the upstream engine.
+    ///
+    /// # Error
+    ///
+    /// It returns the html data as a string if the upstream engine provides the data as expected
+    /// otherwise it returns a custom `EngineError`.
     async fn fetch_html_from_upstream(
         &self,
         url: &str,
             .change_context(EngineError::RequestError)?)
     }
+    /// This function scrapes results from the upstream engine and puts all the scraped results like
+    /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
+    /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
+    /// struct and then returns it within a Result enum.
+    ///
+    /// # Arguments
+    ///
+    /// * `query` - Takes the user provided query to query to the upstream search engine with.
+    /// * `page` - Takes an u32 as an argument.
+    /// * `user_agent` - Takes a random user agent string as an argument.
+    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
+    ///
+    /// # Errors
+    ///
+    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
+    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
+    /// provide results for the requested search query and also returns error if the scraping selector
+    /// or HeaderMap fails to initialize.
     async fn results(
         &self,
         query: &str,
     ) -> Result<HashMap<String, SearchResult>, EngineError>;
 }
+/// A named struct which stores the engine struct with the name of the associated engine.
 pub struct EngineHandler {
+    /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
+    /// the `SearchEngine` trait.
     engine: Box<dyn SearchEngine>,
+    /// It stores the name of the engine to which the struct is associated to.
     name: &'static str,
 }
 }
 impl EngineHandler {
+    /// Parses an engine name into an engine handler.
+    ///
+    /// # Arguments
+    ///
+    /// * `engine_name` - It takes the name of the engine to which the struct was associated to.
+    ///
+    /// # Returns
+    ///
+    /// It returns an option either containing the value or a none if the engine is unknown
     pub fn new(engine_name: &str) -> Option<Self> {
         let engine: (&'static str, Box<dyn SearchEngine>) =
             match engine_name.to_lowercase().as_str() {
+                "duckduckgo" => (
+                    "duckduckgo",
+                    Box::new(crate::engines::duckduckgo::DuckDuckGo),
+                ),
+                "searx" => ("searx", Box::new(crate::engines::searx::Searx)),
                 _ => return None,
             };
         })
     }
+    /// This function converts the EngineHandler type into a tuple containing the engine name and
+    /// the associated engine struct.
     pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
         (self.name, self.engine)
     }

src/models/mod.rs ADDED Viewed

	@@ -0,0 +1,8 @@

+//! This module provides modules which in turn provides various models for aggregrating search
+//! results, parsing config file, providing trait to standardize search engine handling code,
+//! custom engine error for the search engine, etc.
+pub mod aggregation_models;
+pub mod engine_models;
+pub mod parser_models;
+pub mod server_models;

src/{config → models}/parser_models.rs RENAMED Viewed

@@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
 /// order to allow the deserializing the json back to struct in aggregate function in
 /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
 /// it to the template files.
-///
-/// # Fields
-//
-/// * `theme` - It stores the parsed theme option used to set a theme for the website.
-/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
-/// theme being used.
 #[derive(Serialize, Deserialize, Clone, Default)]
 pub struct Style {
     pub theme: String,
     pub colorscheme: String,
 }
@@ -36,3 +33,20 @@ impl Style {
         Style { theme, colorscheme }
     }
 }

 /// order to allow the deserializing the json back to struct in aggregate function in
 /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
 /// it to the template files.
 #[derive(Serialize, Deserialize, Clone, Default)]
 pub struct Style {
+    /// It stores the parsed theme option used to set a theme for the website.
     pub theme: String,
+    /// It stores the parsed colorscheme option used to set a colorscheme for the
+    /// theme being used.
     pub colorscheme: String,
 }
         Style { theme, colorscheme }
     }
 }
+/// Configuration options for the aggregator.
+#[derive(Clone)]
+pub struct AggregatorConfig {
+    /// It stores the option to whether enable or disable random delays between
+    /// requests.
+    pub random_delay: bool,
+}
+/// Configuration options for the rate limiter middleware.
+#[derive(Clone)]
+pub struct RateLimiter {
+    /// The number of request that are allowed within a provided time limit.
+    pub number_of_requests: u8,
+    /// The time limit in which the quantity of requests that should be accepted.
+    pub time_limit: u8,
+}

src/models/server_models.rs ADDED Viewed

	@@ -0,0 +1,26 @@

+//! This module provides the models to parse cookies and search parameters from the search
+//! engine website.
+use serde::Deserialize;
+/// A named struct which deserializes all the user provided search parameters and stores them.
+#[derive(Deserialize)]
+pub struct SearchParams {
+    /// It stores the search parameter option `q` (or query in simple words)
+    /// of the search url.
+    pub q: Option<String>,
+    /// It stores the search parameter `page` (or pageno in simple words)
+    /// of the search url.
+    pub page: Option<u32>,
+}
+/// A named struct which is used to deserialize the cookies fetched from the client side.
+#[allow(dead_code)]
+#[derive(Deserialize)]
+pub struct Cookie {
+    /// It stores the theme name used in the website.
+    pub theme: String,
+    /// It stores the colorscheme name used for the website theme.
+    pub colorscheme: String,
+    /// It stores the user selected upstream search engines selected from the UI.
+    pub engines: Vec<String>,
+}

src/results/aggregator.rs CHANGED Viewed

@@ -1,27 +1,23 @@
 //! This module provides the functionality to scrape and gathers all the results from the upstream
 //! search engines and then removes duplicate results.
-use std::{
-    collections::HashMap,
-    io::{BufReader, Read},
-    time::Duration,
-};
-use super::{
     aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
-    user_agent::random_user_agent,
 };
 use error_stack::Report;
 use rand::Rng;
 use regex::Regex;
 use std::{fs::File, io::BufRead};
 use tokio::task::JoinHandle;
-use crate::{
-    engines::engine_models::{EngineError, EngineHandler},
-    handler::paths::{file_path, FileType},
-};
 /// Aliases for long type annotations
 type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;

 //! This module provides the functionality to scrape and gathers all the results from the upstream
 //! search engines and then removes duplicate results.
+use super::user_agent::random_user_agent;
+use crate::handler::paths::{file_path, FileType};
+use crate::models::{
     aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
+    engine_models::{EngineError, EngineHandler},
 };
 use error_stack::Report;
 use rand::Rng;
 use regex::Regex;
+use std::{
+    collections::HashMap,
+    io::{BufReader, Read},
+    time::Duration,
+};
 use std::{fs::File, io::BufRead};
 use tokio::task::JoinHandle;
 /// Aliases for long type annotations
 type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;

src/results/mod.rs CHANGED Viewed

@@ -1,3 +1,6 @@
-pub mod aggregation_models;
 pub mod aggregator;
 pub mod user_agent;

+//! This module provides modules that handle the functionality to aggregate the fetched search
+//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
+//! provides various models to aggregate search results into a standardized form.
 pub mod aggregator;
 pub mod user_agent;

src/results/user_agent.rs CHANGED Viewed

@@ -4,6 +4,8 @@ use std::sync::OnceLock;
 use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
 static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
 /// A function to generate random user agent to improve privacy of the user.

 use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
+/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
+/// again and again without the need of reinitializing the `UserAgents` struct.
 static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
 /// A function to generate random user agent to improve privacy of the user.

src/server/mod.rs CHANGED Viewed

	@@ -1 +1,7 @@






1	pub mod routes;

+//! This module provides modules that handle the functionality of handling different routes/paths
+//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
+//! the search route. Also, caches the next, current and previous search results in the search
+//! routes with the help of the redis server.
+pub mod router;
 pub mod routes;

src/server/router.rs ADDED Viewed

	@@ -0,0 +1,64 @@

+//! This module provides the functionality to handle different routes of the `websurfx`
+//! meta search engine website and provide appropriate response to each route/page
+//! when requested.
+use crate::{
+    config::parser::Config,
+    handler::paths::{file_path, FileType},
+};
+use actix_web::{get, web, HttpRequest, HttpResponse};
+use handlebars::Handlebars;
+use std::fs::read_to_string;
+/// Handles the route of index page or main page of the `websurfx` meta search engine website.
+#[get("/")]
+pub async fn index(
+    hbs: web::Data<Handlebars<'_>>,
+    config: web::Data<Config>,
+) -> Result<HttpResponse, Box<dyn std::error::Error>> {
+    let page_content: String = hbs.render("index", &config.style).unwrap();
+    Ok(HttpResponse::Ok().body(page_content))
+}
+/// Handles the route of any other accessed route/page which is not provided by the
+/// website essentially the 404 error page.
+pub async fn not_found(
+    hbs: web::Data<Handlebars<'_>>,
+    config: web::Data<Config>,
+) -> Result<HttpResponse, Box<dyn std::error::Error>> {
+    let page_content: String = hbs.render("404", &config.style)?;
+    Ok(HttpResponse::Ok()
+        .content_type("text/html; charset=utf-8")
+        .body(page_content))
+}
+/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
+#[get("/robots.txt")]
+pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
+    let page_content: String =
+        read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
+    Ok(HttpResponse::Ok()
+        .content_type("text/plain; charset=ascii")
+        .body(page_content))
+}
+/// Handles the route of about page of the `websurfx` meta search engine website.
+#[get("/about")]
+pub async fn about(
+    hbs: web::Data<Handlebars<'_>>,
+    config: web::Data<Config>,
+) -> Result<HttpResponse, Box<dyn std::error::Error>> {
+    let page_content: String = hbs.render("about", &config.style)?;
+    Ok(HttpResponse::Ok().body(page_content))
+}
+/// Handles the route of settings page of the `websurfx` meta search engine website.
+#[get("/settings")]
+pub async fn settings(
+    hbs: web::Data<Handlebars<'_>>,
+    config: web::Data<Config>,
+) -> Result<HttpResponse, Box<dyn std::error::Error>> {
+    let page_content: String = hbs.render("settings", &config.style)?;
+    Ok(HttpResponse::Ok().body(page_content))
+}

src/server/routes/mod.rs ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ //! This module provides modules to handle various routes in the search engine website.
2	+
3	+ pub mod search;

src/server/{routes.rs → routes/search.rs} RENAMED Viewed

@@ -1,23 +1,20 @@
-//! This module provides the functionality to handle different routes of the `websurfx`
-//! meta search engine website and provide appropriate response to each route/page
-//! when requested.
-use std::{
-    fs::{read_to_string, File},
-    io::{BufRead, BufReader, Read},
-};
 use crate::{
     cache::cacher::RedisCache,
     config::parser::Config,
-    engines::engine_models::EngineHandler,
     handler::paths::{file_path, FileType},
-    results::{aggregation_models::SearchResults, aggregator::aggregate},
 };
 use actix_web::{get, web, HttpRequest, HttpResponse};
 use handlebars::Handlebars;
 use regex::Regex;
 use serde::Deserialize;
 use tokio::join;
 // ---- Constants ----
@@ -25,17 +22,16 @@ use tokio::join;
 static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
 /// A named struct which deserializes all the user provided search parameters and stores them.
-///
-/// # Fields
-///
-/// * `q` - It stores the search parameter option `q` (or query in simple words)
-/// of the search url.
-/// * `page` - It stores the search parameter `page` (or pageno in simple words)
-/// of the search url.
 #[derive(Deserialize)]
-struct SearchParams {
     q: Option<String>,
     page: Option<u32>,
     safesearch: Option<u8>,
 }
@@ -63,17 +59,14 @@ pub async fn not_found(
 }
 /// A named struct which is used to deserialize the cookies fetched from the client side.
-///
-/// # Fields
-///
-/// * `theme` - It stores the theme name used in the website.
-/// * `colorscheme` - It stores the colorscheme name used for the website theme.
-/// * `engines` - It stores the user selected upstream search engines selected from the UI.
 #[allow(dead_code)]
 #[derive(Deserialize)]
 struct Cookie<'a> {
     theme: &'a str,
     colorscheme: &'a str,
     engines: Vec<&'a str>,
 }
@@ -174,8 +167,21 @@ pub async fn search(
     }
 }
-/// Fetches the results for a query and page.
-/// First checks the redis cache, if that fails it gets proper results
 async fn results(
     url: String,
     config: &Config,
@@ -184,6 +190,7 @@ async fn results(
     req: HttpRequest,
     safe_search: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
     let mut redis_cache: RedisCache = REDIS_CACHE
         .get_or_init(async {
             // Initialize redis cache connection pool only one and store it in the heap.
@@ -191,7 +198,6 @@ async fn results(
         })
         .await
         .clone();
     // fetch the cached results json.
     let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
         redis_cache.clone().cached_json(&url).await;
@@ -223,7 +229,8 @@ async fn results(
             // UI and use that.
             let mut results: SearchResults = match req.cookie("appCookie") {
                 Some(cookie_value) => {
-                    let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
                     let engines: Vec<EngineHandler> = cookie_value
                         .engines
@@ -267,6 +274,8 @@ async fn results(
     }
 }
 fn is_match_from_filter_list(
     file_path: &str,
     query: &str,

+//! This module handles the search route of the search engine website.
 use crate::{
     cache::cacher::RedisCache,
     config::parser::Config,
     handler::paths::{file_path, FileType},
+    models::{aggregation_models::SearchResults, engine_models::EngineHandler},
+    results::aggregator::aggregate,
 };
 use actix_web::{get, web, HttpRequest, HttpResponse};
 use handlebars::Handlebars;
 use regex::Regex;
 use serde::Deserialize;
+use std::{
+    fs::{read_to_string, File},
+    io::{BufRead, BufReader, Read},
+};
 use tokio::join;
 // ---- Constants ----
 static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
 /// A named struct which deserializes all the user provided search parameters and stores them.
 #[derive(Deserialize)]
+pub struct SearchParams {
+    /// It stores the search parameter option `q` (or query in simple words)
+    /// of the search url.
     q: Option<String>,
+    /// It stores the search parameter `page` (or pageno in simple words)
+    /// of the search url.
     page: Option<u32>,
+    /// It stores the search parameter `safesearch` (or safe search level in simple words) of the
+    /// search url.
     safesearch: Option<u8>,
 }
 }
 /// A named struct which is used to deserialize the cookies fetched from the client side.
 #[allow(dead_code)]
 #[derive(Deserialize)]
 struct Cookie<'a> {
+    /// It stores the theme name used in the website.
     theme: &'a str,
+    /// It stores the colorscheme name used for the website theme.
     colorscheme: &'a str,
+    /// It stores the user selected upstream search engines selected from the UI.
     engines: Vec<&'a str>,
 }
     }
 }
+/// Fetches the results for a query and page. It First checks the redis cache, if that
+/// fails it gets proper results by requesting from the upstream search engines.
+///
+/// # Arguments
+///
+/// * `url` - It takes the url of the current page that requested the search results for a
+/// particular search query.
+/// * `config` - It takes a parsed config struct.
+/// * `query` - It takes the page number as u32 value.
+/// * `req` - It takes the `HttpRequest` struct as a value.
+///
+/// # Error
+///
+/// It returns the `SearchResults` struct if the search results could be successfully fetched from
+/// the cache or from the upstream search engines otherwise it returns an appropriate error.
 async fn results(
     url: String,
     config: &Config,
     req: HttpRequest,
     safe_search: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
+    // Initialize redis cache connection struct
     let mut redis_cache: RedisCache = REDIS_CACHE
         .get_or_init(async {
             // Initialize redis cache connection pool only one and store it in the heap.
         })
         .await
         .clone();
     // fetch the cached results json.
     let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
         redis_cache.clone().cached_json(&url).await;
             // UI and use that.
             let mut results: SearchResults = match req.cookie("appCookie") {
                 Some(cookie_value) => {
+                    let cookie_value: Cookie<'_> =
+                        serde_json::from_str(cookie_value.name_value().1)?;
                     let engines: Vec<EngineHandler> = cookie_value
                         .engines
     }
 }
+/// A helper function which checks whether the search query contains any keywords which should be
+/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
 fn is_match_from_filter_list(
     file_path: &str,
     query: &str,

websurfx/config.lua CHANGED Viewed

@@ -10,6 +10,10 @@ production_use = false -- whether to use production mode or not (in other words
 -- if production_use is set to true
 -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
 request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
 -- ### Search ###
 -- Filter results based on different levels. The levels provided are:
@@ -45,4 +49,7 @@ theme = "simple" -- the theme name which should be used for the website
 redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
 -- ### Search Engines ###
-upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.

 -- if production_use is set to true
 -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
 request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
+rate_limiter = {
+	number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
+	time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
+}
 -- ### Search ###
 -- Filter results based on different levels. The levels provided are:
 redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
 -- ### Search Engines ###
+upstream_search_engines = {
+	DuckDuckGo = true,
+	Searx = false,
+} -- select the upstream search engines from which the results should be fetched.