alamin655 commited on
Commit
2a04e64
β€’
2 Parent(s): f20ac50 e19038b

Merge branch 'rolling' into fix-gitpod-setup

Browse files
Cargo.lock CHANGED
@@ -57,6 +57,18 @@ dependencies = [
57
  "pin-project-lite",
58
  ]
59
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  [[package]]
61
  name = "actix-http"
62
  version = "3.4.0"
@@ -590,7 +602,7 @@ version = "0.12.0"
590
  source = "registry+https://github.com/rust-lang/crates.io-index"
591
  checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
592
  dependencies = [
593
- "time 0.1.45",
594
  "url 1.7.2",
595
  ]
596
 
@@ -618,7 +630,7 @@ dependencies = [
618
  "publicsuffix",
619
  "serde",
620
  "serde_json",
621
- "time 0.1.45",
622
  "try_from",
623
  "url 1.7.2",
624
  ]
@@ -817,6 +829,19 @@ dependencies = [
817
  "syn 2.0.32",
818
  ]
819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
  [[package]]
821
  name = "deranged"
822
  version = "0.3.8"
@@ -1162,6 +1187,12 @@ version = "0.3.28"
1162
  source = "registry+https://github.com/rust-lang/crates.io-index"
1163
  checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
1164
 
 
 
 
 
 
 
1165
  [[package]]
1166
  name = "futures-util"
1167
  version = "0.3.28"
@@ -1225,6 +1256,24 @@ version = "0.28.0"
1225
  source = "registry+https://github.com/rust-lang/crates.io-index"
1226
  checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
1227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1228
  [[package]]
1229
  name = "h2"
1230
  version = "0.1.26"
@@ -1289,6 +1338,12 @@ version = "0.12.3"
1289
  source = "registry+https://github.com/rust-lang/crates.io-index"
1290
  checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
1291
 
 
 
 
 
 
 
1292
  [[package]]
1293
  name = "hermit-abi"
1294
  version = "0.3.2"
@@ -1410,7 +1465,7 @@ dependencies = [
1410
  "log",
1411
  "net2",
1412
  "rustc_version 0.2.3",
1413
- "time 0.1.45",
1414
  "tokio 0.1.22",
1415
  "tokio-buf",
1416
  "tokio-executor",
@@ -1511,7 +1566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
1511
  checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
1512
  dependencies = [
1513
  "autocfg 1.1.0",
1514
- "hashbrown",
1515
  ]
1516
 
1517
  [[package]]
@@ -1672,6 +1727,15 @@ version = "0.1.1"
1672
  source = "registry+https://github.com/rust-lang/crates.io-index"
1673
  checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
1674
 
 
 
 
 
 
 
 
 
 
1675
  [[package]]
1676
  name = "markup5ever"
1677
  version = "0.8.1"
@@ -1887,6 +1951,18 @@ version = "0.5.0"
1887
  source = "registry+https://github.com/rust-lang/crates.io-index"
1888
  checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0"
1889
 
 
 
 
 
 
 
 
 
 
 
 
 
1890
  [[package]]
1891
  name = "num-traits"
1892
  version = "0.2.16"
@@ -2307,6 +2383,22 @@ dependencies = [
2307
  "url 2.4.1",
2308
  ]
2309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2310
  [[package]]
2311
  name = "quote"
2312
  version = "0.6.13"
@@ -2461,6 +2553,15 @@ dependencies = [
2461
  "rand_core 0.3.1",
2462
  ]
2463
 
 
 
 
 
 
 
 
 
 
2464
  [[package]]
2465
  name = "rayon"
2466
  version = "1.7.0"
@@ -2583,7 +2684,7 @@ dependencies = [
2583
  "serde",
2584
  "serde_json",
2585
  "serde_urlencoded 0.5.5",
2586
- "time 0.1.45",
2587
  "tokio 0.1.22",
2588
  "tokio-executor",
2589
  "tokio-io",
@@ -3157,12 +3258,11 @@ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
3157
 
3158
  [[package]]
3159
  name = "time"
3160
- version = "0.1.45"
3161
  source = "registry+https://github.com/rust-lang/crates.io-index"
3162
- checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
3163
  dependencies = [
3164
  "libc",
3165
- "wasi 0.10.0+wasi-snapshot-preview1",
3166
  "winapi 0.3.9",
3167
  ]
3168
 
@@ -3609,9 +3709,9 @@ dependencies = [
3609
 
3610
  [[package]]
3611
  name = "wasi"
3612
- version = "0.10.0+wasi-snapshot-preview1"
3613
  source = "registry+https://github.com/rust-lang/crates.io-index"
3614
- checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
3615
 
3616
  [[package]]
3617
  name = "wasi"
@@ -3701,6 +3801,7 @@ version = "0.20.7"
3701
  dependencies = [
3702
  "actix-cors",
3703
  "actix-files",
 
3704
  "actix-web",
3705
  "async-once-cell",
3706
  "async-trait",
 
57
  "pin-project-lite",
58
  ]
59
 
60
+ [[package]]
61
+ name = "actix-governor"
62
+ version = "0.4.1"
63
+ source = "registry+https://github.com/rust-lang/crates.io-index"
64
+ checksum = "46ff2d40f2bc627b8054c5e20fa6b0b0cf9428699b54bd41634e9ae3098ad555"
65
+ dependencies = [
66
+ "actix-http",
67
+ "actix-web",
68
+ "futures 0.3.28",
69
+ "governor",
70
+ ]
71
+
72
  [[package]]
73
  name = "actix-http"
74
  version = "3.4.0"
 
602
  source = "registry+https://github.com/rust-lang/crates.io-index"
603
  checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
604
  dependencies = [
605
+ "time 0.1.43",
606
  "url 1.7.2",
607
  ]
608
 
 
630
  "publicsuffix",
631
  "serde",
632
  "serde_json",
633
+ "time 0.1.43",
634
  "try_from",
635
  "url 1.7.2",
636
  ]
 
829
  "syn 2.0.32",
830
  ]
831
 
832
+ [[package]]
833
+ name = "dashmap"
834
+ version = "5.5.3"
835
+ source = "registry+https://github.com/rust-lang/crates.io-index"
836
+ checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
837
+ dependencies = [
838
+ "cfg-if 1.0.0",
839
+ "hashbrown 0.14.0",
840
+ "lock_api 0.4.10",
841
+ "once_cell",
842
+ "parking_lot_core 0.9.8",
843
+ ]
844
+
845
  [[package]]
846
  name = "deranged"
847
  version = "0.3.8"
 
1187
  source = "registry+https://github.com/rust-lang/crates.io-index"
1188
  checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
1189
 
1190
+ [[package]]
1191
+ name = "futures-timer"
1192
+ version = "3.0.2"
1193
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1194
+ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
1195
+
1196
  [[package]]
1197
  name = "futures-util"
1198
  version = "0.3.28"
 
1256
  source = "registry+https://github.com/rust-lang/crates.io-index"
1257
  checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
1258
 
1259
+ [[package]]
1260
+ name = "governor"
1261
+ version = "0.5.1"
1262
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1263
+ checksum = "c390a940a5d157878dd057c78680a33ce3415bcd05b4799509ea44210914b4d5"
1264
+ dependencies = [
1265
+ "cfg-if 1.0.0",
1266
+ "dashmap",
1267
+ "futures 0.3.28",
1268
+ "futures-timer",
1269
+ "no-std-compat",
1270
+ "nonzero_ext",
1271
+ "parking_lot 0.12.1",
1272
+ "quanta",
1273
+ "rand 0.8.5",
1274
+ "smallvec 1.11.0",
1275
+ ]
1276
+
1277
  [[package]]
1278
  name = "h2"
1279
  version = "0.1.26"
 
1338
  source = "registry+https://github.com/rust-lang/crates.io-index"
1339
  checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
1340
 
1341
+ [[package]]
1342
+ name = "hashbrown"
1343
+ version = "0.14.0"
1344
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1345
+ checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
1346
+
1347
  [[package]]
1348
  name = "hermit-abi"
1349
  version = "0.3.2"
 
1465
  "log",
1466
  "net2",
1467
  "rustc_version 0.2.3",
1468
+ "time 0.1.43",
1469
  "tokio 0.1.22",
1470
  "tokio-buf",
1471
  "tokio-executor",
 
1566
  checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
1567
  dependencies = [
1568
  "autocfg 1.1.0",
1569
+ "hashbrown 0.12.3",
1570
  ]
1571
 
1572
  [[package]]
 
1727
  source = "registry+https://github.com/rust-lang/crates.io-index"
1728
  checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
1729
 
1730
+ [[package]]
1731
+ name = "mach"
1732
+ version = "0.3.2"
1733
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1734
+ checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
1735
+ dependencies = [
1736
+ "libc",
1737
+ ]
1738
+
1739
  [[package]]
1740
  name = "markup5ever"
1741
  version = "0.8.1"
 
1951
  source = "registry+https://github.com/rust-lang/crates.io-index"
1952
  checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0"
1953
 
1954
+ [[package]]
1955
+ name = "no-std-compat"
1956
+ version = "0.4.1"
1957
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1958
+ checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
1959
+
1960
+ [[package]]
1961
+ name = "nonzero_ext"
1962
+ version = "0.3.0"
1963
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1964
+ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
1965
+
1966
  [[package]]
1967
  name = "num-traits"
1968
  version = "0.2.16"
 
2383
  "url 2.4.1",
2384
  ]
2385
 
2386
+ [[package]]
2387
+ name = "quanta"
2388
+ version = "0.9.3"
2389
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2390
+ checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8"
2391
+ dependencies = [
2392
+ "crossbeam-utils 0.8.16",
2393
+ "libc",
2394
+ "mach",
2395
+ "once_cell",
2396
+ "raw-cpuid",
2397
+ "wasi 0.10.2+wasi-snapshot-preview1",
2398
+ "web-sys",
2399
+ "winapi 0.3.9",
2400
+ ]
2401
+
2402
  [[package]]
2403
  name = "quote"
2404
  version = "0.6.13"
 
2553
  "rand_core 0.3.1",
2554
  ]
2555
 
2556
+ [[package]]
2557
+ name = "raw-cpuid"
2558
+ version = "10.7.0"
2559
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2560
+ checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
2561
+ dependencies = [
2562
+ "bitflags 1.3.2",
2563
+ ]
2564
+
2565
  [[package]]
2566
  name = "rayon"
2567
  version = "1.7.0"
 
2684
  "serde",
2685
  "serde_json",
2686
  "serde_urlencoded 0.5.5",
2687
+ "time 0.1.43",
2688
  "tokio 0.1.22",
2689
  "tokio-executor",
2690
  "tokio-io",
 
3258
 
3259
  [[package]]
3260
  name = "time"
3261
+ version = "0.1.43"
3262
  source = "registry+https://github.com/rust-lang/crates.io-index"
3263
+ checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
3264
  dependencies = [
3265
  "libc",
 
3266
  "winapi 0.3.9",
3267
  ]
3268
 
 
3709
 
3710
  [[package]]
3711
  name = "wasi"
3712
+ version = "0.10.2+wasi-snapshot-preview1"
3713
  source = "registry+https://github.com/rust-lang/crates.io-index"
3714
+ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
3715
 
3716
  [[package]]
3717
  name = "wasi"
 
3801
  dependencies = [
3802
  "actix-cors",
3803
  "actix-files",
3804
+ "actix-governor",
3805
  "actix-web",
3806
  "async-once-cell",
3807
  "async-trait",
Cargo.toml CHANGED
@@ -32,6 +32,7 @@ futures = {version="0.3.28"}
32
  dhat = {version="0.3.2", optional = true}
33
  mimalloc = { version = "0.1.38", default-features = false }
34
  async-once-cell = {version="0.5.3"}
 
35
 
36
  [dev-dependencies]
37
  rusty-hook = "^0.11.2"
 
32
  dhat = {version="0.3.2", optional = true}
33
  mimalloc = { version = "0.1.38", default-features = false }
34
  async-once-cell = {version="0.5.3"}
35
+ actix-governor = {version="0.4.1"}
36
 
37
  [dev-dependencies]
38
  rusty-hook = "^0.11.2"
src/cache/cacher.rs CHANGED
@@ -10,17 +10,14 @@ use super::error::PoolError;
10
 
11
  /// A named struct which stores the redis Connection url address to which the client will
12
  /// connect to.
13
- ///
14
- /// # Fields
15
- ///
16
- /// * `connection_pool` - It stores a pool of connections ready to be used.
17
- /// * `pool_size` - It stores the size of the connection pool (in other words the number of
18
- /// connections that should be stored in the pool).
19
- /// * `current_connection` - It stores the index of which connection is being used at the moment.
20
  #[derive(Clone)]
21
  pub struct RedisCache {
 
22
  connection_pool: Vec<ConnectionManager>,
 
 
23
  pool_size: u8,
 
24
  current_connection: u8,
25
  }
26
 
 
10
 
11
  /// A named struct which stores the redis Connection url address to which the client will
12
  /// connect to.
 
 
 
 
 
 
 
13
  #[derive(Clone)]
14
  pub struct RedisCache {
15
+ /// It stores a pool of connections ready to be used.
16
  connection_pool: Vec<ConnectionManager>,
17
+ /// It stores the size of the connection pool (in other words the number of
18
+ /// connections that should be stored in the pool).
19
  pool_size: u8,
20
+ /// It stores the index of which connection is being used at the moment.
21
  current_connection: u8,
22
  }
23
 
src/cache/error.rs CHANGED
@@ -5,15 +5,12 @@ use std::fmt;
5
  use redis::RedisError;
6
 
7
  /// A custom error type used for handling redis async pool associated errors.
8
- ///
9
- /// This enum provides variants three different categories of errors:
10
- /// * `RedisError` - This variant handles all errors related to `RedisError`,
11
- /// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
12
- /// which occurs when all the connections in the connection pool return a connection
13
- /// dropped redis error.
14
  #[derive(Debug)]
15
  pub enum PoolError {
 
16
  RedisError(RedisError),
 
 
17
  PoolExhaustionWithConnectionDropError,
18
  }
19
 
 
5
  use redis::RedisError;
6
 
7
  /// A custom error type used for handling redis async pool associated errors.
 
 
 
 
 
 
8
  #[derive(Debug)]
9
  pub enum PoolError {
10
+ /// This variant handles all errors related to `RedisError`,
11
  RedisError(RedisError),
12
+ /// This variant handles the errors which occurs when all the connections
13
+ /// in the connection pool return a connection dropped redis error.
14
  PoolExhaustionWithConnectionDropError,
15
  }
16
 
src/cache/mod.rs CHANGED
@@ -1,2 +1,5 @@
 
 
 
1
  pub mod cacher;
2
  pub mod error;
 
1
+ //! This module provides the modules which provide the functionality to cache the aggregated
2
+ //! results fetched and aggregated from the upstream search engines in a json format.
3
+
4
  pub mod cacher;
5
  pub mod error;
src/config/mod.rs CHANGED
@@ -1,2 +1,4 @@
 
 
 
1
  pub mod parser;
2
- pub mod parser_models;
 
1
+ //! This module provides the modules which handles the functionality to parse the lua config
2
+ //! and convert the config options into rust readable form.
3
+
4
  pub mod parser;
 
src/config/parser.rs CHANGED
@@ -3,52 +3,42 @@
3
 
4
  use crate::handler::paths::{file_path, FileType};
5
 
6
- use super::parser_models::Style;
7
  use log::LevelFilter;
8
  use mlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
12
- ///
13
- /// # Fields
14
- //
15
- /// * `port` - It stores the parsed port number option on which the server should launch.
16
- /// * `binding_ip` - It stores the parsed ip address option on which the server should launch
17
- /// * `style` - It stores the theming options for the website.
18
- /// * `redis_url` - It stores the redis connection url address on which the redis
19
- /// client should connect.
20
- /// * `aggregator` - It stores the option to whether enable or disable production use.
21
- /// * `logging` - It stores the option to whether enable or disable logs.
22
- /// * `debug` - It stores the option to whether enable or disable debug mode.
23
- /// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
24
- /// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
25
- /// * `threads` - It stores the number of threads which controls the app will use to run.
26
  #[derive(Clone)]
27
  pub struct Config {
 
28
  pub port: u16,
 
29
  pub binding_ip: String,
 
30
  pub style: Style,
 
 
31
  pub redis_url: String,
 
32
  pub aggregator: AggregatorConfig,
 
33
  pub logging: bool,
 
34
  pub debug: bool,
35
- pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
 
 
36
  pub request_timeout: u8,
 
37
  pub threads: u8,
 
 
 
 
38
  pub safe_search: u8,
39
  }
40
 
41
- /// Configuration options for the aggregator.
42
- ///
43
- /// # Fields
44
- ///
45
- /// * `random_delay` - It stores the option to whether enable or disable random delays between
46
- /// requests.
47
- #[derive(Clone)]
48
- pub struct AggregatorConfig {
49
- pub random_delay: bool,
50
- }
51
-
52
  impl Config {
53
  /// A function which parses the config.lua file and puts all the parsed options in the newly
54
  /// constructed Config struct and returns it.
@@ -90,6 +80,8 @@ impl Config {
90
  parsed_threads
91
  };
92
 
 
 
93
  let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
94
  let safe_search: u8 = match parsed_safe_search {
95
  0..=4 => parsed_safe_search,
@@ -117,16 +109,25 @@ impl Config {
117
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?
118
  .into_iter()
119
  .filter_map(|(key, value)| value.then_some(key))
120
- .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
121
  .collect(),
122
  request_timeout: globals.get::<_, u8>("request_timeout")?,
123
  threads,
 
 
 
 
124
  safe_search,
125
  })
126
  }
127
  }
128
 
129
  /// a helper function that sets the proper logging level
 
 
 
 
 
130
  fn set_logging_level(debug: bool, logging: bool) {
131
  if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
132
  if pkg_env_var.to_lowercase() == "dev" {
 
3
 
4
  use crate::handler::paths::{file_path, FileType};
5
 
6
+ use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
  use mlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
10
 
11
  /// A named struct which stores the parsed config file options.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  #[derive(Clone)]
13
  pub struct Config {
14
+ /// It stores the parsed port number option on which the server should launch.
15
  pub port: u16,
16
+ /// It stores the parsed ip address option on which the server should launch
17
  pub binding_ip: String,
18
+ /// It stores the theming options for the website.
19
  pub style: Style,
20
+ /// It stores the redis connection url address on which the redis
21
+ /// client should connect.
22
  pub redis_url: String,
23
+ /// It stores the option to whether enable or disable production use.
24
  pub aggregator: AggregatorConfig,
25
+ /// It stores the option to whether enable or disable logs.
26
  pub logging: bool,
27
+ /// It stores the option to whether enable or disable debug mode.
28
  pub debug: bool,
29
+ /// It stores all the engine names that were enabled by the user.
30
+ pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
31
+ /// It stores the time (secs) which controls the server request timeout.
32
  pub request_timeout: u8,
33
+ /// It stores the number of threads which controls the app will use to run.
34
  pub threads: u8,
35
+ /// It stores configuration options for the ratelimiting middleware.
36
+ pub rate_limiter: RateLimiter,
37
+ /// It stores the level of safe search to be used for restricting content in the
38
+ /// search results.
39
  pub safe_search: u8,
40
  }
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  impl Config {
43
  /// A function which parses the config.lua file and puts all the parsed options in the newly
44
  /// constructed Config struct and returns it.
 
80
  parsed_threads
81
  };
82
 
83
+ let rate_limiter = globals.get::<_, HashMap<String, u8>>("rate_limiter")?;
84
+
85
  let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
86
  let safe_search: u8 = match parsed_safe_search {
87
  0..=4 => parsed_safe_search,
 
109
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?
110
  .into_iter()
111
  .filter_map(|(key, value)| value.then_some(key))
112
+ .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
113
  .collect(),
114
  request_timeout: globals.get::<_, u8>("request_timeout")?,
115
  threads,
116
+ rate_limiter: RateLimiter {
117
+ number_of_requests: rate_limiter["number_of_requests"],
118
+ time_limit: rate_limiter["time_limit"],
119
+ },
120
  safe_search,
121
  })
122
  }
123
  }
124
 
125
  /// a helper function that sets the proper logging level
126
+ ///
127
+ /// # Arguments
128
+ ///
129
+ /// * `debug` - It takes the option to whether enable or disable debug mode.
130
+ /// * `logging` - It takes the option to whether enable or disable logs.
131
  fn set_logging_level(debug: bool, logging: bool) {
132
  if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
133
  if pkg_env_var.to_lowercase() == "dev" {
src/engines/duckduckgo.rs CHANGED
@@ -7,9 +7,9 @@ use std::collections::HashMap;
7
  use reqwest::header::HeaderMap;
8
  use scraper::{Html, Selector};
9
 
10
- use crate::results::aggregation_models::SearchResult;
11
 
12
- use super::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{Report, Result, ResultExt};
15
 
@@ -19,24 +19,6 @@ pub struct DuckDuckGo;
19
 
20
  #[async_trait::async_trait]
21
  impl SearchEngine for DuckDuckGo {
22
- /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
23
- /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
24
- /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
25
- /// values are RawSearchResult struct and then returns it within a Result enum.
26
- ///
27
- /// # Arguments
28
- ///
29
- /// * `query` - Takes the user provided query to query to the upstream search engine with.
30
- /// * `page` - Takes an u32 as an argument.
31
- /// * `user_agent` - Takes a random user agent string as an argument.
32
- /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
33
- ///
34
- /// # Errors
35
- ///
36
- /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
37
- /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
38
- /// provide results for the requested search query and also returns error if the scraping selector
39
- /// or HeaderMap fails to initialize.
40
  async fn results(
41
  &self,
42
  query: &str,
 
7
  use reqwest::header::HeaderMap;
8
  use scraper::{Html, Selector};
9
 
10
+ use crate::models::aggregation_models::SearchResult;
11
 
12
+ use crate::models::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{Report, Result, ResultExt};
15
 
 
19
 
20
  #[async_trait::async_trait]
21
  impl SearchEngine for DuckDuckGo {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  async fn results(
23
  &self,
24
  query: &str,
src/engines/mod.rs CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
 
1
  pub mod duckduckgo;
2
- pub mod engine_models;
3
  pub mod searx;
 
1
+ //! This module provides different modules which handles the functionlity to fetch results from the
2
+ //! upstream search engines based on user requested queries. Also provides different models to
3
+ //! provide a standard functions to be implemented for all the upstream search engine handling
4
+ //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
+
6
  pub mod duckduckgo;
 
7
  pub mod searx;
src/engines/searx.rs CHANGED
@@ -6,9 +6,8 @@ use reqwest::header::HeaderMap;
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
- use crate::results::aggregation_models::SearchResult;
10
-
11
- use super::engine_models::{EngineError, SearchEngine};
12
  use error_stack::{Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
@@ -17,25 +16,6 @@ pub struct Searx;
17
 
18
  #[async_trait::async_trait]
19
  impl SearchEngine for Searx {
20
- /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
21
- /// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
22
- /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
23
- /// values are RawSearchResult struct and then returns it within a Result enum.
24
- ///
25
- /// # Arguments
26
- ///
27
- /// * `query` - Takes the user provided query to query to the upstream search engine with.
28
- /// * `page` - Takes an u32 as an argument.
29
- /// * `user_agent` - Takes a random user agent string as an argument.
30
- /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
31
- ///
32
- /// # Errors
33
- ///
34
- /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
35
- /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
36
- /// provide results for the requested search query and also returns error if the scraping selector
37
- /// or HeaderMap fails to initialize.
38
-
39
  async fn results(
40
  &self,
41
  query: &str,
 
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
+ use crate::models::aggregation_models::SearchResult;
10
+ use crate::models::engine_models::{EngineError, SearchEngine};
 
11
  use error_stack::{Report, Result, ResultExt};
12
 
13
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
 
16
 
17
  #[async_trait::async_trait]
18
  impl SearchEngine for Searx {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  async fn results(
20
  &self,
21
  query: &str,
src/handler/mod.rs CHANGED
@@ -1 +1,5 @@
 
 
 
 
1
  pub mod paths;
 
1
+ //! This module provides modules which provide the functionality to handle paths for different
2
+ //! files present on different paths and provide one appropriate path on which it is present and
3
+ //! can be used.
4
+
5
  pub mod paths;
src/handler/paths.rs CHANGED
@@ -7,42 +7,46 @@ use std::path::Path;
7
  use std::sync::OnceLock;
8
 
9
  // ------- Constants --------
10
- static PUBLIC_DIRECTORY_NAME: &str = "public";
11
- static COMMON_DIRECTORY_NAME: &str = "websurfx";
12
- static CONFIG_FILE_NAME: &str = "config.lua";
13
- static ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
14
- static BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
 
 
 
 
 
15
 
 
16
  #[derive(Hash, PartialEq, Eq, Debug)]
17
  pub enum FileType {
 
18
  Config,
 
19
  AllowList,
 
20
  BlockList,
 
21
  Theme,
22
  }
23
 
 
24
  static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
25
 
26
- /// A helper function which returns an appropriate config file path checking if the config
27
- /// file exists on that path.
28
  ///
29
  /// # Error
30
  ///
31
- /// Returns a `config file not found!!` error if the config file is not present under following
32
- /// paths which are:
33
- /// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
34
- /// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
35
- /// one (3).
36
- /// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
37
- /// here then it returns an error as mentioned above.
38
-
39
- /// A function which returns an appropriate theme directory path checking if the theme
40
- /// directory exists on that path.
41
  ///
42
- /// # Error
 
 
 
43
  ///
44
- /// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
45
- /// paths which are:
46
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
47
  /// 2. Under project folder ( or codebase in other words) if it is not present
48
  /// here then it returns an error as mentioned above.
@@ -110,6 +114,6 @@ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
110
  // if no of the configs above exist, return error
111
  Err(Error::new(
112
  std::io::ErrorKind::NotFound,
113
- format!("{:?} file not found!!", file_type),
114
  ))
115
  }
 
7
  use std::sync::OnceLock;
8
 
9
  // ------- Constants --------
10
+ /// The constant holding the name of the theme folder.
11
+ const PUBLIC_DIRECTORY_NAME: &str = "public";
12
+ /// The constant holding the name of the common folder.
13
+ const COMMON_DIRECTORY_NAME: &str = "websurfx";
14
+ /// The constant holding the name of the config file.
15
+ const CONFIG_FILE_NAME: &str = "config.lua";
16
+ /// The constant holding the name of the AllowList text file.
17
+ const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
18
+ /// The constant holding the name of the BlockList text file.
19
+ const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
20
 
21
+ /// An enum type which provides different variants to handle paths for various files/folders.
22
  #[derive(Hash, PartialEq, Eq, Debug)]
23
  pub enum FileType {
24
+ /// This variant handles all the paths associated with the config file.
25
  Config,
26
+ /// This variant handles all the paths associated with the Allowlist text file.
27
  AllowList,
28
+ /// This variant handles all the paths associated with the BlockList text file.
29
  BlockList,
30
+ /// This variant handles all the paths associated with the public folder (Theme folder).
31
  Theme,
32
  }
33
 
34
+ /// A static variable which stores the different filesystem paths for various file/folder types.
35
  static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
36
 
37
+ /// A function which returns an appropriate path for thr provided file type by checking if the path
38
+ /// for the given file type exists on that path.
39
  ///
40
  /// # Error
41
  ///
42
+ /// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
43
+ /// present on the path on which it is being tested.
 
 
 
 
 
 
 
 
44
  ///
45
+ /// # Example
46
+ ///
47
+ /// If this function is give the file_type of Theme variant then the theme folder is checked by the
48
+ /// following steps:
49
  ///
 
 
50
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
51
  /// 2. Under project folder ( or codebase in other words) if it is not present
52
  /// here then it returns an error as mentioned above.
 
114
  // if no of the configs above exist, return error
115
  Err(Error::new(
116
  std::io::ErrorKind::NotFound,
117
+ format!("{:?} file/folder not found!!", file_type),
118
  ))
119
  }
src/lib.rs CHANGED
@@ -1,25 +1,26 @@
1
  //! This main library module provides the functionality to provide and handle the Tcp server
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
 
 
 
 
4
  pub mod cache;
5
  pub mod config;
6
  pub mod engines;
7
  pub mod handler;
 
8
  pub mod results;
9
  pub mod server;
10
 
11
  use std::net::TcpListener;
12
 
13
- use crate::server::routes;
14
 
15
  use actix_cors::Cors;
16
  use actix_files as fs;
17
- use actix_web::{
18
- dev::Server,
19
- http::header,
20
- middleware::{Compress, Logger},
21
- web, App, HttpServer,
22
- };
23
  use config::parser::Config;
24
  use handlebars::Handlebars;
25
  use handler::paths::{file_path, FileType};
@@ -45,7 +46,7 @@ use handler::paths::{file_path, FileType};
45
  /// let server = run(listener,config).expect("Failed to start server");
46
  /// ```
47
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
48
- let mut handlebars: Handlebars = Handlebars::new();
49
 
50
  let public_folder_path: &str = file_path(FileType::Theme)?;
51
 
@@ -53,7 +54,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
53
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
54
  .unwrap();
55
 
56
- let handlebars_ref: web::Data<Handlebars> = web::Data::new(handlebars);
57
 
58
  let cloned_config_threads_opt: u8 = config.threads;
59
 
@@ -69,11 +70,17 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
69
  ]);
70
 
71
  App::new()
 
72
  .app_data(handlebars_ref.clone())
73
  .app_data(web::Data::new(config.clone()))
74
  .wrap(cors)
75
- .wrap(Logger::default()) // added logging middleware for logging.
76
- .wrap(Compress::default()) // compress request headers to reduce memory usage.
 
 
 
 
 
77
  // Serve images and static files (css and js files).
78
  .service(
79
  fs::Files::new("/static", format!("{}/static", public_folder_path))
@@ -83,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
83
  fs::Files::new("/images", format!("{}/images", public_folder_path))
84
  .show_files_listing(),
85
  )
86
- .service(routes::robots_data) // robots.txt
87
- .service(routes::index) // index page
88
- .service(routes::search) // search page
89
- .service(routes::about) // about page
90
- .service(routes::settings) // settings page
91
- .default_service(web::route().to(routes::not_found)) // error page
92
  })
93
  .workers(cloned_config_threads_opt as usize)
94
  // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
 
1
  //! This main library module provides the functionality to provide and handle the Tcp server
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
4
+ #![forbid(unsafe_code, clippy::panic)]
5
+ #![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
6
+ #![warn(clippy::cognitive_complexity, rust_2018_idioms)]
7
+
8
  pub mod cache;
9
  pub mod config;
10
  pub mod engines;
11
  pub mod handler;
12
+ pub mod models;
13
  pub mod results;
14
  pub mod server;
15
 
16
  use std::net::TcpListener;
17
 
18
+ use crate::server::router;
19
 
20
  use actix_cors::Cors;
21
  use actix_files as fs;
22
+ use actix_governor::{Governor, GovernorConfigBuilder};
23
+ use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
 
 
 
 
24
  use config::parser::Config;
25
  use handlebars::Handlebars;
26
  use handler::paths::{file_path, FileType};
 
46
  /// let server = run(listener,config).expect("Failed to start server");
47
  /// ```
48
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
49
+ let mut handlebars: Handlebars<'_> = Handlebars::new();
50
 
51
  let public_folder_path: &str = file_path(FileType::Theme)?;
52
 
 
54
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
55
  .unwrap();
56
 
57
+ let handlebars_ref: web::Data<Handlebars<'_>> = web::Data::new(handlebars);
58
 
59
  let cloned_config_threads_opt: u8 = config.threads;
60
 
 
70
  ]);
71
 
72
  App::new()
73
+ .wrap(Logger::default()) // added logging middleware for logging.
74
  .app_data(handlebars_ref.clone())
75
  .app_data(web::Data::new(config.clone()))
76
  .wrap(cors)
77
+ .wrap(Governor::new(
78
+ &GovernorConfigBuilder::default()
79
+ .per_second(config.rate_limiter.time_limit as u64)
80
+ .burst_size(config.rate_limiter.number_of_requests as u32)
81
+ .finish()
82
+ .unwrap(),
83
+ ))
84
  // Serve images and static files (css and js files).
85
  .service(
86
  fs::Files::new("/static", format!("{}/static", public_folder_path))
 
90
  fs::Files::new("/images", format!("{}/images", public_folder_path))
91
  .show_files_listing(),
92
  )
93
+ .service(router::robots_data) // robots.txt
94
+ .service(router::index) // index page
95
+ .service(server::routes::search::search) // search page
96
+ .service(router::about) // about page
97
+ .service(router::settings) // settings page
98
+ .default_service(web::route().to(router::not_found)) // error page
99
  })
100
  .workers(cloned_config_threads_opt as usize)
101
  // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
src/{results β†’ models}/aggregation_models.rs RENAMED
@@ -4,25 +4,22 @@
4
  use serde::{Deserialize, Serialize};
5
  use smallvec::SmallVec;
6
 
7
- use crate::{config::parser_models::Style, engines::engine_models::EngineError};
8
 
9
  /// A named struct to store the raw scraped search results scraped search results from the
10
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
11
  /// to write idiomatic rust using `Iterators`.
12
- ///
13
- /// # Fields
14
- ///
15
- /// * `title` - The title of the search result.
16
- /// * `url` - The url which is accessed when clicked on it
17
  /// (href url in html in simple words).
18
- /// * `description` - The description of the search result.
19
- /// * `engine` - The names of the upstream engines from which this results were provided.
20
- #[derive(Clone, Serialize, Deserialize, Debug)]
21
  #[serde(rename_all = "camelCase")]
22
  pub struct SearchResult {
 
23
  pub title: String,
 
24
  pub url: String,
 
25
  pub description: String,
 
26
  pub engine: SmallVec<[String; 0]>,
27
  }
28
 
@@ -64,14 +61,27 @@ impl SearchResult {
64
  }
65
  }
66
 
 
67
  #[derive(Serialize, Deserialize, Clone)]
68
  pub struct EngineErrorInfo {
 
 
69
  pub error: String,
 
70
  pub engine: String,
 
 
71
  pub severity_color: String,
72
  }
73
 
74
  impl EngineErrorInfo {
 
 
 
 
 
 
 
75
  pub fn new(error: &EngineError, engine: &str) -> Self {
76
  Self {
77
  error: match error {
@@ -91,25 +101,26 @@ impl EngineErrorInfo {
91
 
92
  /// A named struct to store, serialize, deserialize the all the search results scraped and
93
  /// aggregated from the upstream search engines.
94
- ///
95
- /// # Fields
96
- ///
97
- /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
98
  /// `SearchResult` structs.
99
- /// * `page_query` - Stores the current pages search query `q` provided in the search url.
100
- /// * `style` - Stores the theming options for the website.
101
- /// * `engine_errors_info` - Stores the information on which engines failed with their engine name
102
- /// and the type of error that caused it.
103
- /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
104
- /// given search query.
105
  #[derive(Serialize, Deserialize, Default)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
 
108
  pub results: Vec<SearchResult>,
 
109
  pub page_query: String,
 
110
  pub style: Style,
 
 
111
  pub engine_errors_info: Vec<EngineErrorInfo>,
 
 
 
112
  pub disallowed: bool,
 
 
 
113
  pub filtered: bool,
114
  }
115
 
@@ -122,9 +133,8 @@ impl SearchResults {
122
  /// and stores it into a vector of `SearchResult` structs.
123
  /// * `page_query` - Takes an argument of current page`s search query `q` provided in
124
  /// the search url.
125
- /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
126
- /// given search query.
127
- /// * ``
128
  pub fn new(
129
  results: Vec<SearchResult>,
130
  page_query: &str,
 
4
  use serde::{Deserialize, Serialize};
5
  use smallvec::SmallVec;
6
 
7
+ use super::{engine_models::EngineError, parser_models::Style};
8
 
9
  /// A named struct to store the raw scraped search results scraped search results from the
10
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
11
  /// to write idiomatic rust using `Iterators`.
 
 
 
 
 
12
  /// (href url in html in simple words).
13
+ #[derive(Clone, Serialize, Deserialize)]
 
 
14
  #[serde(rename_all = "camelCase")]
15
  pub struct SearchResult {
16
+ /// The title of the search result.
17
  pub title: String,
18
+ /// The url which is accessed when clicked on it
19
  pub url: String,
20
+ /// The description of the search result.
21
  pub description: String,
22
+ /// The names of the upstream engines from which this results were provided.
23
  pub engine: SmallVec<[String; 0]>,
24
  }
25
 
 
61
  }
62
  }
63
 
64
+ /// A named struct that stores the error info related to the upstream search engines.
65
  #[derive(Serialize, Deserialize, Clone)]
66
  pub struct EngineErrorInfo {
67
+ /// It stores the error type which occured while fetching the result from a particular search
68
+ /// engine.
69
  pub error: String,
70
+ /// It stores the name of the engine that failed to provide the requested search results.
71
  pub engine: String,
72
+ /// It stores the name of the color to indicate whether how severe the particular error is (In
73
+ /// other words it indicates the severity of the error/issue).
74
  pub severity_color: String,
75
  }
76
 
77
  impl EngineErrorInfo {
78
+ /// Constructs a new `SearchResult` with the given arguments needed for the struct.
79
+ ///
80
+ /// # Arguments
81
+ ///
82
+ /// * `error` - It takes the error type which occured while fetching the result from a particular
83
+ /// search engine.
84
+ /// * `engine` - It takes the name of the engine that failed to provide the requested search results.
85
  pub fn new(error: &EngineError, engine: &str) -> Self {
86
  Self {
87
  error: match error {
 
101
 
102
  /// A named struct to store, serialize, deserialize the all the search results scraped and
103
  /// aggregated from the upstream search engines.
 
 
 
 
104
  /// `SearchResult` structs.
 
 
 
 
 
 
105
  #[derive(Serialize, Deserialize, Default)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
108
+ /// Stores the individual serializable `SearchResult` struct into a vector of
109
  pub results: Vec<SearchResult>,
110
+ /// Stores the current pages search query `q` provided in the search url.
111
  pub page_query: String,
112
+ /// Stores the theming options for the website.
113
  pub style: Style,
114
+ /// Stores the information on which engines failed with their engine name
115
+ /// and the type of error that caused it.
116
  pub engine_errors_info: Vec<EngineErrorInfo>,
117
+ /// Stores the flag option which holds the check value that the following
118
+ /// search query was disallowed when the safe search level set to 4 and it
119
+ /// was present in the `Blocklist` file.
120
  pub disallowed: bool,
121
+ /// Stores the flag option which holds the check value that the following
122
+ /// search query was filtered when the safe search level set to 3 and it
123
+ /// was present in the `Blocklist` file.
124
  pub filtered: bool,
125
  }
126
 
 
133
  /// and stores it into a vector of `SearchResult` structs.
134
  /// * `page_query` - Takes an argument of current page`s search query `q` provided in
135
  /// the search url.
136
+ /// * `engine_errors_info` - Takes an array of structs which contains information regarding
137
+ /// which engines failed with their names, reason and their severity color name.
 
138
  pub fn new(
139
  results: Vec<SearchResult>,
140
  page_query: &str,
src/{engines β†’ models}/engine_models.rs RENAMED
@@ -1,24 +1,23 @@
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
- use crate::results::aggregation_models::SearchResult;
5
  use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
9
- ///
10
- /// This enum provides variants three different categories of errors:
11
- /// * `RequestError` - This variant handles all request related errors like forbidden, not found,
12
- /// etc.
13
- /// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
14
- /// search engines.
15
- /// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
16
- /// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
17
- /// all other errors occurring within the code handling the `upstream search engines`.
18
  #[derive(Debug)]
19
  pub enum EngineError {
 
 
20
  EmptyResultSet,
 
 
21
  RequestError,
 
 
 
 
22
  UnexpectedError,
23
  }
24
 
@@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
46
  /// A trait to define common behavior for all search engines.
47
  #[async_trait::async_trait]
48
  pub trait SearchEngine: Sync + Send {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  async fn fetch_html_from_upstream(
50
  &self,
51
  url: &str,
@@ -65,6 +81,24 @@ pub trait SearchEngine: Sync + Send {
65
  .change_context(EngineError::RequestError)?)
66
  }
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  async fn results(
69
  &self,
70
  query: &str,
@@ -75,8 +109,12 @@ pub trait SearchEngine: Sync + Send {
75
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
76
  }
77
 
 
78
  pub struct EngineHandler {
 
 
79
  engine: Box<dyn SearchEngine>,
 
80
  name: &'static str,
81
  }
82
 
@@ -87,12 +125,23 @@ impl Clone for EngineHandler {
87
  }
88
 
89
  impl EngineHandler {
90
- /// parses an engine name into an engine handler, returns none if the engine is unknown
 
 
 
 
 
 
 
 
91
  pub fn new(engine_name: &str) -> Option<Self> {
92
  let engine: (&'static str, Box<dyn SearchEngine>) =
93
  match engine_name.to_lowercase().as_str() {
94
- "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
95
- "searx" => ("searx", Box::new(super::searx::Searx)),
 
 
 
96
  _ => return None,
97
  };
98
 
@@ -102,6 +151,8 @@ impl EngineHandler {
102
  })
103
  }
104
 
 
 
105
  pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
106
  (self.name, self.engine)
107
  }
 
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
+ use super::aggregation_models::SearchResult;
5
  use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
8
  /// A custom error type used for handle engine associated errors.
 
 
 
 
 
 
 
 
 
9
  #[derive(Debug)]
10
  pub enum EngineError {
11
+ /// This variant handles all request related errors like forbidden, not found,
12
+ /// etc.
13
  EmptyResultSet,
14
+ /// This variant handles the not results found error provide by the upstream
15
+ /// search engines.
16
  RequestError,
17
+ /// This variant handles all the errors which are unexpected or occur rarely
18
+ /// and are errors mostly related to failure in initialization of HeaderMap,
19
+ /// Selector errors and all other errors occurring within the code handling
20
+ /// the `upstream search engines`.
21
  UnexpectedError,
22
  }
23
 
 
45
  /// A trait to define common behavior for all search engines.
46
  #[async_trait::async_trait]
47
  pub trait SearchEngine: Sync + Send {
48
+ /// This helper function fetches/requests the search results from the upstream search engine in
49
+ /// an html form.
50
+ ///
51
+ /// # Arguments
52
+ ///
53
+ /// * `url` - It takes the url of the upstream search engine with the user requested search
54
+ /// query appended in the search parameters.
55
+ /// * `header_map` - It takes the http request headers to be sent to the upstream engine in
56
+ /// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
57
+ /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
58
+ /// the amount of time for each request to remain connected when until the results can be provided
59
+ /// by the upstream engine.
60
+ ///
61
+ /// # Error
62
+ ///
63
+ /// It returns the html data as a string if the upstream engine provides the data as expected
64
+ /// otherwise it returns a custom `EngineError`.
65
  async fn fetch_html_from_upstream(
66
  &self,
67
  url: &str,
 
81
  .change_context(EngineError::RequestError)?)
82
  }
83
 
84
+ /// This function scrapes results from the upstream engine and puts all the scraped results like
85
+ /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
86
+ /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
87
+ /// struct and then returns it within a Result enum.
88
+ ///
89
+ /// # Arguments
90
+ ///
91
+ /// * `query` - Takes the user provided query to query to the upstream search engine with.
92
+ /// * `page` - Takes an u32 as an argument.
93
+ /// * `user_agent` - Takes a random user agent string as an argument.
94
+ /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
95
+ ///
96
+ /// # Errors
97
+ ///
98
+ /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
99
+ /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
100
+ /// provide results for the requested search query and also returns error if the scraping selector
101
+ /// or HeaderMap fails to initialize.
102
  async fn results(
103
  &self,
104
  query: &str,
 
109
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
110
  }
111
 
112
+ /// A named struct which stores the engine struct with the name of the associated engine.
113
  pub struct EngineHandler {
114
+ /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
115
+ /// the `SearchEngine` trait.
116
  engine: Box<dyn SearchEngine>,
117
+ /// It stores the name of the engine to which the struct is associated to.
118
  name: &'static str,
119
  }
120
 
 
125
  }
126
 
127
  impl EngineHandler {
128
+ /// Parses an engine name into an engine handler.
129
+ ///
130
+ /// # Arguments
131
+ ///
132
+ /// * `engine_name` - It takes the name of the engine to which the struct was associated to.
133
+ ///
134
+ /// # Returns
135
+ ///
136
+ /// It returns an option either containing the value or a none if the engine is unknown
137
  pub fn new(engine_name: &str) -> Option<Self> {
138
  let engine: (&'static str, Box<dyn SearchEngine>) =
139
  match engine_name.to_lowercase().as_str() {
140
+ "duckduckgo" => (
141
+ "duckduckgo",
142
+ Box::new(crate::engines::duckduckgo::DuckDuckGo),
143
+ ),
144
+ "searx" => ("searx", Box::new(crate::engines::searx::Searx)),
145
  _ => return None,
146
  };
147
 
 
151
  })
152
  }
153
 
154
+ /// This function converts the EngineHandler type into a tuple containing the engine name and
155
+ /// the associated engine struct.
156
  pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
157
  (self.name, self.engine)
158
  }
src/models/mod.rs ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ //! This module provides modules which in turn provides various models for aggregrating search
2
+ //! results, parsing config file, providing trait to standardize search engine handling code,
3
+ //! custom engine error for the search engine, etc.
4
+
5
+ pub mod aggregation_models;
6
+ pub mod engine_models;
7
+ pub mod parser_models;
8
+ pub mod server_models;
src/{config β†’ models}/parser_models.rs RENAMED
@@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
12
  /// order to allow the deserializing the json back to struct in aggregate function in
13
  /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
14
  /// it to the template files.
15
- ///
16
- /// # Fields
17
- //
18
- /// * `theme` - It stores the parsed theme option used to set a theme for the website.
19
- /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
20
- /// theme being used.
21
  #[derive(Serialize, Deserialize, Clone, Default)]
22
  pub struct Style {
 
23
  pub theme: String,
 
 
24
  pub colorscheme: String,
25
  }
26
 
@@ -36,3 +33,20 @@ impl Style {
36
  Style { theme, colorscheme }
37
  }
38
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  /// order to allow the deserializing the json back to struct in aggregate function in
13
  /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
14
  /// it to the template files.
 
 
 
 
 
 
15
  #[derive(Serialize, Deserialize, Clone, Default)]
16
  pub struct Style {
17
+ /// It stores the parsed theme option used to set a theme for the website.
18
  pub theme: String,
19
+ /// It stores the parsed colorscheme option used to set a colorscheme for the
20
+ /// theme being used.
21
  pub colorscheme: String,
22
  }
23
 
 
33
  Style { theme, colorscheme }
34
  }
35
  }
36
+
37
+ /// Configuration options for the aggregator.
38
+ #[derive(Clone)]
39
+ pub struct AggregatorConfig {
40
+ /// It stores the option to whether enable or disable random delays between
41
+ /// requests.
42
+ pub random_delay: bool,
43
+ }
44
+
45
+ /// Configuration options for the rate limiter middleware.
46
+ #[derive(Clone)]
47
+ pub struct RateLimiter {
48
+ /// The number of request that are allowed within a provided time limit.
49
+ pub number_of_requests: u8,
50
+ /// The time limit in which the quantity of requests that should be accepted.
51
+ pub time_limit: u8,
52
+ }
src/models/server_models.rs ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the models to parse cookies and search parameters from the search
2
+ //! engine website.
3
+ use serde::Deserialize;
4
+
5
+ /// A named struct which deserializes all the user provided search parameters and stores them.
6
+ #[derive(Deserialize)]
7
+ pub struct SearchParams {
8
+ /// It stores the search parameter option `q` (or query in simple words)
9
+ /// of the search url.
10
+ pub q: Option<String>,
11
+ /// It stores the search parameter `page` (or pageno in simple words)
12
+ /// of the search url.
13
+ pub page: Option<u32>,
14
+ }
15
+
16
+ /// A named struct which is used to deserialize the cookies fetched from the client side.
17
+ #[allow(dead_code)]
18
+ #[derive(Deserialize)]
19
+ pub struct Cookie {
20
+ /// It stores the theme name used in the website.
21
+ pub theme: String,
22
+ /// It stores the colorscheme name used for the website theme.
23
+ pub colorscheme: String,
24
+ /// It stores the user selected upstream search engines selected from the UI.
25
+ pub engines: Vec<String>,
26
+ }
src/results/aggregator.rs CHANGED
@@ -1,27 +1,23 @@
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
- use std::{
5
- collections::HashMap,
6
- io::{BufReader, Read},
7
- time::Duration,
8
- };
9
-
10
- use super::{
11
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
12
- user_agent::random_user_agent,
13
  };
14
  use error_stack::Report;
15
  use rand::Rng;
16
  use regex::Regex;
 
 
 
 
 
17
  use std::{fs::File, io::BufRead};
18
  use tokio::task::JoinHandle;
19
 
20
- use crate::{
21
- engines::engine_models::{EngineError, EngineHandler},
22
- handler::paths::{file_path, FileType},
23
- };
24
-
25
  /// Aliases for long type annotations
26
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
27
 
 
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
+ use super::user_agent::random_user_agent;
5
+ use crate::handler::paths::{file_path, FileType};
6
+ use crate::models::{
 
 
 
 
7
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
8
+ engine_models::{EngineError, EngineHandler},
9
  };
10
  use error_stack::Report;
11
  use rand::Rng;
12
  use regex::Regex;
13
+ use std::{
14
+ collections::HashMap,
15
+ io::{BufReader, Read},
16
+ time::Duration,
17
+ };
18
  use std::{fs::File, io::BufRead};
19
  use tokio::task::JoinHandle;
20
 
 
 
 
 
 
21
  /// Aliases for long type annotations
22
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
23
 
src/results/mod.rs CHANGED
@@ -1,3 +1,6 @@
1
- pub mod aggregation_models;
 
 
 
2
  pub mod aggregator;
3
  pub mod user_agent;
 
1
+ //! This module provides modules that handle the functionality to aggregate the fetched search
2
+ //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
3
+ //! provides various models to aggregate search results into a standardized form.
4
+
5
  pub mod aggregator;
6
  pub mod user_agent;
src/results/user_agent.rs CHANGED
@@ -4,6 +4,8 @@ use std::sync::OnceLock;
4
 
5
  use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
6
 
 
 
7
  static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
8
 
9
  /// A function to generate random user agent to improve privacy of the user.
 
4
 
5
  use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
6
 
7
+ /// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
8
+ /// again and again without the need of reinitializing the `UserAgents` struct.
9
  static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
10
 
11
  /// A function to generate random user agent to improve privacy of the user.
src/server/mod.rs CHANGED
@@ -1 +1,7 @@
 
 
 
 
 
 
1
  pub mod routes;
 
1
+ //! This module provides modules that handle the functionality of handling different routes/paths
2
+ //! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
3
+ //! the search route. Also, caches the next, current and previous search results in the search
4
+ //! routes with the help of the redis server.
5
+
6
+ pub mod router;
7
  pub mod routes;
src/server/router.rs ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the functionality to handle different routes of the `websurfx`
2
+ //! meta search engine website and provide appropriate response to each route/page
3
+ //! when requested.
4
+
5
+ use crate::{
6
+ config::parser::Config,
7
+ handler::paths::{file_path, FileType},
8
+ };
9
+ use actix_web::{get, web, HttpRequest, HttpResponse};
10
+ use handlebars::Handlebars;
11
+ use std::fs::read_to_string;
12
+
13
+ /// Handles the route of index page or main page of the `websurfx` meta search engine website.
14
+ #[get("/")]
15
+ pub async fn index(
16
+ hbs: web::Data<Handlebars<'_>>,
17
+ config: web::Data<Config>,
18
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
19
+ let page_content: String = hbs.render("index", &config.style).unwrap();
20
+ Ok(HttpResponse::Ok().body(page_content))
21
+ }
22
+
23
+ /// Handles the route of any other accessed route/page which is not provided by the
24
+ /// website essentially the 404 error page.
25
+ pub async fn not_found(
26
+ hbs: web::Data<Handlebars<'_>>,
27
+ config: web::Data<Config>,
28
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
29
+ let page_content: String = hbs.render("404", &config.style)?;
30
+
31
+ Ok(HttpResponse::Ok()
32
+ .content_type("text/html; charset=utf-8")
33
+ .body(page_content))
34
+ }
35
+
36
+ /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
37
+ #[get("/robots.txt")]
38
+ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
39
+ let page_content: String =
40
+ read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
41
+ Ok(HttpResponse::Ok()
42
+ .content_type("text/plain; charset=ascii")
43
+ .body(page_content))
44
+ }
45
+
46
+ /// Handles the route of about page of the `websurfx` meta search engine website.
47
+ #[get("/about")]
48
+ pub async fn about(
49
+ hbs: web::Data<Handlebars<'_>>,
50
+ config: web::Data<Config>,
51
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
52
+ let page_content: String = hbs.render("about", &config.style)?;
53
+ Ok(HttpResponse::Ok().body(page_content))
54
+ }
55
+
56
+ /// Handles the route of settings page of the `websurfx` meta search engine website.
57
+ #[get("/settings")]
58
+ pub async fn settings(
59
+ hbs: web::Data<Handlebars<'_>>,
60
+ config: web::Data<Config>,
61
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
62
+ let page_content: String = hbs.render("settings", &config.style)?;
63
+ Ok(HttpResponse::Ok().body(page_content))
64
+ }
src/server/routes/mod.rs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ //! This module provides modules to handle various routes in the search engine website.
2
+
3
+ pub mod search;
src/server/{routes.rs β†’ routes/search.rs} RENAMED
@@ -1,23 +1,20 @@
1
- //! This module provides the functionality to handle different routes of the `websurfx`
2
- //! meta search engine website and provide appropriate response to each route/page
3
- //! when requested.
4
-
5
- use std::{
6
- fs::{read_to_string, File},
7
- io::{BufRead, BufReader, Read},
8
- };
9
 
10
  use crate::{
11
  cache::cacher::RedisCache,
12
  config::parser::Config,
13
- engines::engine_models::EngineHandler,
14
  handler::paths::{file_path, FileType},
15
- results::{aggregation_models::SearchResults, aggregator::aggregate},
 
16
  };
17
  use actix_web::{get, web, HttpRequest, HttpResponse};
18
  use handlebars::Handlebars;
19
  use regex::Regex;
20
  use serde::Deserialize;
 
 
 
 
21
  use tokio::join;
22
 
23
  // ---- Constants ----
@@ -25,17 +22,16 @@ use tokio::join;
25
  static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
26
 
27
  /// A named struct which deserializes all the user provided search parameters and stores them.
28
- ///
29
- /// # Fields
30
- ///
31
- /// * `q` - It stores the search parameter option `q` (or query in simple words)
32
- /// of the search url.
33
- /// * `page` - It stores the search parameter `page` (or pageno in simple words)
34
- /// of the search url.
35
  #[derive(Deserialize)]
36
- struct SearchParams {
 
 
37
  q: Option<String>,
 
 
38
  page: Option<u32>,
 
 
39
  safesearch: Option<u8>,
40
  }
41
 
@@ -63,17 +59,14 @@ pub async fn not_found(
63
  }
64
 
65
  /// A named struct which is used to deserialize the cookies fetched from the client side.
66
- ///
67
- /// # Fields
68
- ///
69
- /// * `theme` - It stores the theme name used in the website.
70
- /// * `colorscheme` - It stores the colorscheme name used for the website theme.
71
- /// * `engines` - It stores the user selected upstream search engines selected from the UI.
72
  #[allow(dead_code)]
73
  #[derive(Deserialize)]
74
  struct Cookie<'a> {
 
75
  theme: &'a str,
 
76
  colorscheme: &'a str,
 
77
  engines: Vec<&'a str>,
78
  }
79
 
@@ -174,8 +167,21 @@ pub async fn search(
174
  }
175
  }
176
 
177
- /// Fetches the results for a query and page.
178
- /// First checks the redis cache, if that fails it gets proper results
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  async fn results(
180
  url: String,
181
  config: &Config,
@@ -184,6 +190,7 @@ async fn results(
184
  req: HttpRequest,
185
  safe_search: u8,
186
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
 
187
  let mut redis_cache: RedisCache = REDIS_CACHE
188
  .get_or_init(async {
189
  // Initialize redis cache connection pool only one and store it in the heap.
@@ -191,7 +198,6 @@ async fn results(
191
  })
192
  .await
193
  .clone();
194
-
195
  // fetch the cached results json.
196
  let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
197
  redis_cache.clone().cached_json(&url).await;
@@ -223,7 +229,8 @@ async fn results(
223
  // UI and use that.
224
  let mut results: SearchResults = match req.cookie("appCookie") {
225
  Some(cookie_value) => {
226
- let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
 
227
 
228
  let engines: Vec<EngineHandler> = cookie_value
229
  .engines
@@ -267,6 +274,8 @@ async fn results(
267
  }
268
  }
269
 
 
 
270
  fn is_match_from_filter_list(
271
  file_path: &str,
272
  query: &str,
 
1
+ //! This module handles the search route of the search engine website.
 
 
 
 
 
 
 
2
 
3
  use crate::{
4
  cache::cacher::RedisCache,
5
  config::parser::Config,
 
6
  handler::paths::{file_path, FileType},
7
+ models::{aggregation_models::SearchResults, engine_models::EngineHandler},
8
+ results::aggregator::aggregate,
9
  };
10
  use actix_web::{get, web, HttpRequest, HttpResponse};
11
  use handlebars::Handlebars;
12
  use regex::Regex;
13
  use serde::Deserialize;
14
+ use std::{
15
+ fs::{read_to_string, File},
16
+ io::{BufRead, BufReader, Read},
17
+ };
18
  use tokio::join;
19
 
20
  // ---- Constants ----
 
22
  static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
23
 
24
  /// A named struct which deserializes all the user provided search parameters and stores them.
 
 
 
 
 
 
 
25
  #[derive(Deserialize)]
26
+ pub struct SearchParams {
27
+ /// It stores the search parameter option `q` (or query in simple words)
28
+ /// of the search url.
29
  q: Option<String>,
30
+ /// It stores the search parameter `page` (or pageno in simple words)
31
+ /// of the search url.
32
  page: Option<u32>,
33
+ /// It stores the search parameter `safesearch` (or safe search level in simple words) of the
34
+ /// search url.
35
  safesearch: Option<u8>,
36
  }
37
 
 
59
  }
60
 
61
  /// A named struct which is used to deserialize the cookies fetched from the client side.
 
 
 
 
 
 
62
  #[allow(dead_code)]
63
  #[derive(Deserialize)]
64
  struct Cookie<'a> {
65
+ /// It stores the theme name used in the website.
66
  theme: &'a str,
67
+ /// It stores the colorscheme name used for the website theme.
68
  colorscheme: &'a str,
69
+ /// It stores the user selected upstream search engines selected from the UI.
70
  engines: Vec<&'a str>,
71
  }
72
 
 
167
  }
168
  }
169
 
170
+ /// Fetches the results for a query and page. It First checks the redis cache, if that
171
+ /// fails it gets proper results by requesting from the upstream search engines.
172
+ ///
173
+ /// # Arguments
174
+ ///
175
+ /// * `url` - It takes the url of the current page that requested the search results for a
176
+ /// particular search query.
177
+ /// * `config` - It takes a parsed config struct.
178
+ /// * `query` - It takes the page number as u32 value.
179
+ /// * `req` - It takes the `HttpRequest` struct as a value.
180
+ ///
181
+ /// # Error
182
+ ///
183
+ /// It returns the `SearchResults` struct if the search results could be successfully fetched from
184
+ /// the cache or from the upstream search engines otherwise it returns an appropriate error.
185
  async fn results(
186
  url: String,
187
  config: &Config,
 
190
  req: HttpRequest,
191
  safe_search: u8,
192
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
193
+ // Initialize redis cache connection struct
194
  let mut redis_cache: RedisCache = REDIS_CACHE
195
  .get_or_init(async {
196
  // Initialize redis cache connection pool only one and store it in the heap.
 
198
  })
199
  .await
200
  .clone();
 
201
  // fetch the cached results json.
202
  let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
203
  redis_cache.clone().cached_json(&url).await;
 
229
  // UI and use that.
230
  let mut results: SearchResults = match req.cookie("appCookie") {
231
  Some(cookie_value) => {
232
+ let cookie_value: Cookie<'_> =
233
+ serde_json::from_str(cookie_value.name_value().1)?;
234
 
235
  let engines: Vec<EngineHandler> = cookie_value
236
  .engines
 
274
  }
275
  }
276
 
277
+ /// A helper function which checks whether the search query contains any keywords which should be
278
+ /// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
279
  fn is_match_from_filter_list(
280
  file_path: &str,
281
  query: &str,
websurfx/config.lua CHANGED
@@ -10,6 +10,10 @@ production_use = false -- whether to use production mode or not (in other words
10
  -- if production_use is set to true
11
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
12
  request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
 
 
 
 
13
 
14
  -- ### Search ###
15
  -- Filter results based on different levels. The levels provided are:
@@ -45,4 +49,7 @@ theme = "simple" -- the theme name which should be used for the website
45
  redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
46
 
47
  -- ### Search Engines ###
48
- upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
 
 
 
 
10
  -- if production_use is set to true
11
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
12
  request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
13
+ rate_limiter = {
14
+ number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
15
+ time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
16
+ }
17
 
18
  -- ### Search ###
19
  -- Filter results based on different levels. The levels provided are:
 
49
  redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
50
 
51
  -- ### Search Engines ###
52
+ upstream_search_engines = {
53
+ DuckDuckGo = true,
54
+ Searx = false,
55
+ } -- select the upstream search engines from which the results should be fetched.