Spaces:
Runtime error
Runtime error
Merge branch 'rolling' into fix-gitpod-setup
Browse files- Cargo.lock +111 -10
- Cargo.toml +1 -0
- src/cache/cacher.rs +4 -7
- src/cache/error.rs +3 -6
- src/cache/mod.rs +3 -0
- src/config/mod.rs +3 -1
- src/config/parser.rs +29 -28
- src/engines/duckduckgo.rs +2 -20
- src/engines/mod.rs +5 -1
- src/engines/searx.rs +2 -22
- src/handler/mod.rs +4 -0
- src/handler/paths.rs +25 -21
- src/lib.rs +24 -17
- src/{results β models}/aggregation_models.rs +32 -22
- src/{engines β models}/engine_models.rs +64 -13
- src/models/mod.rs +8 -0
- src/{config β models}/parser_models.rs +20 -6
- src/models/server_models.rs +26 -0
- src/results/aggregator.rs +9 -13
- src/results/mod.rs +4 -1
- src/results/user_agent.rs +2 -0
- src/server/mod.rs +6 -0
- src/server/router.rs +64 -0
- src/server/routes/mod.rs +3 -0
- src/server/{routes.rs β routes/search.rs} +37 -28
- websurfx/config.lua +8 -1
Cargo.lock
CHANGED
@@ -57,6 +57,18 @@ dependencies = [
|
|
57 |
"pin-project-lite",
|
58 |
]
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
[[package]]
|
61 |
name = "actix-http"
|
62 |
version = "3.4.0"
|
@@ -590,7 +602,7 @@ version = "0.12.0"
|
|
590 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
591 |
checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
|
592 |
dependencies = [
|
593 |
-
"time 0.1.
|
594 |
"url 1.7.2",
|
595 |
]
|
596 |
|
@@ -618,7 +630,7 @@ dependencies = [
|
|
618 |
"publicsuffix",
|
619 |
"serde",
|
620 |
"serde_json",
|
621 |
-
"time 0.1.
|
622 |
"try_from",
|
623 |
"url 1.7.2",
|
624 |
]
|
@@ -817,6 +829,19 @@ dependencies = [
|
|
817 |
"syn 2.0.32",
|
818 |
]
|
819 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
820 |
[[package]]
|
821 |
name = "deranged"
|
822 |
version = "0.3.8"
|
@@ -1162,6 +1187,12 @@ version = "0.3.28"
|
|
1162 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1163 |
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
|
1164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1165 |
[[package]]
|
1166 |
name = "futures-util"
|
1167 |
version = "0.3.28"
|
@@ -1225,6 +1256,24 @@ version = "0.28.0"
|
|
1225 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1226 |
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
|
1227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1228 |
[[package]]
|
1229 |
name = "h2"
|
1230 |
version = "0.1.26"
|
@@ -1289,6 +1338,12 @@ version = "0.12.3"
|
|
1289 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1290 |
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
1291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1292 |
[[package]]
|
1293 |
name = "hermit-abi"
|
1294 |
version = "0.3.2"
|
@@ -1410,7 +1465,7 @@ dependencies = [
|
|
1410 |
"log",
|
1411 |
"net2",
|
1412 |
"rustc_version 0.2.3",
|
1413 |
-
"time 0.1.
|
1414 |
"tokio 0.1.22",
|
1415 |
"tokio-buf",
|
1416 |
"tokio-executor",
|
@@ -1511,7 +1566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1511 |
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
1512 |
dependencies = [
|
1513 |
"autocfg 1.1.0",
|
1514 |
-
"hashbrown",
|
1515 |
]
|
1516 |
|
1517 |
[[package]]
|
@@ -1672,6 +1727,15 @@ version = "0.1.1"
|
|
1672 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1673 |
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
1674 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1675 |
[[package]]
|
1676 |
name = "markup5ever"
|
1677 |
version = "0.8.1"
|
@@ -1887,6 +1951,18 @@ version = "0.5.0"
|
|
1887 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1888 |
checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0"
|
1889 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1890 |
[[package]]
|
1891 |
name = "num-traits"
|
1892 |
version = "0.2.16"
|
@@ -2307,6 +2383,22 @@ dependencies = [
|
|
2307 |
"url 2.4.1",
|
2308 |
]
|
2309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2310 |
[[package]]
|
2311 |
name = "quote"
|
2312 |
version = "0.6.13"
|
@@ -2461,6 +2553,15 @@ dependencies = [
|
|
2461 |
"rand_core 0.3.1",
|
2462 |
]
|
2463 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2464 |
[[package]]
|
2465 |
name = "rayon"
|
2466 |
version = "1.7.0"
|
@@ -2583,7 +2684,7 @@ dependencies = [
|
|
2583 |
"serde",
|
2584 |
"serde_json",
|
2585 |
"serde_urlencoded 0.5.5",
|
2586 |
-
"time 0.1.
|
2587 |
"tokio 0.1.22",
|
2588 |
"tokio-executor",
|
2589 |
"tokio-io",
|
@@ -3157,12 +3258,11 @@ checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
|
|
3157 |
|
3158 |
[[package]]
|
3159 |
name = "time"
|
3160 |
-
version = "0.1.
|
3161 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3162 |
-
checksum = "
|
3163 |
dependencies = [
|
3164 |
"libc",
|
3165 |
-
"wasi 0.10.0+wasi-snapshot-preview1",
|
3166 |
"winapi 0.3.9",
|
3167 |
]
|
3168 |
|
@@ -3609,9 +3709,9 @@ dependencies = [
|
|
3609 |
|
3610 |
[[package]]
|
3611 |
name = "wasi"
|
3612 |
-
version = "0.10.
|
3613 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3614 |
-
checksum = "
|
3615 |
|
3616 |
[[package]]
|
3617 |
name = "wasi"
|
@@ -3701,6 +3801,7 @@ version = "0.20.7"
|
|
3701 |
dependencies = [
|
3702 |
"actix-cors",
|
3703 |
"actix-files",
|
|
|
3704 |
"actix-web",
|
3705 |
"async-once-cell",
|
3706 |
"async-trait",
|
|
|
57 |
"pin-project-lite",
|
58 |
]
|
59 |
|
60 |
+
[[package]]
|
61 |
+
name = "actix-governor"
|
62 |
+
version = "0.4.1"
|
63 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
64 |
+
checksum = "46ff2d40f2bc627b8054c5e20fa6b0b0cf9428699b54bd41634e9ae3098ad555"
|
65 |
+
dependencies = [
|
66 |
+
"actix-http",
|
67 |
+
"actix-web",
|
68 |
+
"futures 0.3.28",
|
69 |
+
"governor",
|
70 |
+
]
|
71 |
+
|
72 |
[[package]]
|
73 |
name = "actix-http"
|
74 |
version = "3.4.0"
|
|
|
602 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
603 |
checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
|
604 |
dependencies = [
|
605 |
+
"time 0.1.43",
|
606 |
"url 1.7.2",
|
607 |
]
|
608 |
|
|
|
630 |
"publicsuffix",
|
631 |
"serde",
|
632 |
"serde_json",
|
633 |
+
"time 0.1.43",
|
634 |
"try_from",
|
635 |
"url 1.7.2",
|
636 |
]
|
|
|
829 |
"syn 2.0.32",
|
830 |
]
|
831 |
|
832 |
+
[[package]]
|
833 |
+
name = "dashmap"
|
834 |
+
version = "5.5.3"
|
835 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
836 |
+
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
|
837 |
+
dependencies = [
|
838 |
+
"cfg-if 1.0.0",
|
839 |
+
"hashbrown 0.14.0",
|
840 |
+
"lock_api 0.4.10",
|
841 |
+
"once_cell",
|
842 |
+
"parking_lot_core 0.9.8",
|
843 |
+
]
|
844 |
+
|
845 |
[[package]]
|
846 |
name = "deranged"
|
847 |
version = "0.3.8"
|
|
|
1187 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1188 |
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
|
1189 |
|
1190 |
+
[[package]]
|
1191 |
+
name = "futures-timer"
|
1192 |
+
version = "3.0.2"
|
1193 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1194 |
+
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
|
1195 |
+
|
1196 |
[[package]]
|
1197 |
name = "futures-util"
|
1198 |
version = "0.3.28"
|
|
|
1256 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1257 |
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
|
1258 |
|
1259 |
+
[[package]]
|
1260 |
+
name = "governor"
|
1261 |
+
version = "0.5.1"
|
1262 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1263 |
+
checksum = "c390a940a5d157878dd057c78680a33ce3415bcd05b4799509ea44210914b4d5"
|
1264 |
+
dependencies = [
|
1265 |
+
"cfg-if 1.0.0",
|
1266 |
+
"dashmap",
|
1267 |
+
"futures 0.3.28",
|
1268 |
+
"futures-timer",
|
1269 |
+
"no-std-compat",
|
1270 |
+
"nonzero_ext",
|
1271 |
+
"parking_lot 0.12.1",
|
1272 |
+
"quanta",
|
1273 |
+
"rand 0.8.5",
|
1274 |
+
"smallvec 1.11.0",
|
1275 |
+
]
|
1276 |
+
|
1277 |
[[package]]
|
1278 |
name = "h2"
|
1279 |
version = "0.1.26"
|
|
|
1338 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1339 |
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
1340 |
|
1341 |
+
[[package]]
|
1342 |
+
name = "hashbrown"
|
1343 |
+
version = "0.14.0"
|
1344 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1345 |
+
checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
|
1346 |
+
|
1347 |
[[package]]
|
1348 |
name = "hermit-abi"
|
1349 |
version = "0.3.2"
|
|
|
1465 |
"log",
|
1466 |
"net2",
|
1467 |
"rustc_version 0.2.3",
|
1468 |
+
"time 0.1.43",
|
1469 |
"tokio 0.1.22",
|
1470 |
"tokio-buf",
|
1471 |
"tokio-executor",
|
|
|
1566 |
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
|
1567 |
dependencies = [
|
1568 |
"autocfg 1.1.0",
|
1569 |
+
"hashbrown 0.12.3",
|
1570 |
]
|
1571 |
|
1572 |
[[package]]
|
|
|
1727 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1728 |
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
1729 |
|
1730 |
+
[[package]]
|
1731 |
+
name = "mach"
|
1732 |
+
version = "0.3.2"
|
1733 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1734 |
+
checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
|
1735 |
+
dependencies = [
|
1736 |
+
"libc",
|
1737 |
+
]
|
1738 |
+
|
1739 |
[[package]]
|
1740 |
name = "markup5ever"
|
1741 |
version = "0.8.1"
|
|
|
1951 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1952 |
checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0"
|
1953 |
|
1954 |
+
[[package]]
|
1955 |
+
name = "no-std-compat"
|
1956 |
+
version = "0.4.1"
|
1957 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1958 |
+
checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
|
1959 |
+
|
1960 |
+
[[package]]
|
1961 |
+
name = "nonzero_ext"
|
1962 |
+
version = "0.3.0"
|
1963 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1964 |
+
checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
|
1965 |
+
|
1966 |
[[package]]
|
1967 |
name = "num-traits"
|
1968 |
version = "0.2.16"
|
|
|
2383 |
"url 2.4.1",
|
2384 |
]
|
2385 |
|
2386 |
+
[[package]]
|
2387 |
+
name = "quanta"
|
2388 |
+
version = "0.9.3"
|
2389 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2390 |
+
checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8"
|
2391 |
+
dependencies = [
|
2392 |
+
"crossbeam-utils 0.8.16",
|
2393 |
+
"libc",
|
2394 |
+
"mach",
|
2395 |
+
"once_cell",
|
2396 |
+
"raw-cpuid",
|
2397 |
+
"wasi 0.10.2+wasi-snapshot-preview1",
|
2398 |
+
"web-sys",
|
2399 |
+
"winapi 0.3.9",
|
2400 |
+
]
|
2401 |
+
|
2402 |
[[package]]
|
2403 |
name = "quote"
|
2404 |
version = "0.6.13"
|
|
|
2553 |
"rand_core 0.3.1",
|
2554 |
]
|
2555 |
|
2556 |
+
[[package]]
|
2557 |
+
name = "raw-cpuid"
|
2558 |
+
version = "10.7.0"
|
2559 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2560 |
+
checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332"
|
2561 |
+
dependencies = [
|
2562 |
+
"bitflags 1.3.2",
|
2563 |
+
]
|
2564 |
+
|
2565 |
[[package]]
|
2566 |
name = "rayon"
|
2567 |
version = "1.7.0"
|
|
|
2684 |
"serde",
|
2685 |
"serde_json",
|
2686 |
"serde_urlencoded 0.5.5",
|
2687 |
+
"time 0.1.43",
|
2688 |
"tokio 0.1.22",
|
2689 |
"tokio-executor",
|
2690 |
"tokio-io",
|
|
|
3258 |
|
3259 |
[[package]]
|
3260 |
name = "time"
|
3261 |
+
version = "0.1.43"
|
3262 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3263 |
+
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
|
3264 |
dependencies = [
|
3265 |
"libc",
|
|
|
3266 |
"winapi 0.3.9",
|
3267 |
]
|
3268 |
|
|
|
3709 |
|
3710 |
[[package]]
|
3711 |
name = "wasi"
|
3712 |
+
version = "0.10.2+wasi-snapshot-preview1"
|
3713 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3714 |
+
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
3715 |
|
3716 |
[[package]]
|
3717 |
name = "wasi"
|
|
|
3801 |
dependencies = [
|
3802 |
"actix-cors",
|
3803 |
"actix-files",
|
3804 |
+
"actix-governor",
|
3805 |
"actix-web",
|
3806 |
"async-once-cell",
|
3807 |
"async-trait",
|
Cargo.toml
CHANGED
@@ -32,6 +32,7 @@ futures = {version="0.3.28"}
|
|
32 |
dhat = {version="0.3.2", optional = true}
|
33 |
mimalloc = { version = "0.1.38", default-features = false }
|
34 |
async-once-cell = {version="0.5.3"}
|
|
|
35 |
|
36 |
[dev-dependencies]
|
37 |
rusty-hook = "^0.11.2"
|
|
|
32 |
dhat = {version="0.3.2", optional = true}
|
33 |
mimalloc = { version = "0.1.38", default-features = false }
|
34 |
async-once-cell = {version="0.5.3"}
|
35 |
+
actix-governor = {version="0.4.1"}
|
36 |
|
37 |
[dev-dependencies]
|
38 |
rusty-hook = "^0.11.2"
|
src/cache/cacher.rs
CHANGED
@@ -10,17 +10,14 @@ use super::error::PoolError;
|
|
10 |
|
11 |
/// A named struct which stores the redis Connection url address to which the client will
|
12 |
/// connect to.
|
13 |
-
///
|
14 |
-
/// # Fields
|
15 |
-
///
|
16 |
-
/// * `connection_pool` - It stores a pool of connections ready to be used.
|
17 |
-
/// * `pool_size` - It stores the size of the connection pool (in other words the number of
|
18 |
-
/// connections that should be stored in the pool).
|
19 |
-
/// * `current_connection` - It stores the index of which connection is being used at the moment.
|
20 |
#[derive(Clone)]
|
21 |
pub struct RedisCache {
|
|
|
22 |
connection_pool: Vec<ConnectionManager>,
|
|
|
|
|
23 |
pool_size: u8,
|
|
|
24 |
current_connection: u8,
|
25 |
}
|
26 |
|
|
|
10 |
|
11 |
/// A named struct which stores the redis Connection url address to which the client will
|
12 |
/// connect to.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
#[derive(Clone)]
|
14 |
pub struct RedisCache {
|
15 |
+
/// It stores a pool of connections ready to be used.
|
16 |
connection_pool: Vec<ConnectionManager>,
|
17 |
+
/// It stores the size of the connection pool (in other words the number of
|
18 |
+
/// connections that should be stored in the pool).
|
19 |
pool_size: u8,
|
20 |
+
/// It stores the index of which connection is being used at the moment.
|
21 |
current_connection: u8,
|
22 |
}
|
23 |
|
src/cache/error.rs
CHANGED
@@ -5,15 +5,12 @@ use std::fmt;
|
|
5 |
use redis::RedisError;
|
6 |
|
7 |
/// A custom error type used for handling redis async pool associated errors.
|
8 |
-
///
|
9 |
-
/// This enum provides variants three different categories of errors:
|
10 |
-
/// * `RedisError` - This variant handles all errors related to `RedisError`,
|
11 |
-
/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
|
12 |
-
/// which occurs when all the connections in the connection pool return a connection
|
13 |
-
/// dropped redis error.
|
14 |
#[derive(Debug)]
|
15 |
pub enum PoolError {
|
|
|
16 |
RedisError(RedisError),
|
|
|
|
|
17 |
PoolExhaustionWithConnectionDropError,
|
18 |
}
|
19 |
|
|
|
5 |
use redis::RedisError;
|
6 |
|
7 |
/// A custom error type used for handling redis async pool associated errors.
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
#[derive(Debug)]
|
9 |
pub enum PoolError {
|
10 |
+
/// This variant handles all errors related to `RedisError`,
|
11 |
RedisError(RedisError),
|
12 |
+
/// This variant handles the errors which occurs when all the connections
|
13 |
+
/// in the connection pool return a connection dropped redis error.
|
14 |
PoolExhaustionWithConnectionDropError,
|
15 |
}
|
16 |
|
src/cache/mod.rs
CHANGED
@@ -1,2 +1,5 @@
|
|
|
|
|
|
|
|
1 |
pub mod cacher;
|
2 |
pub mod error;
|
|
|
1 |
+
//! This module provides the modules which provide the functionality to cache the aggregated
|
2 |
+
//! results fetched and aggregated from the upstream search engines in a json format.
|
3 |
+
|
4 |
pub mod cacher;
|
5 |
pub mod error;
|
src/config/mod.rs
CHANGED
@@ -1,2 +1,4 @@
|
|
|
|
|
|
|
|
1 |
pub mod parser;
|
2 |
-
pub mod parser_models;
|
|
|
1 |
+
//! This module provides the modules which handles the functionality to parse the lua config
|
2 |
+
//! and convert the config options into rust readable form.
|
3 |
+
|
4 |
pub mod parser;
|
|
src/config/parser.rs
CHANGED
@@ -3,52 +3,42 @@
|
|
3 |
|
4 |
use crate::handler::paths::{file_path, FileType};
|
5 |
|
6 |
-
use
|
7 |
use log::LevelFilter;
|
8 |
use mlua::Lua;
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
12 |
-
///
|
13 |
-
/// # Fields
|
14 |
-
//
|
15 |
-
/// * `port` - It stores the parsed port number option on which the server should launch.
|
16 |
-
/// * `binding_ip` - It stores the parsed ip address option on which the server should launch
|
17 |
-
/// * `style` - It stores the theming options for the website.
|
18 |
-
/// * `redis_url` - It stores the redis connection url address on which the redis
|
19 |
-
/// client should connect.
|
20 |
-
/// * `aggregator` - It stores the option to whether enable or disable production use.
|
21 |
-
/// * `logging` - It stores the option to whether enable or disable logs.
|
22 |
-
/// * `debug` - It stores the option to whether enable or disable debug mode.
|
23 |
-
/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user.
|
24 |
-
/// * `request_timeout` - It stores the time (secs) which controls the server request timeout.
|
25 |
-
/// * `threads` - It stores the number of threads which controls the app will use to run.
|
26 |
#[derive(Clone)]
|
27 |
pub struct Config {
|
|
|
28 |
pub port: u16,
|
|
|
29 |
pub binding_ip: String,
|
|
|
30 |
pub style: Style,
|
|
|
|
|
31 |
pub redis_url: String,
|
|
|
32 |
pub aggregator: AggregatorConfig,
|
|
|
33 |
pub logging: bool,
|
|
|
34 |
pub debug: bool,
|
35 |
-
|
|
|
|
|
36 |
pub request_timeout: u8,
|
|
|
37 |
pub threads: u8,
|
|
|
|
|
|
|
|
|
38 |
pub safe_search: u8,
|
39 |
}
|
40 |
|
41 |
-
/// Configuration options for the aggregator.
|
42 |
-
///
|
43 |
-
/// # Fields
|
44 |
-
///
|
45 |
-
/// * `random_delay` - It stores the option to whether enable or disable random delays between
|
46 |
-
/// requests.
|
47 |
-
#[derive(Clone)]
|
48 |
-
pub struct AggregatorConfig {
|
49 |
-
pub random_delay: bool,
|
50 |
-
}
|
51 |
-
|
52 |
impl Config {
|
53 |
/// A function which parses the config.lua file and puts all the parsed options in the newly
|
54 |
/// constructed Config struct and returns it.
|
@@ -90,6 +80,8 @@ impl Config {
|
|
90 |
parsed_threads
|
91 |
};
|
92 |
|
|
|
|
|
93 |
let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
|
94 |
let safe_search: u8 = match parsed_safe_search {
|
95 |
0..=4 => parsed_safe_search,
|
@@ -117,16 +109,25 @@ impl Config {
|
|
117 |
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
118 |
.into_iter()
|
119 |
.filter_map(|(key, value)| value.then_some(key))
|
120 |
-
.filter_map(|engine| crate::
|
121 |
.collect(),
|
122 |
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
123 |
threads,
|
|
|
|
|
|
|
|
|
124 |
safe_search,
|
125 |
})
|
126 |
}
|
127 |
}
|
128 |
|
129 |
/// a helper function that sets the proper logging level
|
|
|
|
|
|
|
|
|
|
|
130 |
fn set_logging_level(debug: bool, logging: bool) {
|
131 |
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
132 |
if pkg_env_var.to_lowercase() == "dev" {
|
|
|
3 |
|
4 |
use crate::handler::paths::{file_path, FileType};
|
5 |
|
6 |
+
use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
use mlua::Lua;
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
#[derive(Clone)]
|
13 |
pub struct Config {
|
14 |
+
/// It stores the parsed port number option on which the server should launch.
|
15 |
pub port: u16,
|
16 |
+
/// It stores the parsed ip address option on which the server should launch
|
17 |
pub binding_ip: String,
|
18 |
+
/// It stores the theming options for the website.
|
19 |
pub style: Style,
|
20 |
+
/// It stores the redis connection url address on which the redis
|
21 |
+
/// client should connect.
|
22 |
pub redis_url: String,
|
23 |
+
/// It stores the option to whether enable or disable production use.
|
24 |
pub aggregator: AggregatorConfig,
|
25 |
+
/// It stores the option to whether enable or disable logs.
|
26 |
pub logging: bool,
|
27 |
+
/// It stores the option to whether enable or disable debug mode.
|
28 |
pub debug: bool,
|
29 |
+
/// It stores all the engine names that were enabled by the user.
|
30 |
+
pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
|
31 |
+
/// It stores the time (secs) which controls the server request timeout.
|
32 |
pub request_timeout: u8,
|
33 |
+
/// It stores the number of threads which controls the app will use to run.
|
34 |
pub threads: u8,
|
35 |
+
/// It stores configuration options for the ratelimiting middleware.
|
36 |
+
pub rate_limiter: RateLimiter,
|
37 |
+
/// It stores the level of safe search to be used for restricting content in the
|
38 |
+
/// search results.
|
39 |
pub safe_search: u8,
|
40 |
}
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
impl Config {
|
43 |
/// A function which parses the config.lua file and puts all the parsed options in the newly
|
44 |
/// constructed Config struct and returns it.
|
|
|
80 |
parsed_threads
|
81 |
};
|
82 |
|
83 |
+
let rate_limiter = globals.get::<_, HashMap<String, u8>>("rate_limiter")?;
|
84 |
+
|
85 |
let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
|
86 |
let safe_search: u8 = match parsed_safe_search {
|
87 |
0..=4 => parsed_safe_search,
|
|
|
109 |
.get::<_, HashMap<String, bool>>("upstream_search_engines")?
|
110 |
.into_iter()
|
111 |
.filter_map(|(key, value)| value.then_some(key))
|
112 |
+
.filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
|
113 |
.collect(),
|
114 |
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
115 |
threads,
|
116 |
+
rate_limiter: RateLimiter {
|
117 |
+
number_of_requests: rate_limiter["number_of_requests"],
|
118 |
+
time_limit: rate_limiter["time_limit"],
|
119 |
+
},
|
120 |
safe_search,
|
121 |
})
|
122 |
}
|
123 |
}
|
124 |
|
125 |
/// a helper function that sets the proper logging level
|
126 |
+
///
|
127 |
+
/// # Arguments
|
128 |
+
///
|
129 |
+
/// * `debug` - It takes the option to whether enable or disable debug mode.
|
130 |
+
/// * `logging` - It takes the option to whether enable or disable logs.
|
131 |
fn set_logging_level(debug: bool, logging: bool) {
|
132 |
if let Ok(pkg_env_var) = std::env::var("PKG_ENV") {
|
133 |
if pkg_env_var.to_lowercase() == "dev" {
|
src/engines/duckduckgo.rs
CHANGED
@@ -7,9 +7,9 @@ use std::collections::HashMap;
|
|
7 |
use reqwest::header::HeaderMap;
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
-
use crate::
|
11 |
|
12 |
-
use
|
13 |
|
14 |
use error_stack::{Report, Result, ResultExt};
|
15 |
|
@@ -19,24 +19,6 @@ pub struct DuckDuckGo;
|
|
19 |
|
20 |
#[async_trait::async_trait]
|
21 |
impl SearchEngine for DuckDuckGo {
|
22 |
-
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
23 |
-
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
24 |
-
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
25 |
-
/// values are RawSearchResult struct and then returns it within a Result enum.
|
26 |
-
///
|
27 |
-
/// # Arguments
|
28 |
-
///
|
29 |
-
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
30 |
-
/// * `page` - Takes an u32 as an argument.
|
31 |
-
/// * `user_agent` - Takes a random user agent string as an argument.
|
32 |
-
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
33 |
-
///
|
34 |
-
/// # Errors
|
35 |
-
///
|
36 |
-
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
37 |
-
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
38 |
-
/// provide results for the requested search query and also returns error if the scraping selector
|
39 |
-
/// or HeaderMap fails to initialize.
|
40 |
async fn results(
|
41 |
&self,
|
42 |
query: &str,
|
|
|
7 |
use reqwest::header::HeaderMap;
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
+
use crate::models::aggregation_models::SearchResult;
|
11 |
|
12 |
+
use crate::models::engine_models::{EngineError, SearchEngine};
|
13 |
|
14 |
use error_stack::{Report, Result, ResultExt};
|
15 |
|
|
|
19 |
|
20 |
#[async_trait::async_trait]
|
21 |
impl SearchEngine for DuckDuckGo {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
async fn results(
|
23 |
&self,
|
24 |
query: &str,
|
src/engines/mod.rs
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
pub mod duckduckgo;
|
2 |
-
pub mod engine_models;
|
3 |
pub mod searx;
|
|
|
1 |
+
//! This module provides different modules which handles the functionlity to fetch results from the
|
2 |
+
//! upstream search engines based on user requested queries. Also provides different models to
|
3 |
+
//! provide a standard functions to be implemented for all the upstream search engine handling
|
4 |
+
//! code. Moreover, it also provides a custom error for the upstream search engine handling code.
|
5 |
+
|
6 |
pub mod duckduckgo;
|
|
|
7 |
pub mod searx;
|
src/engines/searx.rs
CHANGED
@@ -6,9 +6,8 @@ use reqwest::header::HeaderMap;
|
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
9 |
-
use crate::
|
10 |
-
|
11 |
-
use super::engine_models::{EngineError, SearchEngine};
|
12 |
use error_stack::{Report, Result, ResultExt};
|
13 |
|
14 |
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
@@ -17,25 +16,6 @@ pub struct Searx;
|
|
17 |
|
18 |
#[async_trait::async_trait]
|
19 |
impl SearchEngine for Searx {
|
20 |
-
/// This function scrapes results from the upstream engine duckduckgo and puts all the scraped
|
21 |
-
/// results like title, visiting_url (href in html),engine (from which engine it was fetched from)
|
22 |
-
/// and description in a RawSearchResult and then adds that to HashMap whose keys are url and
|
23 |
-
/// values are RawSearchResult struct and then returns it within a Result enum.
|
24 |
-
///
|
25 |
-
/// # Arguments
|
26 |
-
///
|
27 |
-
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
28 |
-
/// * `page` - Takes an u32 as an argument.
|
29 |
-
/// * `user_agent` - Takes a random user agent string as an argument.
|
30 |
-
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
31 |
-
///
|
32 |
-
/// # Errors
|
33 |
-
///
|
34 |
-
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
35 |
-
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
36 |
-
/// provide results for the requested search query and also returns error if the scraping selector
|
37 |
-
/// or HeaderMap fails to initialize.
|
38 |
-
|
39 |
async fn results(
|
40 |
&self,
|
41 |
query: &str,
|
|
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
9 |
+
use crate::models::aggregation_models::SearchResult;
|
10 |
+
use crate::models::engine_models::{EngineError, SearchEngine};
|
|
|
11 |
use error_stack::{Report, Result, ResultExt};
|
12 |
|
13 |
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
|
|
16 |
|
17 |
#[async_trait::async_trait]
|
18 |
impl SearchEngine for Searx {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
async fn results(
|
20 |
&self,
|
21 |
query: &str,
|
src/handler/mod.rs
CHANGED
@@ -1 +1,5 @@
|
|
|
|
|
|
|
|
|
|
1 |
pub mod paths;
|
|
|
1 |
+
//! This module provides modules which provide the functionality to handle paths for different
|
2 |
+
//! files present on different paths and provide one appropriate path on which it is present and
|
3 |
+
//! can be used.
|
4 |
+
|
5 |
pub mod paths;
|
src/handler/paths.rs
CHANGED
@@ -7,42 +7,46 @@ use std::path::Path;
|
|
7 |
use std::sync::OnceLock;
|
8 |
|
9 |
// ------- Constants --------
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
15 |
|
|
|
16 |
#[derive(Hash, PartialEq, Eq, Debug)]
|
17 |
pub enum FileType {
|
|
|
18 |
Config,
|
|
|
19 |
AllowList,
|
|
|
20 |
BlockList,
|
|
|
21 |
Theme,
|
22 |
}
|
23 |
|
|
|
24 |
static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
|
25 |
|
26 |
-
/// A
|
27 |
-
/// file exists on that path.
|
28 |
///
|
29 |
/// # Error
|
30 |
///
|
31 |
-
/// Returns a
|
32 |
-
///
|
33 |
-
/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2)
|
34 |
-
/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next
|
35 |
-
/// one (3).
|
36 |
-
/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present
|
37 |
-
/// here then it returns an error as mentioned above.
|
38 |
-
|
39 |
-
/// A function which returns an appropriate theme directory path checking if the theme
|
40 |
-
/// directory exists on that path.
|
41 |
///
|
42 |
-
/// #
|
|
|
|
|
|
|
43 |
///
|
44 |
-
/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following
|
45 |
-
/// paths which are:
|
46 |
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
47 |
/// 2. Under project folder ( or codebase in other words) if it is not present
|
48 |
/// here then it returns an error as mentioned above.
|
@@ -110,6 +114,6 @@ pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
|
|
110 |
// if no of the configs above exist, return error
|
111 |
Err(Error::new(
|
112 |
std::io::ErrorKind::NotFound,
|
113 |
-
format!("{:?} file not found!!", file_type),
|
114 |
))
|
115 |
}
|
|
|
7 |
use std::sync::OnceLock;
|
8 |
|
9 |
// ------- Constants --------
|
10 |
+
/// The constant holding the name of the theme folder.
|
11 |
+
const PUBLIC_DIRECTORY_NAME: &str = "public";
|
12 |
+
/// The constant holding the name of the common folder.
|
13 |
+
const COMMON_DIRECTORY_NAME: &str = "websurfx";
|
14 |
+
/// The constant holding the name of the config file.
|
15 |
+
const CONFIG_FILE_NAME: &str = "config.lua";
|
16 |
+
/// The constant holding the name of the AllowList text file.
|
17 |
+
const ALLOWLIST_FILE_NAME: &str = "allowlist.txt";
|
18 |
+
/// The constant holding the name of the BlockList text file.
|
19 |
+
const BLOCKLIST_FILE_NAME: &str = "blocklist.txt";
|
20 |
|
21 |
+
/// An enum type which provides different variants to handle paths for various files/folders.
|
22 |
#[derive(Hash, PartialEq, Eq, Debug)]
|
23 |
pub enum FileType {
|
24 |
+
/// This variant handles all the paths associated with the config file.
|
25 |
Config,
|
26 |
+
/// This variant handles all the paths associated with the Allowlist text file.
|
27 |
AllowList,
|
28 |
+
/// This variant handles all the paths associated with the BlockList text file.
|
29 |
BlockList,
|
30 |
+
/// This variant handles all the paths associated with the public folder (Theme folder).
|
31 |
Theme,
|
32 |
}
|
33 |
|
34 |
+
/// A static variable which stores the different filesystem paths for various file/folder types.
|
35 |
static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
|
36 |
|
37 |
+
/// A function which returns an appropriate path for thr provided file type by checking if the path
|
38 |
+
/// for the given file type exists on that path.
|
39 |
///
|
40 |
/// # Error
|
41 |
///
|
42 |
+
/// Returns a `<File Name> folder/file not found!!` error if the give file_type folder/file is not
|
43 |
+
/// present on the path on which it is being tested.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
///
|
45 |
+
/// # Example
|
46 |
+
///
|
47 |
+
/// If this function is give the file_type of Theme variant then the theme folder is checked by the
|
48 |
+
/// following steps:
|
49 |
///
|
|
|
|
|
50 |
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
51 |
/// 2. Under project folder ( or codebase in other words) if it is not present
|
52 |
/// here then it returns an error as mentioned above.
|
|
|
114 |
// if no of the configs above exist, return error
|
115 |
Err(Error::new(
|
116 |
std::io::ErrorKind::NotFound,
|
117 |
+
format!("{:?} file/folder not found!!", file_type),
|
118 |
))
|
119 |
}
|
src/lib.rs
CHANGED
@@ -1,25 +1,26 @@
|
|
1 |
//! This main library module provides the functionality to provide and handle the Tcp server
|
2 |
//! and register all the routes for the `websurfx` meta search engine website.
|
3 |
|
|
|
|
|
|
|
|
|
4 |
pub mod cache;
|
5 |
pub mod config;
|
6 |
pub mod engines;
|
7 |
pub mod handler;
|
|
|
8 |
pub mod results;
|
9 |
pub mod server;
|
10 |
|
11 |
use std::net::TcpListener;
|
12 |
|
13 |
-
use crate::server::
|
14 |
|
15 |
use actix_cors::Cors;
|
16 |
use actix_files as fs;
|
17 |
-
use
|
18 |
-
|
19 |
-
http::header,
|
20 |
-
middleware::{Compress, Logger},
|
21 |
-
web, App, HttpServer,
|
22 |
-
};
|
23 |
use config::parser::Config;
|
24 |
use handlebars::Handlebars;
|
25 |
use handler::paths::{file_path, FileType};
|
@@ -45,7 +46,7 @@ use handler::paths::{file_path, FileType};
|
|
45 |
/// let server = run(listener,config).expect("Failed to start server");
|
46 |
/// ```
|
47 |
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
48 |
-
let mut handlebars: Handlebars = Handlebars::new();
|
49 |
|
50 |
let public_folder_path: &str = file_path(FileType::Theme)?;
|
51 |
|
@@ -53,7 +54,7 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|
53 |
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
54 |
.unwrap();
|
55 |
|
56 |
-
let handlebars_ref: web::Data<Handlebars
|
57 |
|
58 |
let cloned_config_threads_opt: u8 = config.threads;
|
59 |
|
@@ -69,11 +70,17 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|
69 |
]);
|
70 |
|
71 |
App::new()
|
|
|
72 |
.app_data(handlebars_ref.clone())
|
73 |
.app_data(web::Data::new(config.clone()))
|
74 |
.wrap(cors)
|
75 |
-
.wrap(
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
77 |
// Serve images and static files (css and js files).
|
78 |
.service(
|
79 |
fs::Files::new("/static", format!("{}/static", public_folder_path))
|
@@ -83,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
|
83 |
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
84 |
.show_files_listing(),
|
85 |
)
|
86 |
-
.service(
|
87 |
-
.service(
|
88 |
-
.service(routes::search) // search page
|
89 |
-
.service(
|
90 |
-
.service(
|
91 |
-
.default_service(web::route().to(
|
92 |
})
|
93 |
.workers(cloned_config_threads_opt as usize)
|
94 |
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
|
|
1 |
//! This main library module provides the functionality to provide and handle the Tcp server
|
2 |
//! and register all the routes for the `websurfx` meta search engine website.
|
3 |
|
4 |
+
#![forbid(unsafe_code, clippy::panic)]
|
5 |
+
#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)]
|
6 |
+
#![warn(clippy::cognitive_complexity, rust_2018_idioms)]
|
7 |
+
|
8 |
pub mod cache;
|
9 |
pub mod config;
|
10 |
pub mod engines;
|
11 |
pub mod handler;
|
12 |
+
pub mod models;
|
13 |
pub mod results;
|
14 |
pub mod server;
|
15 |
|
16 |
use std::net::TcpListener;
|
17 |
|
18 |
+
use crate::server::router;
|
19 |
|
20 |
use actix_cors::Cors;
|
21 |
use actix_files as fs;
|
22 |
+
use actix_governor::{Governor, GovernorConfigBuilder};
|
23 |
+
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
|
|
|
|
|
|
|
|
24 |
use config::parser::Config;
|
25 |
use handlebars::Handlebars;
|
26 |
use handler::paths::{file_path, FileType};
|
|
|
46 |
/// let server = run(listener,config).expect("Failed to start server");
|
47 |
/// ```
|
48 |
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
49 |
+
let mut handlebars: Handlebars<'_> = Handlebars::new();
|
50 |
|
51 |
let public_folder_path: &str = file_path(FileType::Theme)?;
|
52 |
|
|
|
54 |
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
55 |
.unwrap();
|
56 |
|
57 |
+
let handlebars_ref: web::Data<Handlebars<'_>> = web::Data::new(handlebars);
|
58 |
|
59 |
let cloned_config_threads_opt: u8 = config.threads;
|
60 |
|
|
|
70 |
]);
|
71 |
|
72 |
App::new()
|
73 |
+
.wrap(Logger::default()) // added logging middleware for logging.
|
74 |
.app_data(handlebars_ref.clone())
|
75 |
.app_data(web::Data::new(config.clone()))
|
76 |
.wrap(cors)
|
77 |
+
.wrap(Governor::new(
|
78 |
+
&GovernorConfigBuilder::default()
|
79 |
+
.per_second(config.rate_limiter.time_limit as u64)
|
80 |
+
.burst_size(config.rate_limiter.number_of_requests as u32)
|
81 |
+
.finish()
|
82 |
+
.unwrap(),
|
83 |
+
))
|
84 |
// Serve images and static files (css and js files).
|
85 |
.service(
|
86 |
fs::Files::new("/static", format!("{}/static", public_folder_path))
|
|
|
90 |
fs::Files::new("/images", format!("{}/images", public_folder_path))
|
91 |
.show_files_listing(),
|
92 |
)
|
93 |
+
.service(router::robots_data) // robots.txt
|
94 |
+
.service(router::index) // index page
|
95 |
+
.service(server::routes::search::search) // search page
|
96 |
+
.service(router::about) // about page
|
97 |
+
.service(router::settings) // settings page
|
98 |
+
.default_service(web::route().to(router::not_found)) // error page
|
99 |
})
|
100 |
.workers(cloned_config_threads_opt as usize)
|
101 |
// Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
|
src/{results β models}/aggregation_models.rs
RENAMED
@@ -4,25 +4,22 @@
|
|
4 |
use serde::{Deserialize, Serialize};
|
5 |
use smallvec::SmallVec;
|
6 |
|
7 |
-
use
|
8 |
|
9 |
/// A named struct to store the raw scraped search results scraped search results from the
|
10 |
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
11 |
/// to write idiomatic rust using `Iterators`.
|
12 |
-
///
|
13 |
-
/// # Fields
|
14 |
-
///
|
15 |
-
/// * `title` - The title of the search result.
|
16 |
-
/// * `url` - The url which is accessed when clicked on it
|
17 |
/// (href url in html in simple words).
|
18 |
-
|
19 |
-
/// * `engine` - The names of the upstream engines from which this results were provided.
|
20 |
-
#[derive(Clone, Serialize, Deserialize, Debug)]
|
21 |
#[serde(rename_all = "camelCase")]
|
22 |
pub struct SearchResult {
|
|
|
23 |
pub title: String,
|
|
|
24 |
pub url: String,
|
|
|
25 |
pub description: String,
|
|
|
26 |
pub engine: SmallVec<[String; 0]>,
|
27 |
}
|
28 |
|
@@ -64,14 +61,27 @@ impl SearchResult {
|
|
64 |
}
|
65 |
}
|
66 |
|
|
|
67 |
#[derive(Serialize, Deserialize, Clone)]
|
68 |
pub struct EngineErrorInfo {
|
|
|
|
|
69 |
pub error: String,
|
|
|
70 |
pub engine: String,
|
|
|
|
|
71 |
pub severity_color: String,
|
72 |
}
|
73 |
|
74 |
impl EngineErrorInfo {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
pub fn new(error: &EngineError, engine: &str) -> Self {
|
76 |
Self {
|
77 |
error: match error {
|
@@ -91,25 +101,26 @@ impl EngineErrorInfo {
|
|
91 |
|
92 |
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
93 |
/// aggregated from the upstream search engines.
|
94 |
-
///
|
95 |
-
/// # Fields
|
96 |
-
///
|
97 |
-
/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
|
98 |
/// `SearchResult` structs.
|
99 |
-
/// * `page_query` - Stores the current pages search query `q` provided in the search url.
|
100 |
-
/// * `style` - Stores the theming options for the website.
|
101 |
-
/// * `engine_errors_info` - Stores the information on which engines failed with their engine name
|
102 |
-
/// and the type of error that caused it.
|
103 |
-
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
104 |
-
/// given search query.
|
105 |
#[derive(Serialize, Deserialize, Default)]
|
106 |
#[serde(rename_all = "camelCase")]
|
107 |
pub struct SearchResults {
|
|
|
108 |
pub results: Vec<SearchResult>,
|
|
|
109 |
pub page_query: String,
|
|
|
110 |
pub style: Style,
|
|
|
|
|
111 |
pub engine_errors_info: Vec<EngineErrorInfo>,
|
|
|
|
|
|
|
112 |
pub disallowed: bool,
|
|
|
|
|
|
|
113 |
pub filtered: bool,
|
114 |
}
|
115 |
|
@@ -122,9 +133,8 @@ impl SearchResults {
|
|
122 |
/// and stores it into a vector of `SearchResult` structs.
|
123 |
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
124 |
/// the search url.
|
125 |
-
/// * `
|
126 |
-
///
|
127 |
-
/// * ``
|
128 |
pub fn new(
|
129 |
results: Vec<SearchResult>,
|
130 |
page_query: &str,
|
|
|
4 |
use serde::{Deserialize, Serialize};
|
5 |
use smallvec::SmallVec;
|
6 |
|
7 |
+
use super::{engine_models::EngineError, parser_models::Style};
|
8 |
|
9 |
/// A named struct to store the raw scraped search results scraped search results from the
|
10 |
/// upstream search engines before aggregating it.It derives the Clone trait which is needed
|
11 |
/// to write idiomatic rust using `Iterators`.
|
|
|
|
|
|
|
|
|
|
|
12 |
/// (href url in html in simple words).
|
13 |
+
#[derive(Clone, Serialize, Deserialize)]
|
|
|
|
|
14 |
#[serde(rename_all = "camelCase")]
|
15 |
pub struct SearchResult {
|
16 |
+
/// The title of the search result.
|
17 |
pub title: String,
|
18 |
+
/// The url which is accessed when clicked on it
|
19 |
pub url: String,
|
20 |
+
/// The description of the search result.
|
21 |
pub description: String,
|
22 |
+
/// The names of the upstream engines from which this results were provided.
|
23 |
pub engine: SmallVec<[String; 0]>,
|
24 |
}
|
25 |
|
|
|
61 |
}
|
62 |
}
|
63 |
|
64 |
+
/// A named struct that stores the error info related to the upstream search engines.
|
65 |
#[derive(Serialize, Deserialize, Clone)]
|
66 |
pub struct EngineErrorInfo {
|
67 |
+
/// It stores the error type which occured while fetching the result from a particular search
|
68 |
+
/// engine.
|
69 |
pub error: String,
|
70 |
+
/// It stores the name of the engine that failed to provide the requested search results.
|
71 |
pub engine: String,
|
72 |
+
/// It stores the name of the color to indicate whether how severe the particular error is (In
|
73 |
+
/// other words it indicates the severity of the error/issue).
|
74 |
pub severity_color: String,
|
75 |
}
|
76 |
|
77 |
impl EngineErrorInfo {
|
78 |
+
/// Constructs a new `SearchResult` with the given arguments needed for the struct.
|
79 |
+
///
|
80 |
+
/// # Arguments
|
81 |
+
///
|
82 |
+
/// * `error` - It takes the error type which occured while fetching the result from a particular
|
83 |
+
/// search engine.
|
84 |
+
/// * `engine` - It takes the name of the engine that failed to provide the requested search results.
|
85 |
pub fn new(error: &EngineError, engine: &str) -> Self {
|
86 |
Self {
|
87 |
error: match error {
|
|
|
101 |
|
102 |
/// A named struct to store, serialize, deserialize the all the search results scraped and
|
103 |
/// aggregated from the upstream search engines.
|
|
|
|
|
|
|
|
|
104 |
/// `SearchResult` structs.
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
#[derive(Serialize, Deserialize, Default)]
|
106 |
#[serde(rename_all = "camelCase")]
|
107 |
pub struct SearchResults {
|
108 |
+
/// Stores the individual serializable `SearchResult` struct into a vector of
|
109 |
pub results: Vec<SearchResult>,
|
110 |
+
/// Stores the current pages search query `q` provided in the search url.
|
111 |
pub page_query: String,
|
112 |
+
/// Stores the theming options for the website.
|
113 |
pub style: Style,
|
114 |
+
/// Stores the information on which engines failed with their engine name
|
115 |
+
/// and the type of error that caused it.
|
116 |
pub engine_errors_info: Vec<EngineErrorInfo>,
|
117 |
+
/// Stores the flag option which holds the check value that the following
|
118 |
+
/// search query was disallowed when the safe search level set to 4 and it
|
119 |
+
/// was present in the `Blocklist` file.
|
120 |
pub disallowed: bool,
|
121 |
+
/// Stores the flag option which holds the check value that the following
|
122 |
+
/// search query was filtered when the safe search level set to 3 and it
|
123 |
+
/// was present in the `Blocklist` file.
|
124 |
pub filtered: bool,
|
125 |
}
|
126 |
|
|
|
133 |
/// and stores it into a vector of `SearchResult` structs.
|
134 |
/// * `page_query` - Takes an argument of current page`s search query `q` provided in
|
135 |
/// the search url.
|
136 |
+
/// * `engine_errors_info` - Takes an array of structs which contains information regarding
|
137 |
+
/// which engines failed with their names, reason and their severity color name.
|
|
|
138 |
pub fn new(
|
139 |
results: Vec<SearchResult>,
|
140 |
page_query: &str,
|
src/{engines β models}/engine_models.rs
RENAMED
@@ -1,24 +1,23 @@
|
|
1 |
//! This module provides the error enum to handle different errors associated while requesting data from
|
2 |
//! the upstream search engines with the search query provided by the user.
|
3 |
|
4 |
-
use
|
5 |
use error_stack::{Result, ResultExt};
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
9 |
-
///
|
10 |
-
/// This enum provides variants three different categories of errors:
|
11 |
-
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
|
12 |
-
/// etc.
|
13 |
-
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
|
14 |
-
/// search engines.
|
15 |
-
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
|
16 |
-
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
|
17 |
-
/// all other errors occurring within the code handling the `upstream search engines`.
|
18 |
#[derive(Debug)]
|
19 |
pub enum EngineError {
|
|
|
|
|
20 |
EmptyResultSet,
|
|
|
|
|
21 |
RequestError,
|
|
|
|
|
|
|
|
|
22 |
UnexpectedError,
|
23 |
}
|
24 |
|
@@ -46,6 +45,23 @@ impl error_stack::Context for EngineError {}
|
|
46 |
/// A trait to define common behavior for all search engines.
|
47 |
#[async_trait::async_trait]
|
48 |
pub trait SearchEngine: Sync + Send {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
async fn fetch_html_from_upstream(
|
50 |
&self,
|
51 |
url: &str,
|
@@ -65,6 +81,24 @@ pub trait SearchEngine: Sync + Send {
|
|
65 |
.change_context(EngineError::RequestError)?)
|
66 |
}
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
async fn results(
|
69 |
&self,
|
70 |
query: &str,
|
@@ -75,8 +109,12 @@ pub trait SearchEngine: Sync + Send {
|
|
75 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
76 |
}
|
77 |
|
|
|
78 |
pub struct EngineHandler {
|
|
|
|
|
79 |
engine: Box<dyn SearchEngine>,
|
|
|
80 |
name: &'static str,
|
81 |
}
|
82 |
|
@@ -87,12 +125,23 @@ impl Clone for EngineHandler {
|
|
87 |
}
|
88 |
|
89 |
impl EngineHandler {
|
90 |
-
///
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
pub fn new(engine_name: &str) -> Option<Self> {
|
92 |
let engine: (&'static str, Box<dyn SearchEngine>) =
|
93 |
match engine_name.to_lowercase().as_str() {
|
94 |
-
"duckduckgo" => (
|
95 |
-
|
|
|
|
|
|
|
96 |
_ => return None,
|
97 |
};
|
98 |
|
@@ -102,6 +151,8 @@ impl EngineHandler {
|
|
102 |
})
|
103 |
}
|
104 |
|
|
|
|
|
105 |
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
106 |
(self.name, self.engine)
|
107 |
}
|
|
|
1 |
//! This module provides the error enum to handle different errors associated while requesting data from
|
2 |
//! the upstream search engines with the search query provided by the user.
|
3 |
|
4 |
+
use super::aggregation_models::SearchResult;
|
5 |
use error_stack::{Result, ResultExt};
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
#[derive(Debug)]
|
10 |
pub enum EngineError {
|
11 |
+
/// This variant handles all request related errors like forbidden, not found,
|
12 |
+
/// etc.
|
13 |
EmptyResultSet,
|
14 |
+
/// This variant handles the not results found error provide by the upstream
|
15 |
+
/// search engines.
|
16 |
RequestError,
|
17 |
+
/// This variant handles all the errors which are unexpected or occur rarely
|
18 |
+
/// and are errors mostly related to failure in initialization of HeaderMap,
|
19 |
+
/// Selector errors and all other errors occurring within the code handling
|
20 |
+
/// the `upstream search engines`.
|
21 |
UnexpectedError,
|
22 |
}
|
23 |
|
|
|
45 |
/// A trait to define common behavior for all search engines.
|
46 |
#[async_trait::async_trait]
|
47 |
pub trait SearchEngine: Sync + Send {
|
48 |
+
/// This helper function fetches/requests the search results from the upstream search engine in
|
49 |
+
/// an html form.
|
50 |
+
///
|
51 |
+
/// # Arguments
|
52 |
+
///
|
53 |
+
/// * `url` - It takes the url of the upstream search engine with the user requested search
|
54 |
+
/// query appended in the search parameters.
|
55 |
+
/// * `header_map` - It takes the http request headers to be sent to the upstream engine in
|
56 |
+
/// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
|
57 |
+
/// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
|
58 |
+
/// the amount of time for each request to remain connected when until the results can be provided
|
59 |
+
/// by the upstream engine.
|
60 |
+
///
|
61 |
+
/// # Error
|
62 |
+
///
|
63 |
+
/// It returns the html data as a string if the upstream engine provides the data as expected
|
64 |
+
/// otherwise it returns a custom `EngineError`.
|
65 |
async fn fetch_html_from_upstream(
|
66 |
&self,
|
67 |
url: &str,
|
|
|
81 |
.change_context(EngineError::RequestError)?)
|
82 |
}
|
83 |
|
84 |
+
/// This function scrapes results from the upstream engine and puts all the scraped results like
|
85 |
+
/// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
|
86 |
+
/// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
|
87 |
+
/// struct and then returns it within a Result enum.
|
88 |
+
///
|
89 |
+
/// # Arguments
|
90 |
+
///
|
91 |
+
/// * `query` - Takes the user provided query to query to the upstream search engine with.
|
92 |
+
/// * `page` - Takes an u32 as an argument.
|
93 |
+
/// * `user_agent` - Takes a random user agent string as an argument.
|
94 |
+
/// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
|
95 |
+
///
|
96 |
+
/// # Errors
|
97 |
+
///
|
98 |
+
/// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
|
99 |
+
/// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
|
100 |
+
/// provide results for the requested search query and also returns error if the scraping selector
|
101 |
+
/// or HeaderMap fails to initialize.
|
102 |
async fn results(
|
103 |
&self,
|
104 |
query: &str,
|
|
|
109 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
110 |
}
|
111 |
|
112 |
+
/// A named struct which stores the engine struct with the name of the associated engine.
|
113 |
pub struct EngineHandler {
|
114 |
+
/// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
|
115 |
+
/// the `SearchEngine` trait.
|
116 |
engine: Box<dyn SearchEngine>,
|
117 |
+
/// It stores the name of the engine to which the struct is associated to.
|
118 |
name: &'static str,
|
119 |
}
|
120 |
|
|
|
125 |
}
|
126 |
|
127 |
impl EngineHandler {
|
128 |
+
/// Parses an engine name into an engine handler.
|
129 |
+
///
|
130 |
+
/// # Arguments
|
131 |
+
///
|
132 |
+
/// * `engine_name` - It takes the name of the engine to which the struct was associated to.
|
133 |
+
///
|
134 |
+
/// # Returns
|
135 |
+
///
|
136 |
+
/// It returns an option either containing the value or a none if the engine is unknown
|
137 |
pub fn new(engine_name: &str) -> Option<Self> {
|
138 |
let engine: (&'static str, Box<dyn SearchEngine>) =
|
139 |
match engine_name.to_lowercase().as_str() {
|
140 |
+
"duckduckgo" => (
|
141 |
+
"duckduckgo",
|
142 |
+
Box::new(crate::engines::duckduckgo::DuckDuckGo),
|
143 |
+
),
|
144 |
+
"searx" => ("searx", Box::new(crate::engines::searx::Searx)),
|
145 |
_ => return None,
|
146 |
};
|
147 |
|
|
|
151 |
})
|
152 |
}
|
153 |
|
154 |
+
/// This function converts the EngineHandler type into a tuple containing the engine name and
|
155 |
+
/// the associated engine struct.
|
156 |
pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
|
157 |
(self.name, self.engine)
|
158 |
}
|
src/models/mod.rs
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides modules which in turn provides various models for aggregrating search
|
2 |
+
//! results, parsing config file, providing trait to standardize search engine handling code,
|
3 |
+
//! custom engine error for the search engine, etc.
|
4 |
+
|
5 |
+
pub mod aggregation_models;
|
6 |
+
pub mod engine_models;
|
7 |
+
pub mod parser_models;
|
8 |
+
pub mod server_models;
|
src/{config β models}/parser_models.rs
RENAMED
@@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize};
|
|
12 |
/// order to allow the deserializing the json back to struct in aggregate function in
|
13 |
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
14 |
/// it to the template files.
|
15 |
-
///
|
16 |
-
/// # Fields
|
17 |
-
//
|
18 |
-
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
|
19 |
-
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
|
20 |
-
/// theme being used.
|
21 |
#[derive(Serialize, Deserialize, Clone, Default)]
|
22 |
pub struct Style {
|
|
|
23 |
pub theme: String,
|
|
|
|
|
24 |
pub colorscheme: String,
|
25 |
}
|
26 |
|
@@ -36,3 +33,20 @@ impl Style {
|
|
36 |
Style { theme, colorscheme }
|
37 |
}
|
38 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
/// order to allow the deserializing the json back to struct in aggregate function in
|
13 |
/// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
|
14 |
/// it to the template files.
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
#[derive(Serialize, Deserialize, Clone, Default)]
|
16 |
pub struct Style {
|
17 |
+
/// It stores the parsed theme option used to set a theme for the website.
|
18 |
pub theme: String,
|
19 |
+
/// It stores the parsed colorscheme option used to set a colorscheme for the
|
20 |
+
/// theme being used.
|
21 |
pub colorscheme: String,
|
22 |
}
|
23 |
|
|
|
33 |
Style { theme, colorscheme }
|
34 |
}
|
35 |
}
|
36 |
+
|
37 |
+
/// Configuration options for the aggregator.
|
38 |
+
#[derive(Clone)]
|
39 |
+
pub struct AggregatorConfig {
|
40 |
+
/// It stores the option to whether enable or disable random delays between
|
41 |
+
/// requests.
|
42 |
+
pub random_delay: bool,
|
43 |
+
}
|
44 |
+
|
45 |
+
/// Configuration options for the rate limiter middleware.
|
46 |
+
#[derive(Clone)]
|
47 |
+
pub struct RateLimiter {
|
48 |
+
/// The number of request that are allowed within a provided time limit.
|
49 |
+
pub number_of_requests: u8,
|
50 |
+
/// The time limit in which the quantity of requests that should be accepted.
|
51 |
+
pub time_limit: u8,
|
52 |
+
}
|
src/models/server_models.rs
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides the models to parse cookies and search parameters from the search
|
2 |
+
//! engine website.
|
3 |
+
use serde::Deserialize;
|
4 |
+
|
5 |
+
/// A named struct which deserializes all the user provided search parameters and stores them.
|
6 |
+
#[derive(Deserialize)]
|
7 |
+
pub struct SearchParams {
|
8 |
+
/// It stores the search parameter option `q` (or query in simple words)
|
9 |
+
/// of the search url.
|
10 |
+
pub q: Option<String>,
|
11 |
+
/// It stores the search parameter `page` (or pageno in simple words)
|
12 |
+
/// of the search url.
|
13 |
+
pub page: Option<u32>,
|
14 |
+
}
|
15 |
+
|
16 |
+
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
17 |
+
#[allow(dead_code)]
|
18 |
+
#[derive(Deserialize)]
|
19 |
+
pub struct Cookie {
|
20 |
+
/// It stores the theme name used in the website.
|
21 |
+
pub theme: String,
|
22 |
+
/// It stores the colorscheme name used for the website theme.
|
23 |
+
pub colorscheme: String,
|
24 |
+
/// It stores the user selected upstream search engines selected from the UI.
|
25 |
+
pub engines: Vec<String>,
|
26 |
+
}
|
src/results/aggregator.rs
CHANGED
@@ -1,27 +1,23 @@
|
|
1 |
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
2 |
//! search engines and then removes duplicate results.
|
3 |
|
4 |
-
use
|
5 |
-
|
6 |
-
|
7 |
-
time::Duration,
|
8 |
-
};
|
9 |
-
|
10 |
-
use super::{
|
11 |
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
12 |
-
|
13 |
};
|
14 |
use error_stack::Report;
|
15 |
use rand::Rng;
|
16 |
use regex::Regex;
|
|
|
|
|
|
|
|
|
|
|
17 |
use std::{fs::File, io::BufRead};
|
18 |
use tokio::task::JoinHandle;
|
19 |
|
20 |
-
use crate::{
|
21 |
-
engines::engine_models::{EngineError, EngineHandler},
|
22 |
-
handler::paths::{file_path, FileType},
|
23 |
-
};
|
24 |
-
|
25 |
/// Aliases for long type annotations
|
26 |
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
27 |
|
|
|
1 |
//! This module provides the functionality to scrape and gathers all the results from the upstream
|
2 |
//! search engines and then removes duplicate results.
|
3 |
|
4 |
+
use super::user_agent::random_user_agent;
|
5 |
+
use crate::handler::paths::{file_path, FileType};
|
6 |
+
use crate::models::{
|
|
|
|
|
|
|
|
|
7 |
aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
|
8 |
+
engine_models::{EngineError, EngineHandler},
|
9 |
};
|
10 |
use error_stack::Report;
|
11 |
use rand::Rng;
|
12 |
use regex::Regex;
|
13 |
+
use std::{
|
14 |
+
collections::HashMap,
|
15 |
+
io::{BufReader, Read},
|
16 |
+
time::Duration,
|
17 |
+
};
|
18 |
use std::{fs::File, io::BufRead};
|
19 |
use tokio::task::JoinHandle;
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
/// Aliases for long type annotations
|
22 |
type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
|
23 |
|
src/results/mod.rs
CHANGED
@@ -1,3 +1,6 @@
|
|
1 |
-
|
|
|
|
|
|
|
2 |
pub mod aggregator;
|
3 |
pub mod user_agent;
|
|
|
1 |
+
//! This module provides modules that handle the functionality to aggregate the fetched search
|
2 |
+
//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
|
3 |
+
//! provides various models to aggregate search results into a standardized form.
|
4 |
+
|
5 |
pub mod aggregator;
|
6 |
pub mod user_agent;
|
src/results/user_agent.rs
CHANGED
@@ -4,6 +4,8 @@ use std::sync::OnceLock;
|
|
4 |
|
5 |
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
6 |
|
|
|
|
|
7 |
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
8 |
|
9 |
/// A function to generate random user agent to improve privacy of the user.
|
|
|
4 |
|
5 |
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
6 |
|
7 |
+
/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused
|
8 |
+
/// again and again without the need of reinitializing the `UserAgents` struct.
|
9 |
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
10 |
|
11 |
/// A function to generate random user agent to improve privacy of the user.
|
src/server/mod.rs
CHANGED
@@ -1 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
pub mod routes;
|
|
|
1 |
+
//! This module provides modules that handle the functionality of handling different routes/paths
|
2 |
+
//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in
|
3 |
+
//! the search route. Also, caches the next, current and previous search results in the search
|
4 |
+
//! routes with the help of the redis server.
|
5 |
+
|
6 |
+
pub mod router;
|
7 |
pub mod routes;
|
src/server/router.rs
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides the functionality to handle different routes of the `websurfx`
|
2 |
+
//! meta search engine website and provide appropriate response to each route/page
|
3 |
+
//! when requested.
|
4 |
+
|
5 |
+
use crate::{
|
6 |
+
config::parser::Config,
|
7 |
+
handler::paths::{file_path, FileType},
|
8 |
+
};
|
9 |
+
use actix_web::{get, web, HttpRequest, HttpResponse};
|
10 |
+
use handlebars::Handlebars;
|
11 |
+
use std::fs::read_to_string;
|
12 |
+
|
13 |
+
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
14 |
+
#[get("/")]
|
15 |
+
pub async fn index(
|
16 |
+
hbs: web::Data<Handlebars<'_>>,
|
17 |
+
config: web::Data<Config>,
|
18 |
+
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
19 |
+
let page_content: String = hbs.render("index", &config.style).unwrap();
|
20 |
+
Ok(HttpResponse::Ok().body(page_content))
|
21 |
+
}
|
22 |
+
|
23 |
+
/// Handles the route of any other accessed route/page which is not provided by the
|
24 |
+
/// website essentially the 404 error page.
|
25 |
+
pub async fn not_found(
|
26 |
+
hbs: web::Data<Handlebars<'_>>,
|
27 |
+
config: web::Data<Config>,
|
28 |
+
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
29 |
+
let page_content: String = hbs.render("404", &config.style)?;
|
30 |
+
|
31 |
+
Ok(HttpResponse::Ok()
|
32 |
+
.content_type("text/html; charset=utf-8")
|
33 |
+
.body(page_content))
|
34 |
+
}
|
35 |
+
|
36 |
+
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
37 |
+
#[get("/robots.txt")]
|
38 |
+
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
39 |
+
let page_content: String =
|
40 |
+
read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
|
41 |
+
Ok(HttpResponse::Ok()
|
42 |
+
.content_type("text/plain; charset=ascii")
|
43 |
+
.body(page_content))
|
44 |
+
}
|
45 |
+
|
46 |
+
/// Handles the route of about page of the `websurfx` meta search engine website.
|
47 |
+
#[get("/about")]
|
48 |
+
pub async fn about(
|
49 |
+
hbs: web::Data<Handlebars<'_>>,
|
50 |
+
config: web::Data<Config>,
|
51 |
+
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
52 |
+
let page_content: String = hbs.render("about", &config.style)?;
|
53 |
+
Ok(HttpResponse::Ok().body(page_content))
|
54 |
+
}
|
55 |
+
|
56 |
+
/// Handles the route of settings page of the `websurfx` meta search engine website.
|
57 |
+
#[get("/settings")]
|
58 |
+
pub async fn settings(
|
59 |
+
hbs: web::Data<Handlebars<'_>>,
|
60 |
+
config: web::Data<Config>,
|
61 |
+
) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
62 |
+
let page_content: String = hbs.render("settings", &config.style)?;
|
63 |
+
Ok(HttpResponse::Ok().body(page_content))
|
64 |
+
}
|
src/server/routes/mod.rs
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides modules to handle various routes in the search engine website.
|
2 |
+
|
3 |
+
pub mod search;
|
src/server/{routes.rs β routes/search.rs}
RENAMED
@@ -1,23 +1,20 @@
|
|
1 |
-
//! This module
|
2 |
-
//! meta search engine website and provide appropriate response to each route/page
|
3 |
-
//! when requested.
|
4 |
-
|
5 |
-
use std::{
|
6 |
-
fs::{read_to_string, File},
|
7 |
-
io::{BufRead, BufReader, Read},
|
8 |
-
};
|
9 |
|
10 |
use crate::{
|
11 |
cache::cacher::RedisCache,
|
12 |
config::parser::Config,
|
13 |
-
engines::engine_models::EngineHandler,
|
14 |
handler::paths::{file_path, FileType},
|
15 |
-
|
|
|
16 |
};
|
17 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
18 |
use handlebars::Handlebars;
|
19 |
use regex::Regex;
|
20 |
use serde::Deserialize;
|
|
|
|
|
|
|
|
|
21 |
use tokio::join;
|
22 |
|
23 |
// ---- Constants ----
|
@@ -25,17 +22,16 @@ use tokio::join;
|
|
25 |
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
26 |
|
27 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
28 |
-
///
|
29 |
-
/// # Fields
|
30 |
-
///
|
31 |
-
/// * `q` - It stores the search parameter option `q` (or query in simple words)
|
32 |
-
/// of the search url.
|
33 |
-
/// * `page` - It stores the search parameter `page` (or pageno in simple words)
|
34 |
-
/// of the search url.
|
35 |
#[derive(Deserialize)]
|
36 |
-
struct SearchParams {
|
|
|
|
|
37 |
q: Option<String>,
|
|
|
|
|
38 |
page: Option<u32>,
|
|
|
|
|
39 |
safesearch: Option<u8>,
|
40 |
}
|
41 |
|
@@ -63,17 +59,14 @@ pub async fn not_found(
|
|
63 |
}
|
64 |
|
65 |
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
66 |
-
///
|
67 |
-
/// # Fields
|
68 |
-
///
|
69 |
-
/// * `theme` - It stores the theme name used in the website.
|
70 |
-
/// * `colorscheme` - It stores the colorscheme name used for the website theme.
|
71 |
-
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
72 |
#[allow(dead_code)]
|
73 |
#[derive(Deserialize)]
|
74 |
struct Cookie<'a> {
|
|
|
75 |
theme: &'a str,
|
|
|
76 |
colorscheme: &'a str,
|
|
|
77 |
engines: Vec<&'a str>,
|
78 |
}
|
79 |
|
@@ -174,8 +167,21 @@ pub async fn search(
|
|
174 |
}
|
175 |
}
|
176 |
|
177 |
-
/// Fetches the results for a query and page.
|
178 |
-
///
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
async fn results(
|
180 |
url: String,
|
181 |
config: &Config,
|
@@ -184,6 +190,7 @@ async fn results(
|
|
184 |
req: HttpRequest,
|
185 |
safe_search: u8,
|
186 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
|
|
187 |
let mut redis_cache: RedisCache = REDIS_CACHE
|
188 |
.get_or_init(async {
|
189 |
// Initialize redis cache connection pool only one and store it in the heap.
|
@@ -191,7 +198,6 @@ async fn results(
|
|
191 |
})
|
192 |
.await
|
193 |
.clone();
|
194 |
-
|
195 |
// fetch the cached results json.
|
196 |
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
|
197 |
redis_cache.clone().cached_json(&url).await;
|
@@ -223,7 +229,8 @@ async fn results(
|
|
223 |
// UI and use that.
|
224 |
let mut results: SearchResults = match req.cookie("appCookie") {
|
225 |
Some(cookie_value) => {
|
226 |
-
let cookie_value: Cookie =
|
|
|
227 |
|
228 |
let engines: Vec<EngineHandler> = cookie_value
|
229 |
.engines
|
@@ -267,6 +274,8 @@ async fn results(
|
|
267 |
}
|
268 |
}
|
269 |
|
|
|
|
|
270 |
fn is_match_from_filter_list(
|
271 |
file_path: &str,
|
272 |
query: &str,
|
|
|
1 |
+
//! This module handles the search route of the search engine website.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
use crate::{
|
4 |
cache::cacher::RedisCache,
|
5 |
config::parser::Config,
|
|
|
6 |
handler::paths::{file_path, FileType},
|
7 |
+
models::{aggregation_models::SearchResults, engine_models::EngineHandler},
|
8 |
+
results::aggregator::aggregate,
|
9 |
};
|
10 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
11 |
use handlebars::Handlebars;
|
12 |
use regex::Regex;
|
13 |
use serde::Deserialize;
|
14 |
+
use std::{
|
15 |
+
fs::{read_to_string, File},
|
16 |
+
io::{BufRead, BufReader, Read},
|
17 |
+
};
|
18 |
use tokio::join;
|
19 |
|
20 |
// ---- Constants ----
|
|
|
22 |
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
23 |
|
24 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
#[derive(Deserialize)]
|
26 |
+
pub struct SearchParams {
|
27 |
+
/// It stores the search parameter option `q` (or query in simple words)
|
28 |
+
/// of the search url.
|
29 |
q: Option<String>,
|
30 |
+
/// It stores the search parameter `page` (or pageno in simple words)
|
31 |
+
/// of the search url.
|
32 |
page: Option<u32>,
|
33 |
+
/// It stores the search parameter `safesearch` (or safe search level in simple words) of the
|
34 |
+
/// search url.
|
35 |
safesearch: Option<u8>,
|
36 |
}
|
37 |
|
|
|
59 |
}
|
60 |
|
61 |
/// A named struct which is used to deserialize the cookies fetched from the client side.
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
#[allow(dead_code)]
|
63 |
#[derive(Deserialize)]
|
64 |
struct Cookie<'a> {
|
65 |
+
/// It stores the theme name used in the website.
|
66 |
theme: &'a str,
|
67 |
+
/// It stores the colorscheme name used for the website theme.
|
68 |
colorscheme: &'a str,
|
69 |
+
/// It stores the user selected upstream search engines selected from the UI.
|
70 |
engines: Vec<&'a str>,
|
71 |
}
|
72 |
|
|
|
167 |
}
|
168 |
}
|
169 |
|
170 |
+
/// Fetches the results for a query and page. It First checks the redis cache, if that
|
171 |
+
/// fails it gets proper results by requesting from the upstream search engines.
|
172 |
+
///
|
173 |
+
/// # Arguments
|
174 |
+
///
|
175 |
+
/// * `url` - It takes the url of the current page that requested the search results for a
|
176 |
+
/// particular search query.
|
177 |
+
/// * `config` - It takes a parsed config struct.
|
178 |
+
/// * `query` - It takes the page number as u32 value.
|
179 |
+
/// * `req` - It takes the `HttpRequest` struct as a value.
|
180 |
+
///
|
181 |
+
/// # Error
|
182 |
+
///
|
183 |
+
/// It returns the `SearchResults` struct if the search results could be successfully fetched from
|
184 |
+
/// the cache or from the upstream search engines otherwise it returns an appropriate error.
|
185 |
async fn results(
|
186 |
url: String,
|
187 |
config: &Config,
|
|
|
190 |
req: HttpRequest,
|
191 |
safe_search: u8,
|
192 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
193 |
+
// Initialize redis cache connection struct
|
194 |
let mut redis_cache: RedisCache = REDIS_CACHE
|
195 |
.get_or_init(async {
|
196 |
// Initialize redis cache connection pool only one and store it in the heap.
|
|
|
198 |
})
|
199 |
.await
|
200 |
.clone();
|
|
|
201 |
// fetch the cached results json.
|
202 |
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
|
203 |
redis_cache.clone().cached_json(&url).await;
|
|
|
229 |
// UI and use that.
|
230 |
let mut results: SearchResults = match req.cookie("appCookie") {
|
231 |
Some(cookie_value) => {
|
232 |
+
let cookie_value: Cookie<'_> =
|
233 |
+
serde_json::from_str(cookie_value.name_value().1)?;
|
234 |
|
235 |
let engines: Vec<EngineHandler> = cookie_value
|
236 |
.engines
|
|
|
274 |
}
|
275 |
}
|
276 |
|
277 |
+
/// A helper function which checks whether the search query contains any keywords which should be
|
278 |
+
/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files.
|
279 |
fn is_match_from_filter_list(
|
280 |
file_path: &str,
|
281 |
query: &str,
|
websurfx/config.lua
CHANGED
@@ -10,6 +10,10 @@ production_use = false -- whether to use production mode or not (in other words
|
|
10 |
-- if production_use is set to true
|
11 |
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
12 |
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-- ### Search ###
|
15 |
-- Filter results based on different levels. The levels provided are:
|
@@ -45,4 +49,7 @@ theme = "simple" -- the theme name which should be used for the website
|
|
45 |
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
46 |
|
47 |
-- ### Search Engines ###
|
48 |
-
upstream_search_engines = {
|
|
|
|
|
|
|
|
10 |
-- if production_use is set to true
|
11 |
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
12 |
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
13 |
+
rate_limiter = {
|
14 |
+
number_of_requests = 20, -- The number of request that are allowed within a provided time limit.
|
15 |
+
time_limit = 3, -- The time limit in which the quantity of requests that should be accepted.
|
16 |
+
}
|
17 |
|
18 |
-- ### Search ###
|
19 |
-- Filter results based on different levels. The levels provided are:
|
|
|
49 |
redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
|
50 |
|
51 |
-- ### Search Engines ###
|
52 |
+
upstream_search_engines = {
|
53 |
+
DuckDuckGo = true,
|
54 |
+
Searx = false,
|
55 |
+
} -- select the upstream search engines from which the results should be fetched.
|