Spaces:
Runtime error
Runtime error
Merge branch 'rolling' into feat-rate-limiter-for-websurfx
Browse files- .gitignore +1 -0
- Cargo.lock +146 -25
- Cargo.toml +8 -4
- Dockerfile +1 -1
- README.md +1 -1
- docs/installation.md +1 -1
- src/bin/websurfx.rs +14 -0
- src/cache/cacher.rs +104 -23
- src/cache/error.rs +40 -0
- src/cache/mod.rs +1 -0
- src/config/parser.rs +21 -20
- src/config/parser_models.rs +1 -1
- src/engines/duckduckgo.rs +20 -40
- src/engines/engine_models.rs +4 -6
- src/engines/searx.rs +16 -35
- src/handler/paths.rs +58 -54
- src/lib.rs +1 -1
- src/results/aggregation_models.rs +30 -30
- src/results/aggregator.rs +45 -46
- src/results/user_agent.rs +21 -17
- src/server/routes.rs +37 -24
.gitignore
CHANGED
@@ -4,3 +4,4 @@ package-lock.json
|
|
4 |
dump.rdb
|
5 |
.vscode
|
6 |
megalinter-reports/
|
|
|
|
4 |
dump.rdb
|
5 |
.vscode
|
6 |
megalinter-reports/
|
7 |
+
dhat-heap.json
|
Cargo.lock
CHANGED
@@ -300,12 +300,24 @@ version = "1.0.75"
|
|
300 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
301 |
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
|
302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
[[package]]
|
304 |
name = "askama_escape"
|
305 |
version = "0.10.3"
|
306 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
307 |
checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
|
308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
[[package]]
|
310 |
name = "async-trait"
|
311 |
version = "0.1.73"
|
@@ -571,7 +583,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
571 |
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
|
572 |
dependencies = [
|
573 |
"bytes 1.4.0",
|
|
|
574 |
"memchr",
|
|
|
|
|
|
|
575 |
]
|
576 |
|
577 |
[[package]]
|
@@ -845,6 +861,22 @@ dependencies = [
|
|
845 |
"syn 1.0.109",
|
846 |
]
|
847 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
848 |
[[package]]
|
849 |
name = "digest"
|
850 |
version = "0.10.7"
|
@@ -1630,6 +1662,16 @@ version = "0.2.147"
|
|
1630 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1631 |
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
1632 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1633 |
[[package]]
|
1634 |
name = "linux-raw-sys"
|
1635 |
version = "0.4.5"
|
@@ -1767,6 +1809,15 @@ dependencies = [
|
|
1767 |
"autocfg 1.1.0",
|
1768 |
]
|
1769 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1770 |
[[package]]
|
1771 |
name = "mime"
|
1772 |
version = "0.3.17"
|
@@ -1792,6 +1843,16 @@ dependencies = [
|
|
1792 |
"adler",
|
1793 |
]
|
1794 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1795 |
[[package]]
|
1796 |
name = "mio"
|
1797 |
version = "0.6.23"
|
@@ -1835,6 +1896,20 @@ dependencies = [
|
|
1835 |
"ws2_32-sys",
|
1836 |
]
|
1837 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1838 |
[[package]]
|
1839 |
name = "native-tls"
|
1840 |
version = "0.2.11"
|
@@ -2202,6 +2277,26 @@ dependencies = [
|
|
2202 |
"siphasher 0.3.11",
|
2203 |
]
|
2204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2205 |
[[package]]
|
2206 |
name = "pin-project-lite"
|
2207 |
version = "0.2.13"
|
@@ -2504,12 +2599,21 @@ version = "0.23.3"
|
|
2504 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2505 |
checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
|
2506 |
dependencies = [
|
|
|
|
|
|
|
2507 |
"combine",
|
|
|
|
|
2508 |
"itoa 1.0.9",
|
2509 |
"percent-encoding 2.3.0",
|
|
|
2510 |
"ryu",
|
2511 |
"sha1_smol",
|
2512 |
"socket2 0.4.9",
|
|
|
|
|
|
|
2513 |
"url 2.4.1",
|
2514 |
]
|
2515 |
|
@@ -2628,36 +2732,18 @@ dependencies = [
|
|
2628 |
"winreg 0.50.0",
|
2629 |
]
|
2630 |
|
2631 |
-
[[package]]
|
2632 |
-
name = "rlua"
|
2633 |
-
version = "0.19.7"
|
2634 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2635 |
-
checksum = "5d33e5ba15c3d43178f283ed5863d4531e292fc0e56fb773f3bea45f18e3a42a"
|
2636 |
-
dependencies = [
|
2637 |
-
"bitflags 1.3.2",
|
2638 |
-
"bstr",
|
2639 |
-
"libc",
|
2640 |
-
"num-traits",
|
2641 |
-
"rlua-lua54-sys",
|
2642 |
-
]
|
2643 |
-
|
2644 |
-
[[package]]
|
2645 |
-
name = "rlua-lua54-sys"
|
2646 |
-
version = "0.1.6"
|
2647 |
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2648 |
-
checksum = "7aafabafe1895cb4a2be81a56d7ff3d46bf4b5d2f9cfdbea2ed404cdabe96474"
|
2649 |
-
dependencies = [
|
2650 |
-
"cc",
|
2651 |
-
"libc",
|
2652 |
-
"pkg-config",
|
2653 |
-
]
|
2654 |
-
|
2655 |
[[package]]
|
2656 |
name = "rustc-demangle"
|
2657 |
version = "0.1.23"
|
2658 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2659 |
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
2660 |
|
|
|
|
|
|
|
|
|
|
|
|
|
2661 |
[[package]]
|
2662 |
name = "rustc_version"
|
2663 |
version = "0.2.3"
|
@@ -2957,6 +3043,9 @@ name = "smallvec"
|
|
2957 |
version = "1.11.0"
|
2958 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2959 |
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
|
|
|
|
|
|
|
2960 |
|
2961 |
[[package]]
|
2962 |
name = "socket2"
|
@@ -3098,6 +3187,16 @@ dependencies = [
|
|
3098 |
"unicode-xid 0.2.4",
|
3099 |
]
|
3100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3101 |
[[package]]
|
3102 |
name = "tempfile"
|
3103 |
version = "3.8.0"
|
@@ -3151,6 +3250,12 @@ dependencies = [
|
|
3151 |
"syn 2.0.29",
|
3152 |
]
|
3153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
3154 |
[[package]]
|
3155 |
name = "time"
|
3156 |
version = "0.1.45"
|
@@ -3335,6 +3440,17 @@ dependencies = [
|
|
3335 |
"tokio-sync",
|
3336 |
]
|
3337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3338 |
[[package]]
|
3339 |
name = "tokio-sync"
|
3340 |
version = "0.1.8"
|
@@ -3688,24 +3804,29 @@ dependencies = [
|
|
3688 |
"actix-files",
|
3689 |
"actix-governor",
|
3690 |
"actix-web",
|
|
|
3691 |
"async-trait",
|
3692 |
"criterion",
|
|
|
3693 |
"env_logger",
|
3694 |
"error-stack",
|
3695 |
"fake-useragent",
|
|
|
3696 |
"handlebars",
|
3697 |
"log",
|
3698 |
"md5",
|
|
|
|
|
3699 |
"once_cell",
|
3700 |
"rand 0.8.5",
|
3701 |
"redis",
|
3702 |
"regex",
|
3703 |
"reqwest 0.11.20",
|
3704 |
-
"rlua",
|
3705 |
"rusty-hook",
|
3706 |
"scraper",
|
3707 |
"serde",
|
3708 |
"serde_json",
|
|
|
3709 |
"tempfile",
|
3710 |
"tokio 1.32.0",
|
3711 |
]
|
|
|
300 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
301 |
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
|
302 |
|
303 |
+
[[package]]
|
304 |
+
name = "arc-swap"
|
305 |
+
version = "1.6.0"
|
306 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
307 |
+
checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
|
308 |
+
|
309 |
[[package]]
|
310 |
name = "askama_escape"
|
311 |
version = "0.10.3"
|
312 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
313 |
checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341"
|
314 |
|
315 |
+
[[package]]
|
316 |
+
name = "async-once-cell"
|
317 |
+
version = "0.5.3"
|
318 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
319 |
+
checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb"
|
320 |
+
|
321 |
[[package]]
|
322 |
name = "async-trait"
|
323 |
version = "0.1.73"
|
|
|
583 |
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
|
584 |
dependencies = [
|
585 |
"bytes 1.4.0",
|
586 |
+
"futures-core",
|
587 |
"memchr",
|
588 |
+
"pin-project-lite",
|
589 |
+
"tokio 1.32.0",
|
590 |
+
"tokio-util",
|
591 |
]
|
592 |
|
593 |
[[package]]
|
|
|
861 |
"syn 1.0.109",
|
862 |
]
|
863 |
|
864 |
+
[[package]]
|
865 |
+
name = "dhat"
|
866 |
+
version = "0.3.2"
|
867 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
868 |
+
checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3"
|
869 |
+
dependencies = [
|
870 |
+
"backtrace",
|
871 |
+
"lazy_static",
|
872 |
+
"mintex",
|
873 |
+
"parking_lot 0.12.1",
|
874 |
+
"rustc-hash",
|
875 |
+
"serde",
|
876 |
+
"serde_json",
|
877 |
+
"thousands",
|
878 |
+
]
|
879 |
+
|
880 |
[[package]]
|
881 |
name = "digest"
|
882 |
version = "0.10.7"
|
|
|
1662 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1663 |
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
1664 |
|
1665 |
+
[[package]]
|
1666 |
+
name = "libmimalloc-sys"
|
1667 |
+
version = "0.1.34"
|
1668 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1669 |
+
checksum = "25d058a81af0d1c22d7a1c948576bee6d673f7af3c0f35564abd6c81122f513d"
|
1670 |
+
dependencies = [
|
1671 |
+
"cc",
|
1672 |
+
"libc",
|
1673 |
+
]
|
1674 |
+
|
1675 |
[[package]]
|
1676 |
name = "linux-raw-sys"
|
1677 |
version = "0.4.5"
|
|
|
1809 |
"autocfg 1.1.0",
|
1810 |
]
|
1811 |
|
1812 |
+
[[package]]
|
1813 |
+
name = "mimalloc"
|
1814 |
+
version = "0.1.38"
|
1815 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1816 |
+
checksum = "972e5f23f6716f62665760b0f4cbf592576a80c7b879ba9beaafc0e558894127"
|
1817 |
+
dependencies = [
|
1818 |
+
"libmimalloc-sys",
|
1819 |
+
]
|
1820 |
+
|
1821 |
[[package]]
|
1822 |
name = "mime"
|
1823 |
version = "0.3.17"
|
|
|
1843 |
"adler",
|
1844 |
]
|
1845 |
|
1846 |
+
[[package]]
|
1847 |
+
name = "mintex"
|
1848 |
+
version = "0.1.2"
|
1849 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1850 |
+
checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb"
|
1851 |
+
dependencies = [
|
1852 |
+
"once_cell",
|
1853 |
+
"sys-info",
|
1854 |
+
]
|
1855 |
+
|
1856 |
[[package]]
|
1857 |
name = "mio"
|
1858 |
version = "0.6.23"
|
|
|
1896 |
"ws2_32-sys",
|
1897 |
]
|
1898 |
|
1899 |
+
[[package]]
|
1900 |
+
name = "mlua"
|
1901 |
+
version = "0.8.10"
|
1902 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1903 |
+
checksum = "0bb37b0ba91f017aa7ca2b98ef99496827770cd635b4a932a6047c5b4bbe678e"
|
1904 |
+
dependencies = [
|
1905 |
+
"bstr",
|
1906 |
+
"cc",
|
1907 |
+
"num-traits",
|
1908 |
+
"once_cell",
|
1909 |
+
"pkg-config",
|
1910 |
+
"rustc-hash",
|
1911 |
+
]
|
1912 |
+
|
1913 |
[[package]]
|
1914 |
name = "native-tls"
|
1915 |
version = "0.2.11"
|
|
|
2277 |
"siphasher 0.3.11",
|
2278 |
]
|
2279 |
|
2280 |
+
[[package]]
|
2281 |
+
name = "pin-project"
|
2282 |
+
version = "1.1.3"
|
2283 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2284 |
+
checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
|
2285 |
+
dependencies = [
|
2286 |
+
"pin-project-internal",
|
2287 |
+
]
|
2288 |
+
|
2289 |
+
[[package]]
|
2290 |
+
name = "pin-project-internal"
|
2291 |
+
version = "1.1.3"
|
2292 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2293 |
+
checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
|
2294 |
+
dependencies = [
|
2295 |
+
"proc-macro2 1.0.66",
|
2296 |
+
"quote 1.0.33",
|
2297 |
+
"syn 2.0.29",
|
2298 |
+
]
|
2299 |
+
|
2300 |
[[package]]
|
2301 |
name = "pin-project-lite"
|
2302 |
version = "0.2.13"
|
|
|
2599 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2600 |
checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
|
2601 |
dependencies = [
|
2602 |
+
"arc-swap",
|
2603 |
+
"async-trait",
|
2604 |
+
"bytes 1.4.0",
|
2605 |
"combine",
|
2606 |
+
"futures 0.3.28",
|
2607 |
+
"futures-util",
|
2608 |
"itoa 1.0.9",
|
2609 |
"percent-encoding 2.3.0",
|
2610 |
+
"pin-project-lite",
|
2611 |
"ryu",
|
2612 |
"sha1_smol",
|
2613 |
"socket2 0.4.9",
|
2614 |
+
"tokio 1.32.0",
|
2615 |
+
"tokio-retry",
|
2616 |
+
"tokio-util",
|
2617 |
"url 2.4.1",
|
2618 |
]
|
2619 |
|
|
|
2732 |
"winreg 0.50.0",
|
2733 |
]
|
2734 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2735 |
[[package]]
|
2736 |
name = "rustc-demangle"
|
2737 |
version = "0.1.23"
|
2738 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2739 |
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
2740 |
|
2741 |
+
[[package]]
|
2742 |
+
name = "rustc-hash"
|
2743 |
+
version = "1.1.0"
|
2744 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2745 |
+
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
2746 |
+
|
2747 |
[[package]]
|
2748 |
name = "rustc_version"
|
2749 |
version = "0.2.3"
|
|
|
3043 |
version = "1.11.0"
|
3044 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3045 |
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
|
3046 |
+
dependencies = [
|
3047 |
+
"serde",
|
3048 |
+
]
|
3049 |
|
3050 |
[[package]]
|
3051 |
name = "socket2"
|
|
|
3187 |
"unicode-xid 0.2.4",
|
3188 |
]
|
3189 |
|
3190 |
+
[[package]]
|
3191 |
+
name = "sys-info"
|
3192 |
+
version = "0.9.1"
|
3193 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3194 |
+
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
|
3195 |
+
dependencies = [
|
3196 |
+
"cc",
|
3197 |
+
"libc",
|
3198 |
+
]
|
3199 |
+
|
3200 |
[[package]]
|
3201 |
name = "tempfile"
|
3202 |
version = "3.8.0"
|
|
|
3250 |
"syn 2.0.29",
|
3251 |
]
|
3252 |
|
3253 |
+
[[package]]
|
3254 |
+
name = "thousands"
|
3255 |
+
version = "0.2.0"
|
3256 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3257 |
+
checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820"
|
3258 |
+
|
3259 |
[[package]]
|
3260 |
name = "time"
|
3261 |
version = "0.1.45"
|
|
|
3440 |
"tokio-sync",
|
3441 |
]
|
3442 |
|
3443 |
+
[[package]]
|
3444 |
+
name = "tokio-retry"
|
3445 |
+
version = "0.3.0"
|
3446 |
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
3447 |
+
checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f"
|
3448 |
+
dependencies = [
|
3449 |
+
"pin-project",
|
3450 |
+
"rand 0.8.5",
|
3451 |
+
"tokio 1.32.0",
|
3452 |
+
]
|
3453 |
+
|
3454 |
[[package]]
|
3455 |
name = "tokio-sync"
|
3456 |
version = "0.1.8"
|
|
|
3804 |
"actix-files",
|
3805 |
"actix-governor",
|
3806 |
"actix-web",
|
3807 |
+
"async-once-cell",
|
3808 |
"async-trait",
|
3809 |
"criterion",
|
3810 |
+
"dhat",
|
3811 |
"env_logger",
|
3812 |
"error-stack",
|
3813 |
"fake-useragent",
|
3814 |
+
"futures 0.3.28",
|
3815 |
"handlebars",
|
3816 |
"log",
|
3817 |
"md5",
|
3818 |
+
"mimalloc",
|
3819 |
+
"mlua",
|
3820 |
"once_cell",
|
3821 |
"rand 0.8.5",
|
3822 |
"redis",
|
3823 |
"regex",
|
3824 |
"reqwest 0.11.20",
|
|
|
3825 |
"rusty-hook",
|
3826 |
"scraper",
|
3827 |
"serde",
|
3828 |
"serde_json",
|
3829 |
+
"smallvec 1.11.0",
|
3830 |
"tempfile",
|
3831 |
"tokio 1.32.0",
|
3832 |
]
|
Cargo.toml
CHANGED
@@ -8,7 +8,7 @@ license = "AGPL-3.0"
|
|
8 |
|
9 |
[dependencies]
|
10 |
reqwest = {version="0.11.20",features=["json"]}
|
11 |
-
tokio = {version="1.32.0",features=["
|
12 |
serde = {version="1.0.188",features=["derive"]}
|
13 |
handlebars = { version = "4.4.0", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
@@ -48,13 +48,17 @@ rpath = false
|
|
48 |
|
49 |
[profile.release]
|
50 |
opt-level = 3
|
51 |
-
debug = false
|
|
|
52 |
split-debuginfo = '...'
|
53 |
debug-assertions = false
|
54 |
overflow-checks = false
|
55 |
-
lto =
|
56 |
panic = 'abort'
|
57 |
incremental = false
|
58 |
-
codegen-units =
|
59 |
rpath = false
|
60 |
strip = "debuginfo"
|
|
|
|
|
|
|
|
8 |
|
9 |
[dependencies]
|
10 |
reqwest = {version="0.11.20",features=["json"]}
|
11 |
+
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
12 |
serde = {version="1.0.188",features=["derive"]}
|
13 |
handlebars = { version = "4.4.0", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
|
|
48 |
|
49 |
[profile.release]
|
50 |
opt-level = 3
|
51 |
+
debug = false # This should only be commented when testing with dhat profiler
|
52 |
+
# debug = 1 # This should only be uncommented when testing with dhat profiler
|
53 |
split-debuginfo = '...'
|
54 |
debug-assertions = false
|
55 |
overflow-checks = false
|
56 |
+
lto = true
|
57 |
panic = 'abort'
|
58 |
incremental = false
|
59 |
+
codegen-units = 1
|
60 |
rpath = false
|
61 |
strip = "debuginfo"
|
62 |
+
|
63 |
+
[features]
|
64 |
+
dhat-heap = ["dep:dhat"]
|
Dockerfile
CHANGED
@@ -19,7 +19,7 @@ COPY . .
|
|
19 |
RUN cargo install --path .
|
20 |
|
21 |
# We do not need the Rust toolchain to run the binary!
|
22 |
-
FROM gcr.io/distroless/cc-
|
23 |
COPY --from=builder /app/public/ /opt/websurfx/public/
|
24 |
COPY --from=builder /app/websurfx/config.lua /etc/xdg/websurfx/config.lua
|
25 |
COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
|
|
|
19 |
RUN cargo install --path .
|
20 |
|
21 |
# We do not need the Rust toolchain to run the binary!
|
22 |
+
FROM gcr.io/distroless/cc-debian12
|
23 |
COPY --from=builder /app/public/ /opt/websurfx/public/
|
24 |
COPY --from=builder /app/websurfx/config.lua /etc/xdg/websurfx/config.lua
|
25 |
COPY --from=builder /usr/local/cargo/bin/* /usr/local/bin/
|
README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
<b align="center"><a href="README.md">Readme</a></b> |
|
6 |
<b><a href="https://discord.gg/SWnda7Mw5u">Discord</a></b> |
|
7 |
<b><a href="https://github.com/neon-mmd/websurfx">GitHub</a></b> |
|
8 |
-
<b><a href="
|
9 |
<br /><br />
|
10 |
<a href="#">
|
11 |
<img
|
|
|
5 |
<b align="center"><a href="README.md">Readme</a></b> |
|
6 |
<b><a href="https://discord.gg/SWnda7Mw5u">Discord</a></b> |
|
7 |
<b><a href="https://github.com/neon-mmd/websurfx">GitHub</a></b> |
|
8 |
+
<b><a href="../../tree/HEAD/docs/">Documentation</a></b>
|
9 |
<br /><br />
|
10 |
<a href="#">
|
11 |
<img
|
docs/installation.md
CHANGED
@@ -109,7 +109,7 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo
|
|
109 |
theme = "simple" -- the theme name which should be used for the website
|
110 |
|
111 |
-- ### Caching ###
|
112 |
-
|
113 |
|
114 |
-- ### Search Engines ###
|
115 |
upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
|
|
|
109 |
theme = "simple" -- the theme name which should be used for the website
|
110 |
|
111 |
-- ### Caching ###
|
112 |
+
redis_url = "redis://redis:6379" -- redis connection url address on which the client should connect on.
|
113 |
|
114 |
-- ### Search Engines ###
|
115 |
upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched.
|
src/bin/websurfx.rs
CHANGED
@@ -3,9 +3,19 @@
|
|
3 |
//! This module contains the main function which handles the logging of the application to the
|
4 |
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
|
5 |
|
|
|
6 |
use std::net::TcpListener;
|
7 |
use websurfx::{config::parser::Config, run};
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
/// The function that launches the main server and registers all the routes of the website.
|
10 |
///
|
11 |
/// # Error
|
@@ -14,6 +24,10 @@ use websurfx::{config::parser::Config, run};
|
|
14 |
/// available for being used for other applications.
|
15 |
#[actix_web::main]
|
16 |
async fn main() -> std::io::Result<()> {
|
|
|
|
|
|
|
|
|
17 |
// Initialize the parsed config file.
|
18 |
let config = Config::parse(false).unwrap();
|
19 |
|
|
|
3 |
//! This module contains the main function which handles the logging of the application to the
|
4 |
//! stdout and handles the command line arguments provided and launches the `websurfx` server.
|
5 |
|
6 |
+
use mimalloc::MiMalloc;
|
7 |
use std::net::TcpListener;
|
8 |
use websurfx::{config::parser::Config, run};
|
9 |
|
10 |
+
/// A dhat heap memory profiler
|
11 |
+
#[cfg(feature = "dhat-heap")]
|
12 |
+
#[global_allocator]
|
13 |
+
static ALLOC: dhat::Alloc = dhat::Alloc;
|
14 |
+
|
15 |
+
#[cfg(not(feature = "dhat-heap"))]
|
16 |
+
#[global_allocator]
|
17 |
+
static GLOBAL: MiMalloc = MiMalloc;
|
18 |
+
|
19 |
/// The function that launches the main server and registers all the routes of the website.
|
20 |
///
|
21 |
/// # Error
|
|
|
24 |
/// available for being used for other applications.
|
25 |
#[actix_web::main]
|
26 |
async fn main() -> std::io::Result<()> {
|
27 |
+
// A dhat heap profiler initialization.
|
28 |
+
#[cfg(feature = "dhat-heap")]
|
29 |
+
let _profiler = dhat::Profiler::new_heap();
|
30 |
+
|
31 |
// Initialize the parsed config file.
|
32 |
let config = Config::parse(false).unwrap();
|
33 |
|
src/cache/cacher.rs
CHANGED
@@ -1,17 +1,27 @@
|
|
1 |
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
2 |
//! from the upstream search engines in a json format.
|
3 |
|
|
|
|
|
4 |
use md5::compute;
|
5 |
-
use redis::{
|
|
|
|
|
6 |
|
7 |
/// A named struct which stores the redis Connection url address to which the client will
|
8 |
/// connect to.
|
9 |
///
|
10 |
/// # Fields
|
11 |
///
|
12 |
-
/// * `
|
|
|
|
|
|
|
|
|
13 |
pub struct RedisCache {
|
14 |
-
|
|
|
|
|
15 |
}
|
16 |
|
17 |
impl RedisCache {
|
@@ -19,11 +29,25 @@ impl RedisCache {
|
|
19 |
///
|
20 |
/// # Arguments
|
21 |
///
|
22 |
-
/// * `redis_connection_url` - It
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
24 |
let client = Client::open(redis_connection_url)?;
|
25 |
-
let
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
Ok(redis_cache)
|
28 |
}
|
29 |
|
@@ -32,7 +56,7 @@ impl RedisCache {
|
|
32 |
/// # Arguments
|
33 |
///
|
34 |
/// * `url` - It takes an url as string.
|
35 |
-
fn hash_url(url: &str) -> String {
|
36 |
format!("{:?}", compute(url))
|
37 |
}
|
38 |
|
@@ -41,9 +65,42 @@ impl RedisCache {
|
|
41 |
/// # Arguments
|
42 |
///
|
43 |
/// * `url` - It takes an url as a string.
|
44 |
-
pub fn cached_json(&mut self, url: &str) -> Result<String,
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
}
|
48 |
|
49 |
/// A function which caches the results by using the hashed `url` as the key and
|
@@ -54,21 +111,45 @@ impl RedisCache {
|
|
54 |
///
|
55 |
/// * `json_results` - It takes the json results string as an argument.
|
56 |
/// * `url` - It takes the url as a String.
|
57 |
-
pub fn cache_results(
|
58 |
&mut self,
|
59 |
-
json_results:
|
60 |
url: &str,
|
61 |
-
) -> Result<(),
|
62 |
-
|
63 |
-
|
64 |
-
// put results_json into cache
|
65 |
-
self.connection.set(&hashed_url_string, json_results)?;
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
.
|
70 |
-
.
|
71 |
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
}
|
74 |
}
|
|
|
1 |
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
2 |
//! from the upstream search engines in a json format.
|
3 |
|
4 |
+
use error_stack::Report;
|
5 |
+
use futures::future::try_join_all;
|
6 |
use md5::compute;
|
7 |
+
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
8 |
+
|
9 |
+
use super::error::PoolError;
|
10 |
|
11 |
/// A named struct which stores the redis Connection url address to which the client will
|
12 |
/// connect to.
|
13 |
///
|
14 |
/// # Fields
|
15 |
///
|
16 |
+
/// * `connection_pool` - It stores a pool of connections ready to be used.
|
17 |
+
/// * `pool_size` - It stores the size of the connection pool (in other words the number of
|
18 |
+
/// connections that should be stored in the pool).
|
19 |
+
/// * `current_connection` - It stores the index of which connection is being used at the moment.
|
20 |
+
#[derive(Clone)]
|
21 |
pub struct RedisCache {
|
22 |
+
connection_pool: Vec<ConnectionManager>,
|
23 |
+
pool_size: u8,
|
24 |
+
current_connection: u8,
|
25 |
}
|
26 |
|
27 |
impl RedisCache {
|
|
|
29 |
///
|
30 |
/// # Arguments
|
31 |
///
|
32 |
+
/// * `redis_connection_url` - It takes the redis Connection url address.
|
33 |
+
/// * `pool_size` - It takes the size of the connection pool (in other words the number of
|
34 |
+
/// connections that should be stored in the pool).
|
35 |
+
pub async fn new(
|
36 |
+
redis_connection_url: &str,
|
37 |
+
pool_size: u8,
|
38 |
+
) -> Result<Self, Box<dyn std::error::Error>> {
|
39 |
let client = Client::open(redis_connection_url)?;
|
40 |
+
let mut tasks: Vec<_> = Vec::new();
|
41 |
+
|
42 |
+
for _ in 0..pool_size {
|
43 |
+
tasks.push(client.get_tokio_connection_manager());
|
44 |
+
}
|
45 |
+
|
46 |
+
let redis_cache = RedisCache {
|
47 |
+
connection_pool: try_join_all(tasks).await?,
|
48 |
+
pool_size,
|
49 |
+
current_connection: Default::default(),
|
50 |
+
};
|
51 |
Ok(redis_cache)
|
52 |
}
|
53 |
|
|
|
56 |
/// # Arguments
|
57 |
///
|
58 |
/// * `url` - It takes an url as string.
|
59 |
+
fn hash_url(&self, url: &str) -> String {
|
60 |
format!("{:?}", compute(url))
|
61 |
}
|
62 |
|
|
|
65 |
/// # Arguments
|
66 |
///
|
67 |
/// * `url` - It takes an url as a string.
|
68 |
+
pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<PoolError>> {
|
69 |
+
self.current_connection = Default::default();
|
70 |
+
let hashed_url_string: &str = &self.hash_url(url);
|
71 |
+
|
72 |
+
let mut result: Result<String, RedisError> = self.connection_pool
|
73 |
+
[self.current_connection as usize]
|
74 |
+
.get(hashed_url_string)
|
75 |
+
.await;
|
76 |
+
|
77 |
+
// Code to check whether the current connection being used is dropped with connection error
|
78 |
+
// or not. if it drops with the connection error then the current connection is replaced
|
79 |
+
// with a new connection from the pool which is then used to run the redis command then
|
80 |
+
// that connection is also checked whether it is dropped or not if it is not then the
|
81 |
+
// result is passed as a `Result` or else the same process repeats again and if all of the
|
82 |
+
// connections in the pool result in connection drop error then a custom pool error is
|
83 |
+
// returned.
|
84 |
+
loop {
|
85 |
+
match result {
|
86 |
+
Err(error) => match error.is_connection_dropped() {
|
87 |
+
true => {
|
88 |
+
self.current_connection += 1;
|
89 |
+
if self.current_connection == self.pool_size {
|
90 |
+
return Err(Report::new(
|
91 |
+
PoolError::PoolExhaustionWithConnectionDropError,
|
92 |
+
));
|
93 |
+
}
|
94 |
+
result = self.connection_pool[self.current_connection as usize]
|
95 |
+
.get(hashed_url_string)
|
96 |
+
.await;
|
97 |
+
continue;
|
98 |
+
}
|
99 |
+
false => return Err(Report::new(PoolError::RedisError(error))),
|
100 |
+
},
|
101 |
+
Ok(res) => return Ok(res),
|
102 |
+
}
|
103 |
+
}
|
104 |
}
|
105 |
|
106 |
/// A function which caches the results by using the hashed `url` as the key and
|
|
|
111 |
///
|
112 |
/// * `json_results` - It takes the json results string as an argument.
|
113 |
/// * `url` - It takes the url as a String.
|
114 |
+
pub async fn cache_results(
|
115 |
&mut self,
|
116 |
+
json_results: &str,
|
117 |
url: &str,
|
118 |
+
) -> Result<(), Report<PoolError>> {
|
119 |
+
self.current_connection = Default::default();
|
120 |
+
let hashed_url_string: &str = &self.hash_url(url);
|
|
|
|
|
121 |
|
122 |
+
let mut result: Result<(), RedisError> = self.connection_pool
|
123 |
+
[self.current_connection as usize]
|
124 |
+
.set_ex(hashed_url_string, json_results, 60)
|
125 |
+
.await;
|
126 |
|
127 |
+
// Code to check whether the current connection being used is dropped with connection error
|
128 |
+
// or not. if it drops with the connection error then the current connection is replaced
|
129 |
+
// with a new connection from the pool which is then used to run the redis command then
|
130 |
+
// that connection is also checked whether it is dropped or not if it is not then the
|
131 |
+
// result is passed as a `Result` or else the same process repeats again and if all of the
|
132 |
+
// connections in the pool result in connection drop error then a custom pool error is
|
133 |
+
// returned.
|
134 |
+
loop {
|
135 |
+
match result {
|
136 |
+
Err(error) => match error.is_connection_dropped() {
|
137 |
+
true => {
|
138 |
+
self.current_connection += 1;
|
139 |
+
if self.current_connection == self.pool_size {
|
140 |
+
return Err(Report::new(
|
141 |
+
PoolError::PoolExhaustionWithConnectionDropError,
|
142 |
+
));
|
143 |
+
}
|
144 |
+
result = self.connection_pool[self.current_connection as usize]
|
145 |
+
.set_ex(hashed_url_string, json_results, 60)
|
146 |
+
.await;
|
147 |
+
continue;
|
148 |
+
}
|
149 |
+
false => return Err(Report::new(PoolError::RedisError(error))),
|
150 |
+
},
|
151 |
+
Ok(_) => return Ok(()),
|
152 |
+
}
|
153 |
+
}
|
154 |
}
|
155 |
}
|
src/cache/error.rs
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//! This module provides the error enum to handle different errors associated while requesting data from
|
2 |
+
//! the redis server using an async connection pool.
|
3 |
+
use std::fmt;
|
4 |
+
|
5 |
+
use redis::RedisError;
|
6 |
+
|
7 |
+
/// A custom error type used for handling redis async pool associated errors.
|
8 |
+
///
|
9 |
+
/// This enum provides variants three different categories of errors:
|
10 |
+
/// * `RedisError` - This variant handles all errors related to `RedisError`,
|
11 |
+
/// * `PoolExhaustionWithConnectionDropError` - This variant handles the error
|
12 |
+
/// which occurs when all the connections in the connection pool return a connection
|
13 |
+
/// dropped redis error.
|
14 |
+
#[derive(Debug)]
|
15 |
+
pub enum PoolError {
|
16 |
+
RedisError(RedisError),
|
17 |
+
PoolExhaustionWithConnectionDropError,
|
18 |
+
}
|
19 |
+
|
20 |
+
impl fmt::Display for PoolError {
|
21 |
+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
22 |
+
match self {
|
23 |
+
PoolError::RedisError(redis_error) => {
|
24 |
+
if let Some(detail) = redis_error.detail() {
|
25 |
+
write!(f, "{}", detail)
|
26 |
+
} else {
|
27 |
+
write!(f, "")
|
28 |
+
}
|
29 |
+
}
|
30 |
+
PoolError::PoolExhaustionWithConnectionDropError => {
|
31 |
+
write!(
|
32 |
+
f,
|
33 |
+
"Error all connections from the pool dropped with connection error"
|
34 |
+
)
|
35 |
+
}
|
36 |
+
}
|
37 |
+
}
|
38 |
+
}
|
39 |
+
|
40 |
+
impl error_stack::Context for PoolError {}
|
src/cache/mod.rs
CHANGED
@@ -1 +1,2 @@
|
|
1 |
pub mod cacher;
|
|
|
|
1 |
pub mod cacher;
|
2 |
+
pub mod error;
|
src/config/parser.rs
CHANGED
@@ -5,7 +5,7 @@ use crate::handler::paths::{file_path, FileType};
|
|
5 |
|
6 |
use super::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
-
use
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
@@ -53,30 +53,31 @@ impl Config {
|
|
53 |
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
|
54 |
/// Config struct with all the parsed config options from the parsed config file.
|
55 |
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
|
56 |
-
Lua::new()
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
.exec()?;
|
62 |
|
63 |
-
|
64 |
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
}
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
|
81 |
let rate_limter = globals.get::<_,HashMap<String, u8>>("rate_limiter")?;
|
82 |
|
|
|
5 |
|
6 |
use super::parser_models::{AggregatorConfig, RateLimiter, Style};
|
7 |
use log::LevelFilter;
|
8 |
+
use mlua::Lua;
|
9 |
use std::{collections::HashMap, fs, thread::available_parallelism};
|
10 |
|
11 |
/// A named struct which stores the parsed config file options.
|
|
|
53 |
/// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed
|
54 |
/// Config struct with all the parsed config options from the parsed config file.
|
55 |
pub fn parse(logging_initialized: bool) -> Result<Self, Box<dyn std::error::Error>> {
|
56 |
+
let lua = Lua::new();
|
57 |
+
let globals = lua.globals();
|
58 |
|
59 |
+
lua.load(&fs::read_to_string(file_path(FileType::Config)?)?)
|
60 |
+
.exec()?;
|
|
|
61 |
|
62 |
+
let parsed_threads: u8 = globals.get::<_, u8>("threads")?;
|
63 |
|
64 |
+
let debug: bool = globals.get::<_, bool>("debug")?;
|
65 |
+
let logging: bool = globals.get::<_, bool>("logging")?;
|
66 |
|
67 |
+
if !logging_initialized {
|
68 |
+
set_logging_level(debug, logging);
|
69 |
+
}
|
70 |
|
71 |
+
let threads: u8 = if parsed_threads == 0 {
|
72 |
+
let total_num_of_threads: usize = available_parallelism()?.get() / 2;
|
73 |
+
log::error!(
|
74 |
+
"Config Error: The value of `threads` option should be a non zero positive integer"
|
75 |
+
);
|
76 |
+
log::error!("Falling back to using {} threads", total_num_of_threads);
|
77 |
+
total_num_of_threads as u8
|
78 |
+
} else {
|
79 |
+
parsed_threads
|
80 |
+
};
|
81 |
|
82 |
let rate_limter = globals.get::<_,HashMap<String, u8>>("rate_limiter")?;
|
83 |
|
src/config/parser_models.rs
CHANGED
@@ -18,7 +18,7 @@ use serde::{Deserialize, Serialize};
|
|
18 |
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
|
19 |
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
|
20 |
/// theme being used.
|
21 |
-
#[derive(Serialize, Deserialize, Clone)]
|
22 |
pub struct Style {
|
23 |
pub theme: String,
|
24 |
pub colorscheme: String,
|
|
|
18 |
/// * `theme` - It stores the parsed theme option used to set a theme for the website.
|
19 |
/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
|
20 |
/// theme being used.
|
21 |
+
#[derive(Serialize, Deserialize, Clone, Default)]
|
22 |
pub struct Style {
|
23 |
pub theme: String,
|
24 |
pub colorscheme: String,
|
src/engines/duckduckgo.rs
CHANGED
@@ -4,14 +4,14 @@
|
|
4 |
|
5 |
use std::collections::HashMap;
|
6 |
|
7 |
-
use reqwest::header::
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
use crate::results::aggregation_models::SearchResult;
|
11 |
|
12 |
use super::engine_models::{EngineError, SearchEngine};
|
13 |
|
14 |
-
use error_stack::{
|
15 |
|
16 |
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
17 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
@@ -39,9 +39,9 @@ impl SearchEngine for DuckDuckGo {
|
|
39 |
/// or HeaderMap fails to initialize.
|
40 |
async fn results(
|
41 |
&self,
|
42 |
-
query:
|
43 |
page: u32,
|
44 |
-
user_agent:
|
45 |
request_timeout: u8,
|
46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
47 |
// Page number can be missing or empty string and so appropriate handling is required
|
@@ -61,38 +61,19 @@ impl SearchEngine for DuckDuckGo {
|
|
61 |
};
|
62 |
|
63 |
// initializing HeaderMap and adding appropriate headers.
|
64 |
-
let
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
.
|
69 |
-
.
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
"https://google.com/"
|
75 |
-
.parse()
|
76 |
-
.into_report()
|
77 |
-
.change_context(EngineError::UnexpectedError)?,
|
78 |
-
);
|
79 |
-
header_map.insert(
|
80 |
-
CONTENT_TYPE,
|
81 |
-
"application/x-www-form-urlencoded"
|
82 |
-
.parse()
|
83 |
-
.into_report()
|
84 |
-
.change_context(EngineError::UnexpectedError)?,
|
85 |
-
);
|
86 |
-
header_map.insert(
|
87 |
-
COOKIE,
|
88 |
-
"kl=wt-wt"
|
89 |
-
.parse()
|
90 |
-
.into_report()
|
91 |
-
.change_context(EngineError::UnexpectedError)?,
|
92 |
-
);
|
93 |
|
94 |
let document: Html = Html::parse_document(
|
95 |
-
&DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
96 |
);
|
97 |
|
98 |
let no_result: Selector = Selector::parse(".no-results")
|
@@ -126,8 +107,7 @@ impl SearchEngine for DuckDuckGo {
|
|
126 |
.next()
|
127 |
.unwrap()
|
128 |
.inner_html()
|
129 |
-
.trim()
|
130 |
-
.to_string(),
|
131 |
format!(
|
132 |
"https://{}",
|
133 |
result
|
@@ -136,15 +116,15 @@ impl SearchEngine for DuckDuckGo {
|
|
136 |
.unwrap()
|
137 |
.inner_html()
|
138 |
.trim()
|
139 |
-
)
|
|
|
140 |
result
|
141 |
.select(&result_desc)
|
142 |
.next()
|
143 |
.unwrap()
|
144 |
.inner_html()
|
145 |
-
.trim()
|
146 |
-
|
147 |
-
vec!["duckduckgo".to_string()],
|
148 |
)
|
149 |
})
|
150 |
.map(|search_result| (search_result.url.clone(), search_result))
|
|
|
4 |
|
5 |
use std::collections::HashMap;
|
6 |
|
7 |
+
use reqwest::header::HeaderMap;
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
use crate::results::aggregation_models::SearchResult;
|
11 |
|
12 |
use super::engine_models::{EngineError, SearchEngine};
|
13 |
|
14 |
+
use error_stack::{Report, Result, ResultExt};
|
15 |
|
16 |
/// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to
|
17 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
|
|
39 |
/// or HeaderMap fails to initialize.
|
40 |
async fn results(
|
41 |
&self,
|
42 |
+
query: &str,
|
43 |
page: u32,
|
44 |
+
user_agent: &str,
|
45 |
request_timeout: u8,
|
46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
47 |
// Page number can be missing or empty string and so appropriate handling is required
|
|
|
61 |
};
|
62 |
|
63 |
// initializing HeaderMap and adding appropriate headers.
|
64 |
+
let header_map = HeaderMap::try_from(&HashMap::from([
|
65 |
+
("USER_AGENT".to_string(), user_agent.to_string()),
|
66 |
+
("REFERER".to_string(), "https://google.com/".to_string()),
|
67 |
+
(
|
68 |
+
"CONTENT_TYPE".to_string(),
|
69 |
+
"application/x-www-form-urlencoded".to_string(),
|
70 |
+
),
|
71 |
+
("COOKIE".to_string(), "kl=wt-wt".to_string()),
|
72 |
+
]))
|
73 |
+
.change_context(EngineError::UnexpectedError)?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
let document: Html = Html::parse_document(
|
76 |
+
&DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
77 |
);
|
78 |
|
79 |
let no_result: Selector = Selector::parse(".no-results")
|
|
|
107 |
.next()
|
108 |
.unwrap()
|
109 |
.inner_html()
|
110 |
+
.trim(),
|
|
|
111 |
format!(
|
112 |
"https://{}",
|
113 |
result
|
|
|
116 |
.unwrap()
|
117 |
.inner_html()
|
118 |
.trim()
|
119 |
+
)
|
120 |
+
.as_str(),
|
121 |
result
|
122 |
.select(&result_desc)
|
123 |
.next()
|
124 |
.unwrap()
|
125 |
.inner_html()
|
126 |
+
.trim(),
|
127 |
+
&["duckduckgo"],
|
|
|
128 |
)
|
129 |
})
|
130 |
.map(|search_result| (search_result.url.clone(), search_result))
|
src/engines/engine_models.rs
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
//! the upstream search engines with the search query provided by the user.
|
3 |
|
4 |
use crate::results::aggregation_models::SearchResult;
|
5 |
-
use error_stack::{
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
@@ -48,7 +48,7 @@ impl error_stack::Context for EngineError {}
|
|
48 |
pub trait SearchEngine: Sync + Send {
|
49 |
async fn fetch_html_from_upstream(
|
50 |
&self,
|
51 |
-
url:
|
52 |
header_map: reqwest::header::HeaderMap,
|
53 |
request_timeout: u8,
|
54 |
) -> Result<String, EngineError> {
|
@@ -59,19 +59,17 @@ pub trait SearchEngine: Sync + Send {
|
|
59 |
.headers(header_map) // add spoofed headers to emulate human behavior
|
60 |
.send()
|
61 |
.await
|
62 |
-
.into_report()
|
63 |
.change_context(EngineError::RequestError)?
|
64 |
.text()
|
65 |
.await
|
66 |
-
.into_report()
|
67 |
.change_context(EngineError::RequestError)?)
|
68 |
}
|
69 |
|
70 |
async fn results(
|
71 |
&self,
|
72 |
-
query:
|
73 |
page: u32,
|
74 |
-
user_agent:
|
75 |
request_timeout: u8,
|
76 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
77 |
}
|
|
|
2 |
//! the upstream search engines with the search query provided by the user.
|
3 |
|
4 |
use crate::results::aggregation_models::SearchResult;
|
5 |
+
use error_stack::{Result, ResultExt};
|
6 |
use std::{collections::HashMap, fmt, time::Duration};
|
7 |
|
8 |
/// A custom error type used for handle engine associated errors.
|
|
|
48 |
pub trait SearchEngine: Sync + Send {
|
49 |
async fn fetch_html_from_upstream(
|
50 |
&self,
|
51 |
+
url: &str,
|
52 |
header_map: reqwest::header::HeaderMap,
|
53 |
request_timeout: u8,
|
54 |
) -> Result<String, EngineError> {
|
|
|
59 |
.headers(header_map) // add spoofed headers to emulate human behavior
|
60 |
.send()
|
61 |
.await
|
|
|
62 |
.change_context(EngineError::RequestError)?
|
63 |
.text()
|
64 |
.await
|
|
|
65 |
.change_context(EngineError::RequestError)?)
|
66 |
}
|
67 |
|
68 |
async fn results(
|
69 |
&self,
|
70 |
+
query: &str,
|
71 |
page: u32,
|
72 |
+
user_agent: &str,
|
73 |
request_timeout: u8,
|
74 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
75 |
}
|
src/engines/searx.rs
CHANGED
@@ -2,14 +2,14 @@
|
|
2 |
//! by querying the upstream searx search engine instance with user provided query and with a page
|
3 |
//! number if provided.
|
4 |
|
5 |
-
use reqwest::header::
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
9 |
use crate::results::aggregation_models::SearchResult;
|
10 |
|
11 |
use super::engine_models::{EngineError, SearchEngine};
|
12 |
-
use error_stack::{
|
13 |
|
14 |
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
15 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
@@ -38,9 +38,9 @@ impl SearchEngine for Searx {
|
|
38 |
|
39 |
async fn results(
|
40 |
&self,
|
41 |
-
query:
|
42 |
page: u32,
|
43 |
-
user_agent:
|
44 |
request_timeout: u8,
|
45 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
46 |
// Page number can be missing or empty string and so appropriate handling is required
|
@@ -51,32 +51,16 @@ impl SearchEngine for Searx {
|
|
51 |
};
|
52 |
|
53 |
// initializing headers and adding appropriate headers.
|
54 |
-
let
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
);
|
62 |
-
header_map.insert(
|
63 |
-
REFERER,
|
64 |
-
"https://google.com/"
|
65 |
-
.parse()
|
66 |
-
.into_report()
|
67 |
-
.change_context(EngineError::UnexpectedError)?,
|
68 |
-
);
|
69 |
-
header_map.insert(
|
70 |
-
CONTENT_TYPE,
|
71 |
-
"application/x-www-form-urlencoded"
|
72 |
-
.parse()
|
73 |
-
.into_report()
|
74 |
-
.change_context(EngineError::UnexpectedError)?,
|
75 |
-
);
|
76 |
-
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?);
|
77 |
|
78 |
let document: Html = Html::parse_document(
|
79 |
-
&Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?,
|
80 |
);
|
81 |
|
82 |
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
@@ -117,24 +101,21 @@ impl SearchEngine for Searx {
|
|
117 |
.next()
|
118 |
.unwrap()
|
119 |
.inner_html()
|
120 |
-
.trim()
|
121 |
-
.to_string(),
|
122 |
result
|
123 |
.select(&result_url)
|
124 |
.next()
|
125 |
.unwrap()
|
126 |
.value()
|
127 |
.attr("href")
|
128 |
-
.unwrap()
|
129 |
-
.to_string(),
|
130 |
result
|
131 |
.select(&result_desc)
|
132 |
.next()
|
133 |
.unwrap()
|
134 |
.inner_html()
|
135 |
-
.trim()
|
136 |
-
|
137 |
-
vec!["searx".to_string()],
|
138 |
)
|
139 |
})
|
140 |
.map(|search_result| (search_result.url.clone(), search_result))
|
|
|
2 |
//! by querying the upstream searx search engine instance with user provided query and with a page
|
3 |
//! number if provided.
|
4 |
|
5 |
+
use reqwest::header::HeaderMap;
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
9 |
use crate::results::aggregation_models::SearchResult;
|
10 |
|
11 |
use super::engine_models::{EngineError, SearchEngine};
|
12 |
+
use error_stack::{Report, Result, ResultExt};
|
13 |
|
14 |
/// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
|
15 |
/// reduce code duplication as well as allows to create vector of different search engines easily.
|
|
|
38 |
|
39 |
async fn results(
|
40 |
&self,
|
41 |
+
query: &str,
|
42 |
page: u32,
|
43 |
+
user_agent: &str,
|
44 |
request_timeout: u8,
|
45 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
46 |
// Page number can be missing or empty string and so appropriate handling is required
|
|
|
51 |
};
|
52 |
|
53 |
// initializing headers and adding appropriate headers.
|
54 |
+
let header_map = HeaderMap::try_from(&HashMap::from([
|
55 |
+
("USER_AGENT".to_string(), user_agent.to_string()),
|
56 |
+
("REFERER".to_string(), "https://google.com/".to_string()),
|
57 |
+
("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
|
58 |
+
("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string())
|
59 |
+
]))
|
60 |
+
.change_context(EngineError::UnexpectedError)?;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
let document: Html = Html::parse_document(
|
63 |
+
&Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?,
|
64 |
);
|
65 |
|
66 |
let no_result: Selector = Selector::parse("#urls>.dialog-error>p")
|
|
|
101 |
.next()
|
102 |
.unwrap()
|
103 |
.inner_html()
|
104 |
+
.trim(),
|
|
|
105 |
result
|
106 |
.select(&result_url)
|
107 |
.next()
|
108 |
.unwrap()
|
109 |
.value()
|
110 |
.attr("href")
|
111 |
+
.unwrap(),
|
|
|
112 |
result
|
113 |
.select(&result_desc)
|
114 |
.next()
|
115 |
.unwrap()
|
116 |
.inner_html()
|
117 |
+
.trim(),
|
118 |
+
&["searx"],
|
|
|
119 |
)
|
120 |
})
|
121 |
.map(|search_result| (search_result.url.clone(), search_result))
|
src/handler/paths.rs
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
use std::collections::HashMap;
|
5 |
use std::io::Error;
|
6 |
use std::path::Path;
|
|
|
7 |
|
8 |
// ------- Constants --------
|
9 |
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
@@ -20,57 +21,7 @@ pub enum FileType {
|
|
20 |
Theme,
|
21 |
}
|
22 |
|
23 |
-
static FILE_PATHS_FOR_DIFF_FILE_TYPES:
|
24 |
-
once_cell::sync::Lazy::new(|| {
|
25 |
-
HashMap::from([
|
26 |
-
(
|
27 |
-
FileType::Config,
|
28 |
-
vec![
|
29 |
-
format!(
|
30 |
-
"{}/.config/{}/{}",
|
31 |
-
std::env::var("HOME").unwrap(),
|
32 |
-
COMMON_DIRECTORY_NAME,
|
33 |
-
CONFIG_FILE_NAME
|
34 |
-
),
|
35 |
-
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
36 |
-
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
37 |
-
],
|
38 |
-
),
|
39 |
-
(
|
40 |
-
FileType::Theme,
|
41 |
-
vec![
|
42 |
-
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
43 |
-
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
44 |
-
],
|
45 |
-
),
|
46 |
-
(
|
47 |
-
FileType::AllowList,
|
48 |
-
vec![
|
49 |
-
format!(
|
50 |
-
"{}/.config/{}/{}",
|
51 |
-
std::env::var("HOME").unwrap(),
|
52 |
-
COMMON_DIRECTORY_NAME,
|
53 |
-
ALLOWLIST_FILE_NAME
|
54 |
-
),
|
55 |
-
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
56 |
-
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
57 |
-
],
|
58 |
-
),
|
59 |
-
(
|
60 |
-
FileType::BlockList,
|
61 |
-
vec![
|
62 |
-
format!(
|
63 |
-
"{}/.config/{}/{}",
|
64 |
-
std::env::var("HOME").unwrap(),
|
65 |
-
COMMON_DIRECTORY_NAME,
|
66 |
-
BLOCKLIST_FILE_NAME
|
67 |
-
),
|
68 |
-
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
69 |
-
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
70 |
-
],
|
71 |
-
),
|
72 |
-
])
|
73 |
-
});
|
74 |
|
75 |
/// A helper function which returns an appropriate config file path checking if the config
|
76 |
/// file exists on that path.
|
@@ -95,11 +46,64 @@ static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy<HashMap<FileType, V
|
|
95 |
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
96 |
/// 2. Under project folder ( or codebase in other words) if it is not present
|
97 |
/// here then it returns an error as mentioned above.
|
98 |
-
pub fn file_path(file_type: FileType) -> Result
|
99 |
-
let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
for (idx, _) in file_path.iter().enumerate() {
|
101 |
if Path::new(file_path[idx].as_str()).exists() {
|
102 |
-
return Ok(file_path[idx]
|
103 |
}
|
104 |
}
|
105 |
|
|
|
4 |
use std::collections::HashMap;
|
5 |
use std::io::Error;
|
6 |
use std::path::Path;
|
7 |
+
use std::sync::OnceLock;
|
8 |
|
9 |
// ------- Constants --------
|
10 |
static PUBLIC_DIRECTORY_NAME: &str = "public";
|
|
|
21 |
Theme,
|
22 |
}
|
23 |
|
24 |
+
static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock<HashMap<FileType, Vec<String>>> = OnceLock::new();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
/// A helper function which returns an appropriate config file path checking if the config
|
27 |
/// file exists on that path.
|
|
|
46 |
/// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
|
47 |
/// 2. Under project folder ( or codebase in other words) if it is not present
|
48 |
/// here then it returns an error as mentioned above.
|
49 |
+
pub fn file_path(file_type: FileType) -> Result<&'static str, Error> {
|
50 |
+
let file_path: &Vec<String> = FILE_PATHS_FOR_DIFF_FILE_TYPES
|
51 |
+
.get_or_init(|| {
|
52 |
+
HashMap::from([
|
53 |
+
(
|
54 |
+
FileType::Config,
|
55 |
+
vec![
|
56 |
+
format!(
|
57 |
+
"{}/.config/{}/{}",
|
58 |
+
std::env::var("HOME").unwrap(),
|
59 |
+
COMMON_DIRECTORY_NAME,
|
60 |
+
CONFIG_FILE_NAME
|
61 |
+
),
|
62 |
+
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
63 |
+
format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME),
|
64 |
+
],
|
65 |
+
),
|
66 |
+
(
|
67 |
+
FileType::Theme,
|
68 |
+
vec![
|
69 |
+
format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME),
|
70 |
+
format!("./{}/", PUBLIC_DIRECTORY_NAME),
|
71 |
+
],
|
72 |
+
),
|
73 |
+
(
|
74 |
+
FileType::AllowList,
|
75 |
+
vec![
|
76 |
+
format!(
|
77 |
+
"{}/.config/{}/{}",
|
78 |
+
std::env::var("HOME").unwrap(),
|
79 |
+
COMMON_DIRECTORY_NAME,
|
80 |
+
ALLOWLIST_FILE_NAME
|
81 |
+
),
|
82 |
+
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
83 |
+
format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME),
|
84 |
+
],
|
85 |
+
),
|
86 |
+
(
|
87 |
+
FileType::BlockList,
|
88 |
+
vec![
|
89 |
+
format!(
|
90 |
+
"{}/.config/{}/{}",
|
91 |
+
std::env::var("HOME").unwrap(),
|
92 |
+
COMMON_DIRECTORY_NAME,
|
93 |
+
BLOCKLIST_FILE_NAME
|
94 |
+
),
|
95 |
+
format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
96 |
+
format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME),
|
97 |
+
],
|
98 |
+
),
|
99 |
+
])
|
100 |
+
})
|
101 |
+
.get(&file_type)
|
102 |
+
.unwrap();
|
103 |
+
|
104 |
for (idx, _) in file_path.iter().enumerate() {
|
105 |
if Path::new(file_path[idx].as_str()).exists() {
|
106 |
+
return Ok(std::mem::take(&mut &*file_path[idx]));
|
107 |
}
|
108 |
}
|
109 |
|
src/lib.rs
CHANGED
@@ -43,7 +43,7 @@ use handler::paths::{file_path, FileType};
|
|
43 |
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
44 |
let mut handlebars: Handlebars = Handlebars::new();
|
45 |
|
46 |
-
let public_folder_path:
|
47 |
|
48 |
handlebars
|
49 |
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
|
|
43 |
pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
|
44 |
let mut handlebars: Handlebars = Handlebars::new();
|
45 |
|
46 |
+
let public_folder_path: &str = file_path(FileType::Theme)?;
|
47 |
|
48 |
handlebars
|
49 |
.register_templates_directory(".html", format!("{}/templates", public_folder_path))
|
src/results/aggregation_models.rs
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
//! data scraped from the upstream search engines.
|
3 |
|
4 |
use serde::{Deserialize, Serialize};
|
|
|
5 |
|
6 |
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
7 |
|
@@ -16,13 +17,13 @@ use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
|
16 |
/// (href url in html in simple words).
|
17 |
/// * `description` - The description of the search result.
|
18 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
19 |
-
#[derive(Clone, Serialize, Deserialize)]
|
20 |
#[serde(rename_all = "camelCase")]
|
21 |
pub struct SearchResult {
|
22 |
pub title: String,
|
23 |
pub url: String,
|
24 |
pub description: String,
|
25 |
-
pub engine:
|
26 |
}
|
27 |
|
28 |
impl SearchResult {
|
@@ -35,12 +36,12 @@ impl SearchResult {
|
|
35 |
/// (href url in html in simple words).
|
36 |
/// * `description` - The description of the search result.
|
37 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
38 |
-
pub fn new(title:
|
39 |
SearchResult {
|
40 |
-
title,
|
41 |
-
url,
|
42 |
-
description,
|
43 |
-
engine,
|
44 |
}
|
45 |
}
|
46 |
|
@@ -49,8 +50,8 @@ impl SearchResult {
|
|
49 |
/// # Arguments
|
50 |
///
|
51 |
/// * `engine` - Takes an engine name provided as a String.
|
52 |
-
pub fn add_engines(&mut self, engine:
|
53 |
-
self.engine.push(engine)
|
54 |
}
|
55 |
|
56 |
/// A function which returns the engine name stored from the struct as a string.
|
@@ -58,13 +59,12 @@ impl SearchResult {
|
|
58 |
/// # Returns
|
59 |
///
|
60 |
/// An engine name stored as a string from the struct.
|
61 |
-
pub fn engine(self) -> String {
|
62 |
-
self.engine
|
63 |
}
|
64 |
}
|
65 |
|
66 |
-
|
67 |
-
#[derive(Serialize, Deserialize)]
|
68 |
pub struct EngineErrorInfo {
|
69 |
pub error: String,
|
70 |
pub engine: String,
|
@@ -72,18 +72,18 @@ pub struct EngineErrorInfo {
|
|
72 |
}
|
73 |
|
74 |
impl EngineErrorInfo {
|
75 |
-
pub fn new(error: &EngineError, engine:
|
76 |
Self {
|
77 |
error: match error {
|
78 |
-
EngineError::RequestError =>
|
79 |
-
EngineError::EmptyResultSet =>
|
80 |
-
EngineError::UnexpectedError =>
|
81 |
},
|
82 |
-
engine,
|
83 |
severity_color: match error {
|
84 |
-
EngineError::RequestError =>
|
85 |
-
EngineError::EmptyResultSet =>
|
86 |
-
EngineError::UnexpectedError =>
|
87 |
},
|
88 |
}
|
89 |
}
|
@@ -108,7 +108,7 @@ pub struct SearchResults {
|
|
108 |
pub results: Vec<SearchResult>,
|
109 |
pub page_query: String,
|
110 |
pub style: Style,
|
111 |
-
pub engine_errors_info:
|
112 |
}
|
113 |
|
114 |
impl SearchResults {
|
@@ -124,19 +124,19 @@ impl SearchResults {
|
|
124 |
/// given search query.
|
125 |
pub fn new(
|
126 |
results: Vec<SearchResult>,
|
127 |
-
page_query:
|
128 |
-
engine_errors_info:
|
129 |
) -> Self {
|
130 |
-
|
131 |
results,
|
132 |
-
page_query,
|
133 |
-
style: Style::
|
134 |
-
engine_errors_info,
|
135 |
}
|
136 |
}
|
137 |
|
138 |
/// A setter function to add website style to the return search results.
|
139 |
-
pub fn add_style(&mut self, style: Style) {
|
140 |
-
self.style = style;
|
141 |
}
|
142 |
}
|
|
|
2 |
//! data scraped from the upstream search engines.
|
3 |
|
4 |
use serde::{Deserialize, Serialize};
|
5 |
+
use smallvec::SmallVec;
|
6 |
|
7 |
use crate::{config::parser_models::Style, engines::engine_models::EngineError};
|
8 |
|
|
|
17 |
/// (href url in html in simple words).
|
18 |
/// * `description` - The description of the search result.
|
19 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
20 |
+
#[derive(Clone, Serialize, Deserialize, Debug)]
|
21 |
#[serde(rename_all = "camelCase")]
|
22 |
pub struct SearchResult {
|
23 |
pub title: String,
|
24 |
pub url: String,
|
25 |
pub description: String,
|
26 |
+
pub engine: SmallVec<[String; 0]>,
|
27 |
}
|
28 |
|
29 |
impl SearchResult {
|
|
|
36 |
/// (href url in html in simple words).
|
37 |
/// * `description` - The description of the search result.
|
38 |
/// * `engine` - The names of the upstream engines from which this results were provided.
|
39 |
+
pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self {
|
40 |
SearchResult {
|
41 |
+
title: title.to_owned(),
|
42 |
+
url: url.to_owned(),
|
43 |
+
description: description.to_owned(),
|
44 |
+
engine: engine.iter().map(|name| name.to_string()).collect(),
|
45 |
}
|
46 |
}
|
47 |
|
|
|
50 |
/// # Arguments
|
51 |
///
|
52 |
/// * `engine` - Takes an engine name provided as a String.
|
53 |
+
pub fn add_engines(&mut self, engine: &str) {
|
54 |
+
self.engine.push(engine.to_owned())
|
55 |
}
|
56 |
|
57 |
/// A function which returns the engine name stored from the struct as a string.
|
|
|
59 |
/// # Returns
|
60 |
///
|
61 |
/// An engine name stored as a string from the struct.
|
62 |
+
pub fn engine(&mut self) -> String {
|
63 |
+
std::mem::take(&mut self.engine[0])
|
64 |
}
|
65 |
}
|
66 |
|
67 |
+
#[derive(Serialize, Deserialize, Clone)]
|
|
|
68 |
pub struct EngineErrorInfo {
|
69 |
pub error: String,
|
70 |
pub engine: String,
|
|
|
72 |
}
|
73 |
|
74 |
impl EngineErrorInfo {
|
75 |
+
pub fn new(error: &EngineError, engine: &str) -> Self {
|
76 |
Self {
|
77 |
error: match error {
|
78 |
+
EngineError::RequestError => "RequestError".to_owned(),
|
79 |
+
EngineError::EmptyResultSet => "EmptyResultSet".to_owned(),
|
80 |
+
EngineError::UnexpectedError => "UnexpectedError".to_owned(),
|
81 |
},
|
82 |
+
engine: engine.to_owned(),
|
83 |
severity_color: match error {
|
84 |
+
EngineError::RequestError => "green".to_owned(),
|
85 |
+
EngineError::EmptyResultSet => "blue".to_owned(),
|
86 |
+
EngineError::UnexpectedError => "red".to_owned(),
|
87 |
},
|
88 |
}
|
89 |
}
|
|
|
108 |
pub results: Vec<SearchResult>,
|
109 |
pub page_query: String,
|
110 |
pub style: Style,
|
111 |
+
pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
|
112 |
}
|
113 |
|
114 |
impl SearchResults {
|
|
|
124 |
/// given search query.
|
125 |
pub fn new(
|
126 |
results: Vec<SearchResult>,
|
127 |
+
page_query: &str,
|
128 |
+
engine_errors_info: &[EngineErrorInfo],
|
129 |
) -> Self {
|
130 |
+
Self {
|
131 |
results,
|
132 |
+
page_query: page_query.to_owned(),
|
133 |
+
style: Style::default(),
|
134 |
+
engine_errors_info: SmallVec::from(engine_errors_info),
|
135 |
}
|
136 |
}
|
137 |
|
138 |
/// A setter function to add website style to the return search results.
|
139 |
+
pub fn add_style(&mut self, style: &Style) {
|
140 |
+
self.style = style.to_owned();
|
141 |
}
|
142 |
}
|
src/results/aggregator.rs
CHANGED
@@ -64,14 +64,14 @@ type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<Eng
|
|
64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
65 |
/// containing appropriate values.
|
66 |
pub async fn aggregate(
|
67 |
-
query:
|
68 |
page: u32,
|
69 |
random_delay: bool,
|
70 |
debug: bool,
|
71 |
-
upstream_search_engines:
|
72 |
request_timeout: u8,
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
-
let user_agent:
|
75 |
|
76 |
// Add a random delay before making the request.
|
77 |
if random_delay || !debug {
|
@@ -80,19 +80,18 @@ pub async fn aggregate(
|
|
80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
81 |
}
|
82 |
|
83 |
-
let mut names: Vec<&str> =
|
84 |
|
85 |
// create tasks for upstream result fetching
|
86 |
let mut tasks: FutureVec = FutureVec::new();
|
87 |
|
88 |
for engine_handler in upstream_search_engines {
|
89 |
-
let (name, search_engine) = engine_handler.into_name_engine();
|
90 |
names.push(name);
|
91 |
-
let query: String = query.
|
92 |
-
let user_agent: String = user_agent.clone();
|
93 |
tasks.push(tokio::spawn(async move {
|
94 |
search_engine
|
95 |
-
.results(query, page, user_agent
|
96 |
.await
|
97 |
}));
|
98 |
}
|
@@ -110,7 +109,7 @@ pub async fn aggregate(
|
|
110 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
111 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
112 |
|
113 |
-
let mut handle_error = |error: Report<EngineError>, engine_name:
|
114 |
log::error!("Engine Error: {:?}", error);
|
115 |
engine_errors_info.push(EngineErrorInfo::new(
|
116 |
error.downcast_ref::<EngineError>().unwrap(),
|
@@ -120,7 +119,7 @@ pub async fn aggregate(
|
|
120 |
|
121 |
for _ in 0..responses.len() {
|
122 |
let response = responses.pop().unwrap();
|
123 |
-
let engine = names.pop().unwrap()
|
124 |
|
125 |
if result_map.is_empty() {
|
126 |
match response {
|
@@ -128,7 +127,7 @@ pub async fn aggregate(
|
|
128 |
result_map = results.clone();
|
129 |
}
|
130 |
Err(error) => {
|
131 |
-
handle_error(error, engine);
|
132 |
}
|
133 |
}
|
134 |
continue;
|
@@ -140,13 +139,13 @@ pub async fn aggregate(
|
|
140 |
result_map
|
141 |
.entry(key)
|
142 |
.and_modify(|result| {
|
143 |
-
result.add_engines(engine
|
144 |
})
|
145 |
.or_insert_with(|| -> SearchResult { value });
|
146 |
});
|
147 |
}
|
148 |
Err(error) => {
|
149 |
-
handle_error(error, engine);
|
150 |
}
|
151 |
}
|
152 |
}
|
@@ -155,24 +154,20 @@ pub async fn aggregate(
|
|
155 |
filter_with_lists(
|
156 |
&mut result_map,
|
157 |
&mut blacklist_map,
|
158 |
-
|
159 |
)?;
|
160 |
|
161 |
filter_with_lists(
|
162 |
&mut blacklist_map,
|
163 |
&mut result_map,
|
164 |
-
|
165 |
)?;
|
166 |
|
167 |
drop(blacklist_map);
|
168 |
|
169 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
170 |
|
171 |
-
Ok(SearchResults::new(
|
172 |
-
results,
|
173 |
-
query.to_string(),
|
174 |
-
engine_errors_info,
|
175 |
-
))
|
176 |
}
|
177 |
|
178 |
/// Filters a map of search results using a list of regex patterns.
|
@@ -203,7 +198,10 @@ pub fn filter_with_lists(
|
|
203 |
|| re.is_match(&search_result.description.to_lowercase())
|
204 |
{
|
205 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
206 |
-
resultant_map.insert(
|
|
|
|
|
|
|
207 |
}
|
208 |
}
|
209 |
}
|
@@ -214,6 +212,7 @@ pub fn filter_with_lists(
|
|
214 |
#[cfg(test)]
|
215 |
mod tests {
|
216 |
use super::*;
|
|
|
217 |
use std::collections::HashMap;
|
218 |
use std::io::Write;
|
219 |
use tempfile::NamedTempFile;
|
@@ -223,22 +222,22 @@ mod tests {
|
|
223 |
// Create a map of search results to filter
|
224 |
let mut map_to_be_filtered = HashMap::new();
|
225 |
map_to_be_filtered.insert(
|
226 |
-
"https://www.example.com".
|
227 |
SearchResult {
|
228 |
-
title: "Example Domain".
|
229 |
-
url: "https://www.example.com".
|
230 |
description: "This domain is for use in illustrative examples in documents."
|
231 |
-
.
|
232 |
-
engine:
|
233 |
},
|
234 |
);
|
235 |
map_to_be_filtered.insert(
|
236 |
-
"https://www.rust-lang.org/".
|
237 |
SearchResult {
|
238 |
-
title: "Rust Programming Language".
|
239 |
-
url: "https://www.rust-lang.org/".
|
240 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
241 |
-
engine:
|
242 |
},
|
243 |
);
|
244 |
|
@@ -267,22 +266,22 @@ mod tests {
|
|
267 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
268 |
let mut map_to_be_filtered = HashMap::new();
|
269 |
map_to_be_filtered.insert(
|
270 |
-
"https://www.example.com".
|
271 |
SearchResult {
|
272 |
-
title: "Example Domain".
|
273 |
-
url: "https://www.example.com".
|
274 |
description: "This domain is for use in illustrative examples in documents."
|
275 |
-
.
|
276 |
-
engine:
|
277 |
},
|
278 |
);
|
279 |
map_to_be_filtered.insert(
|
280 |
-
"https://www.rust-lang.org/".
|
281 |
SearchResult {
|
282 |
-
title: "Rust Programming Language".
|
283 |
-
url: "https://www.rust-lang.org/".
|
284 |
-
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".
|
285 |
-
engine:
|
286 |
},
|
287 |
);
|
288 |
|
@@ -327,13 +326,13 @@ mod tests {
|
|
327 |
fn test_filter_with_lists_invalid_regex() {
|
328 |
let mut map_to_be_filtered = HashMap::new();
|
329 |
map_to_be_filtered.insert(
|
330 |
-
"https://www.example.com".
|
331 |
SearchResult {
|
332 |
-
title: "Example Domain".
|
333 |
-
url: "https://www.example.com".
|
334 |
description: "This domain is for use in illustrative examples in documents."
|
335 |
-
.
|
336 |
-
engine:
|
337 |
},
|
338 |
);
|
339 |
|
|
|
64 |
/// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
|
65 |
/// containing appropriate values.
|
66 |
pub async fn aggregate(
|
67 |
+
query: &str,
|
68 |
page: u32,
|
69 |
random_delay: bool,
|
70 |
debug: bool,
|
71 |
+
upstream_search_engines: &[EngineHandler],
|
72 |
request_timeout: u8,
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
+
let user_agent: &str = random_user_agent();
|
75 |
|
76 |
// Add a random delay before making the request.
|
77 |
if random_delay || !debug {
|
|
|
80 |
tokio::time::sleep(Duration::from_secs(delay_secs)).await;
|
81 |
}
|
82 |
|
83 |
+
let mut names: Vec<&str> = Vec::with_capacity(0);
|
84 |
|
85 |
// create tasks for upstream result fetching
|
86 |
let mut tasks: FutureVec = FutureVec::new();
|
87 |
|
88 |
for engine_handler in upstream_search_engines {
|
89 |
+
let (name, search_engine) = engine_handler.to_owned().into_name_engine();
|
90 |
names.push(name);
|
91 |
+
let query: String = query.to_owned();
|
|
|
92 |
tasks.push(tokio::spawn(async move {
|
93 |
search_engine
|
94 |
+
.results(&query, page, user_agent, request_timeout)
|
95 |
.await
|
96 |
}));
|
97 |
}
|
|
|
109 |
let mut result_map: HashMap<String, SearchResult> = HashMap::new();
|
110 |
let mut engine_errors_info: Vec<EngineErrorInfo> = Vec::new();
|
111 |
|
112 |
+
let mut handle_error = |error: &Report<EngineError>, engine_name: &'static str| {
|
113 |
log::error!("Engine Error: {:?}", error);
|
114 |
engine_errors_info.push(EngineErrorInfo::new(
|
115 |
error.downcast_ref::<EngineError>().unwrap(),
|
|
|
119 |
|
120 |
for _ in 0..responses.len() {
|
121 |
let response = responses.pop().unwrap();
|
122 |
+
let engine = names.pop().unwrap();
|
123 |
|
124 |
if result_map.is_empty() {
|
125 |
match response {
|
|
|
127 |
result_map = results.clone();
|
128 |
}
|
129 |
Err(error) => {
|
130 |
+
handle_error(&error, engine);
|
131 |
}
|
132 |
}
|
133 |
continue;
|
|
|
139 |
result_map
|
140 |
.entry(key)
|
141 |
.and_modify(|result| {
|
142 |
+
result.add_engines(engine);
|
143 |
})
|
144 |
.or_insert_with(|| -> SearchResult { value });
|
145 |
});
|
146 |
}
|
147 |
Err(error) => {
|
148 |
+
handle_error(&error, engine);
|
149 |
}
|
150 |
}
|
151 |
}
|
|
|
154 |
filter_with_lists(
|
155 |
&mut result_map,
|
156 |
&mut blacklist_map,
|
157 |
+
file_path(FileType::BlockList)?,
|
158 |
)?;
|
159 |
|
160 |
filter_with_lists(
|
161 |
&mut blacklist_map,
|
162 |
&mut result_map,
|
163 |
+
file_path(FileType::AllowList)?,
|
164 |
)?;
|
165 |
|
166 |
drop(blacklist_map);
|
167 |
|
168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
169 |
|
170 |
+
Ok(SearchResults::new(results, query, &engine_errors_info))
|
|
|
|
|
|
|
|
|
171 |
}
|
172 |
|
173 |
/// Filters a map of search results using a list of regex patterns.
|
|
|
198 |
|| re.is_match(&search_result.description.to_lowercase())
|
199 |
{
|
200 |
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
201 |
+
resultant_map.insert(
|
202 |
+
url.to_owned(),
|
203 |
+
map_to_be_filtered.remove(&url.to_owned()).unwrap(),
|
204 |
+
);
|
205 |
}
|
206 |
}
|
207 |
}
|
|
|
212 |
#[cfg(test)]
|
213 |
mod tests {
|
214 |
use super::*;
|
215 |
+
use smallvec::smallvec;
|
216 |
use std::collections::HashMap;
|
217 |
use std::io::Write;
|
218 |
use tempfile::NamedTempFile;
|
|
|
222 |
// Create a map of search results to filter
|
223 |
let mut map_to_be_filtered = HashMap::new();
|
224 |
map_to_be_filtered.insert(
|
225 |
+
"https://www.example.com".to_owned(),
|
226 |
SearchResult {
|
227 |
+
title: "Example Domain".to_owned(),
|
228 |
+
url: "https://www.example.com".to_owned(),
|
229 |
description: "This domain is for use in illustrative examples in documents."
|
230 |
+
.to_owned(),
|
231 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
232 |
},
|
233 |
);
|
234 |
map_to_be_filtered.insert(
|
235 |
+
"https://www.rust-lang.org/".to_owned(),
|
236 |
SearchResult {
|
237 |
+
title: "Rust Programming Language".to_owned(),
|
238 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
239 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
240 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
241 |
},
|
242 |
);
|
243 |
|
|
|
266 |
fn test_filter_with_lists_wildcard() -> Result<(), Box<dyn std::error::Error>> {
|
267 |
let mut map_to_be_filtered = HashMap::new();
|
268 |
map_to_be_filtered.insert(
|
269 |
+
"https://www.example.com".to_owned(),
|
270 |
SearchResult {
|
271 |
+
title: "Example Domain".to_owned(),
|
272 |
+
url: "https://www.example.com".to_owned(),
|
273 |
description: "This domain is for use in illustrative examples in documents."
|
274 |
+
.to_owned(),
|
275 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
276 |
},
|
277 |
);
|
278 |
map_to_be_filtered.insert(
|
279 |
+
"https://www.rust-lang.org/".to_owned(),
|
280 |
SearchResult {
|
281 |
+
title: "Rust Programming Language".to_owned(),
|
282 |
+
url: "https://www.rust-lang.org/".to_owned(),
|
283 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(),
|
284 |
+
engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()],
|
285 |
},
|
286 |
);
|
287 |
|
|
|
326 |
fn test_filter_with_lists_invalid_regex() {
|
327 |
let mut map_to_be_filtered = HashMap::new();
|
328 |
map_to_be_filtered.insert(
|
329 |
+
"https://www.example.com".to_owned(),
|
330 |
SearchResult {
|
331 |
+
title: "Example Domain".to_owned(),
|
332 |
+
url: "https://www.example.com".to_owned(),
|
333 |
description: "This domain is for use in illustrative examples in documents."
|
334 |
+
.to_owned(),
|
335 |
+
engine: smallvec!["Google".to_owned(), "Bing".to_owned()],
|
336 |
},
|
337 |
);
|
338 |
|
src/results/user_agent.rs
CHANGED
@@ -1,28 +1,32 @@
|
|
1 |
//! This module provides the functionality to generate random user agent string.
|
2 |
|
|
|
|
|
3 |
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
4 |
|
5 |
-
static USER_AGENTS:
|
6 |
-
UserAgentsBuilder::new()
|
7 |
-
.cache(false)
|
8 |
-
.dir("/tmp")
|
9 |
-
.thread(1)
|
10 |
-
.set_browsers(
|
11 |
-
Browsers::new()
|
12 |
-
.set_chrome()
|
13 |
-
.set_safari()
|
14 |
-
.set_edge()
|
15 |
-
.set_firefox()
|
16 |
-
.set_mozilla(),
|
17 |
-
)
|
18 |
-
.build()
|
19 |
-
});
|
20 |
|
21 |
/// A function to generate random user agent to improve privacy of the user.
|
22 |
///
|
23 |
/// # Returns
|
24 |
///
|
25 |
/// A randomly generated user agent string.
|
26 |
-
pub fn random_user_agent() ->
|
27 |
-
USER_AGENTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
}
|
|
|
1 |
//! This module provides the functionality to generate random user agent string.
|
2 |
|
3 |
+
use std::sync::OnceLock;
|
4 |
+
|
5 |
use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder};
|
6 |
|
7 |
+
static USER_AGENTS: OnceLock<UserAgents> = OnceLock::new();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
/// A function to generate random user agent to improve privacy of the user.
|
10 |
///
|
11 |
/// # Returns
|
12 |
///
|
13 |
/// A randomly generated user agent string.
|
14 |
+
pub fn random_user_agent() -> &'static str {
|
15 |
+
USER_AGENTS
|
16 |
+
.get_or_init(|| {
|
17 |
+
UserAgentsBuilder::new()
|
18 |
+
.cache(false)
|
19 |
+
.dir("/tmp")
|
20 |
+
.thread(1)
|
21 |
+
.set_browsers(
|
22 |
+
Browsers::new()
|
23 |
+
.set_chrome()
|
24 |
+
.set_safari()
|
25 |
+
.set_edge()
|
26 |
+
.set_firefox()
|
27 |
+
.set_mozilla(),
|
28 |
+
)
|
29 |
+
.build()
|
30 |
+
})
|
31 |
+
.random()
|
32 |
}
|
src/server/routes.rs
CHANGED
@@ -16,6 +16,10 @@ use handlebars::Handlebars;
|
|
16 |
use serde::Deserialize;
|
17 |
use tokio::join;
|
18 |
|
|
|
|
|
|
|
|
|
19 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
20 |
///
|
21 |
/// # Fields
|
@@ -62,10 +66,10 @@ pub async fn not_found(
|
|
62 |
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
63 |
#[allow(dead_code)]
|
64 |
#[derive(Deserialize)]
|
65 |
-
struct Cookie {
|
66 |
-
theme:
|
67 |
-
colorscheme:
|
68 |
-
engines: Vec
|
69 |
}
|
70 |
|
71 |
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
@@ -111,9 +115,9 @@ pub async fn search(
|
|
111 |
page - 1
|
112 |
),
|
113 |
&config,
|
114 |
-
query
|
115 |
page - 1,
|
116 |
-
req
|
117 |
),
|
118 |
results(
|
119 |
format!(
|
@@ -121,9 +125,9 @@ pub async fn search(
|
|
121 |
config.binding_ip, config.port, query, page
|
122 |
),
|
123 |
&config,
|
124 |
-
query
|
125 |
page,
|
126 |
-
req
|
127 |
),
|
128 |
results(
|
129 |
format!(
|
@@ -134,9 +138,9 @@ pub async fn search(
|
|
134 |
page + 1
|
135 |
),
|
136 |
&config,
|
137 |
-
query
|
138 |
page + 1,
|
139 |
-
req
|
140 |
)
|
141 |
);
|
142 |
|
@@ -154,30 +158,35 @@ pub async fn search(
|
|
154 |
async fn results(
|
155 |
url: String,
|
156 |
config: &Config,
|
157 |
-
query:
|
158 |
page: u32,
|
159 |
-
req: HttpRequest,
|
160 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
// fetch the cached results json.
|
164 |
-
let cached_results_json =
|
|
|
165 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
166 |
// handle the data accordingly.
|
167 |
match cached_results_json {
|
168 |
-
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)
|
169 |
Err(_) => {
|
170 |
// check if the cookie value is empty or not if it is empty then use the
|
171 |
// default selected upstream search engines from the config file otherwise
|
172 |
// parse the non-empty cookie and grab the user selected engines from the
|
173 |
// UI and use that.
|
174 |
-
let mut results:
|
175 |
-
.cookie("appCookie")
|
176 |
-
{
|
177 |
Some(cookie_value) => {
|
178 |
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
179 |
|
180 |
-
let engines = cookie_value
|
181 |
.engines
|
182 |
.iter()
|
183 |
.filter_map(|name| EngineHandler::new(name))
|
@@ -188,7 +197,7 @@ async fn results(
|
|
188 |
page,
|
189 |
config.aggregator.random_delay,
|
190 |
config.debug,
|
191 |
-
engines,
|
192 |
config.request_timeout,
|
193 |
)
|
194 |
.await?
|
@@ -199,14 +208,18 @@ async fn results(
|
|
199 |
page,
|
200 |
config.aggregator.random_delay,
|
201 |
config.debug,
|
202 |
-
config.upstream_search_engines
|
203 |
config.request_timeout,
|
204 |
)
|
205 |
.await?
|
206 |
}
|
207 |
};
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
|
|
210 |
Ok(results)
|
211 |
}
|
212 |
}
|
|
|
16 |
use serde::Deserialize;
|
17 |
use tokio::join;
|
18 |
|
19 |
+
// ---- Constants ----
|
20 |
+
/// Initialize redis cache connection once and store it on the heap.
|
21 |
+
const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
22 |
+
|
23 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
24 |
///
|
25 |
/// # Fields
|
|
|
66 |
/// * `engines` - It stores the user selected upstream search engines selected from the UI.
|
67 |
#[allow(dead_code)]
|
68 |
#[derive(Deserialize)]
|
69 |
+
struct Cookie<'a> {
|
70 |
+
theme: &'a str,
|
71 |
+
colorscheme: &'a str,
|
72 |
+
engines: Vec<&'a str>,
|
73 |
}
|
74 |
|
75 |
/// Handles the route of search page of the `websurfx` meta search engine website and it takes
|
|
|
115 |
page - 1
|
116 |
),
|
117 |
&config,
|
118 |
+
query,
|
119 |
page - 1,
|
120 |
+
&req,
|
121 |
),
|
122 |
results(
|
123 |
format!(
|
|
|
125 |
config.binding_ip, config.port, query, page
|
126 |
),
|
127 |
&config,
|
128 |
+
query,
|
129 |
page,
|
130 |
+
&req,
|
131 |
),
|
132 |
results(
|
133 |
format!(
|
|
|
138 |
page + 1
|
139 |
),
|
140 |
&config,
|
141 |
+
query,
|
142 |
page + 1,
|
143 |
+
&req,
|
144 |
)
|
145 |
);
|
146 |
|
|
|
158 |
async fn results(
|
159 |
url: String,
|
160 |
config: &Config,
|
161 |
+
query: &str,
|
162 |
page: u32,
|
163 |
+
req: &HttpRequest,
|
164 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
165 |
+
let redis_cache: RedisCache = REDIS_CACHE
|
166 |
+
.get_or_init(async {
|
167 |
+
// Initialize redis cache connection pool only one and store it in the heap.
|
168 |
+
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
169 |
+
})
|
170 |
+
.await
|
171 |
+
.clone();
|
172 |
+
|
173 |
// fetch the cached results json.
|
174 |
+
let cached_results_json: Result<String, error_stack::Report<crate::cache::error::PoolError>> =
|
175 |
+
redis_cache.clone().cached_json(&url).await;
|
176 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
177 |
// handle the data accordingly.
|
178 |
match cached_results_json {
|
179 |
+
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
180 |
Err(_) => {
|
181 |
// check if the cookie value is empty or not if it is empty then use the
|
182 |
// default selected upstream search engines from the config file otherwise
|
183 |
// parse the non-empty cookie and grab the user selected engines from the
|
184 |
// UI and use that.
|
185 |
+
let mut results: SearchResults = match req.cookie("appCookie") {
|
|
|
|
|
186 |
Some(cookie_value) => {
|
187 |
let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
|
188 |
|
189 |
+
let engines: Vec<EngineHandler> = cookie_value
|
190 |
.engines
|
191 |
.iter()
|
192 |
.filter_map(|name| EngineHandler::new(name))
|
|
|
197 |
page,
|
198 |
config.aggregator.random_delay,
|
199 |
config.debug,
|
200 |
+
&engines,
|
201 |
config.request_timeout,
|
202 |
)
|
203 |
.await?
|
|
|
208 |
page,
|
209 |
config.aggregator.random_delay,
|
210 |
config.debug,
|
211 |
+
&config.upstream_search_engines,
|
212 |
config.request_timeout,
|
213 |
)
|
214 |
.await?
|
215 |
}
|
216 |
};
|
217 |
+
|
218 |
+
results.add_style(&config.style);
|
219 |
+
redis_cache
|
220 |
+
.clone()
|
221 |
+
.cache_results(&serde_json::to_string(&results)?, &url)
|
222 |
+
.await?;
|
223 |
Ok(results)
|
224 |
}
|
225 |
}
|