Spaces:
Runtime error
Runtime error
Merge pull request #202 from neon-mmd/feat-disallow-user-to-search-via-lists
Browse files- Cargo.lock +11 -11
- Cargo.toml +4 -4
- public/images/barricade.png +0 -0
- public/images/filter.png +0 -0
- public/static/themes/simple.css +29 -0
- public/templates/search.html +62 -30
- src/config/parser.rs +12 -0
- src/engines/duckduckgo.rs +1 -0
- src/engines/engine_models.rs +1 -0
- src/engines/searx.rs +11 -2
- src/results/aggregation_models.rs +33 -4
- src/results/aggregator.rs +23 -14
- src/server/routes.rs +73 -15
- websurfx/config.lua +11 -0
Cargo.lock
CHANGED
@@ -532,18 +532,18 @@ dependencies = [
|
|
532 |
|
533 |
[[package]]
|
534 |
name = "clap"
|
535 |
-
version = "4.4.
|
536 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
537 |
-
checksum = "
|
538 |
dependencies = [
|
539 |
"clap_builder",
|
540 |
]
|
541 |
|
542 |
[[package]]
|
543 |
name = "clap_builder"
|
544 |
-
version = "4.4.
|
545 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
546 |
-
checksum = "
|
547 |
dependencies = [
|
548 |
"anstyle",
|
549 |
"clap_lex",
|
@@ -1270,9 +1270,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
|
1270 |
|
1271 |
[[package]]
|
1272 |
name = "handlebars"
|
1273 |
-
version = "4.
|
1274 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1275 |
-
checksum = "
|
1276 |
dependencies = [
|
1277 |
"log",
|
1278 |
"pest",
|
@@ -2494,9 +2494,9 @@ dependencies = [
|
|
2494 |
|
2495 |
[[package]]
|
2496 |
name = "redis"
|
2497 |
-
version = "0.23.
|
2498 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2499 |
-
checksum = "
|
2500 |
dependencies = [
|
2501 |
"arc-swap",
|
2502 |
"async-trait",
|
@@ -2663,9 +2663,9 @@ dependencies = [
|
|
2663 |
|
2664 |
[[package]]
|
2665 |
name = "rustix"
|
2666 |
-
version = "0.38.
|
2667 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2668 |
-
checksum = "
|
2669 |
dependencies = [
|
2670 |
"bitflags 2.4.0",
|
2671 |
"errno",
|
@@ -3697,7 +3697,7 @@ dependencies = [
|
|
3697 |
|
3698 |
[[package]]
|
3699 |
name = "websurfx"
|
3700 |
-
version = "0.
|
3701 |
dependencies = [
|
3702 |
"actix-cors",
|
3703 |
"actix-files",
|
|
|
532 |
|
533 |
[[package]]
|
534 |
name = "clap"
|
535 |
+
version = "4.4.2"
|
536 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
537 |
+
checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
|
538 |
dependencies = [
|
539 |
"clap_builder",
|
540 |
]
|
541 |
|
542 |
[[package]]
|
543 |
name = "clap_builder"
|
544 |
+
version = "4.4.2"
|
545 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
546 |
+
checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
|
547 |
dependencies = [
|
548 |
"anstyle",
|
549 |
"clap_lex",
|
|
|
1270 |
|
1271 |
[[package]]
|
1272 |
name = "handlebars"
|
1273 |
+
version = "4.4.0"
|
1274 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1275 |
+
checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683"
|
1276 |
dependencies = [
|
1277 |
"log",
|
1278 |
"pest",
|
|
|
2494 |
|
2495 |
[[package]]
|
2496 |
name = "redis"
|
2497 |
+
version = "0.23.3"
|
2498 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2499 |
+
checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
|
2500 |
dependencies = [
|
2501 |
"arc-swap",
|
2502 |
"async-trait",
|
|
|
2663 |
|
2664 |
[[package]]
|
2665 |
name = "rustix"
|
2666 |
+
version = "0.38.11"
|
2667 |
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2668 |
+
checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
|
2669 |
dependencies = [
|
2670 |
"bitflags 2.4.0",
|
2671 |
"errno",
|
|
|
3697 |
|
3698 |
[[package]]
|
3699 |
name = "websurfx"
|
3700 |
+
version = "0.19.0"
|
3701 |
dependencies = [
|
3702 |
"actix-cors",
|
3703 |
"actix-files",
|
Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
-
version = "0.
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
@@ -10,7 +10,7 @@ license = "AGPL-3.0"
|
|
10 |
reqwest = {version="0.11.20",features=["json"]}
|
11 |
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
12 |
serde = {version="1.0.188",features=["derive"]}
|
13 |
-
handlebars = { version = "4.
|
14 |
scraper = {version="0.17.1"}
|
15 |
actix-web = {version="4.4.0", features = ["cookies"]}
|
16 |
actix-files = {version="0.6.2"}
|
@@ -19,8 +19,8 @@ serde_json = {version="1.0.105"}
|
|
19 |
fake-useragent = {version="0.1.3"}
|
20 |
env_logger = {version="0.10.0"}
|
21 |
log = {version="0.4.20"}
|
22 |
-
mlua = {version="0.8.10",features=["luajit"]}
|
23 |
-
redis = {version="0.23.
|
24 |
md5 = {version="0.7.0"}
|
25 |
rand={version="0.8.5"}
|
26 |
once_cell = {version="1.18.0"}
|
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
+
version = "0.19.0"
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
|
|
10 |
reqwest = {version="0.11.20",features=["json"]}
|
11 |
tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
|
12 |
serde = {version="1.0.188",features=["derive"]}
|
13 |
+
handlebars = { version = "4.4.0", features = ["dir_source"] }
|
14 |
scraper = {version="0.17.1"}
|
15 |
actix-web = {version="4.4.0", features = ["cookies"]}
|
16 |
actix-files = {version="0.6.2"}
|
|
|
19 |
fake-useragent = {version="0.1.3"}
|
20 |
env_logger = {version="0.10.0"}
|
21 |
log = {version="0.4.20"}
|
22 |
+
mlua = {version="0.8.10", features=["luajit"]}
|
23 |
+
redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
|
24 |
md5 = {version="0.7.0"}
|
25 |
rand={version="0.8.5"}
|
26 |
once_cell = {version="1.18.0"}
|
public/images/barricade.png
ADDED
public/images/filter.png
ADDED
public/static/themes/simple.css
CHANGED
@@ -132,6 +132,35 @@ body {
|
|
132 |
width: 1.2rem;
|
133 |
height: 1.2rem;
|
134 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
/* styles for the footer and header */
|
137 |
|
|
|
132 |
width: 1.2rem;
|
133 |
height: 1.2rem;
|
134 |
}
|
135 |
+
.results .result_disallowed,
|
136 |
+
.results .result_filtered {
|
137 |
+
display: flex;
|
138 |
+
justify-content: center;
|
139 |
+
align-items: center;
|
140 |
+
gap: 10rem;
|
141 |
+
font-size: 2rem;
|
142 |
+
color: var(--foreground-color);
|
143 |
+
margin: 0rem 7rem;
|
144 |
+
}
|
145 |
+
|
146 |
+
.results .result_disallowed .user_query,
|
147 |
+
.results .result_filtered .user_query {
|
148 |
+
color: var(--background-color);
|
149 |
+
font-weight: 300;
|
150 |
+
}
|
151 |
+
|
152 |
+
.results .result_disallowed img,
|
153 |
+
.results .result_filtered img {
|
154 |
+
width: 30rem;
|
155 |
+
}
|
156 |
+
|
157 |
+
.results .result_disallowed div,
|
158 |
+
.results .result_filtered div {
|
159 |
+
display: flex;
|
160 |
+
flex-direction: column;
|
161 |
+
gap: 1rem;
|
162 |
+
line-break: strict;
|
163 |
+
}
|
164 |
|
165 |
/* styles for the footer and header */
|
166 |
|
public/templates/search.html
CHANGED
@@ -1,37 +1,69 @@
|
|
1 |
{{>header this.style}}
|
2 |
<main class="results">
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
</div>
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
<li>Make sure that all words are spelled correctly.</li>
|
22 |
-
<li>Try different keywords.</li>
|
23 |
-
<li>Try more general keywords.</li>
|
24 |
-
</ul>
|
25 |
-
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
26 |
</div>
|
27 |
-
{{/if}}
|
28 |
-
</div>
|
29 |
-
<div class="page_navigation">
|
30 |
-
<button type="button" onclick="navigate_backward()">
|
31 |
-
← previous
|
32 |
-
</button>
|
33 |
-
<button type="button" onclick="navigate_forward()">next →</button>
|
34 |
-
</div>
|
35 |
</main>
|
36 |
<script src="static/index.js"></script>
|
37 |
<script src="static/pagination.js"></script>
|
|
|
1 |
{{>header this.style}}
|
2 |
<main class="results">
|
3 |
+
{{>search_bar this}}
|
4 |
+
<div class="results_aggregated">
|
5 |
+
{{#if results}} {{#each results}}
|
6 |
+
<div class="result">
|
7 |
+
<h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
|
8 |
+
<small>{{{this.url}}}</small>
|
9 |
+
<p>{{{this.description}}}</p>
|
10 |
+
<div class="upstream_engines">
|
11 |
+
{{#each engine}}
|
12 |
+
<span>{{{this}}}</span>
|
13 |
+
{{/each}}
|
14 |
+
</div>
|
15 |
+
</div>
|
16 |
+
{{/each}} {{else}} {{#if disallowed}}
|
17 |
+
<div class="result_disallowed">
|
18 |
+
<div class="description">
|
19 |
+
<p>
|
20 |
+
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
21 |
+
has been disallowed.
|
22 |
+
</p>
|
23 |
+
<p class="description_paragraph">Dear user,</p>
|
24 |
+
<p class="description_paragraph">
|
25 |
+
The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
|
26 |
+
been blacklisted via server configuration and hence disallowed by the
|
27 |
+
server. Henceforth no results could be displayed for your query.
|
28 |
+
</p>
|
29 |
+
</div>
|
30 |
+
<img src="./images/barricade.png" alt="Image of a Barricade" />
|
31 |
+
</div>
|
32 |
+
{{else}} {{#if filtered}}
|
33 |
+
<div class="result_filtered">
|
34 |
+
<div class="description">
|
35 |
+
<p>
|
36 |
+
Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
|
37 |
+
has been filtered.
|
38 |
+
</p>
|
39 |
+
<p class="description_paragraph">Dear user,</p>
|
40 |
+
<p class="description_paragraph">
|
41 |
+
All the search results contain results that has been configured to be
|
42 |
+
filtered out via server configuration and henceforth has been
|
43 |
+
completely filtered out.
|
44 |
+
</p>
|
45 |
+
</div>
|
46 |
+
<img src="./images/filter.png" alt="Image of a paper inside a funnel" />
|
47 |
+
</div>
|
48 |
+
{{else}}
|
49 |
+
<div class="result_not_found">
|
50 |
+
<p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
|
51 |
+
<p class="suggestions">Suggestions:</p>
|
52 |
+
<ul>
|
53 |
+
<li>Make sure that all words are spelled correctly.</li>
|
54 |
+
<li>Try different keywords.</li>
|
55 |
+
<li>Try more general keywords.</li>
|
56 |
+
</ul>
|
57 |
+
<img src="./images/no_results.gif" alt="Man fishing gif" />
|
58 |
+
</div>
|
59 |
+
{{/if}} {{/if}} {{/if}}
|
60 |
</div>
|
61 |
+
<div class="page_navigation">
|
62 |
+
<button type="button" onclick="navigate_backward()">
|
63 |
+
← previous
|
64 |
+
</button>
|
65 |
+
<button type="button" onclick="navigate_forward()">next →</button>
|
|
|
|
|
|
|
|
|
|
|
66 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
</main>
|
68 |
<script src="static/index.js"></script>
|
69 |
<script src="static/pagination.js"></script>
|
src/config/parser.rs
CHANGED
@@ -35,6 +35,7 @@ pub struct Config {
|
|
35 |
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
36 |
pub request_timeout: u8,
|
37 |
pub threads: u8,
|
|
|
38 |
}
|
39 |
|
40 |
/// Configuration options for the aggregator.
|
@@ -89,6 +90,16 @@ impl Config {
|
|
89 |
parsed_threads
|
90 |
};
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
Ok(Config {
|
93 |
port: globals.get::<_, u16>("port")?,
|
94 |
binding_ip: globals.get::<_, String>("binding_ip")?,
|
@@ -110,6 +121,7 @@ impl Config {
|
|
110 |
.collect(),
|
111 |
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
112 |
threads,
|
|
|
113 |
})
|
114 |
}
|
115 |
}
|
|
|
35 |
pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
|
36 |
pub request_timeout: u8,
|
37 |
pub threads: u8,
|
38 |
+
pub safe_search: u8,
|
39 |
}
|
40 |
|
41 |
/// Configuration options for the aggregator.
|
|
|
90 |
parsed_threads
|
91 |
};
|
92 |
|
93 |
+
let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
|
94 |
+
let safe_search: u8 = match parsed_safe_search {
|
95 |
+
0..=4 => parsed_safe_search,
|
96 |
+
_ => {
|
97 |
+
log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4.");
|
98 |
+
log::error!("Falling back to using the value `1` for the option");
|
99 |
+
1
|
100 |
+
}
|
101 |
+
};
|
102 |
+
|
103 |
Ok(Config {
|
104 |
port: globals.get::<_, u16>("port")?,
|
105 |
binding_ip: globals.get::<_, String>("binding_ip")?,
|
|
|
121 |
.collect(),
|
122 |
request_timeout: globals.get::<_, u8>("request_timeout")?,
|
123 |
threads,
|
124 |
+
safe_search,
|
125 |
})
|
126 |
}
|
127 |
}
|
src/engines/duckduckgo.rs
CHANGED
@@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo {
|
|
43 |
page: u32,
|
44 |
user_agent: &str,
|
45 |
request_timeout: u8,
|
|
|
46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
47 |
// Page number can be missing or empty string and so appropriate handling is required
|
48 |
// so that upstream server recieves valid page number.
|
|
|
43 |
page: u32,
|
44 |
user_agent: &str,
|
45 |
request_timeout: u8,
|
46 |
+
_safe_search: u8,
|
47 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
48 |
// Page number can be missing or empty string and so appropriate handling is required
|
49 |
// so that upstream server recieves valid page number.
|
src/engines/engine_models.rs
CHANGED
@@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send {
|
|
71 |
page: u32,
|
72 |
user_agent: &str,
|
73 |
request_timeout: u8,
|
|
|
74 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
75 |
}
|
76 |
|
|
|
71 |
page: u32,
|
72 |
user_agent: &str,
|
73 |
request_timeout: u8,
|
74 |
+
safe_search: u8,
|
75 |
) -> Result<HashMap<String, SearchResult>, EngineError>;
|
76 |
}
|
77 |
|
src/engines/searx.rs
CHANGED
@@ -42,12 +42,21 @@ impl SearchEngine for Searx {
|
|
42 |
page: u32,
|
43 |
user_agent: &str,
|
44 |
request_timeout: u8,
|
|
|
45 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
46 |
// Page number can be missing or empty string and so appropriate handling is required
|
47 |
// so that upstream server recieves valid page number.
|
|
|
|
|
|
|
|
|
48 |
let url: String = match page {
|
49 |
-
0 | 1 =>
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
};
|
52 |
|
53 |
// initializing headers and adding appropriate headers.
|
|
|
42 |
page: u32,
|
43 |
user_agent: &str,
|
44 |
request_timeout: u8,
|
45 |
+
mut safe_search: u8,
|
46 |
) -> Result<HashMap<String, SearchResult>, EngineError> {
|
47 |
// Page number can be missing or empty string and so appropriate handling is required
|
48 |
// so that upstream server recieves valid page number.
|
49 |
+
if safe_search == 3 {
|
50 |
+
safe_search = 2;
|
51 |
+
};
|
52 |
+
|
53 |
let url: String = match page {
|
54 |
+
0 | 1 => {
|
55 |
+
format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
|
56 |
+
}
|
57 |
+
_ => format!(
|
58 |
+
"https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
|
59 |
+
),
|
60 |
};
|
61 |
|
62 |
// initializing headers and adding appropriate headers.
|
src/results/aggregation_models.rs
CHANGED
@@ -102,13 +102,15 @@ impl EngineErrorInfo {
|
|
102 |
/// and the type of error that caused it.
|
103 |
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
104 |
/// given search query.
|
105 |
-
#[derive(Serialize, Deserialize)]
|
106 |
#[serde(rename_all = "camelCase")]
|
107 |
pub struct SearchResults {
|
108 |
pub results: Vec<SearchResult>,
|
109 |
pub page_query: String,
|
110 |
pub style: Style,
|
111 |
-
pub engine_errors_info:
|
|
|
|
|
112 |
}
|
113 |
|
114 |
impl SearchResults {
|
@@ -122,6 +124,7 @@ impl SearchResults {
|
|
122 |
/// the search url.
|
123 |
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
124 |
/// given search query.
|
|
|
125 |
pub fn new(
|
126 |
results: Vec<SearchResult>,
|
127 |
page_query: &str,
|
@@ -131,12 +134,38 @@ impl SearchResults {
|
|
131 |
results,
|
132 |
page_query: page_query.to_owned(),
|
133 |
style: Style::default(),
|
134 |
-
engine_errors_info:
|
|
|
|
|
135 |
}
|
136 |
}
|
137 |
|
138 |
/// A setter function to add website style to the return search results.
|
139 |
pub fn add_style(&mut self, style: &Style) {
|
140 |
-
self.style = style.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
}
|
142 |
}
|
|
|
102 |
/// and the type of error that caused it.
|
103 |
/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
|
104 |
/// given search query.
|
105 |
+
#[derive(Serialize, Deserialize, Default)]
|
106 |
#[serde(rename_all = "camelCase")]
|
107 |
pub struct SearchResults {
|
108 |
pub results: Vec<SearchResult>,
|
109 |
pub page_query: String,
|
110 |
pub style: Style,
|
111 |
+
pub engine_errors_info: Vec<EngineErrorInfo>,
|
112 |
+
pub disallowed: bool,
|
113 |
+
pub filtered: bool,
|
114 |
}
|
115 |
|
116 |
impl SearchResults {
|
|
|
124 |
/// the search url.
|
125 |
/// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
|
126 |
/// given search query.
|
127 |
+
/// * ``
|
128 |
pub fn new(
|
129 |
results: Vec<SearchResult>,
|
130 |
page_query: &str,
|
|
|
134 |
results,
|
135 |
page_query: page_query.to_owned(),
|
136 |
style: Style::default(),
|
137 |
+
engine_errors_info: engine_errors_info.to_owned(),
|
138 |
+
disallowed: Default::default(),
|
139 |
+
filtered: Default::default(),
|
140 |
}
|
141 |
}
|
142 |
|
143 |
/// A setter function to add website style to the return search results.
|
144 |
pub fn add_style(&mut self, style: &Style) {
|
145 |
+
self.style = style.clone();
|
146 |
+
}
|
147 |
+
|
148 |
+
/// A setter function that sets disallowed to true.
|
149 |
+
pub fn set_disallowed(&mut self) {
|
150 |
+
self.disallowed = true;
|
151 |
+
}
|
152 |
+
|
153 |
+
/// A setter function to set the current page search query.
|
154 |
+
pub fn set_page_query(&mut self, page: &str) {
|
155 |
+
self.page_query = page.to_owned();
|
156 |
+
}
|
157 |
+
|
158 |
+
/// A setter function that sets the filtered to true.
|
159 |
+
pub fn set_filtered(&mut self) {
|
160 |
+
self.filtered = true;
|
161 |
+
}
|
162 |
+
|
163 |
+
/// A getter function that gets the value of `engine_errors_info`.
|
164 |
+
pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
|
165 |
+
std::mem::take(&mut self.engine_errors_info)
|
166 |
+
}
|
167 |
+
/// A getter function that gets the value of `results`.
|
168 |
+
pub fn results(&mut self) -> Vec<SearchResult> {
|
169 |
+
self.results.clone()
|
170 |
}
|
171 |
}
|
src/results/aggregator.rs
CHANGED
@@ -70,6 +70,7 @@ pub async fn aggregate(
|
|
70 |
debug: bool,
|
71 |
upstream_search_engines: &[EngineHandler],
|
72 |
request_timeout: u8,
|
|
|
73 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
74 |
let user_agent: &str = random_user_agent();
|
75 |
|
@@ -91,7 +92,13 @@ pub async fn aggregate(
|
|
91 |
let query: String = query.to_owned();
|
92 |
tasks.push(tokio::spawn(async move {
|
93 |
search_engine
|
94 |
-
.results(
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
.await
|
96 |
}));
|
97 |
}
|
@@ -150,20 +157,22 @@ pub async fn aggregate(
|
|
150 |
}
|
151 |
}
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
159 |
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
|
166 |
-
|
|
|
167 |
|
168 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
169 |
|
@@ -189,7 +198,7 @@ pub fn filter_with_lists(
|
|
189 |
let mut reader = BufReader::new(File::open(file_path)?);
|
190 |
|
191 |
for line in reader.by_ref().lines() {
|
192 |
-
let re = Regex::new(
|
193 |
|
194 |
// Iterate over each search result in the map and check if it matches the regex pattern
|
195 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
|
|
70 |
debug: bool,
|
71 |
upstream_search_engines: &[EngineHandler],
|
72 |
request_timeout: u8,
|
73 |
+
safe_search: u8,
|
74 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
75 |
let user_agent: &str = random_user_agent();
|
76 |
|
|
|
92 |
let query: String = query.to_owned();
|
93 |
tasks.push(tokio::spawn(async move {
|
94 |
search_engine
|
95 |
+
.results(
|
96 |
+
&query,
|
97 |
+
page,
|
98 |
+
user_agent.clone(),
|
99 |
+
request_timeout,
|
100 |
+
safe_search,
|
101 |
+
)
|
102 |
.await
|
103 |
}));
|
104 |
}
|
|
|
157 |
}
|
158 |
}
|
159 |
|
160 |
+
if safe_search >= 3 {
|
161 |
+
let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
|
162 |
+
filter_with_lists(
|
163 |
+
&mut result_map,
|
164 |
+
&mut blacklist_map,
|
165 |
+
file_path(FileType::BlockList)?,
|
166 |
+
)?;
|
167 |
|
168 |
+
filter_with_lists(
|
169 |
+
&mut blacklist_map,
|
170 |
+
&mut result_map,
|
171 |
+
file_path(FileType::AllowList)?,
|
172 |
+
)?;
|
173 |
|
174 |
+
drop(blacklist_map);
|
175 |
+
}
|
176 |
|
177 |
let results: Vec<SearchResult> = result_map.into_values().collect();
|
178 |
|
|
|
198 |
let mut reader = BufReader::new(File::open(file_path)?);
|
199 |
|
200 |
for line in reader.by_ref().lines() {
|
201 |
+
let re = Regex::new(line?.trim())?;
|
202 |
|
203 |
// Iterate over each search result in the map and check if it matches the regex pattern
|
204 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
src/server/routes.rs
CHANGED
@@ -2,7 +2,10 @@
|
|
2 |
//! meta search engine website and provide appropriate response to each route/page
|
3 |
//! when requested.
|
4 |
|
5 |
-
use std::
|
|
|
|
|
|
|
6 |
|
7 |
use crate::{
|
8 |
cache::cacher::RedisCache,
|
@@ -13,12 +16,13 @@ use crate::{
|
|
13 |
};
|
14 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
15 |
use handlebars::Handlebars;
|
|
|
16 |
use serde::Deserialize;
|
17 |
use tokio::join;
|
18 |
|
19 |
// ---- Constants ----
|
20 |
/// Initialize redis cache connection once and store it on the heap.
|
21 |
-
|
22 |
|
23 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
24 |
///
|
@@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Once
|
|
32 |
struct SearchParams {
|
33 |
q: Option<String>,
|
34 |
page: Option<u32>,
|
|
|
35 |
}
|
36 |
|
37 |
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
@@ -105,42 +110,58 @@ pub async fn search(
|
|
105 |
None => 1,
|
106 |
};
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
let (_, results, _) = join!(
|
109 |
results(
|
110 |
format!(
|
111 |
-
"http://{}:{}/search?q={}&page={}",
|
112 |
config.binding_ip,
|
113 |
config.port,
|
114 |
query,
|
115 |
-
page - 1
|
|
|
116 |
),
|
117 |
&config,
|
118 |
query,
|
119 |
page - 1,
|
120 |
-
|
|
|
121 |
),
|
122 |
results(
|
123 |
format!(
|
124 |
-
"http://{}:{}/search?q={}&page={}",
|
125 |
-
config.binding_ip, config.port, query, page
|
126 |
),
|
127 |
&config,
|
128 |
query,
|
129 |
page,
|
130 |
-
|
|
|
131 |
),
|
132 |
results(
|
133 |
format!(
|
134 |
-
"http://{}:{}/search?q={}&page={}",
|
135 |
config.binding_ip,
|
136 |
config.port,
|
137 |
query,
|
138 |
-
page + 1
|
|
|
139 |
),
|
140 |
&config,
|
141 |
query,
|
142 |
page + 1,
|
143 |
-
|
|
|
144 |
)
|
145 |
);
|
146 |
|
@@ -160,9 +181,10 @@ async fn results(
|
|
160 |
config: &Config,
|
161 |
query: &str,
|
162 |
page: u32,
|
163 |
-
req:
|
|
|
164 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
165 |
-
let redis_cache: RedisCache = REDIS_CACHE
|
166 |
.get_or_init(async {
|
167 |
// Initialize redis cache connection pool only one and store it in the heap.
|
168 |
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
@@ -178,6 +200,23 @@ async fn results(
|
|
178 |
match cached_results_json {
|
179 |
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
180 |
Err(_) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
// check if the cookie value is empty or not if it is empty then use the
|
182 |
// default selected upstream search engines from the config file otherwise
|
183 |
// parse the non-empty cookie and grab the user selected engines from the
|
@@ -199,6 +238,7 @@ async fn results(
|
|
199 |
config.debug,
|
200 |
&engines,
|
201 |
config.request_timeout,
|
|
|
202 |
)
|
203 |
.await?
|
204 |
}
|
@@ -210,14 +250,16 @@ async fn results(
|
|
210 |
config.debug,
|
211 |
&config.upstream_search_engines,
|
212 |
config.request_timeout,
|
|
|
213 |
)
|
214 |
.await?
|
215 |
}
|
216 |
};
|
217 |
-
|
|
|
|
|
218 |
results.add_style(&config.style);
|
219 |
redis_cache
|
220 |
-
.clone()
|
221 |
.cache_results(&serde_json::to_string(&results)?, &url)
|
222 |
.await?;
|
223 |
Ok(results)
|
@@ -225,6 +267,22 @@ async fn results(
|
|
225 |
}
|
226 |
}
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
229 |
#[get("/robots.txt")]
|
230 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
|
|
2 |
//! meta search engine website and provide appropriate response to each route/page
|
3 |
//! when requested.
|
4 |
|
5 |
+
use std::{
|
6 |
+
fs::{read_to_string, File},
|
7 |
+
io::{BufRead, BufReader, Read},
|
8 |
+
};
|
9 |
|
10 |
use crate::{
|
11 |
cache::cacher::RedisCache,
|
|
|
16 |
};
|
17 |
use actix_web::{get, web, HttpRequest, HttpResponse};
|
18 |
use handlebars::Handlebars;
|
19 |
+
use regex::Regex;
|
20 |
use serde::Deserialize;
|
21 |
use tokio::join;
|
22 |
|
23 |
// ---- Constants ----
|
24 |
/// Initialize redis cache connection once and store it on the heap.
|
25 |
+
static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
|
26 |
|
27 |
/// A named struct which deserializes all the user provided search parameters and stores them.
|
28 |
///
|
|
|
36 |
struct SearchParams {
|
37 |
q: Option<String>,
|
38 |
page: Option<u32>,
|
39 |
+
safesearch: Option<u8>,
|
40 |
}
|
41 |
|
42 |
/// Handles the route of index page or main page of the `websurfx` meta search engine website.
|
|
|
110 |
None => 1,
|
111 |
};
|
112 |
|
113 |
+
let safe_search: u8 = match config.safe_search {
|
114 |
+
3..=4 => config.safe_search,
|
115 |
+
_ => match ¶ms.safesearch {
|
116 |
+
Some(safesearch) => match safesearch {
|
117 |
+
0..=2 => *safesearch,
|
118 |
+
_ => 1,
|
119 |
+
},
|
120 |
+
None => config.safe_search,
|
121 |
+
},
|
122 |
+
};
|
123 |
+
|
124 |
let (_, results, _) = join!(
|
125 |
results(
|
126 |
format!(
|
127 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
128 |
config.binding_ip,
|
129 |
config.port,
|
130 |
query,
|
131 |
+
page - 1,
|
132 |
+
safe_search
|
133 |
),
|
134 |
&config,
|
135 |
query,
|
136 |
page - 1,
|
137 |
+
req.clone(),
|
138 |
+
safe_search
|
139 |
),
|
140 |
results(
|
141 |
format!(
|
142 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
143 |
+
config.binding_ip, config.port, query, page, safe_search
|
144 |
),
|
145 |
&config,
|
146 |
query,
|
147 |
page,
|
148 |
+
req.clone(),
|
149 |
+
safe_search
|
150 |
),
|
151 |
results(
|
152 |
format!(
|
153 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
154 |
config.binding_ip,
|
155 |
config.port,
|
156 |
query,
|
157 |
+
page + 1,
|
158 |
+
safe_search
|
159 |
),
|
160 |
&config,
|
161 |
query,
|
162 |
page + 1,
|
163 |
+
req.clone(),
|
164 |
+
safe_search
|
165 |
)
|
166 |
);
|
167 |
|
|
|
181 |
config: &Config,
|
182 |
query: &str,
|
183 |
page: u32,
|
184 |
+
req: HttpRequest,
|
185 |
+
safe_search: u8,
|
186 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
187 |
+
let mut redis_cache: RedisCache = REDIS_CACHE
|
188 |
.get_or_init(async {
|
189 |
// Initialize redis cache connection pool only one and store it in the heap.
|
190 |
RedisCache::new(&config.redis_url, 5).await.unwrap()
|
|
|
200 |
match cached_results_json {
|
201 |
Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
|
202 |
Err(_) => {
|
203 |
+
if safe_search == 4 {
|
204 |
+
let mut results: SearchResults = SearchResults::default();
|
205 |
+
let mut _flag: bool =
|
206 |
+
is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
207 |
+
_flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
|
208 |
+
|
209 |
+
if _flag {
|
210 |
+
results.set_disallowed();
|
211 |
+
results.add_style(&config.style);
|
212 |
+
results.set_page_query(query);
|
213 |
+
redis_cache
|
214 |
+
.cache_results(&serde_json::to_string(&results)?, &url)
|
215 |
+
.await?;
|
216 |
+
return Ok(results);
|
217 |
+
}
|
218 |
+
}
|
219 |
+
|
220 |
// check if the cookie value is empty or not if it is empty then use the
|
221 |
// default selected upstream search engines from the config file otherwise
|
222 |
// parse the non-empty cookie and grab the user selected engines from the
|
|
|
238 |
config.debug,
|
239 |
&engines,
|
240 |
config.request_timeout,
|
241 |
+
safe_search,
|
242 |
)
|
243 |
.await?
|
244 |
}
|
|
|
250 |
config.debug,
|
251 |
&config.upstream_search_engines,
|
252 |
config.request_timeout,
|
253 |
+
safe_search,
|
254 |
)
|
255 |
.await?
|
256 |
}
|
257 |
};
|
258 |
+
if results.engine_errors_info().is_empty() && results.results().is_empty() {
|
259 |
+
results.set_filtered();
|
260 |
+
}
|
261 |
results.add_style(&config.style);
|
262 |
redis_cache
|
|
|
263 |
.cache_results(&serde_json::to_string(&results)?, &url)
|
264 |
.await?;
|
265 |
Ok(results)
|
|
|
267 |
}
|
268 |
}
|
269 |
|
270 |
+
fn is_match_from_filter_list(
|
271 |
+
file_path: &str,
|
272 |
+
query: &str,
|
273 |
+
) -> Result<bool, Box<dyn std::error::Error>> {
|
274 |
+
let mut flag = false;
|
275 |
+
let mut reader = BufReader::new(File::open(file_path)?);
|
276 |
+
for line in reader.by_ref().lines() {
|
277 |
+
let re = Regex::new(&line?)?;
|
278 |
+
if re.is_match(query) {
|
279 |
+
flag = true;
|
280 |
+
break;
|
281 |
+
}
|
282 |
+
}
|
283 |
+
Ok(flag)
|
284 |
+
}
|
285 |
+
|
286 |
/// Handles the route of robots.txt page of the `websurfx` meta search engine website.
|
287 |
#[get("/robots.txt")]
|
288 |
pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
|
websurfx/config.lua
CHANGED
@@ -11,6 +11,17 @@ production_use = false -- whether to use production mode or not (in other words
|
|
11 |
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
12 |
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
-- ### Website ###
|
15 |
-- The different colorschemes provided are:
|
16 |
-- {{
|
|
|
11 |
-- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
|
12 |
request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
|
13 |
|
14 |
+
-- ### Search ###
|
15 |
+
-- Filter results based on different levels. The levels provided are:
|
16 |
+
-- {{
|
17 |
+
-- 0 - None
|
18 |
+
-- 1 - Low
|
19 |
+
-- 2 - Moderate
|
20 |
+
-- 3 - High
|
21 |
+
-- 4 - Aggressive
|
22 |
+
-- }}
|
23 |
+
safe_search = 2
|
24 |
+
|
25 |
-- ### Website ###
|
26 |
-- The different colorschemes provided are:
|
27 |
-- {{
|