Spaces:

alamin655
/

spacex

Runtime error

App Files Files Community

alamin655 commited on Sep 11, 2023

Commit

867753a

•

2 Parent(s): d3a7435 6c94b92

Merge pull request #202 from neon-mmd/feat-disallow-user-to-search-via-lists

Browse files

Files changed (14) hide show

Cargo.lock +11 -11
Cargo.toml +4 -4
public/images/barricade.png +0 -0
public/images/filter.png +0 -0
public/static/themes/simple.css +29 -0
public/templates/search.html +62 -30
src/config/parser.rs +12 -0
src/engines/duckduckgo.rs +1 -0
src/engines/engine_models.rs +1 -0
src/engines/searx.rs +11 -2
src/results/aggregation_models.rs +33 -4
src/results/aggregator.rs +23 -14
src/server/routes.rs +73 -15
websurfx/config.lua +11 -0

Cargo.lock CHANGED Viewed

@@ -532,18 +532,18 @@ dependencies = [
 [[package]]
 name = "clap"
-version = "4.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27"
 dependencies = [
  "clap_builder",
 ]
 [[package]]
 name = "clap_builder"
-version = "4.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d"
 dependencies = [
  "anstyle",
  "clap_lex",
@@ -1270,9 +1270,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
 [[package]]
 name = "handlebars"
-version = "4.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d"
 dependencies = [
  "log",
  "pest",
@@ -2494,9 +2494,9 @@ dependencies = [
 [[package]]
 name = "redis"
-version = "0.23.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffd6543a7bc6428396845f6854ccf3d1ae8823816592e2cbe74f20f50f209d02"
 dependencies = [
  "arc-swap",
  "async-trait",
@@ -2663,9 +2663,9 @@ dependencies = [
 [[package]]
 name = "rustix"
-version = "0.38.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
@@ -3697,7 +3697,7 @@ dependencies = [
 [[package]]
 name = "websurfx"
-version = "0.18.6"
 dependencies = [
  "actix-cors",
  "actix-files",

 [[package]]
 name = "clap"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
 dependencies = [
  "clap_builder",
 ]
 [[package]]
 name = "clap_builder"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
 dependencies = [
  "anstyle",
  "clap_lex",
 [[package]]
 name = "handlebars"
+version = "4.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683"
 dependencies = [
  "log",
  "pest",
 [[package]]
 name = "redis"
+version = "0.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
 dependencies = [
  "arc-swap",
  "async-trait",
 [[package]]
 name = "rustix"
+version = "0.38.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
 [[package]]
 name = "websurfx"
+version = "0.19.0"
 dependencies = [
  "actix-cors",
  "actix-files",

Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "websurfx"
-version = "0.18.6"
 edition = "2021"
 description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
 repository = "https://github.com/neon-mmd/websurfx"
@@ -10,7 +10,7 @@ license = "AGPL-3.0"
 reqwest = {version="0.11.20",features=["json"]}
 tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
 serde = {version="1.0.188",features=["derive"]}
-handlebars = { version = "4.3.7", features = ["dir_source"] }
 scraper = {version="0.17.1"}
 actix-web = {version="4.4.0", features = ["cookies"]}
 actix-files = {version="0.6.2"}
@@ -19,8 +19,8 @@ serde_json = {version="1.0.105"}
 fake-useragent = {version="0.1.3"}
 env_logger = {version="0.10.0"}
 log = {version="0.4.20"}
-mlua = {version="0.8.10",features=["luajit"]}
-redis = {version="0.23.2",features=["tokio-comp","connection-manager"]}
 md5 = {version="0.7.0"}
 rand={version="0.8.5"}
 once_cell = {version="1.18.0"}

 [package]
 name = "websurfx"
+version = "0.19.0"
 edition = "2021"
 description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
 repository = "https://github.com/neon-mmd/websurfx"
 reqwest = {version="0.11.20",features=["json"]}
 tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
 serde = {version="1.0.188",features=["derive"]}
+handlebars = { version = "4.4.0", features = ["dir_source"] }
 scraper = {version="0.17.1"}
 actix-web = {version="4.4.0", features = ["cookies"]}
 actix-files = {version="0.6.2"}
 fake-useragent = {version="0.1.3"}
 env_logger = {version="0.10.0"}
 log = {version="0.4.20"}
+mlua = {version="0.8.10", features=["luajit"]}
+redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
 md5 = {version="0.7.0"}
 rand={version="0.8.5"}
 once_cell = {version="1.18.0"}

public/images/barricade.png ADDED Viewed

public/images/filter.png ADDED Viewed

public/static/themes/simple.css CHANGED Viewed

@@ -132,6 +132,35 @@ body {
   width: 1.2rem;
   height: 1.2rem;
 }
 /* styles for the footer and header */

   width: 1.2rem;
   height: 1.2rem;
 }
+.results .result_disallowed,
+.results .result_filtered {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  gap: 10rem;
+  font-size: 2rem;
+  color: var(--foreground-color);
+  margin: 0rem 7rem;
+}
+.results .result_disallowed .user_query,
+.results .result_filtered .user_query {
+  color: var(--background-color);
+  font-weight: 300;
+}
+.results .result_disallowed img,
+.results .result_filtered img {
+  width: 30rem;
+}
+.results .result_disallowed div,
+.results .result_filtered div {
+  display: flex;
+  flex-direction: column;
+  gap: 1rem;
+  line-break: strict;
+}
 /* styles for the footer and header */

public/templates/search.html CHANGED Viewed

@@ -1,37 +1,69 @@
 {{>header this.style}}
 <main class="results">
-  {{>search_bar this}}
-  <div class="results_aggregated">
-    {{#if results}} {{#each results}}
-    <div class="result">
-      <h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
-      <small>{{{this.url}}}</small>
-      <p>{{{this.description}}}</p>
-      <div class="upstream_engines">
-        {{#each engine}}
-        <span>{{{this}}}</span>
-        {{/each}}
-      </div>
     </div>
-    {{/each}} {{else}}
-    <div class="result_not_found">
-      <p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
-      <p class="suggestions">Suggestions:</p>
-      <ul>
-        <li>Make sure that all words are spelled correctly.</li>
-        <li>Try different keywords.</li>
-        <li>Try more general keywords.</li>
-      </ul>
-      <img src="./images/no_results.gif" alt="Man fishing gif" />
     </div>
-    {{/if}}
-  </div>
-  <div class="page_navigation">
-    <button type="button" onclick="navigate_backward()">
-      &#8592; previous
-    </button>
-    <button type="button" onclick="navigate_forward()">next &#8594;</button>
-  </div>
 </main>
 <script src="static/index.js"></script>
 <script src="static/pagination.js"></script>

 {{>header this.style}}
 <main class="results">
+    {{>search_bar this}}
+    <div class="results_aggregated">
+        {{#if results}} {{#each results}}
+        <div class="result">
+            <h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
+            <small>{{{this.url}}}</small>
+            <p>{{{this.description}}}</p>
+            <div class="upstream_engines">
+                {{#each engine}}
+                <span>{{{this}}}</span>
+                {{/each}}
+            </div>
+        </div>
+        {{/each}} {{else}} {{#if disallowed}}
+        <div class="result_disallowed">
+            <div class="description">
+                <p>
+                    Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
+                    has been disallowed.
+                </p>
+                <p class="description_paragraph">Dear user,</p>
+                <p class="description_paragraph">
+                    The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
+                    been blacklisted via server configuration and hence disallowed by the
+                    server. Henceforth no results could be displayed for your query.
+                </p>
+            </div>
+            <img src="./images/barricade.png" alt="Image of a Barricade" />
+        </div>
+        {{else}} {{#if filtered}}
+        <div class="result_filtered">
+            <div class="description">
+                <p>
+                    Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
+                    has been filtered.
+                </p>
+                <p class="description_paragraph">Dear user,</p>
+                <p class="description_paragraph">
+                    All the search results contain results that has been configured to be
+                    filtered out via server configuration and henceforth has been
+                    completely filtered out.
+                </p>
+            </div>
+            <img src="./images/filter.png" alt="Image of a paper inside a funnel" />
+        </div>
+        {{else}}
+        <div class="result_not_found">
+            <p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
+            <p class="suggestions">Suggestions:</p>
+            <ul>
+                <li>Make sure that all words are spelled correctly.</li>
+                <li>Try different keywords.</li>
+                <li>Try more general keywords.</li>
+            </ul>
+            <img src="./images/no_results.gif" alt="Man fishing gif" />
+        </div>
+        {{/if}} {{/if}} {{/if}}
     </div>
+    <div class="page_navigation">
+        <button type="button" onclick="navigate_backward()">
+            &#8592; previous
+        </button>
+        <button type="button" onclick="navigate_forward()">next &#8594;</button>
     </div>
 </main>
 <script src="static/index.js"></script>
 <script src="static/pagination.js"></script>

src/config/parser.rs CHANGED Viewed

@@ -35,6 +35,7 @@ pub struct Config {
     pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
     pub request_timeout: u8,
     pub threads: u8,
 }
 /// Configuration options for the aggregator.
@@ -89,6 +90,16 @@ impl Config {
             parsed_threads
         };
         Ok(Config {
             port: globals.get::<_, u16>("port")?,
             binding_ip: globals.get::<_, String>("binding_ip")?,
@@ -110,6 +121,7 @@ impl Config {
                 .collect(),
             request_timeout: globals.get::<_, u8>("request_timeout")?,
             threads,
         })
     }
 }

     pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
     pub request_timeout: u8,
     pub threads: u8,
+    pub safe_search: u8,
 }
 /// Configuration options for the aggregator.
             parsed_threads
         };
+        let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
+        let safe_search: u8 = match parsed_safe_search {
+            0..=4 => parsed_safe_search,
+            _ => {
+                log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4.");
+                log::error!("Falling back to using the value `1` for the option");
+                1
+            }
+        };
         Ok(Config {
             port: globals.get::<_, u16>("port")?,
             binding_ip: globals.get::<_, String>("binding_ip")?,
                 .collect(),
             request_timeout: globals.get::<_, u8>("request_timeout")?,
             threads,
+            safe_search,
         })
     }
 }

src/engines/duckduckgo.rs CHANGED Viewed

@@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo {
         page: u32,
         user_agent: &str,
         request_timeout: u8,
     ) -> Result<HashMap<String, SearchResult>, EngineError> {
         // Page number can be missing or empty string and so appropriate handling is required
         // so that upstream server recieves valid page number.

         page: u32,
         user_agent: &str,
         request_timeout: u8,
+        _safe_search: u8,
     ) -> Result<HashMap<String, SearchResult>, EngineError> {
         // Page number can be missing or empty string and so appropriate handling is required
         // so that upstream server recieves valid page number.

src/engines/engine_models.rs CHANGED Viewed

@@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send {
         page: u32,
         user_agent: &str,
         request_timeout: u8,
     ) -> Result<HashMap<String, SearchResult>, EngineError>;
 }

         page: u32,
         user_agent: &str,
         request_timeout: u8,
+        safe_search: u8,
     ) -> Result<HashMap<String, SearchResult>, EngineError>;
 }

src/engines/searx.rs CHANGED Viewed

@@ -42,12 +42,21 @@ impl SearchEngine for Searx {
         page: u32,
         user_agent: &str,
         request_timeout: u8,
     ) -> Result<HashMap<String, SearchResult>, EngineError> {
         // Page number can be missing or empty string and so appropriate handling is required
         // so that upstream server recieves valid page number.
         let url: String = match page {
-            0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"),
-            _ => format!("https://searx.work/search?q={query}&pageno={page}"),
         };
         // initializing headers and adding appropriate headers.

         page: u32,
         user_agent: &str,
         request_timeout: u8,
+        mut safe_search: u8,
     ) -> Result<HashMap<String, SearchResult>, EngineError> {
         // Page number can be missing or empty string and so appropriate handling is required
         // so that upstream server recieves valid page number.
+        if safe_search == 3 {
+            safe_search = 2;
+        };
         let url: String = match page {
+            0 | 1 => {
+                format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
+            }
+            _ => format!(
+                "https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
+            ),
         };
         // initializing headers and adding appropriate headers.

src/results/aggregation_models.rs CHANGED Viewed

@@ -102,13 +102,15 @@ impl EngineErrorInfo {
 /// and the type of error that caused it.
 /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
 /// given search query.
-#[derive(Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
     pub results: Vec<SearchResult>,
     pub page_query: String,
     pub style: Style,
-    pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
 }
 impl SearchResults {
@@ -122,6 +124,7 @@ impl SearchResults {
     /// the search url.
     /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
     /// given search query.
     pub fn new(
         results: Vec<SearchResult>,
         page_query: &str,
@@ -131,12 +134,38 @@ impl SearchResults {
             results,
             page_query: page_query.to_owned(),
             style: Style::default(),
-            engine_errors_info: SmallVec::from(engine_errors_info),
         }
     }
     /// A setter function to add website style to the return search results.
     pub fn add_style(&mut self, style: &Style) {
-        self.style = style.to_owned();
     }
 }

 /// and the type of error that caused it.
 /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
 /// given search query.
+#[derive(Serialize, Deserialize, Default)]
 #[serde(rename_all = "camelCase")]
 pub struct SearchResults {
     pub results: Vec<SearchResult>,
     pub page_query: String,
     pub style: Style,
+    pub engine_errors_info: Vec<EngineErrorInfo>,
+    pub disallowed: bool,
+    pub filtered: bool,
 }
 impl SearchResults {
     /// the search url.
     /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
     /// given search query.
+    /// * ``
     pub fn new(
         results: Vec<SearchResult>,
         page_query: &str,
             results,
             page_query: page_query.to_owned(),
             style: Style::default(),
+            engine_errors_info: engine_errors_info.to_owned(),
+            disallowed: Default::default(),
+            filtered: Default::default(),
         }
     }
     /// A setter function to add website style to the return search results.
     pub fn add_style(&mut self, style: &Style) {
+        self.style = style.clone();
+    }
+    /// A setter function that sets disallowed to true.
+    pub fn set_disallowed(&mut self) {
+        self.disallowed = true;
+    }
+    /// A setter function to set the current page search query.
+    pub fn set_page_query(&mut self, page: &str) {
+        self.page_query = page.to_owned();
+    }
+    /// A setter function that sets the filtered to true.
+    pub fn set_filtered(&mut self) {
+        self.filtered = true;
+    }
+    /// A getter function that gets the value of `engine_errors_info`.
+    pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
+        std::mem::take(&mut self.engine_errors_info)
+    }
+    /// A getter function that gets the value of `results`.
+    pub fn results(&mut self) -> Vec<SearchResult> {
+        self.results.clone()
     }
 }

src/results/aggregator.rs CHANGED Viewed

@@ -70,6 +70,7 @@ pub async fn aggregate(
     debug: bool,
     upstream_search_engines: &[EngineHandler],
     request_timeout: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
     let user_agent: &str = random_user_agent();
@@ -91,7 +92,13 @@ pub async fn aggregate(
         let query: String = query.to_owned();
         tasks.push(tokio::spawn(async move {
             search_engine
-                .results(&query, page, user_agent, request_timeout)
                 .await
         }));
     }
@@ -150,20 +157,22 @@ pub async fn aggregate(
         }
     }
-    let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
-    filter_with_lists(
-        &mut result_map,
-        &mut blacklist_map,
-        file_path(FileType::BlockList)?,
-    )?;
-    filter_with_lists(
-        &mut blacklist_map,
-        &mut result_map,
-        file_path(FileType::AllowList)?,
-    )?;
-    drop(blacklist_map);
     let results: Vec<SearchResult> = result_map.into_values().collect();
@@ -189,7 +198,7 @@ pub fn filter_with_lists(
     let mut reader = BufReader::new(File::open(file_path)?);
     for line in reader.by_ref().lines() {
-        let re = Regex::new(&line?)?;
         // Iterate over each search result in the map and check if it matches the regex pattern
         for (url, search_result) in map_to_be_filtered.clone().into_iter() {

     debug: bool,
     upstream_search_engines: &[EngineHandler],
     request_timeout: u8,
+    safe_search: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
     let user_agent: &str = random_user_agent();
         let query: String = query.to_owned();
         tasks.push(tokio::spawn(async move {
             search_engine
+                .results(
+                    &query,
+                    page,
+                    user_agent.clone(),
+                    request_timeout,
+                    safe_search,
+                )
                 .await
         }));
     }
         }
     }
+    if safe_search >= 3 {
+        let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
+        filter_with_lists(
+            &mut result_map,
+            &mut blacklist_map,
+            file_path(FileType::BlockList)?,
+        )?;
+        filter_with_lists(
+            &mut blacklist_map,
+            &mut result_map,
+            file_path(FileType::AllowList)?,
+        )?;
+        drop(blacklist_map);
+    }
     let results: Vec<SearchResult> = result_map.into_values().collect();
     let mut reader = BufReader::new(File::open(file_path)?);
     for line in reader.by_ref().lines() {
+        let re = Regex::new(line?.trim())?;
         // Iterate over each search result in the map and check if it matches the regex pattern
         for (url, search_result) in map_to_be_filtered.clone().into_iter() {

src/server/routes.rs CHANGED Viewed

@@ -2,7 +2,10 @@
 //! meta search engine website and provide appropriate response to each route/page
 //! when requested.
-use std::fs::read_to_string;
 use crate::{
     cache::cacher::RedisCache,
@@ -13,12 +16,13 @@ use crate::{
 };
 use actix_web::{get, web, HttpRequest, HttpResponse};
 use handlebars::Handlebars;
 use serde::Deserialize;
 use tokio::join;
 // ---- Constants ----
 /// Initialize redis cache connection once and store it on the heap.
-const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
 /// A named struct which deserializes all the user provided search parameters and stores them.
 ///
@@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Once
 struct SearchParams {
     q: Option<String>,
     page: Option<u32>,
 }
 /// Handles the route of index page or main page of the `websurfx` meta search engine website.
@@ -105,42 +110,58 @@ pub async fn search(
                 None => 1,
             };
             let (_, results, _) = join!(
                 results(
                     format!(
-                        "http://{}:{}/search?q={}&page={}",
                         config.binding_ip,
                         config.port,
                         query,
-                        page - 1
                     ),
                     &config,
                     query,
                     page - 1,
-                    &req,
                 ),
                 results(
                     format!(
-                        "http://{}:{}/search?q={}&page={}",
-                        config.binding_ip, config.port, query, page
                     ),
                     &config,
                     query,
                     page,
-                    &req,
                 ),
                 results(
                     format!(
-                        "http://{}:{}/search?q={}&page={}",
                         config.binding_ip,
                         config.port,
                         query,
-                        page + 1
                     ),
                     &config,
                     query,
                     page + 1,
-                    &req,
                 )
             );
@@ -160,9 +181,10 @@ async fn results(
     config: &Config,
     query: &str,
     page: u32,
-    req: &HttpRequest,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
-    let redis_cache: RedisCache = REDIS_CACHE
         .get_or_init(async {
             // Initialize redis cache connection pool only one and store it in the heap.
             RedisCache::new(&config.redis_url, 5).await.unwrap()
@@ -178,6 +200,23 @@ async fn results(
     match cached_results_json {
         Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
         Err(_) => {
             // check if the cookie value is empty or not if it is empty then use the
             // default selected upstream search engines from the config file otherwise
             // parse the non-empty cookie and grab the user selected engines from the
@@ -199,6 +238,7 @@ async fn results(
                         config.debug,
                         &engines,
                         config.request_timeout,
                     )
                     .await?
                 }
@@ -210,14 +250,16 @@ async fn results(
                         config.debug,
                         &config.upstream_search_engines,
                         config.request_timeout,
                     )
                     .await?
                 }
             };
             results.add_style(&config.style);
             redis_cache
-                .clone()
                 .cache_results(&serde_json::to_string(&results)?, &url)
                 .await?;
             Ok(results)
@@ -225,6 +267,22 @@ async fn results(
     }
 }
 /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
 #[get("/robots.txt")]
 pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {

 //! meta search engine website and provide appropriate response to each route/page
 //! when requested.
+use std::{
+    fs::{read_to_string, File},
+    io::{BufRead, BufReader, Read},
+};
 use crate::{
     cache::cacher::RedisCache,
 };
 use actix_web::{get, web, HttpRequest, HttpResponse};
 use handlebars::Handlebars;
+use regex::Regex;
 use serde::Deserialize;
 use tokio::join;
 // ---- Constants ----
 /// Initialize redis cache connection once and store it on the heap.
+static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
 /// A named struct which deserializes all the user provided search parameters and stores them.
 ///
 struct SearchParams {
     q: Option<String>,
     page: Option<u32>,
+    safesearch: Option<u8>,
 }
 /// Handles the route of index page or main page of the `websurfx` meta search engine website.
                 None => 1,
             };
+            let safe_search: u8 = match config.safe_search {
+                3..=4 => config.safe_search,
+                _ => match &params.safesearch {
+                    Some(safesearch) => match safesearch {
+                        0..=2 => *safesearch,
+                        _ => 1,
+                    },
+                    None => config.safe_search,
+                },
+            };
             let (_, results, _) = join!(
                 results(
                     format!(
+                        "http://{}:{}/search?q={}&page={}&safesearch={}",
                         config.binding_ip,
                         config.port,
                         query,
+                        page - 1,
+                        safe_search
                     ),
                     &config,
                     query,
                     page - 1,
+                    req.clone(),
+                    safe_search
                 ),
                 results(
                     format!(
+                        "http://{}:{}/search?q={}&page={}&safesearch={}",
+                        config.binding_ip, config.port, query, page, safe_search
                     ),
                     &config,
                     query,
                     page,
+                    req.clone(),
+                    safe_search
                 ),
                 results(
                     format!(
+                        "http://{}:{}/search?q={}&page={}&safesearch={}",
                         config.binding_ip,
                         config.port,
                         query,
+                        page + 1,
+                        safe_search
                     ),
                     &config,
                     query,
                     page + 1,
+                    req.clone(),
+                    safe_search
                 )
             );
     config: &Config,
     query: &str,
     page: u32,
+    req: HttpRequest,
+    safe_search: u8,
 ) -> Result<SearchResults, Box<dyn std::error::Error>> {
+    let mut redis_cache: RedisCache = REDIS_CACHE
         .get_or_init(async {
             // Initialize redis cache connection pool only one and store it in the heap.
             RedisCache::new(&config.redis_url, 5).await.unwrap()
     match cached_results_json {
         Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
         Err(_) => {
+            if safe_search == 4 {
+                let mut results: SearchResults = SearchResults::default();
+                let mut _flag: bool =
+                    is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
+                _flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
+                if _flag {
+                    results.set_disallowed();
+                    results.add_style(&config.style);
+                    results.set_page_query(query);
+                    redis_cache
+                        .cache_results(&serde_json::to_string(&results)?, &url)
+                        .await?;
+                    return Ok(results);
+                }
+            }
             // check if the cookie value is empty or not if it is empty then use the
             // default selected upstream search engines from the config file otherwise
             // parse the non-empty cookie and grab the user selected engines from the
                         config.debug,
                         &engines,
                         config.request_timeout,
+                        safe_search,
                     )
                     .await?
                 }
                         config.debug,
                         &config.upstream_search_engines,
                         config.request_timeout,
+                        safe_search,
                     )
                     .await?
                 }
             };
+            if results.engine_errors_info().is_empty() && results.results().is_empty() {
+                results.set_filtered();
+            }
             results.add_style(&config.style);
             redis_cache
                 .cache_results(&serde_json::to_string(&results)?, &url)
                 .await?;
             Ok(results)
     }
 }
+fn is_match_from_filter_list(
+    file_path: &str,
+    query: &str,
+) -> Result<bool, Box<dyn std::error::Error>> {
+    let mut flag = false;
+    let mut reader = BufReader::new(File::open(file_path)?);
+    for line in reader.by_ref().lines() {
+        let re = Regex::new(&line?)?;
+        if re.is_match(query) {
+            flag = true;
+            break;
+        }
+    }
+    Ok(flag)
+}
 /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
 #[get("/robots.txt")]
 pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {

websurfx/config.lua CHANGED Viewed

@@ -11,6 +11,17 @@ production_use = false -- whether to use production mode or not (in other words
 -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
 request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
 -- ### Website ###
 -- The different colorschemes provided are:
 -- {{

 -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
 request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
+-- ### Search ###
+-- Filter results based on different levels. The levels provided are:
+-- {{
+-- 0 - None
+-- 1 - Low
+-- 2 - Moderate
+-- 3 - High
+-- 4 - Aggressive
+-- }}
+safe_search = 2
 -- ### Website ###
 -- The different colorschemes provided are:
 -- {{