alamin655 commited on
Commit
e19038b
β€’
2 Parent(s): 7a64454 ca4447f

Merge pull request #208 from neon-mmd/reorganize-code-and-restructure-the-codebase

Browse files
Cargo.lock CHANGED
@@ -3797,7 +3797,7 @@ dependencies = [
3797
 
3798
  [[package]]
3799
  name = "websurfx"
3800
- version = "0.20.1"
3801
  dependencies = [
3802
  "actix-cors",
3803
  "actix-files",
 
3797
 
3798
  [[package]]
3799
  name = "websurfx"
3800
+ version = "0.20.2"
3801
  dependencies = [
3802
  "actix-cors",
3803
  "actix-files",
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "0.20.1"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
1
  [package]
2
  name = "websurfx"
3
+ version = "0.20.2"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
src/config/mod.rs CHANGED
@@ -2,4 +2,3 @@
2
  //! and convert the config options into rust readable form.
3
 
4
  pub mod parser;
5
- pub mod parser_models;
 
2
  //! and convert the config options into rust readable form.
3
 
4
  pub mod parser;
 
src/config/parser.rs CHANGED
@@ -3,7 +3,7 @@
3
 
4
  use crate::handler::paths::{file_path, FileType};
5
 
6
- use super::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
  use mlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
@@ -27,7 +27,7 @@ pub struct Config {
27
  /// It stores the option to whether enable or disable debug mode.
28
  pub debug: bool,
29
  /// It stores all the engine names that were enabled by the user.
30
- pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
31
  /// It stores the time (secs) which controls the server request timeout.
32
  pub request_timeout: u8,
33
  /// It stores the number of threads which controls the app will use to run.
@@ -109,7 +109,7 @@ impl Config {
109
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?
110
  .into_iter()
111
  .filter_map(|(key, value)| value.then_some(key))
112
- .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
113
  .collect(),
114
  request_timeout: globals.get::<_, u8>("request_timeout")?,
115
  threads,
 
3
 
4
  use crate::handler::paths::{file_path, FileType};
5
 
6
+ use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style};
7
  use log::LevelFilter;
8
  use mlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
 
27
  /// It stores the option to whether enable or disable debug mode.
28
  pub debug: bool,
29
  /// It stores all the engine names that were enabled by the user.
30
+ pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
31
  /// It stores the time (secs) which controls the server request timeout.
32
  pub request_timeout: u8,
33
  /// It stores the number of threads which controls the app will use to run.
 
109
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?
110
  .into_iter()
111
  .filter_map(|(key, value)| value.then_some(key))
112
+ .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
113
  .collect(),
114
  request_timeout: globals.get::<_, u8>("request_timeout")?,
115
  threads,
src/engines/duckduckgo.rs CHANGED
@@ -7,9 +7,9 @@ use std::collections::HashMap;
7
  use reqwest::header::HeaderMap;
8
  use scraper::{Html, Selector};
9
 
10
- use crate::results::aggregation_models::SearchResult;
11
 
12
- use super::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{Report, Result, ResultExt};
15
 
 
7
  use reqwest::header::HeaderMap;
8
  use scraper::{Html, Selector};
9
 
10
+ use crate::models::aggregation_models::SearchResult;
11
 
12
+ use crate::models::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{Report, Result, ResultExt};
15
 
src/engines/mod.rs CHANGED
@@ -4,5 +4,4 @@
4
  //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
 
6
  pub mod duckduckgo;
7
- pub mod engine_models;
8
  pub mod searx;
 
4
  //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
 
6
  pub mod duckduckgo;
 
7
  pub mod searx;
src/engines/searx.rs CHANGED
@@ -6,9 +6,8 @@ use reqwest::header::HeaderMap;
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
- use crate::results::aggregation_models::SearchResult;
10
-
11
- use super::engine_models::{EngineError, SearchEngine};
12
  use error_stack::{Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
 
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
+ use crate::models::aggregation_models::SearchResult;
10
+ use crate::models::engine_models::{EngineError, SearchEngine};
 
11
  use error_stack::{Report, Result, ResultExt};
12
 
13
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
src/lib.rs CHANGED
@@ -9,12 +9,13 @@ pub mod cache;
9
  pub mod config;
10
  pub mod engines;
11
  pub mod handler;
 
12
  pub mod results;
13
  pub mod server;
14
 
15
  use std::net::TcpListener;
16
 
17
- use crate::server::routes;
18
 
19
  use actix_cors::Cors;
20
  use actix_files as fs;
@@ -89,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
89
  fs::Files::new("/images", format!("{}/images", public_folder_path))
90
  .show_files_listing(),
91
  )
92
- .service(routes::robots_data) // robots.txt
93
- .service(routes::index) // index page
94
- .service(routes::search) // search page
95
- .service(routes::about) // about page
96
- .service(routes::settings) // settings page
97
- .default_service(web::route().to(routes::not_found)) // error page
98
  })
99
  .workers(cloned_config_threads_opt as usize)
100
  // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
 
9
  pub mod config;
10
  pub mod engines;
11
  pub mod handler;
12
+ pub mod models;
13
  pub mod results;
14
  pub mod server;
15
 
16
  use std::net::TcpListener;
17
 
18
+ use crate::server::router;
19
 
20
  use actix_cors::Cors;
21
  use actix_files as fs;
 
90
  fs::Files::new("/images", format!("{}/images", public_folder_path))
91
  .show_files_listing(),
92
  )
93
+ .service(router::robots_data) // robots.txt
94
+ .service(router::index) // index page
95
+ .service(server::routes::search::search) // search page
96
+ .service(router::about) // about page
97
+ .service(router::settings) // settings page
98
+ .default_service(web::route().to(router::not_found)) // error page
99
  })
100
  .workers(cloned_config_threads_opt as usize)
101
  // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
src/{results β†’ models}/aggregation_models.rs RENAMED
@@ -4,7 +4,7 @@
4
  use serde::{Deserialize, Serialize};
5
  use smallvec::SmallVec;
6
 
7
- use crate::{config::parser_models::Style, engines::engine_models::EngineError};
8
 
9
  /// A named struct to store the raw scraped search results scraped search results from the
10
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
 
4
  use serde::{Deserialize, Serialize};
5
  use smallvec::SmallVec;
6
 
7
+ use super::{engine_models::EngineError, parser_models::Style};
8
 
9
  /// A named struct to store the raw scraped search results scraped search results from the
10
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
src/{engines β†’ models}/engine_models.rs RENAMED
@@ -1,7 +1,7 @@
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
- use crate::results::aggregation_models::SearchResult;
5
  use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
@@ -137,8 +137,11 @@ impl EngineHandler {
137
  pub fn new(engine_name: &str) -> Option<Self> {
138
  let engine: (&'static str, Box<dyn SearchEngine>) =
139
  match engine_name.to_lowercase().as_str() {
140
- "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
141
- "searx" => ("searx", Box::new(super::searx::Searx)),
 
 
 
142
  _ => return None,
143
  };
144
 
 
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
+ use super::aggregation_models::SearchResult;
5
  use error_stack::{Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
 
137
  pub fn new(engine_name: &str) -> Option<Self> {
138
  let engine: (&'static str, Box<dyn SearchEngine>) =
139
  match engine_name.to_lowercase().as_str() {
140
+ "duckduckgo" => (
141
+ "duckduckgo",
142
+ Box::new(crate::engines::duckduckgo::DuckDuckGo),
143
+ ),
144
+ "searx" => ("searx", Box::new(crate::engines::searx::Searx)),
145
  _ => return None,
146
  };
147
 
src/models/mod.rs ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ //! This module provides modules which in turn provides various models for aggregrating search
2
+ //! results, parsing config file, providing trait to standardize search engine handling code,
3
+ //! custom engine error for the search engine, etc.
4
+
5
+ pub mod aggregation_models;
6
+ pub mod engine_models;
7
+ pub mod parser_models;
8
+ pub mod server_models;
src/{config β†’ models}/parser_models.rs RENAMED
File without changes
src/models/server_models.rs ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the models to parse cookies and search parameters from the search
2
+ //! engine website.
3
+ use serde::Deserialize;
4
+
5
+ /// A named struct which deserializes all the user provided search parameters and stores them.
6
+ #[derive(Deserialize)]
7
+ pub struct SearchParams {
8
+ /// It stores the search parameter option `q` (or query in simple words)
9
+ /// of the search url.
10
+ pub q: Option<String>,
11
+ /// It stores the search parameter `page` (or pageno in simple words)
12
+ /// of the search url.
13
+ pub page: Option<u32>,
14
+ }
15
+
16
+ /// A named struct which is used to deserialize the cookies fetched from the client side.
17
+ #[allow(dead_code)]
18
+ #[derive(Deserialize)]
19
+ pub struct Cookie {
20
+ /// It stores the theme name used in the website.
21
+ pub theme: String,
22
+ /// It stores the colorscheme name used for the website theme.
23
+ pub colorscheme: String,
24
+ /// It stores the user selected upstream search engines selected from the UI.
25
+ pub engines: Vec<String>,
26
+ }
src/results/aggregator.rs CHANGED
@@ -1,27 +1,23 @@
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
- use std::{
5
- collections::HashMap,
6
- io::{BufReader, Read},
7
- time::Duration,
8
- };
9
-
10
- use super::{
11
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
12
- user_agent::random_user_agent,
13
  };
14
  use error_stack::Report;
15
  use rand::Rng;
16
  use regex::Regex;
 
 
 
 
 
17
  use std::{fs::File, io::BufRead};
18
  use tokio::task::JoinHandle;
19
 
20
- use crate::{
21
- engines::engine_models::{EngineError, EngineHandler},
22
- handler::paths::{file_path, FileType},
23
- };
24
-
25
  /// Aliases for long type annotations
26
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
27
 
 
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
+ use super::user_agent::random_user_agent;
5
+ use crate::handler::paths::{file_path, FileType};
6
+ use crate::models::{
 
 
 
 
7
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
8
+ engine_models::{EngineError, EngineHandler},
9
  };
10
  use error_stack::Report;
11
  use rand::Rng;
12
  use regex::Regex;
13
+ use std::{
14
+ collections::HashMap,
15
+ io::{BufReader, Read},
16
+ time::Duration,
17
+ };
18
  use std::{fs::File, io::BufRead};
19
  use tokio::task::JoinHandle;
20
 
 
 
 
 
 
21
  /// Aliases for long type annotations
22
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
23
 
src/results/mod.rs CHANGED
@@ -2,6 +2,5 @@
2
  //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
3
  //! provides various models to aggregate search results into a standardized form.
4
 
5
- pub mod aggregation_models;
6
  pub mod aggregator;
7
  pub mod user_agent;
 
2
  //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
3
  //! provides various models to aggregate search results into a standardized form.
4
 
 
5
  pub mod aggregator;
6
  pub mod user_agent;
src/server/mod.rs CHANGED
@@ -3,4 +3,5 @@
3
  //! the search route. Also, caches the next, current and previous search results in the search
4
  //! routes with the help of the redis server.
5
 
 
6
  pub mod routes;
 
3
  //! the search route. Also, caches the next, current and previous search results in the search
4
  //! routes with the help of the redis server.
5
 
6
+ pub mod router;
7
  pub mod routes;
src/server/router.rs ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the functionality to handle different routes of the `websurfx`
2
+ //! meta search engine website and provide appropriate response to each route/page
3
+ //! when requested.
4
+
5
+ use crate::{
6
+ config::parser::Config,
7
+ handler::paths::{file_path, FileType},
8
+ };
9
+ use actix_web::{get, web, HttpRequest, HttpResponse};
10
+ use handlebars::Handlebars;
11
+ use std::fs::read_to_string;
12
+
13
+ /// Handles the route of index page or main page of the `websurfx` meta search engine website.
14
+ #[get("/")]
15
+ pub async fn index(
16
+ hbs: web::Data<Handlebars<'_>>,
17
+ config: web::Data<Config>,
18
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
19
+ let page_content: String = hbs.render("index", &config.style).unwrap();
20
+ Ok(HttpResponse::Ok().body(page_content))
21
+ }
22
+
23
+ /// Handles the route of any other accessed route/page which is not provided by the
24
+ /// website essentially the 404 error page.
25
+ pub async fn not_found(
26
+ hbs: web::Data<Handlebars<'_>>,
27
+ config: web::Data<Config>,
28
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
29
+ let page_content: String = hbs.render("404", &config.style)?;
30
+
31
+ Ok(HttpResponse::Ok()
32
+ .content_type("text/html; charset=utf-8")
33
+ .body(page_content))
34
+ }
35
+
36
+ /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
37
+ #[get("/robots.txt")]
38
+ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
39
+ let page_content: String =
40
+ read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
41
+ Ok(HttpResponse::Ok()
42
+ .content_type("text/plain; charset=ascii")
43
+ .body(page_content))
44
+ }
45
+
46
+ /// Handles the route of about page of the `websurfx` meta search engine website.
47
+ #[get("/about")]
48
+ pub async fn about(
49
+ hbs: web::Data<Handlebars<'_>>,
50
+ config: web::Data<Config>,
51
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
52
+ let page_content: String = hbs.render("about", &config.style)?;
53
+ Ok(HttpResponse::Ok().body(page_content))
54
+ }
55
+
56
+ /// Handles the route of settings page of the `websurfx` meta search engine website.
57
+ #[get("/settings")]
58
+ pub async fn settings(
59
+ hbs: web::Data<Handlebars<'_>>,
60
+ config: web::Data<Config>,
61
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
62
+ let page_content: String = hbs.render("settings", &config.style)?;
63
+ Ok(HttpResponse::Ok().body(page_content))
64
+ }
src/server/routes/mod.rs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ //! This module provides modules to handle various routes in the search engine website.
2
+
3
+ pub mod search;
src/server/{routes.rs β†’ routes/search.rs} RENAMED
@@ -1,23 +1,20 @@
1
- //! This module provides the functionality to handle different routes of the `websurfx`
2
- //! meta search engine website and provide appropriate response to each route/page
3
- //! when requested.
4
-
5
- use std::{
6
- fs::{read_to_string, File},
7
- io::{BufRead, BufReader, Read},
8
- };
9
 
10
  use crate::{
11
  cache::cacher::RedisCache,
12
  config::parser::Config,
13
- engines::engine_models::EngineHandler,
14
  handler::paths::{file_path, FileType},
15
- results::{aggregation_models::SearchResults, aggregator::aggregate},
 
16
  };
17
  use actix_web::{get, web, HttpRequest, HttpResponse};
18
  use handlebars::Handlebars;
19
  use regex::Regex;
20
  use serde::Deserialize;
 
 
 
 
21
  use tokio::join;
22
 
23
  // ---- Constants ----
@@ -26,7 +23,7 @@ static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Onc
26
 
27
  /// A named struct which deserializes all the user provided search parameters and stores them.
28
  #[derive(Deserialize)]
29
- struct SearchParams {
30
  /// It stores the search parameter option `q` (or query in simple words)
31
  /// of the search url.
32
  q: Option<String>,
 
1
+ //! This module handles the search route of the search engine website.
 
 
 
 
 
 
 
2
 
3
  use crate::{
4
  cache::cacher::RedisCache,
5
  config::parser::Config,
 
6
  handler::paths::{file_path, FileType},
7
+ models::{aggregation_models::SearchResults, engine_models::EngineHandler},
8
+ results::aggregator::aggregate,
9
  };
10
  use actix_web::{get, web, HttpRequest, HttpResponse};
11
  use handlebars::Handlebars;
12
  use regex::Regex;
13
  use serde::Deserialize;
14
+ use std::{
15
+ fs::{read_to_string, File},
16
+ io::{BufRead, BufReader, Read},
17
+ };
18
  use tokio::join;
19
 
20
  // ---- Constants ----
 
23
 
24
  /// A named struct which deserializes all the user provided search parameters and stores them.
25
  #[derive(Deserialize)]
26
+ pub struct SearchParams {
27
  /// It stores the search parameter option `q` (or query in simple words)
28
  /// of the search url.
29
  q: Option<String>,