alamin655 commited on
Commit
5761bca
β€’
2 Parent(s): 8fc1b06 9c8b826

Merge branch 'neon-mmd:rolling' into check

Browse files
public/templates/search.html CHANGED
@@ -4,12 +4,12 @@
4
  <div class="results_aggregated">
5
  {{#each results}}
6
  <div class="result">
7
- <h1><a href="{{this.visitingUrl}}">{{{this.title}}}</a></h1>
8
- <small>{{this.url}}</small>
9
  <p>{{{this.description}}}</p>
10
  <div class="upstream_engines">
11
  {{#each engine}}
12
- <span>{{this}}</span>
13
  {{/each}}
14
  </div>
15
  </div>
 
4
  <div class="results_aggregated">
5
  {{#each results}}
6
  <div class="result">
7
+ <h1><a href="/{{{this.visitingUrl}}}">{{{this.title}}}</a></h1>
8
+ <small>{{{this.url}}}</small>
9
  <p>{{{this.description}}}</p>
10
  <div class="upstream_engines">
11
  {{#each engine}}
12
+ <span>{{{this}}}</span>
13
  {{/each}}
14
  </div>
15
  </div>
src/bin/websurfx.rs CHANGED
@@ -5,7 +5,7 @@
5
 
6
  use std::net::TcpListener;
7
 
8
- use websurfx::{config_parser::parser::Config, run};
9
 
10
  /// The function that launches the main server and registers all the routes of the website.
11
  ///
@@ -26,7 +26,7 @@ async fn main() -> std::io::Result<()> {
26
 
27
  log::info!("started server on port {}", config.port);
28
 
29
- let listener = TcpListener::bind((config.binding_ip_addr.clone(), config.port))?;
30
 
31
  run(listener, config)?.await
32
  }
 
5
 
6
  use std::net::TcpListener;
7
 
8
+ use websurfx::{config::parser::Config, run};
9
 
10
  /// The function that launches the main server and registers all the routes of the website.
11
  ///
 
26
 
27
  log::info!("started server on port {}", config.port);
28
 
29
+ let listener = TcpListener::bind((config.binding_ip.clone(), config.port))?;
30
 
31
  run(listener, config)?.await
32
  }
src/cache/cacher.rs CHANGED
@@ -32,7 +32,7 @@ impl RedisCache {
32
  /// # Arguments
33
  ///
34
  /// * `url` - It takes an url as string.
35
- fn compute_url_hash(url: &str) -> String {
36
  format!("{:?}", compute(url))
37
  }
38
 
@@ -41,8 +41,8 @@ impl RedisCache {
41
  /// # Arguments
42
  ///
43
  /// * `url` - It takes an url as a string.
44
- pub fn cached_results_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
45
- let hashed_url_string = Self::compute_url_hash(url);
46
  Ok(self.connection.get(hashed_url_string)?)
47
  }
48
 
@@ -59,7 +59,7 @@ impl RedisCache {
59
  json_results: String,
60
  url: &str,
61
  ) -> Result<(), Box<dyn std::error::Error>> {
62
- let hashed_url_string = Self::compute_url_hash(url);
63
 
64
  // put results_json into cache
65
  self.connection.set(&hashed_url_string, json_results)?;
 
32
  /// # Arguments
33
  ///
34
  /// * `url` - It takes an url as string.
35
+ fn hash_url(url: &str) -> String {
36
  format!("{:?}", compute(url))
37
  }
38
 
 
41
  /// # Arguments
42
  ///
43
  /// * `url` - It takes an url as a string.
44
+ pub fn get_cached_json(&mut self, url: &str) -> Result<String, Box<dyn std::error::Error>> {
45
+ let hashed_url_string = Self::hash_url(url);
46
  Ok(self.connection.get(hashed_url_string)?)
47
  }
48
 
 
59
  json_results: String,
60
  url: &str,
61
  ) -> Result<(), Box<dyn std::error::Error>> {
62
+ let hashed_url_string = Self::hash_url(url);
63
 
64
  // put results_json into cache
65
  self.connection.set(&hashed_url_string, json_results)?;
src/{config_parser β†’ config}/mod.rs RENAMED
File without changes
src/{config_parser β†’ config}/parser.rs RENAMED
@@ -14,16 +14,16 @@ static CONFIG_FILE_NAME: &str = "config.lua";
14
  /// # Fields
15
  //
16
  /// * `port` - It stores the parsed port number option on which the server should launch.
17
- /// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch
18
  /// * `style` - It stores the theming options for the website.
19
- /// * `redis_connection_url` - It stores the redis connection url address on which the redis
20
  /// client should connect.
21
  #[derive(Clone)]
22
  pub struct Config {
23
  pub port: u16,
24
- pub binding_ip_addr: String,
25
  pub style: Style,
26
- pub redis_connection_url: String,
27
  pub aggregator: AggregatorConfig,
28
  pub logging: bool,
29
  pub debug: bool,
@@ -55,12 +55,12 @@ impl Config {
55
 
56
  Ok(Config {
57
  port: globals.get::<_, u16>("port")?,
58
- binding_ip_addr: globals.get::<_, String>("binding_ip_addr")?,
59
  style: Style::new(
60
  globals.get::<_, String>("theme")?,
61
  globals.get::<_, String>("colorscheme")?,
62
  ),
63
- redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
64
  aggregator: AggregatorConfig {
65
  random_delay: globals.get::<_, bool>("production_use")?,
66
  },
 
14
  /// # Fields
15
  //
16
  /// * `port` - It stores the parsed port number option on which the server should launch.
17
+ /// * `binding_ip` - It stores the parsed ip address option on which the server should launch
18
  /// * `style` - It stores the theming options for the website.
19
+ /// * `redis_url` - It stores the redis connection url address on which the redis
20
  /// client should connect.
21
  #[derive(Clone)]
22
  pub struct Config {
23
  pub port: u16,
24
+ pub binding_ip: String,
25
  pub style: Style,
26
+ pub redis_url: String,
27
  pub aggregator: AggregatorConfig,
28
  pub logging: bool,
29
  pub debug: bool,
 
55
 
56
  Ok(Config {
57
  port: globals.get::<_, u16>("port")?,
58
+ binding_ip: globals.get::<_, String>("binding_ip")?,
59
  style: Style::new(
60
  globals.get::<_, String>("theme")?,
61
  globals.get::<_, String>("colorscheme")?,
62
  ),
63
+ redis_url: globals.get::<_, String>("redis_url")?,
64
  aggregator: AggregatorConfig {
65
  random_delay: globals.get::<_, bool>("production_use")?,
66
  },
src/{config_parser β†’ config}/parser_models.rs RENAMED
File without changes
src/engines/duckduckgo.rs CHANGED
@@ -7,7 +7,7 @@ use std::{collections::HashMap, time::Duration};
7
  use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
- use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
12
  use super::engine_models::EngineError;
13
 
 
7
  use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
+ use crate::results::aggregation_models::RawSearchResult;
11
 
12
  use super::engine_models::EngineError;
13
 
src/engines/searx.rs CHANGED
@@ -6,7 +6,7 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
- use crate::search_results_handler::aggregation_models::RawSearchResult;
10
 
11
  use super::engine_models::EngineError;
12
  use error_stack::{IntoReport, Report, Result, ResultExt};
 
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
+ use crate::results::aggregation_models::RawSearchResult;
10
 
11
  use super::engine_models::EngineError;
12
  use error_stack::{IntoReport, Report, Result, ResultExt};
src/handler/mod.rs CHANGED
@@ -1 +1 @@
1
- pub mod public_path_handler;
 
1
+ pub mod public_paths;
src/handler/{public_path_handler.rs β†’ public_paths.rs} RENAMED
@@ -17,15 +17,17 @@ static PUBLIC_DIRECTORY_NAME: &str = "public";
17
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
18
  /// 2. Under project folder ( or codebase in other words) if it is not present
19
  /// here then it returns an error as mentioned above.
20
- pub fn handle_different_public_path() -> Result<String, Error> {
21
  if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
22
- Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME))
23
- } else if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
24
- Ok(format!("./{}", PUBLIC_DIRECTORY_NAME))
25
- } else {
26
- Err(Error::new(
27
- std::io::ErrorKind::NotFound,
28
- "Themes (public) folder not found!!",
29
- ))
30
  }
 
 
 
 
 
 
 
 
 
31
  }
 
17
  /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2)
18
  /// 2. Under project folder ( or codebase in other words) if it is not present
19
  /// here then it returns an error as mentioned above.
20
+ pub fn get_public_path() -> Result<String, Error> {
21
  if Path::new(format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
22
+ return Ok(format!("/opt/websurfx/{}", PUBLIC_DIRECTORY_NAME));
 
 
 
 
 
 
 
23
  }
24
+
25
+ if Path::new(format!("./{}/", PUBLIC_DIRECTORY_NAME).as_str()).exists() {
26
+ return Ok(format!("./{}", PUBLIC_DIRECTORY_NAME));
27
+ }
28
+
29
+ Err(Error::new(
30
+ std::io::ErrorKind::NotFound,
31
+ "Themes (public) folder not found!!",
32
+ ))
33
  }
src/lib.rs CHANGED
@@ -2,10 +2,10 @@
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
4
  pub mod cache;
5
- pub mod config_parser;
6
  pub mod engines;
7
  pub mod handler;
8
- pub mod search_results_handler;
9
  pub mod server;
10
 
11
  use std::net::TcpListener;
@@ -14,9 +14,9 @@ use crate::server::routes;
14
 
15
  use actix_files as fs;
16
  use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
17
- use config_parser::parser::Config;
18
  use handlebars::Handlebars;
19
- use handler::public_path_handler::handle_different_public_path;
20
 
21
  /// Runs the web server on the provided TCP listener and returns a `Server` instance.
22
  ///
@@ -32,7 +32,7 @@ use handler::public_path_handler::handle_different_public_path;
32
  ///
33
  /// ```rust
34
  /// use std::net::TcpListener;
35
- /// use websurfx::{config_parser::parser::Config, run};
36
  ///
37
  /// let config = Config::parse().unwrap();
38
  /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
@@ -41,7 +41,7 @@ use handler::public_path_handler::handle_different_public_path;
41
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
42
  let mut handlebars: Handlebars = Handlebars::new();
43
 
44
- let public_folder_path: String = handle_different_public_path()?;
45
 
46
  handlebars
47
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
 
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
4
  pub mod cache;
5
+ pub mod config;
6
  pub mod engines;
7
  pub mod handler;
8
+ pub mod results;
9
  pub mod server;
10
 
11
  use std::net::TcpListener;
 
14
 
15
  use actix_files as fs;
16
  use actix_web::{dev::Server, middleware::Logger, web, App, HttpServer};
17
+ use config::parser::Config;
18
  use handlebars::Handlebars;
19
+ use handler::public_paths::get_public_path;
20
 
21
  /// Runs the web server on the provided TCP listener and returns a `Server` instance.
22
  ///
 
32
  ///
33
  /// ```rust
34
  /// use std::net::TcpListener;
35
+ /// use websurfx::{config::parser::Config, run};
36
  ///
37
  /// let config = Config::parse().unwrap();
38
  /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
 
41
  pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
42
  let mut handlebars: Handlebars = Handlebars::new();
43
 
44
+ let public_folder_path: String = get_public_path()?;
45
 
46
  handlebars
47
  .register_templates_directory(".html", format!("{}/templates", public_folder_path))
src/{search_results_handler β†’ results}/aggregation_models.rs RENAMED
@@ -3,7 +3,7 @@
3
 
4
  use serde::{Deserialize, Serialize};
5
 
6
- use crate::config_parser::parser_models::Style;
7
 
8
  /// A named struct to store, serialize and deserializes the individual search result from all the
9
  /// scraped and aggregated search results from the upstream search engines.
 
3
 
4
  use serde::{Deserialize, Serialize};
5
 
6
+ use crate::config::parser_models::Style;
7
 
8
  /// A named struct to store, serialize and deserializes the individual search result from all the
9
  /// scraped and aggregated search results from the upstream search engines.
src/{search_results_handler β†’ results}/aggregator.rs RENAMED
File without changes
src/{search_results_handler β†’ results}/mod.rs RENAMED
File without changes
src/{search_results_handler β†’ results}/user_agent.rs RENAMED
File without changes
src/server/routes.rs CHANGED
@@ -6,9 +6,9 @@ use std::fs::read_to_string;
6
 
7
  use crate::{
8
  cache::cacher::RedisCache,
9
- config_parser::parser::Config,
10
- handler::public_path_handler::handle_different_public_path,
11
- search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate},
12
  };
13
  use actix_web::{get, web, HttpRequest, HttpResponse};
14
  use handlebars::Handlebars;
@@ -73,46 +73,25 @@ pub async fn search(
73
  ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
74
  let params = web::Query::<SearchParams>::from_query(req.query_string())?;
75
 
76
- //Initialize redis cache connection struct
77
- let mut redis_cache = RedisCache::new(config.redis_connection_url.clone())?;
78
  match &params.q {
79
  Some(query) => {
80
  if query.trim().is_empty() {
81
- Ok(HttpResponse::Found()
82
  .insert_header(("location", "/"))
83
- .finish())
84
- } else {
85
- let page = match &params.page {
86
- Some(page) => *page,
87
- None => 0,
88
- };
89
-
90
- let page_url = format!(
91
- "http://{}:{}/search?q={}&page={}",
92
- config.binding_ip_addr, config.port, query, page
93
- );
94
-
95
- // fetch the cached results json.
96
- let cached_results_json = redis_cache.cached_results_json(&page_url);
97
- // check if fetched results was indeed fetched or it was an error and if so
98
- // handle the data accordingly.
99
- match cached_results_json {
100
- Ok(results_json) => {
101
- let new_results_json: SearchResults = serde_json::from_str(&results_json)?;
102
- let page_content: String = hbs.render("search", &new_results_json)?;
103
- Ok(HttpResponse::Ok().body(page_content))
104
- }
105
- Err(_) => {
106
- let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
107
- aggregate(query, page, config.aggregator.random_delay, config.debug).await?;
108
- results_json.add_style(config.style.clone());
109
- redis_cache
110
- .cache_results(serde_json::to_string(&results_json)?, &page_url)?;
111
- let page_content: String = hbs.render("search", &results_json)?;
112
- Ok(HttpResponse::Ok().body(page_content))
113
- }
114
- }
115
  }
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
117
  None => Ok(HttpResponse::Found()
118
  .insert_header(("location", "/"))
@@ -120,11 +99,36 @@ pub async fn search(
120
  }
121
  }
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
124
  #[get("/robots.txt")]
125
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
126
- let page_content: String =
127
- read_to_string(format!("{}/robots.txt", handle_different_public_path()?))?;
128
  Ok(HttpResponse::Ok()
129
  .content_type("text/plain; charset=ascii")
130
  .body(page_content))
 
6
 
7
  use crate::{
8
  cache::cacher::RedisCache,
9
+ config::parser::Config,
10
+ handler::public_paths::get_public_path,
11
+ results::{aggregation_models::SearchResults, aggregator::aggregate},
12
  };
13
  use actix_web::{get, web, HttpRequest, HttpResponse};
14
  use handlebars::Handlebars;
 
73
  ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
74
  let params = web::Query::<SearchParams>::from_query(req.query_string())?;
75
 
 
 
76
  match &params.q {
77
  Some(query) => {
78
  if query.trim().is_empty() {
79
+ return Ok(HttpResponse::Found()
80
  .insert_header(("location", "/"))
81
+ .finish());
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
+ let page = match &params.page {
84
+ Some(page) => *page,
85
+ None => 0,
86
+ };
87
+
88
+ let url = format!(
89
+ "http://{}:{}/search?q={}&page={}",
90
+ config.binding_ip, config.port, query, page
91
+ );
92
+ let results_json = get_results(url, &config, query, page).await?;
93
+ let page_content: String = hbs.render("search", &results_json)?;
94
+ Ok(HttpResponse::Ok().body(page_content))
95
  }
96
  None => Ok(HttpResponse::Found()
97
  .insert_header(("location", "/"))
 
99
  }
100
  }
101
 
102
+ /// Fetches the results for a query and page.
103
+ /// First checks the redis cache, if that fails it gets proper results
104
+ async fn get_results(
105
+ url: String,
106
+ config: &Config,
107
+ query: &str,
108
+ page: u32,
109
+ ) -> Result<SearchResults, Box<dyn std::error::Error>> {
110
+ //Initialize redis cache connection struct
111
+ let mut redis_cache = RedisCache::new(config.redis_url.clone())?;
112
+ // fetch the cached results json.
113
+ let cached_results_json = redis_cache.get_cached_json(&url);
114
+ // check if fetched results was indeed fetched or it was an error and if so
115
+ // handle the data accordingly.
116
+ match cached_results_json {
117
+ Ok(results_json) => Ok(serde_json::from_str::<SearchResults>(&results_json).unwrap()),
118
+ Err(_) => {
119
+ let mut results_json: crate::results::aggregation_models::SearchResults =
120
+ aggregate(query, page, config.aggregator.random_delay, config.debug).await?;
121
+ results_json.add_style(config.style.clone());
122
+ redis_cache.cache_results(serde_json::to_string(&results_json)?, &url)?;
123
+ Ok(results_json)
124
+ }
125
+ }
126
+ }
127
+
128
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
129
  #[get("/robots.txt")]
130
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
131
+ let page_content: String = read_to_string(format!("{}/robots.txt", get_public_path()?))?;
 
132
  Ok(HttpResponse::Ok()
133
  .content_type("text/plain; charset=ascii")
134
  .body(page_content))
tests/index.rs CHANGED
@@ -1,7 +1,7 @@
1
  use std::net::TcpListener;
2
 
3
  use handlebars::Handlebars;
4
- use websurfx::{config_parser::parser::Config, run};
5
 
6
  // Starts a new instance of the HTTP server, bound to a random available port
7
  fn spawn_app() -> String {
@@ -41,5 +41,5 @@ async fn test_index() {
41
  assert_eq!(res.text().await.unwrap(), template);
42
  }
43
 
44
- // TODO: Write tests for tesing parameters for search function that if provided with something
45
  // other than u32 like alphabets and special characters than it should panic
 
1
  use std::net::TcpListener;
2
 
3
  use handlebars::Handlebars;
4
+ use websurfx::{config::parser::Config, run};
5
 
6
  // Starts a new instance of the HTTP server, bound to a random available port
7
  fn spawn_app() -> String {
 
41
  assert_eq!(res.text().await.unwrap(), template);
42
  }
43
 
44
+ // TODO: Write tests for testing parameters for search function that if provided with something
45
  // other than u32 like alphabets and special characters than it should panic
websurfx/config.lua CHANGED
@@ -4,7 +4,7 @@ debug = false -- an option to enable or disable debug mode.
4
 
5
  -- ### Server ###
6
  port = "8080" -- port on which server should be launched
7
- binding_ip_addr = "127.0.0.1" --ip address on the which server should be launched.
8
  production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users)
9
  -- if production_use is set to true
10
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
@@ -25,4 +25,4 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo
25
  theme = "simple" -- the theme name which should be used for the website
26
 
27
  -- ### Caching ###
28
- redis_connection_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.
 
4
 
5
  -- ### Server ###
6
  port = "8080" -- port on which server should be launched
7
+ binding_ip = "127.0.0.1" --ip address on the which server should be launched.
8
  production_use = false -- whether to use production mode or not (in other words this option should be used if it is to be used to host it on the server to provide a service to a large number of users)
9
  -- if production_use is set to true
10
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
 
25
  theme = "simple" -- the theme name which should be used for the website
26
 
27
  -- ### Caching ###
28
+ redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.