alamin655 neon_arch commited on
Commit
c02006c
1 Parent(s): 1143846

✨ `LibreX` for the search engine (#429)

Browse files

* ✨ feat(engine): provide librex search engine (#318)

* ✨ feat(engine): provide librex search engine (#318)

* ✨ feat(engine): provide librex search engine (#318)

* 🔖 chore(release): bump the app version (#318)

* 🔖 chore(release): bump the app version (#318)

* ✨ feat(engine): provide librex search engine (#318)

* ✨ feat(engine): provide librex search engine (#429)

Co-authored-by: neon_arch <[email protected]>

* ✨ feat(engine): provide librex search engine (#429)

Co-authored-by: neon_arch <[email protected]>

* ✨ feat(engine): provide librex search engine (#429)

Co-authored-by: neon_arch <[email protected]>

* ✨ feat(engine): provide librex search engine (#429)

* ✨ feat(engine): provide librex search engine (#429)

* ✨ feat(engine): provide librex search engine (#429)

* ✨ feat(engine): provide librex search engine (#429)

Co-authored-by: neon_arch <[email protected]>

---------

Co-authored-by: neon_arch <[email protected]>

Cargo.lock CHANGED
@@ -4066,7 +4066,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
4066
 
4067
  [[package]]
4068
  name = "websurfx"
4069
- version = "1.4.2"
4070
  dependencies = [
4071
  "actix-cors",
4072
  "actix-files",
 
4066
 
4067
  [[package]]
4068
  name = "websurfx"
4069
+ version = "1.5.0"
4070
  dependencies = [
4071
  "actix-cors",
4072
  "actix-files",
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "1.4.2"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
1
  [package]
2
  name = "websurfx"
3
+ version = "1.5.0"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
src/engines/librex.rs ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! The `librex` module contains the implementation of a search engine for LibreX using the reqwest and scraper libraries.
2
+ //! It includes a `SearchEngine` trait implementation for interacting with the search engine and retrieving search results.
3
+
4
+ use std::collections::HashMap;
5
+
6
+ use reqwest::header::HeaderMap;
7
+ use reqwest::Client;
8
+ use scraper::Html;
9
+
10
+ use crate::models::aggregation_models::SearchResult;
11
+ use crate::models::engine_models::{EngineError, SearchEngine};
12
+
13
+ use error_stack::{Report, Result, ResultExt};
14
+
15
+ use super::search_result_parser::SearchResultParser;
16
+
17
+ /// Represents the LibreX search engine.
18
+ pub struct LibreX {
19
+ /// The parser used to extract search results from HTML documents.
20
+ parser: SearchResultParser,
21
+ }
22
+
23
+ impl LibreX {
24
+ /// Creates a new instance of LibreX with a default configuration.
25
+ ///
26
+ /// # Returns
27
+ ///
28
+ /// Returns a `Result` containing `LibreX` if successful, otherwise an `EngineError`.
29
+ pub fn new() -> Result<Self, EngineError> {
30
+ Ok(Self {
31
+ parser: SearchResultParser::new(
32
+ ".text-result-container>p",
33
+ ".text-result-container",
34
+ ".text-result-wrapper>a>h2",
35
+ ".text-result-wrapper>a",
36
+ ".text-result-wrapper>span",
37
+ )?,
38
+ })
39
+ }
40
+ }
41
+
42
+ #[async_trait::async_trait]
43
+ impl SearchEngine for LibreX {
44
+ /// Retrieves search results from LibreX based on the provided query, page, user agent, and client.
45
+ ///
46
+ /// # Arguments
47
+ ///
48
+ /// * `query` - The search query.
49
+ /// * `page` - The page number for pagination.
50
+ /// * `user_agent` - The user agent string.
51
+ /// * `client` - The reqwest client for making HTTP requests.
52
+ /// * `_safe_search` - A parameter for safe search (not currently used).
53
+ ///
54
+ /// # Returns
55
+ ///
56
+ /// Returns a `Result` containing a `HashMap` of search results if successful, otherwise an `EngineError`.
57
+ /// The `Err` variant is explicit for better documentation.
58
+ async fn results(
59
+ &self,
60
+ query: &str,
61
+ page: u32,
62
+ user_agent: &str,
63
+ client: &Client,
64
+ _safe_search: u8,
65
+ ) -> Result<HashMap<String, SearchResult>, EngineError> {
66
+ // Page number can be missing or empty string and so appropriate handling is required
67
+ // so that upstream server recieves valid page number.
68
+ let url: String = match page {
69
+ 1 | 0 => {
70
+ format!("https://search.ahwx.org/search.php?q={query}&p=0&t=10")
71
+ }
72
+ _ => {
73
+ format!(
74
+ "https://search.ahwx.org/search.php?q={query}&p={}&t=10",
75
+ page * 10,
76
+ )
77
+ }
78
+ };
79
+
80
+ // initializing HeaderMap and adding appropriate headers.
81
+ let header_map = HeaderMap::try_from(&HashMap::from([
82
+ ("USER_AGENT".to_string(), user_agent.to_string()),
83
+ ("REFERER".to_string(), "https://google.com/".to_string()),
84
+ ("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()),
85
+ (
86
+ "COOKIE".to_string(),
87
+ "theme=amoled; disable_special=on; disable_frontends=on; language=en; number_of_results=10; safe_search=on; save=1".to_string(),
88
+ ),
89
+ ]))
90
+ .change_context(EngineError::UnexpectedError)?;
91
+
92
+ let document: Html = Html::parse_document(
93
+ &LibreX::fetch_html_from_upstream(self, &url, header_map, client).await?,
94
+ );
95
+
96
+ if self.parser.parse_for_no_results(&document).next().is_some() {
97
+ return Err(Report::new(EngineError::EmptyResultSet));
98
+ }
99
+
100
+ // scrape all the results from the html
101
+ self.parser
102
+ .parse_for_results(&document, |title, url, desc| {
103
+ Some(SearchResult::new(
104
+ title.inner_html().trim(),
105
+ url.inner_html().trim(),
106
+ desc.inner_html().trim(),
107
+ &["librex"],
108
+ ))
109
+ })
110
+ }
111
+ }
src/engines/mod.rs CHANGED
@@ -5,6 +5,7 @@
5
 
6
  pub mod brave;
7
  pub mod duckduckgo;
 
8
  pub mod search_result_parser;
9
  pub mod searx;
10
  pub mod startpage;
 
5
 
6
  pub mod brave;
7
  pub mod duckduckgo;
8
+ pub mod librex;
9
  pub mod search_result_parser;
10
  pub mod searx;
11
  pub mod startpage;
src/models/engine_models.rs CHANGED
@@ -158,6 +158,10 @@ impl EngineHandler {
158
  let engine = crate::engines::startpage::Startpage::new()?;
159
  ("startpage", Box::new(engine))
160
  }
 
 
 
 
161
  _ => {
162
  return Err(Report::from(EngineError::NoSuchEngineFound(
163
  engine_name.to_string(),
 
158
  let engine = crate::engines::startpage::Startpage::new()?;
159
  ("startpage", Box::new(engine))
160
  }
161
+ "librex" => {
162
+ let engine = crate::engines::librex::LibreX::new()?;
163
+ ("librex", Box::new(engine))
164
+ }
165
  _ => {
166
  return Err(Report::from(EngineError::NoSuchEngineFound(
167
  engine_name.to_string(),
websurfx/config.lua CHANGED
@@ -54,4 +54,5 @@ upstream_search_engines = {
54
  Searx = false,
55
  Brave = false,
56
  Startpage = false,
 
57
  } -- select the upstream search engines from which the results should be fetched.
 
54
  Searx = false,
55
  Brave = false,
56
  Startpage = false,
57
+ LibreX = false,
58
  } -- select the upstream search engines from which the results should be fetched.