File size: 3,756 Bytes
5962cca
 
 
5aca5c0
b72af01
 
5962cca
 
 
 
 
 
 
 
 
 
dbe5b53
019b332
9a4cf94
5962cca
9a4cf94
 
5962cca
 
9a4cf94
 
5962cca
9a4cf94
5962cca
 
9a4cf94
ebb9e9e
 
 
 
 
 
 
c2280b7
5962cca
 
 
 
b72af01
 
15dfda6
b72af01
5aca5c0
b72af01
 
 
 
2d47e8d
b72af01
 
 
 
2d47e8d
15dfda6
b72af01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d47e8d
5aca5c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b72af01
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
//! This module provides the error enum to handle different errors associated while requesting data from
//! the upstream search engines with the search query provided by the user.

use crate::results::aggregation_models::SearchResult;
use error_stack::{IntoReport, Result, ResultExt};
use std::{collections::HashMap, fmt, time::Duration};

/// A custom error type used for handle engine associated errors.
///
/// This enum provides variants three different categories of errors:
/// * `RequestError` - This variant handles all request related errors like forbidden, not found,
/// etc.
/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream
/// search engines.
/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely
/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and
/// all other errors occurring within the code handling the `upstream search engines`.
#[derive(Debug)]
pub enum EngineError {
    EmptyResultSet,
    RequestError,
    UnexpectedError,
}

impl fmt::Display for EngineError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            EngineError::EmptyResultSet => {
                write!(f, "The upstream search engine returned an empty result set")
            }
            EngineError::RequestError => {
                write!(
                    f,
                    "Error occurred while requesting data from upstream search engine"
                )
            }
            EngineError::UnexpectedError => {
                write!(f, "An unexpected error occurred while processing the data")
            }
        }
    }
}

impl error_stack::Context for EngineError {}

/// A trait to define common behavior for all search engines.
#[async_trait::async_trait]
pub trait SearchEngine: Sync + Send {
    async fn fetch_html_from_upstream(
        &self,
        url: String,
        header_map: reqwest::header::HeaderMap,
        request_timeout: u8,
    ) -> Result<String, EngineError> {
        // fetch the html from upstream search engine
        Ok(reqwest::Client::new()
            .get(url)
            .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
            .headers(header_map) // add spoofed headers to emulate human behavior
            .send()
            .await
            .into_report()
            .change_context(EngineError::RequestError)?
            .text()
            .await
            .into_report()
            .change_context(EngineError::RequestError)?)
    }

    async fn results(
        &self,
        query: String,
        page: u32,
        user_agent: String,
        request_timeout: u8,
    ) -> Result<HashMap<String, SearchResult>, EngineError>;
}

pub struct EngineHandler {
    engine: Box<dyn SearchEngine>,
    name: &'static str,
}

impl Clone for EngineHandler {
    fn clone(&self) -> Self {
        Self::new(self.name).unwrap()
    }
}

impl EngineHandler {
    /// parses an engine name into an engine handler, returns none if the engine is unknown
    pub fn new(engine_name: &str) -> Option<Self> {
        let engine: (&'static str, Box<dyn SearchEngine>) =
            match engine_name.to_lowercase().as_str() {
                "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
                "searx" => ("searx", Box::new(super::searx::Searx)),
                _ => return None,
            };

        Some(Self {
            engine: engine.1,
            name: engine.0,
        })
    }

    pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
        (self.name, self.engine)
    }
}