//! This modules provides helper functionalities for parsing a html document into internal SearchResult. use std::collections::HashMap; use crate::models::{aggregation_models::SearchResult, engine_models::EngineError}; use error_stack::{Report, Result}; use scraper::{html::Select, ElementRef, Html, Selector}; /// A html search result parser, based on a predefined CSS selectors. pub struct SearchResultParser { /// selector to locate the element which is displayed, if there were nothing found. no_result: Selector, /// selector to locate the element which contains one item from the search result. results: Selector, /// selector to locate the title relative to the search result item. result_title: Selector, /// selector to locate the url relative to the search result item. result_url: Selector, /// selector to locate the description relative to the search result item. result_desc: Selector, } impl SearchResultParser { /// Creates a new parser, if all the selectors are valid, otherwise it returns an EngineError pub fn new( no_result_selector: &str, results_selector: &str, result_title_selector: &str, result_url_selector: &str, result_desc_selector: &str, ) -> Result { Ok(SearchResultParser { no_result: new_selector(no_result_selector)?, results: new_selector(results_selector)?, result_title: new_selector(result_title_selector)?, result_url: new_selector(result_url_selector)?, result_desc: new_selector(result_desc_selector)?, }) } /// Parse the html and returns element representing the 'no result found' response. pub fn parse_for_no_results<'a>(&'a self, document: &'a Html) -> Select<'a, 'a> { document.select(&self.no_result) } /// Parse the html, and convert the results to SearchResult with the help of the builder function pub fn parse_for_results( &self, document: &Html, builder: impl Fn(&ElementRef<'_>, &ElementRef<'_>, &ElementRef<'_>) -> Option, ) -> Result, EngineError> { let res = document .select(&self.results) .filter_map(|result| { let title = result.select(&self.result_title).next(); let url = result.select(&self.result_url).next(); let desc = result.select(&self.result_desc).next(); match (title, url, desc) { (Some(ref t), Some(ref u), Some(ref d)) => builder(t, u, d), _ => None, } }) .map(|search_result| (search_result.url.clone(), search_result)) .collect(); Ok(res) } } /// Create a Selector struct, if the given parameter is a valid css expression, otherwise convert it into an EngineError. fn new_selector(selector: &str) -> Result { Selector::parse(selector).map_err(|err| { Report::new(EngineError::UnexpectedError).attach_printable(format!( "invalid CSS selector: {}, err: {:?}", selector, err )) }) }