Spaces:
Runtime error
Runtime error
add some comments to filter_with_lists and add a basic test
Browse files- Cargo.lock +1 -0
- Cargo.toml +1 -0
- src/results/aggregator.rs +65 -1
Cargo.lock
CHANGED
@@ -3566,6 +3566,7 @@ dependencies = [
|
|
3566 |
"scraper",
|
3567 |
"serde",
|
3568 |
"serde_json",
|
|
|
3569 |
"tokio 1.32.0",
|
3570 |
]
|
3571 |
|
|
|
3566 |
"scraper",
|
3567 |
"serde",
|
3568 |
"serde_json",
|
3569 |
+
"tempfile",
|
3570 |
"tokio 1.32.0",
|
3571 |
]
|
3572 |
|
Cargo.toml
CHANGED
@@ -27,6 +27,7 @@ once_cell = {version="1.18.0"}
|
|
27 |
error-stack = {version="0.3.1"}
|
28 |
async-trait = {version="0.1.73"}
|
29 |
regex = {version="1.9.3", features=["perf"]}
|
|
|
30 |
|
31 |
[dev-dependencies]
|
32 |
rusty-hook = "^0.11.2"
|
|
|
27 |
error-stack = {version="0.3.1"}
|
28 |
async-trait = {version="0.1.73"}
|
29 |
regex = {version="1.9.3", features=["perf"]}
|
30 |
+
tempfile = "3.8.0"
|
31 |
|
32 |
[dev-dependencies]
|
33 |
rusty-hook = "^0.11.2"
|
src/results/aggregator.rs
CHANGED
@@ -175,22 +175,86 @@ pub async fn aggregate(
|
|
175 |
))
|
176 |
}
|
177 |
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
180 |
resultant_map: &mut HashMap<String, SearchResult>,
|
181 |
file_path: &str,
|
182 |
) -> Result<(), Box<dyn std::error::Error>> {
|
183 |
let mut reader = BufReader::new(File::open(file_path)?);
|
|
|
184 |
for line in reader.by_ref().lines() {
|
185 |
let re = Regex::new(&line?)?;
|
|
|
|
|
186 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
187 |
if re.is_match(&url.to_lowercase())
|
188 |
|| re.is_match(&search_result.title.to_lowercase())
|
189 |
|| re.is_match(&search_result.description.to_lowercase())
|
190 |
{
|
|
|
191 |
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
192 |
}
|
193 |
}
|
194 |
}
|
|
|
195 |
Ok(())
|
196 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
))
|
176 |
}
|
177 |
|
178 |
+
/// Filters a map of search results using a list of regex patterns.
|
179 |
+
///
|
180 |
+
/// # Arguments
|
181 |
+
///
|
182 |
+
/// * `map_to_be_filtered` - A mutable reference to a `HashMap` of search results to filter, where the filtered results will be removed from.
|
183 |
+
/// * `resultant_map` - A mutable reference to a `HashMap` to hold the filtered results.
|
184 |
+
/// * `file_path` - A `&str` representing the path to a file containing regex patterns to use for filtering.
|
185 |
+
///
|
186 |
+
/// # Errors
|
187 |
+
///
|
188 |
+
/// Returns an error if the file at `file_path` cannot be opened or read, or if a regex pattern is invalid.
|
189 |
+
pub fn filter_with_lists(
|
190 |
map_to_be_filtered: &mut HashMap<String, SearchResult>,
|
191 |
resultant_map: &mut HashMap<String, SearchResult>,
|
192 |
file_path: &str,
|
193 |
) -> Result<(), Box<dyn std::error::Error>> {
|
194 |
let mut reader = BufReader::new(File::open(file_path)?);
|
195 |
+
|
196 |
for line in reader.by_ref().lines() {
|
197 |
let re = Regex::new(&line?)?;
|
198 |
+
|
199 |
+
// Iterate over each search result in the map and check if it matches the regex pattern
|
200 |
for (url, search_result) in map_to_be_filtered.clone().into_iter() {
|
201 |
if re.is_match(&url.to_lowercase())
|
202 |
|| re.is_match(&search_result.title.to_lowercase())
|
203 |
|| re.is_match(&search_result.description.to_lowercase())
|
204 |
{
|
205 |
+
// If the search result matches the regex pattern, move it from the original map to the resultant map
|
206 |
resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap());
|
207 |
}
|
208 |
}
|
209 |
}
|
210 |
+
|
211 |
Ok(())
|
212 |
}
|
213 |
+
|
214 |
+
#[cfg(test)]
|
215 |
+
mod tests {
|
216 |
+
use super::*;
|
217 |
+
use std::collections::HashMap;
|
218 |
+
use std::io::Write;
|
219 |
+
use tempfile::NamedTempFile;
|
220 |
+
|
221 |
+
#[test]
|
222 |
+
fn test_filter_with_lists() -> Result<(), Box<dyn std::error::Error>> {
|
223 |
+
// Create a map of search results to filter
|
224 |
+
let mut map_to_be_filtered = HashMap::new();
|
225 |
+
map_to_be_filtered.insert(
|
226 |
+
"https://www.example.com".to_string(),
|
227 |
+
SearchResult {
|
228 |
+
title: "Example Domain".to_string(),
|
229 |
+
url: "https://www.example.com".to_string(),
|
230 |
+
description: "This domain is for use in illustrative examples in documents.".to_string(),
|
231 |
+
engine: vec!["Google".to_string(), "Bing".to_string()],
|
232 |
+
},
|
233 |
+
);
|
234 |
+
map_to_be_filtered.insert(
|
235 |
+
"https://www.rust-lang.org/".to_string(),
|
236 |
+
SearchResult {
|
237 |
+
title: "Rust Programming Language".to_string(),
|
238 |
+
url: "https://www.rust-lang.org/".to_string(),
|
239 |
+
description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(),
|
240 |
+
engine: vec!["Google".to_string(), "DuckDuckGo".to_string()],
|
241 |
+
},
|
242 |
+
);
|
243 |
+
|
244 |
+
// Create a temporary file with regex patterns
|
245 |
+
let mut file = NamedTempFile::new()?;
|
246 |
+
writeln!(file, "example")?;
|
247 |
+
writeln!(file, "rust")?;
|
248 |
+
file.flush()?;
|
249 |
+
|
250 |
+
let mut resultant_map = HashMap::new();
|
251 |
+
filter_with_lists(&mut map_to_be_filtered, &mut resultant_map, file.path().to_str().unwrap())?;
|
252 |
+
|
253 |
+
assert_eq!(resultant_map.len(), 2);
|
254 |
+
assert!(resultant_map.contains_key("https://www.example.com"));
|
255 |
+
assert!(resultant_map.contains_key("https://www.rust-lang.org/"));
|
256 |
+
assert_eq!(map_to_be_filtered.len(), 0);
|
257 |
+
|
258 |
+
Ok(())
|
259 |
+
}
|
260 |
+
}
|