Spaces:
Runtime error
Runtime error
:recycle: Refactor cache system (#399)
Browse files* :recycle: Refactor cache system
* :bug: Fix cache not getting set
This patch also makes it that cookies are eagerly evaluated. This is
done to figure out the safe search level set by the user. The
performance hit wouldn't be much of a deal as the cookie is a small
json string
* π chore: bump the app version (#399)
* π chore: bump the app version (#399)
---------
Co-authored-by: alamin655 <[email protected]>
- Cargo.lock +1 -1
- Cargo.toml +1 -1
- src/bin/websurfx.rs +2 -2
- src/cache/cacher.rs +176 -158
- src/cache/redis_cacher.rs +12 -24
- src/lib.rs +14 -7
- src/server/routes/search.rs +42 -38
- tests/index.rs +4 -9
Cargo.lock
CHANGED
@@ -4066,7 +4066,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
|
|
4066 |
|
4067 |
[[package]]
|
4068 |
name = "websurfx"
|
4069 |
-
version = "1.2.
|
4070 |
dependencies = [
|
4071 |
"actix-cors",
|
4072 |
"actix-files",
|
|
|
4066 |
|
4067 |
[[package]]
|
4068 |
name = "websurfx"
|
4069 |
+
version = "1.2.34"
|
4070 |
dependencies = [
|
4071 |
"actix-cors",
|
4072 |
"actix-files",
|
Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
-
version = "1.2.
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
|
|
1 |
[package]
|
2 |
name = "websurfx"
|
3 |
+
version = "1.2.34"
|
4 |
edition = "2021"
|
5 |
description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
|
6 |
repository = "https://github.com/neon-mmd/websurfx"
|
src/bin/websurfx.rs
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
|
6 |
use mimalloc::MiMalloc;
|
7 |
use std::net::TcpListener;
|
8 |
-
use websurfx::{cache::cacher::
|
9 |
|
10 |
/// A dhat heap memory profiler
|
11 |
#[cfg(feature = "dhat-heap")]
|
@@ -31,7 +31,7 @@ async fn main() -> std::io::Result<()> {
|
|
31 |
// Initialize the parsed config file.
|
32 |
let config = Config::parse(false).unwrap();
|
33 |
|
34 |
-
let cache =
|
35 |
|
36 |
log::info!(
|
37 |
"started server on port {} and IP {}",
|
|
|
5 |
|
6 |
use mimalloc::MiMalloc;
|
7 |
use std::net::TcpListener;
|
8 |
+
use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
|
9 |
|
10 |
/// A dhat heap memory profiler
|
11 |
#[cfg(feature = "dhat-heap")]
|
|
|
31 |
// Initialize the parsed config file.
|
32 |
let config = Config::parse(false).unwrap();
|
33 |
|
34 |
+
let cache = create_cache(&config).await;
|
35 |
|
36 |
log::info!(
|
37 |
"started server on port {} and IP {}",
|
src/cache/cacher.rs
CHANGED
@@ -14,24 +14,10 @@ use super::error::CacheError;
|
|
14 |
#[cfg(feature = "redis-cache")]
|
15 |
use super::redis_cacher::RedisCache;
|
16 |
|
17 |
-
///
|
18 |
-
#[
|
19 |
-
pub
|
20 |
-
|
21 |
-
Disabled,
|
22 |
-
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
|
23 |
-
/// Encapsulates the Redis based cache
|
24 |
-
Redis(RedisCache),
|
25 |
-
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
26 |
-
/// Contains the in-memory cache.
|
27 |
-
InMemory(MokaCache<String, SearchResults>),
|
28 |
-
#[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
|
29 |
-
/// Contains both the in-memory cache and Redis based cache
|
30 |
-
Hybrid(RedisCache, MokaCache<String, SearchResults>),
|
31 |
-
}
|
32 |
-
|
33 |
-
impl Cache {
|
34 |
-
/// A function that builds the cache from the given configuration.
|
35 |
///
|
36 |
/// # Arguments
|
37 |
///
|
@@ -39,89 +25,10 @@ impl Cache {
|
|
39 |
///
|
40 |
/// # Returns
|
41 |
///
|
42 |
-
/// It returns a newly initialized
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
log::info!("Using a hybrid cache");
|
47 |
-
Cache::new_hybrid(
|
48 |
-
RedisCache::new(&_config.redis_url, 5)
|
49 |
-
.await
|
50 |
-
.expect("Redis cache configured"),
|
51 |
-
)
|
52 |
-
}
|
53 |
-
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
|
54 |
-
{
|
55 |
-
log::info!("Listening redis server on {}", &_config.redis_url);
|
56 |
-
Cache::new(
|
57 |
-
RedisCache::new(&_config.redis_url, 5)
|
58 |
-
.await
|
59 |
-
.expect("Redis cache configured"),
|
60 |
-
)
|
61 |
-
}
|
62 |
-
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
63 |
-
{
|
64 |
-
log::info!("Using an in-memory cache");
|
65 |
-
Cache::new_in_memory()
|
66 |
-
}
|
67 |
-
#[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
|
68 |
-
{
|
69 |
-
log::info!("Caching is disabled");
|
70 |
-
Cache::Disabled
|
71 |
-
}
|
72 |
-
}
|
73 |
-
|
74 |
-
/// A function that initializes a new connection pool struct.
|
75 |
-
///
|
76 |
-
/// # Arguments
|
77 |
-
///
|
78 |
-
/// * `redis_cache` - It takes the newly initialized connection pool struct as an argument.
|
79 |
-
///
|
80 |
-
/// # Returns
|
81 |
-
///
|
82 |
-
/// It returns a `Redis` variant with the newly initialized connection pool struct.
|
83 |
-
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
|
84 |
-
pub fn new(redis_cache: RedisCache) -> Self {
|
85 |
-
Cache::Redis(redis_cache)
|
86 |
-
}
|
87 |
-
|
88 |
-
/// A function that initializes the `in memory` cache which is used to cache the results in
|
89 |
-
/// memory with the search engine thus improving performance by making retrieval and caching of
|
90 |
-
/// results faster.
|
91 |
-
///
|
92 |
-
/// # Returns
|
93 |
-
///
|
94 |
-
/// It returns a `InMemory` variant with the newly initialized in memory cache type.
|
95 |
-
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
96 |
-
pub fn new_in_memory() -> Self {
|
97 |
-
let cache = MokaCache::builder()
|
98 |
-
.max_capacity(1000)
|
99 |
-
.time_to_live(Duration::from_secs(60))
|
100 |
-
.build();
|
101 |
-
Cache::InMemory(cache)
|
102 |
-
}
|
103 |
-
|
104 |
-
/// A function that initializes both in memory cache and redis client connection for being used
|
105 |
-
/// for managing hybrid cache which increases resiliancy of the search engine by allowing the
|
106 |
-
/// cache to switch to `in memory` caching if the `redis` cache server is temporarily
|
107 |
-
/// unavailable.
|
108 |
-
///
|
109 |
-
/// # Arguments
|
110 |
-
///
|
111 |
-
/// * `redis_cache` - It takes `redis` client connection struct as an argument.
|
112 |
-
///
|
113 |
-
/// # Returns
|
114 |
-
///
|
115 |
-
/// It returns a tuple variant `Hybrid` storing both the in-memory cache type and the `redis`
|
116 |
-
/// client connection struct.
|
117 |
-
#[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
|
118 |
-
pub fn new_hybrid(redis_cache: RedisCache) -> Self {
|
119 |
-
let cache = MokaCache::builder()
|
120 |
-
.max_capacity(1000)
|
121 |
-
.time_to_live(Duration::from_secs(60))
|
122 |
-
.build();
|
123 |
-
Cache::Hybrid(redis_cache, cache)
|
124 |
-
}
|
125 |
|
126 |
/// A function which fetches the cached json results as json string.
|
127 |
///
|
@@ -133,31 +40,7 @@ impl Cache {
|
|
133 |
///
|
134 |
/// Returns the `SearchResults` from the cache if the program executes normally otherwise
|
135 |
/// returns a `CacheError` if the results cannot be retrieved from the cache.
|
136 |
-
|
137 |
-
match self {
|
138 |
-
Cache::Disabled => Err(Report::new(CacheError::MissingValue)),
|
139 |
-
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
|
140 |
-
Cache::Redis(redis_cache) => {
|
141 |
-
let json = redis_cache.cached_json(_url).await?;
|
142 |
-
Ok(serde_json::from_str::<SearchResults>(&json)
|
143 |
-
.map_err(|_| CacheError::SerializationError)?)
|
144 |
-
}
|
145 |
-
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
146 |
-
Cache::InMemory(in_memory) => match in_memory.get(&_url.to_string()) {
|
147 |
-
Some(res) => Ok(res),
|
148 |
-
None => Err(Report::new(CacheError::MissingValue)),
|
149 |
-
},
|
150 |
-
#[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
|
151 |
-
Cache::Hybrid(redis_cache, in_memory) => match redis_cache.cached_json(_url).await {
|
152 |
-
Ok(res) => Ok(serde_json::from_str::<SearchResults>(&res)
|
153 |
-
.map_err(|_| CacheError::SerializationError)?),
|
154 |
-
Err(_) => match in_memory.get(&_url.to_string()) {
|
155 |
-
Some(res) => Ok(res),
|
156 |
-
None => Err(Report::new(CacheError::MissingValue)),
|
157 |
-
},
|
158 |
-
},
|
159 |
-
}
|
160 |
-
}
|
161 |
|
162 |
/// A function which caches the results by using the `url` as the key and
|
163 |
/// `json results` as the value and stores it in the cache
|
@@ -172,44 +55,164 @@ impl Cache {
|
|
172 |
/// Returns a unit type if the program caches the given search results without a failure
|
173 |
/// otherwise it returns a `CacheError` if the search results cannot be cached due to a
|
174 |
/// failure.
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
&mut self,
|
177 |
_search_results: &SearchResults,
|
178 |
_url: &str,
|
179 |
) -> Result<(), Report<CacheError>> {
|
180 |
-
|
181 |
-
Cache::Disabled => Ok(()),
|
182 |
-
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
|
183 |
-
Cache::Redis(redis_cache) => {
|
184 |
-
let json = serde_json::to_string(_search_results)
|
185 |
-
.map_err(|_| CacheError::SerializationError)?;
|
186 |
-
redis_cache.cache_results(&json, _url).await
|
187 |
-
}
|
188 |
-
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
189 |
-
Cache::InMemory(cache) => {
|
190 |
-
cache.insert(_url.to_string(), _search_results.clone());
|
191 |
-
Ok(())
|
192 |
-
}
|
193 |
-
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
|
194 |
-
Cache::Hybrid(redis_cache, cache) => {
|
195 |
-
let json = serde_json::to_string(_search_results)
|
196 |
-
.map_err(|_| CacheError::SerializationError)?;
|
197 |
-
match redis_cache.cache_results(&json, _url).await {
|
198 |
-
Ok(_) => Ok(()),
|
199 |
-
Err(_) => {
|
200 |
-
cache.insert(_url.to_string(), _search_results.clone());
|
201 |
-
Ok(())
|
202 |
-
}
|
203 |
-
}
|
204 |
-
}
|
205 |
-
}
|
206 |
}
|
207 |
}
|
208 |
|
209 |
/// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
|
210 |
pub struct SharedCache {
|
211 |
/// The internal cache protected from concurrent access by a mutex
|
212 |
-
cache: Mutex<
|
213 |
}
|
214 |
|
215 |
impl SharedCache {
|
@@ -220,9 +223,9 @@ impl SharedCache {
|
|
220 |
/// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type.
|
221 |
///
|
222 |
/// Returns a newly constructed `SharedCache` struct.
|
223 |
-
pub fn new(cache:
|
224 |
Self {
|
225 |
-
cache: Mutex::new(cache),
|
226 |
}
|
227 |
}
|
228 |
|
@@ -237,9 +240,9 @@ impl SharedCache {
|
|
237 |
///
|
238 |
/// Returns a `SearchResults` struct containing the search results from the cache if nothing
|
239 |
/// goes wrong otherwise returns a `CacheError`.
|
240 |
-
pub async fn
|
241 |
let mut mut_cache = self.cache.lock().await;
|
242 |
-
mut_cache.
|
243 |
}
|
244 |
|
245 |
/// A setter function which caches the results by using the `url` as the key and
|
@@ -265,3 +268,18 @@ impl SharedCache {
|
|
265 |
mut_cache.cache_results(search_results, url).await
|
266 |
}
|
267 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
#[cfg(feature = "redis-cache")]
|
15 |
use super::redis_cacher::RedisCache;
|
16 |
|
17 |
+
/// Abstraction trait for common methods provided by a cache backend.
|
18 |
+
#[async_trait::async_trait]
|
19 |
+
pub trait Cacher: Send + Sync {
|
20 |
+
// A function that builds the cache from the given configuration.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
///
|
22 |
/// # Arguments
|
23 |
///
|
|
|
25 |
///
|
26 |
/// # Returns
|
27 |
///
|
28 |
+
/// It returns a newly initialized backend based on the feature enabled by the user.
|
29 |
+
async fn build(config: &Config) -> Self
|
30 |
+
where
|
31 |
+
Self: Sized;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
/// A function which fetches the cached json results as json string.
|
34 |
///
|
|
|
40 |
///
|
41 |
/// Returns the `SearchResults` from the cache if the program executes normally otherwise
|
42 |
/// returns a `CacheError` if the results cannot be retrieved from the cache.
|
43 |
+
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
/// A function which caches the results by using the `url` as the key and
|
46 |
/// `json results` as the value and stores it in the cache
|
|
|
55 |
/// Returns a unit type if the program caches the given search results without a failure
|
56 |
/// otherwise it returns a `CacheError` if the search results cannot be cached due to a
|
57 |
/// failure.
|
58 |
+
async fn cache_results(
|
59 |
+
&mut self,
|
60 |
+
search_results: &SearchResults,
|
61 |
+
url: &str,
|
62 |
+
) -> Result<(), Report<CacheError>>;
|
63 |
+
|
64 |
+
/// A helper function which computes the hash of the url and formats and returns it as string.
|
65 |
+
///
|
66 |
+
/// # Arguments
|
67 |
+
///
|
68 |
+
/// * `url` - It takes an url as string.
|
69 |
+
fn hash_url(&self, url: &str) -> String {
|
70 |
+
blake3::hash(url.as_bytes()).to_string()
|
71 |
+
}
|
72 |
+
}
|
73 |
+
|
74 |
+
#[cfg(feature = "redis-cache")]
|
75 |
+
#[async_trait::async_trait]
|
76 |
+
impl Cacher for RedisCache {
|
77 |
+
async fn build(config: &Config) -> Self {
|
78 |
+
log::info!(
|
79 |
+
"Initialising redis cache. Listening to {}",
|
80 |
+
&config.redis_url
|
81 |
+
);
|
82 |
+
RedisCache::new(&config.redis_url, 5)
|
83 |
+
.await
|
84 |
+
.expect("Redis cache configured")
|
85 |
+
}
|
86 |
+
|
87 |
+
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
88 |
+
let hashed_url_string: &str = &self.hash_url(url);
|
89 |
+
let json = self.cached_json(hashed_url_string).await?;
|
90 |
+
Ok(serde_json::from_str::<SearchResults>(&json)
|
91 |
+
.map_err(|_| CacheError::SerializationError)?)
|
92 |
+
}
|
93 |
+
|
94 |
+
async fn cache_results(
|
95 |
+
&mut self,
|
96 |
+
search_results: &SearchResults,
|
97 |
+
url: &str,
|
98 |
+
) -> Result<(), Report<CacheError>> {
|
99 |
+
let json =
|
100 |
+
serde_json::to_string(search_results).map_err(|_| CacheError::SerializationError)?;
|
101 |
+
let hashed_url_string = self.hash_url(url);
|
102 |
+
self.cache_json(&json, &hashed_url_string).await
|
103 |
+
}
|
104 |
+
}
|
105 |
+
|
106 |
+
/// Memory based cache backend.
|
107 |
+
#[cfg(feature = "memory-cache")]
|
108 |
+
pub struct InMemoryCache {
|
109 |
+
/// The backend cache which stores data.
|
110 |
+
cache: MokaCache<String, SearchResults>,
|
111 |
+
}
|
112 |
+
|
113 |
+
#[cfg(feature = "memory-cache")]
|
114 |
+
#[async_trait::async_trait]
|
115 |
+
impl Cacher for InMemoryCache {
|
116 |
+
async fn build(_config: &Config) -> Self {
|
117 |
+
log::info!("Initialising in-memory cache");
|
118 |
+
|
119 |
+
InMemoryCache {
|
120 |
+
cache: MokaCache::builder()
|
121 |
+
.max_capacity(1000)
|
122 |
+
.time_to_live(Duration::from_secs(60))
|
123 |
+
.build(),
|
124 |
+
}
|
125 |
+
}
|
126 |
+
|
127 |
+
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
128 |
+
let hashed_url_string = self.hash_url(url);
|
129 |
+
match self.cache.get(&hashed_url_string) {
|
130 |
+
Some(res) => Ok(res),
|
131 |
+
None => Err(Report::new(CacheError::MissingValue)),
|
132 |
+
}
|
133 |
+
}
|
134 |
+
|
135 |
+
async fn cache_results(
|
136 |
+
&mut self,
|
137 |
+
search_results: &SearchResults,
|
138 |
+
url: &str,
|
139 |
+
) -> Result<(), Report<CacheError>> {
|
140 |
+
let hashed_url_string = self.hash_url(url);
|
141 |
+
self.cache.insert(hashed_url_string, search_results.clone());
|
142 |
+
Ok(())
|
143 |
+
}
|
144 |
+
}
|
145 |
+
|
146 |
+
/// Cache backend which utilises both memory and redis based caches.
|
147 |
+
///
|
148 |
+
/// The hybrid cache system uses both the types of cache to ensure maximum availability.
|
149 |
+
/// The set method sets the key, value pair in both the caches. Therefore in a case where redis
|
150 |
+
/// cache becomes unavailable, the backend will retreive the value from in-memory cache.
|
151 |
+
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
|
152 |
+
pub struct HybridCache {
|
153 |
+
/// The in-memory backend cache which stores data.
|
154 |
+
memory_cache: InMemoryCache,
|
155 |
+
/// The redis backend cache which stores data.
|
156 |
+
redis_cache: RedisCache,
|
157 |
+
}
|
158 |
+
|
159 |
+
#[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
|
160 |
+
#[async_trait::async_trait]
|
161 |
+
impl Cacher for HybridCache {
|
162 |
+
async fn build(config: &Config) -> Self {
|
163 |
+
log::info!("Initialising hybrid cache");
|
164 |
+
HybridCache {
|
165 |
+
memory_cache: InMemoryCache::build(config).await,
|
166 |
+
redis_cache: RedisCache::build(config).await,
|
167 |
+
}
|
168 |
+
}
|
169 |
+
|
170 |
+
async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
171 |
+
match self.redis_cache.cached_results(url).await {
|
172 |
+
Ok(res) => Ok(res),
|
173 |
+
Err(_) => self.memory_cache.cached_results(url).await,
|
174 |
+
}
|
175 |
+
}
|
176 |
+
|
177 |
+
async fn cache_results(
|
178 |
+
&mut self,
|
179 |
+
search_results: &SearchResults,
|
180 |
+
url: &str,
|
181 |
+
) -> Result<(), Report<CacheError>> {
|
182 |
+
self.redis_cache.cache_results(search_results, url).await?;
|
183 |
+
self.memory_cache.cache_results(search_results, url).await?;
|
184 |
+
|
185 |
+
Ok(())
|
186 |
+
}
|
187 |
+
}
|
188 |
+
|
189 |
+
/// Dummy cache backend
|
190 |
+
pub struct DisabledCache;
|
191 |
+
|
192 |
+
#[async_trait::async_trait]
|
193 |
+
impl Cacher for DisabledCache {
|
194 |
+
async fn build(_config: &Config) -> Self {
|
195 |
+
log::info!("Caching is disabled");
|
196 |
+
DisabledCache
|
197 |
+
}
|
198 |
+
|
199 |
+
async fn cached_results(&mut self, _url: &str) -> Result<SearchResults, Report<CacheError>> {
|
200 |
+
Err(Report::new(CacheError::MissingValue))
|
201 |
+
}
|
202 |
+
|
203 |
+
async fn cache_results(
|
204 |
&mut self,
|
205 |
_search_results: &SearchResults,
|
206 |
_url: &str,
|
207 |
) -> Result<(), Report<CacheError>> {
|
208 |
+
Ok(())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
}
|
210 |
}
|
211 |
|
212 |
/// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
|
213 |
pub struct SharedCache {
|
214 |
/// The internal cache protected from concurrent access by a mutex
|
215 |
+
cache: Mutex<Box<dyn Cacher>>,
|
216 |
}
|
217 |
|
218 |
impl SharedCache {
|
|
|
223 |
/// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type.
|
224 |
///
|
225 |
/// Returns a newly constructed `SharedCache` struct.
|
226 |
+
pub fn new(cache: impl Cacher + 'static) -> Self {
|
227 |
Self {
|
228 |
+
cache: Mutex::new(Box::new(cache)),
|
229 |
}
|
230 |
}
|
231 |
|
|
|
240 |
///
|
241 |
/// Returns a `SearchResults` struct containing the search results from the cache if nothing
|
242 |
/// goes wrong otherwise returns a `CacheError`.
|
243 |
+
pub async fn cached_results(&self, url: &str) -> Result<SearchResults, Report<CacheError>> {
|
244 |
let mut mut_cache = self.cache.lock().await;
|
245 |
+
mut_cache.cached_results(url).await
|
246 |
}
|
247 |
|
248 |
/// A setter function which caches the results by using the `url` as the key and
|
|
|
268 |
mut_cache.cache_results(search_results, url).await
|
269 |
}
|
270 |
}
|
271 |
+
|
272 |
+
/// A function to initialise the cache backend.
|
273 |
+
pub async fn create_cache(config: &Config) -> impl Cacher {
|
274 |
+
#[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
|
275 |
+
return HybridCache::build(config).await;
|
276 |
+
|
277 |
+
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
278 |
+
return InMemoryCache::build(config).await;
|
279 |
+
|
280 |
+
#[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
|
281 |
+
return RedisCache::build(config).await;
|
282 |
+
|
283 |
+
#[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
|
284 |
+
return DisabledCache::build(config).await;
|
285 |
+
}
|
src/cache/redis_cacher.rs
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
2 |
//! from the upstream search engines in a json format.
|
3 |
|
4 |
-
use blake3::hash;
|
5 |
use error_stack::Report;
|
6 |
use futures::future::try_join_all;
|
7 |
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
@@ -53,32 +52,22 @@ impl RedisCache {
|
|
53 |
Ok(redis_cache)
|
54 |
}
|
55 |
|
56 |
-
/// A
|
57 |
///
|
58 |
/// # Arguments
|
59 |
///
|
60 |
-
/// * `
|
61 |
-
fn hash_url(&self, url: &str) -> String {
|
62 |
-
format!("{:?}", blake3::hash(url.as_bytes()))
|
63 |
-
}
|
64 |
-
|
65 |
-
/// A function which fetches the cached json results as json string from the redis server.
|
66 |
-
///
|
67 |
-
/// # Arguments
|
68 |
-
///
|
69 |
-
/// * `url` - It takes an url as a string.
|
70 |
///
|
71 |
/// # Error
|
72 |
///
|
73 |
-
/// Returns the
|
74 |
/// on a failure.
|
75 |
-
pub async fn cached_json(&mut self,
|
76 |
self.current_connection = Default::default();
|
77 |
-
let hashed_url_string: &str = &self.hash_url(url);
|
78 |
|
79 |
let mut result: Result<String, RedisError> = self.connection_pool
|
80 |
[self.current_connection as usize]
|
81 |
-
.get(
|
82 |
.await;
|
83 |
|
84 |
// Code to check whether the current connection being used is dropped with connection error
|
@@ -99,7 +88,7 @@ impl RedisCache {
|
|
99 |
));
|
100 |
}
|
101 |
result = self.connection_pool[self.current_connection as usize]
|
102 |
-
.get(
|
103 |
.await;
|
104 |
continue;
|
105 |
}
|
@@ -110,30 +99,29 @@ impl RedisCache {
|
|
110 |
}
|
111 |
}
|
112 |
|
113 |
-
/// A function which caches the
|
114 |
/// `json results` as the value and stores it in redis server with ttl(time to live)
|
115 |
/// set to 60 seconds.
|
116 |
///
|
117 |
/// # Arguments
|
118 |
///
|
119 |
/// * `json_results` - It takes the json results string as an argument.
|
120 |
-
/// * `
|
121 |
///
|
122 |
/// # Error
|
123 |
///
|
124 |
/// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError`
|
125 |
/// on a failure.
|
126 |
-
pub async fn
|
127 |
&mut self,
|
128 |
json_results: &str,
|
129 |
-
|
130 |
) -> Result<(), Report<CacheError>> {
|
131 |
self.current_connection = Default::default();
|
132 |
-
let hashed_url_string: &str = &self.hash_url(url);
|
133 |
|
134 |
let mut result: Result<(), RedisError> = self.connection_pool
|
135 |
[self.current_connection as usize]
|
136 |
-
.set_ex(
|
137 |
.await;
|
138 |
|
139 |
// Code to check whether the current connection being used is dropped with connection error
|
@@ -154,7 +142,7 @@ impl RedisCache {
|
|
154 |
));
|
155 |
}
|
156 |
result = self.connection_pool[self.current_connection as usize]
|
157 |
-
.set_ex(
|
158 |
.await;
|
159 |
continue;
|
160 |
}
|
|
|
1 |
//! This module provides the functionality to cache the aggregated results fetched and aggregated
|
2 |
//! from the upstream search engines in a json format.
|
3 |
|
|
|
4 |
use error_stack::Report;
|
5 |
use futures::future::try_join_all;
|
6 |
use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
|
|
|
52 |
Ok(redis_cache)
|
53 |
}
|
54 |
|
55 |
+
/// A function which fetches the cached json as json string from the redis server.
|
56 |
///
|
57 |
/// # Arguments
|
58 |
///
|
59 |
+
/// * `key` - It takes a string as key.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
///
|
61 |
/// # Error
|
62 |
///
|
63 |
+
/// Returns the json as a String from the cache on success otherwise returns a `CacheError`
|
64 |
/// on a failure.
|
65 |
+
pub async fn cached_json(&mut self, key: &str) -> Result<String, Report<CacheError>> {
|
66 |
self.current_connection = Default::default();
|
|
|
67 |
|
68 |
let mut result: Result<String, RedisError> = self.connection_pool
|
69 |
[self.current_connection as usize]
|
70 |
+
.get(key)
|
71 |
.await;
|
72 |
|
73 |
// Code to check whether the current connection being used is dropped with connection error
|
|
|
88 |
));
|
89 |
}
|
90 |
result = self.connection_pool[self.current_connection as usize]
|
91 |
+
.get(key)
|
92 |
.await;
|
93 |
continue;
|
94 |
}
|
|
|
99 |
}
|
100 |
}
|
101 |
|
102 |
+
/// A function which caches the json by using the key and
|
103 |
/// `json results` as the value and stores it in redis server with ttl(time to live)
|
104 |
/// set to 60 seconds.
|
105 |
///
|
106 |
/// # Arguments
|
107 |
///
|
108 |
/// * `json_results` - It takes the json results string as an argument.
|
109 |
+
/// * `key` - It takes the key as a String.
|
110 |
///
|
111 |
/// # Error
|
112 |
///
|
113 |
/// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError`
|
114 |
/// on a failure.
|
115 |
+
pub async fn cache_json(
|
116 |
&mut self,
|
117 |
json_results: &str,
|
118 |
+
key: &str,
|
119 |
) -> Result<(), Report<CacheError>> {
|
120 |
self.current_connection = Default::default();
|
|
|
121 |
|
122 |
let mut result: Result<(), RedisError> = self.connection_pool
|
123 |
[self.current_connection as usize]
|
124 |
+
.set_ex(key, json_results, 600)
|
125 |
.await;
|
126 |
|
127 |
// Code to check whether the current connection being used is dropped with connection error
|
|
|
142 |
));
|
143 |
}
|
144 |
result = self.connection_pool[self.current_connection as usize]
|
145 |
+
.set_ex(key, json_results, 60)
|
146 |
.await;
|
147 |
continue;
|
148 |
}
|
src/lib.rs
CHANGED
@@ -22,7 +22,7 @@ use actix_cors::Cors;
|
|
22 |
use actix_files as fs;
|
23 |
use actix_governor::{Governor, GovernorConfigBuilder};
|
24 |
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
25 |
-
use cache::cacher::{
|
26 |
use config::parser::Config;
|
27 |
use handler::{file_path, FileType};
|
28 |
|
@@ -40,14 +40,21 @@ use handler::{file_path, FileType};
|
|
40 |
///
|
41 |
/// ```rust
|
42 |
/// use std::net::TcpListener;
|
43 |
-
/// use websurfx::{config::parser::Config, run, cache::cacher::
|
44 |
///
|
45 |
-
///
|
46 |
-
///
|
47 |
-
///
|
48 |
-
///
|
|
|
|
|
|
|
49 |
/// ```
|
50 |
-
pub fn run(
|
|
|
|
|
|
|
|
|
51 |
let public_folder_path: &str = file_path(FileType::Theme)?;
|
52 |
|
53 |
let cloned_config_threads_opt: u8 = config.threads;
|
|
|
22 |
use actix_files as fs;
|
23 |
use actix_governor::{Governor, GovernorConfigBuilder};
|
24 |
use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
|
25 |
+
use cache::cacher::{Cacher, SharedCache};
|
26 |
use config::parser::Config;
|
27 |
use handler::{file_path, FileType};
|
28 |
|
|
|
40 |
///
|
41 |
/// ```rust
|
42 |
/// use std::net::TcpListener;
|
43 |
+
/// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
|
44 |
///
|
45 |
+
/// #[tokio::main]
|
46 |
+
/// async fn main(){
|
47 |
+
/// let config = Config::parse(true).unwrap();
|
48 |
+
/// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
|
49 |
+
/// let cache = create_cache(&config).await;
|
50 |
+
/// let server = run(listener,config,cache).expect("Failed to start server");
|
51 |
+
/// }
|
52 |
/// ```
|
53 |
+
pub fn run(
|
54 |
+
listener: TcpListener,
|
55 |
+
config: Config,
|
56 |
+
cache: impl Cacher + 'static,
|
57 |
+
) -> std::io::Result<Server> {
|
58 |
let public_folder_path: &str = file_path(FileType::Theme)?;
|
59 |
|
60 |
let cloned_config_threads_opt: u8 = config.threads;
|
src/server/routes/search.rs
CHANGED
@@ -107,41 +107,40 @@ async fn results(
|
|
107 |
req: HttpRequest,
|
108 |
safe_search: &Option<u8>,
|
109 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
);
|
117 |
|
118 |
// fetch the cached results json.
|
119 |
-
let cached_results = cache.
|
120 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
121 |
// handle the data accordingly.
|
122 |
match cached_results {
|
123 |
Ok(results) => Ok(results),
|
124 |
Err(_) => {
|
125 |
-
let mut safe_search_level: u8 = match config.safe_search {
|
126 |
-
3..=4 => config.safe_search,
|
127 |
-
_ => match safe_search {
|
128 |
-
Some(safesearch) => match safesearch {
|
129 |
-
0..=2 => *safesearch,
|
130 |
-
_ => config.safe_search,
|
131 |
-
},
|
132 |
-
None => config.safe_search,
|
133 |
-
},
|
134 |
-
};
|
135 |
-
|
136 |
if safe_search_level == 4 {
|
137 |
let mut results: SearchResults = SearchResults::default();
|
138 |
|
139 |
let flag: bool =
|
140 |
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
141 |
-
|
142 |
if flag {
|
143 |
results.set_disallowed();
|
144 |
-
cache.cache_results(&results, &
|
145 |
results.set_safe_search_level(safe_search_level);
|
146 |
return Ok(results);
|
147 |
}
|
@@ -151,28 +150,14 @@ async fn results(
|
|
151 |
// default selected upstream search engines from the config file otherwise
|
152 |
// parse the non-empty cookie and grab the user selected engines from the
|
153 |
// UI and use that.
|
154 |
-
let mut results: SearchResults = match
|
155 |
Some(cookie_value) => {
|
156 |
-
let cookie_value: Cookie<'_> =
|
157 |
-
serde_json::from_str(cookie_value.name_value().1)?;
|
158 |
-
|
159 |
let engines: Vec<EngineHandler> = cookie_value
|
160 |
.engines
|
161 |
.iter()
|
162 |
.filter_map(|name| EngineHandler::new(name).ok())
|
163 |
.collect();
|
164 |
|
165 |
-
safe_search_level = match config.safe_search {
|
166 |
-
3..=4 => config.safe_search,
|
167 |
-
_ => match safe_search {
|
168 |
-
Some(safesearch) => match safesearch {
|
169 |
-
0..=2 => *safesearch,
|
170 |
-
_ => config.safe_search,
|
171 |
-
},
|
172 |
-
None => cookie_value.safe_search_level,
|
173 |
-
},
|
174 |
-
};
|
175 |
-
|
176 |
match engines.is_empty() {
|
177 |
false => {
|
178 |
aggregate(
|
@@ -217,9 +202,7 @@ async fn results(
|
|
217 |
{
|
218 |
results.set_filtered();
|
219 |
}
|
220 |
-
cache
|
221 |
-
.cache_results(&results, &(format!("{url}{safe_search_level}")))
|
222 |
-
.await?;
|
223 |
results.set_safe_search_level(safe_search_level);
|
224 |
Ok(results)
|
225 |
}
|
@@ -252,3 +235,24 @@ fn is_match_from_filter_list(
|
|
252 |
|
253 |
Ok(false)
|
254 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
req: HttpRequest,
|
108 |
safe_search: &Option<u8>,
|
109 |
) -> Result<SearchResults, Box<dyn std::error::Error>> {
|
110 |
+
// eagerly parse cookie value to evaluate safe search level
|
111 |
+
let cookie_value = req.cookie("appCookie");
|
112 |
+
|
113 |
+
let cookie_value: Option<Cookie<'_>> = cookie_value
|
114 |
+
.as_ref()
|
115 |
+
.and_then(|cv| serde_json::from_str(cv.name_value().1).ok());
|
116 |
+
|
117 |
+
let safe_search_level = get_safesearch_level(
|
118 |
+
safe_search,
|
119 |
+
&cookie_value.as_ref().map(|cv| cv.safe_search_level),
|
120 |
+
config.safe_search,
|
121 |
+
);
|
122 |
+
|
123 |
+
let cache_key = format!(
|
124 |
+
"http://{}:{}/search?q={}&page={}&safesearch={}",
|
125 |
+
config.binding_ip, config.port, query, page, safe_search_level
|
126 |
);
|
127 |
|
128 |
// fetch the cached results json.
|
129 |
+
let cached_results = cache.cached_results(&cache_key).await;
|
130 |
// check if fetched cache results was indeed fetched or it was an error and if so
|
131 |
// handle the data accordingly.
|
132 |
match cached_results {
|
133 |
Ok(results) => Ok(results),
|
134 |
Err(_) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
if safe_search_level == 4 {
|
136 |
let mut results: SearchResults = SearchResults::default();
|
137 |
|
138 |
let flag: bool =
|
139 |
!is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
|
140 |
+
// Return early when query contains disallowed words,
|
141 |
if flag {
|
142 |
results.set_disallowed();
|
143 |
+
cache.cache_results(&results, &cache_key).await?;
|
144 |
results.set_safe_search_level(safe_search_level);
|
145 |
return Ok(results);
|
146 |
}
|
|
|
150 |
// default selected upstream search engines from the config file otherwise
|
151 |
// parse the non-empty cookie and grab the user selected engines from the
|
152 |
// UI and use that.
|
153 |
+
let mut results: SearchResults = match cookie_value {
|
154 |
Some(cookie_value) => {
|
|
|
|
|
|
|
155 |
let engines: Vec<EngineHandler> = cookie_value
|
156 |
.engines
|
157 |
.iter()
|
158 |
.filter_map(|name| EngineHandler::new(name).ok())
|
159 |
.collect();
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
match engines.is_empty() {
|
162 |
false => {
|
163 |
aggregate(
|
|
|
202 |
{
|
203 |
results.set_filtered();
|
204 |
}
|
205 |
+
cache.cache_results(&results, &cache_key).await?;
|
|
|
|
|
206 |
results.set_safe_search_level(safe_search_level);
|
207 |
Ok(results)
|
208 |
}
|
|
|
235 |
|
236 |
Ok(false)
|
237 |
}
|
238 |
+
|
239 |
+
/// A helper function which returns the safe search level based on the url params
|
240 |
+
/// and cookie value.
|
241 |
+
///
|
242 |
+
/// # Argurments
|
243 |
+
///
|
244 |
+
/// * `safe_search` - Safe search level from the url.
|
245 |
+
/// * `cookie` - User's cookie
|
246 |
+
/// * `default` - Safe search level to fall back to
|
247 |
+
fn get_safesearch_level(safe_search: &Option<u8>, cookie: &Option<u8>, default: u8) -> u8 {
|
248 |
+
match safe_search {
|
249 |
+
Some(ss) => {
|
250 |
+
if *ss >= 3 {
|
251 |
+
default
|
252 |
+
} else {
|
253 |
+
*ss
|
254 |
+
}
|
255 |
+
}
|
256 |
+
None => cookie.unwrap_or(default),
|
257 |
+
}
|
258 |
+
}
|
tests/index.rs
CHANGED
@@ -3,18 +3,13 @@ use std::net::TcpListener;
|
|
3 |
use websurfx::{config::parser::Config, run, templates::views};
|
4 |
|
5 |
// Starts a new instance of the HTTP server, bound to a random available port
|
6 |
-
fn spawn_app() -> String {
|
7 |
// Binding to port 0 will trigger the OS to assign a port for us.
|
8 |
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
|
9 |
let port = listener.local_addr().unwrap().port();
|
10 |
let config = Config::parse(false).unwrap();
|
11 |
-
let
|
12 |
-
|
13 |
-
config,
|
14 |
-
#[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
|
15 |
-
websurfx::cache::cacher::Cache::new_in_memory(),
|
16 |
-
)
|
17 |
-
.expect("Failed to bind address");
|
18 |
|
19 |
tokio::spawn(server);
|
20 |
format!("http://127.0.0.1:{}/", port)
|
@@ -22,7 +17,7 @@ fn spawn_app() -> String {
|
|
22 |
|
23 |
#[tokio::test]
|
24 |
async fn test_index() {
|
25 |
-
let address = spawn_app();
|
26 |
|
27 |
let client = reqwest::Client::new();
|
28 |
let res = client.get(address).send().await.unwrap();
|
|
|
3 |
use websurfx::{config::parser::Config, run, templates::views};
|
4 |
|
5 |
// Starts a new instance of the HTTP server, bound to a random available port
|
6 |
+
async fn spawn_app() -> String {
|
7 |
// Binding to port 0 will trigger the OS to assign a port for us.
|
8 |
let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
|
9 |
let port = listener.local_addr().unwrap().port();
|
10 |
let config = Config::parse(false).unwrap();
|
11 |
+
let cache = websurfx::cache::cacher::create_cache(&config).await;
|
12 |
+
let server = run(listener, config, cache).expect("Failed to bind address");
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
tokio::spawn(server);
|
15 |
format!("http://127.0.0.1:{}/", port)
|
|
|
17 |
|
18 |
#[tokio::test]
|
19 |
async fn test_index() {
|
20 |
+
let address = spawn_app().await;
|
21 |
|
22 |
let client = reqwest::Client::new();
|
23 |
let res = client.get(address).send().await.unwrap();
|