ashwin123 alamin655 commited on
Commit
e704c26
β€’
1 Parent(s): 90f0103

:recycle: Refactor cache system (#399)

Browse files

* :recycle: Refactor cache system

* :bug: Fix cache not getting set

This patch also makes it that cookies are eagerly evaluated. This is
done to figure out the safe search level set by the user. The
performance hit wouldn't be much of a deal as the cookie is a small
json string

* πŸ”– chore: bump the app version (#399)

* πŸ”– chore: bump the app version (#399)

---------

Co-authored-by: alamin655 <[email protected]>

Cargo.lock CHANGED
@@ -4066,7 +4066,7 @@ checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10"
4066
 
4067
  [[package]]
4068
  name = "websurfx"
4069
- version = "1.2.30"
4070
  dependencies = [
4071
  "actix-cors",
4072
  "actix-files",
 
4066
 
4067
  [[package]]
4068
  name = "websurfx"
4069
+ version = "1.2.34"
4070
  dependencies = [
4071
  "actix-cors",
4072
  "actix-files",
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "1.2.30"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
1
  [package]
2
  name = "websurfx"
3
+ version = "1.2.34"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
src/bin/websurfx.rs CHANGED
@@ -5,7 +5,7 @@
5
 
6
  use mimalloc::MiMalloc;
7
  use std::net::TcpListener;
8
- use websurfx::{cache::cacher::Cache, config::parser::Config, run};
9
 
10
  /// A dhat heap memory profiler
11
  #[cfg(feature = "dhat-heap")]
@@ -31,7 +31,7 @@ async fn main() -> std::io::Result<()> {
31
  // Initialize the parsed config file.
32
  let config = Config::parse(false).unwrap();
33
 
34
- let cache = Cache::build(&config).await;
35
 
36
  log::info!(
37
  "started server on port {} and IP {}",
 
5
 
6
  use mimalloc::MiMalloc;
7
  use std::net::TcpListener;
8
+ use websurfx::{cache::cacher::create_cache, config::parser::Config, run};
9
 
10
  /// A dhat heap memory profiler
11
  #[cfg(feature = "dhat-heap")]
 
31
  // Initialize the parsed config file.
32
  let config = Config::parse(false).unwrap();
33
 
34
+ let cache = create_cache(&config).await;
35
 
36
  log::info!(
37
  "started server on port {} and IP {}",
src/cache/cacher.rs CHANGED
@@ -14,24 +14,10 @@ use super::error::CacheError;
14
  #[cfg(feature = "redis-cache")]
15
  use super::redis_cacher::RedisCache;
16
 
17
- /// Different implementations for caching, currently it is possible to cache in-memory or in Redis.
18
- #[derive(Clone)]
19
- pub enum Cache {
20
- /// Caching is disabled
21
- Disabled,
22
- #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
23
- /// Encapsulates the Redis based cache
24
- Redis(RedisCache),
25
- #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
26
- /// Contains the in-memory cache.
27
- InMemory(MokaCache<String, SearchResults>),
28
- #[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
29
- /// Contains both the in-memory cache and Redis based cache
30
- Hybrid(RedisCache, MokaCache<String, SearchResults>),
31
- }
32
-
33
- impl Cache {
34
- /// A function that builds the cache from the given configuration.
35
  ///
36
  /// # Arguments
37
  ///
@@ -39,89 +25,10 @@ impl Cache {
39
  ///
40
  /// # Returns
41
  ///
42
- /// It returns a newly initialized variant based on the feature enabled by the user.
43
- pub async fn build(_config: &Config) -> Self {
44
- #[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
45
- {
46
- log::info!("Using a hybrid cache");
47
- Cache::new_hybrid(
48
- RedisCache::new(&_config.redis_url, 5)
49
- .await
50
- .expect("Redis cache configured"),
51
- )
52
- }
53
- #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
54
- {
55
- log::info!("Listening redis server on {}", &_config.redis_url);
56
- Cache::new(
57
- RedisCache::new(&_config.redis_url, 5)
58
- .await
59
- .expect("Redis cache configured"),
60
- )
61
- }
62
- #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
63
- {
64
- log::info!("Using an in-memory cache");
65
- Cache::new_in_memory()
66
- }
67
- #[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
68
- {
69
- log::info!("Caching is disabled");
70
- Cache::Disabled
71
- }
72
- }
73
-
74
- /// A function that initializes a new connection pool struct.
75
- ///
76
- /// # Arguments
77
- ///
78
- /// * `redis_cache` - It takes the newly initialized connection pool struct as an argument.
79
- ///
80
- /// # Returns
81
- ///
82
- /// It returns a `Redis` variant with the newly initialized connection pool struct.
83
- #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
84
- pub fn new(redis_cache: RedisCache) -> Self {
85
- Cache::Redis(redis_cache)
86
- }
87
-
88
- /// A function that initializes the `in memory` cache which is used to cache the results in
89
- /// memory with the search engine thus improving performance by making retrieval and caching of
90
- /// results faster.
91
- ///
92
- /// # Returns
93
- ///
94
- /// It returns a `InMemory` variant with the newly initialized in memory cache type.
95
- #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
96
- pub fn new_in_memory() -> Self {
97
- let cache = MokaCache::builder()
98
- .max_capacity(1000)
99
- .time_to_live(Duration::from_secs(60))
100
- .build();
101
- Cache::InMemory(cache)
102
- }
103
-
104
- /// A function that initializes both in memory cache and redis client connection for being used
105
- /// for managing hybrid cache which increases resiliancy of the search engine by allowing the
106
- /// cache to switch to `in memory` caching if the `redis` cache server is temporarily
107
- /// unavailable.
108
- ///
109
- /// # Arguments
110
- ///
111
- /// * `redis_cache` - It takes `redis` client connection struct as an argument.
112
- ///
113
- /// # Returns
114
- ///
115
- /// It returns a tuple variant `Hybrid` storing both the in-memory cache type and the `redis`
116
- /// client connection struct.
117
- #[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
118
- pub fn new_hybrid(redis_cache: RedisCache) -> Self {
119
- let cache = MokaCache::builder()
120
- .max_capacity(1000)
121
- .time_to_live(Duration::from_secs(60))
122
- .build();
123
- Cache::Hybrid(redis_cache, cache)
124
- }
125
 
126
  /// A function which fetches the cached json results as json string.
127
  ///
@@ -133,31 +40,7 @@ impl Cache {
133
  ///
134
  /// Returns the `SearchResults` from the cache if the program executes normally otherwise
135
  /// returns a `CacheError` if the results cannot be retrieved from the cache.
136
- pub async fn cached_json(&mut self, _url: &str) -> Result<SearchResults, Report<CacheError>> {
137
- match self {
138
- Cache::Disabled => Err(Report::new(CacheError::MissingValue)),
139
- #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
140
- Cache::Redis(redis_cache) => {
141
- let json = redis_cache.cached_json(_url).await?;
142
- Ok(serde_json::from_str::<SearchResults>(&json)
143
- .map_err(|_| CacheError::SerializationError)?)
144
- }
145
- #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
146
- Cache::InMemory(in_memory) => match in_memory.get(&_url.to_string()) {
147
- Some(res) => Ok(res),
148
- None => Err(Report::new(CacheError::MissingValue)),
149
- },
150
- #[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
151
- Cache::Hybrid(redis_cache, in_memory) => match redis_cache.cached_json(_url).await {
152
- Ok(res) => Ok(serde_json::from_str::<SearchResults>(&res)
153
- .map_err(|_| CacheError::SerializationError)?),
154
- Err(_) => match in_memory.get(&_url.to_string()) {
155
- Some(res) => Ok(res),
156
- None => Err(Report::new(CacheError::MissingValue)),
157
- },
158
- },
159
- }
160
- }
161
 
162
  /// A function which caches the results by using the `url` as the key and
163
  /// `json results` as the value and stores it in the cache
@@ -172,44 +55,164 @@ impl Cache {
172
  /// Returns a unit type if the program caches the given search results without a failure
173
  /// otherwise it returns a `CacheError` if the search results cannot be cached due to a
174
  /// failure.
175
- pub async fn cache_results(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  &mut self,
177
  _search_results: &SearchResults,
178
  _url: &str,
179
  ) -> Result<(), Report<CacheError>> {
180
- match self {
181
- Cache::Disabled => Ok(()),
182
- #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
183
- Cache::Redis(redis_cache) => {
184
- let json = serde_json::to_string(_search_results)
185
- .map_err(|_| CacheError::SerializationError)?;
186
- redis_cache.cache_results(&json, _url).await
187
- }
188
- #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
189
- Cache::InMemory(cache) => {
190
- cache.insert(_url.to_string(), _search_results.clone());
191
- Ok(())
192
- }
193
- #[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
194
- Cache::Hybrid(redis_cache, cache) => {
195
- let json = serde_json::to_string(_search_results)
196
- .map_err(|_| CacheError::SerializationError)?;
197
- match redis_cache.cache_results(&json, _url).await {
198
- Ok(_) => Ok(()),
199
- Err(_) => {
200
- cache.insert(_url.to_string(), _search_results.clone());
201
- Ok(())
202
- }
203
- }
204
- }
205
- }
206
  }
207
  }
208
 
209
  /// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
210
  pub struct SharedCache {
211
  /// The internal cache protected from concurrent access by a mutex
212
- cache: Mutex<Cache>,
213
  }
214
 
215
  impl SharedCache {
@@ -220,9 +223,9 @@ impl SharedCache {
220
  /// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type.
221
  ///
222
  /// Returns a newly constructed `SharedCache` struct.
223
- pub fn new(cache: Cache) -> Self {
224
  Self {
225
- cache: Mutex::new(cache),
226
  }
227
  }
228
 
@@ -237,9 +240,9 @@ impl SharedCache {
237
  ///
238
  /// Returns a `SearchResults` struct containing the search results from the cache if nothing
239
  /// goes wrong otherwise returns a `CacheError`.
240
- pub async fn cached_json(&self, url: &str) -> Result<SearchResults, Report<CacheError>> {
241
  let mut mut_cache = self.cache.lock().await;
242
- mut_cache.cached_json(url).await
243
  }
244
 
245
  /// A setter function which caches the results by using the `url` as the key and
@@ -265,3 +268,18 @@ impl SharedCache {
265
  mut_cache.cache_results(search_results, url).await
266
  }
267
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  #[cfg(feature = "redis-cache")]
15
  use super::redis_cacher::RedisCache;
16
 
17
+ /// Abstraction trait for common methods provided by a cache backend.
18
+ #[async_trait::async_trait]
19
+ pub trait Cacher: Send + Sync {
20
+ // A function that builds the cache from the given configuration.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  ///
22
  /// # Arguments
23
  ///
 
25
  ///
26
  /// # Returns
27
  ///
28
+ /// It returns a newly initialized backend based on the feature enabled by the user.
29
+ async fn build(config: &Config) -> Self
30
+ where
31
+ Self: Sized;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  /// A function which fetches the cached json results as json string.
34
  ///
 
40
  ///
41
  /// Returns the `SearchResults` from the cache if the program executes normally otherwise
42
  /// returns a `CacheError` if the results cannot be retrieved from the cache.
43
+ async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>>;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  /// A function which caches the results by using the `url` as the key and
46
  /// `json results` as the value and stores it in the cache
 
55
  /// Returns a unit type if the program caches the given search results without a failure
56
  /// otherwise it returns a `CacheError` if the search results cannot be cached due to a
57
  /// failure.
58
+ async fn cache_results(
59
+ &mut self,
60
+ search_results: &SearchResults,
61
+ url: &str,
62
+ ) -> Result<(), Report<CacheError>>;
63
+
64
+ /// A helper function which computes the hash of the url and formats and returns it as string.
65
+ ///
66
+ /// # Arguments
67
+ ///
68
+ /// * `url` - It takes an url as string.
69
+ fn hash_url(&self, url: &str) -> String {
70
+ blake3::hash(url.as_bytes()).to_string()
71
+ }
72
+ }
73
+
74
+ #[cfg(feature = "redis-cache")]
75
+ #[async_trait::async_trait]
76
+ impl Cacher for RedisCache {
77
+ async fn build(config: &Config) -> Self {
78
+ log::info!(
79
+ "Initialising redis cache. Listening to {}",
80
+ &config.redis_url
81
+ );
82
+ RedisCache::new(&config.redis_url, 5)
83
+ .await
84
+ .expect("Redis cache configured")
85
+ }
86
+
87
+ async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
88
+ let hashed_url_string: &str = &self.hash_url(url);
89
+ let json = self.cached_json(hashed_url_string).await?;
90
+ Ok(serde_json::from_str::<SearchResults>(&json)
91
+ .map_err(|_| CacheError::SerializationError)?)
92
+ }
93
+
94
+ async fn cache_results(
95
+ &mut self,
96
+ search_results: &SearchResults,
97
+ url: &str,
98
+ ) -> Result<(), Report<CacheError>> {
99
+ let json =
100
+ serde_json::to_string(search_results).map_err(|_| CacheError::SerializationError)?;
101
+ let hashed_url_string = self.hash_url(url);
102
+ self.cache_json(&json, &hashed_url_string).await
103
+ }
104
+ }
105
+
106
+ /// Memory based cache backend.
107
+ #[cfg(feature = "memory-cache")]
108
+ pub struct InMemoryCache {
109
+ /// The backend cache which stores data.
110
+ cache: MokaCache<String, SearchResults>,
111
+ }
112
+
113
+ #[cfg(feature = "memory-cache")]
114
+ #[async_trait::async_trait]
115
+ impl Cacher for InMemoryCache {
116
+ async fn build(_config: &Config) -> Self {
117
+ log::info!("Initialising in-memory cache");
118
+
119
+ InMemoryCache {
120
+ cache: MokaCache::builder()
121
+ .max_capacity(1000)
122
+ .time_to_live(Duration::from_secs(60))
123
+ .build(),
124
+ }
125
+ }
126
+
127
+ async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
128
+ let hashed_url_string = self.hash_url(url);
129
+ match self.cache.get(&hashed_url_string) {
130
+ Some(res) => Ok(res),
131
+ None => Err(Report::new(CacheError::MissingValue)),
132
+ }
133
+ }
134
+
135
+ async fn cache_results(
136
+ &mut self,
137
+ search_results: &SearchResults,
138
+ url: &str,
139
+ ) -> Result<(), Report<CacheError>> {
140
+ let hashed_url_string = self.hash_url(url);
141
+ self.cache.insert(hashed_url_string, search_results.clone());
142
+ Ok(())
143
+ }
144
+ }
145
+
146
+ /// Cache backend which utilises both memory and redis based caches.
147
+ ///
148
+ /// The hybrid cache system uses both the types of cache to ensure maximum availability.
149
+ /// The set method sets the key, value pair in both the caches. Therefore in a case where redis
150
+ /// cache becomes unavailable, the backend will retreive the value from in-memory cache.
151
+ #[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
152
+ pub struct HybridCache {
153
+ /// The in-memory backend cache which stores data.
154
+ memory_cache: InMemoryCache,
155
+ /// The redis backend cache which stores data.
156
+ redis_cache: RedisCache,
157
+ }
158
+
159
+ #[cfg(all(feature = "memory-cache", feature = "redis-cache"))]
160
+ #[async_trait::async_trait]
161
+ impl Cacher for HybridCache {
162
+ async fn build(config: &Config) -> Self {
163
+ log::info!("Initialising hybrid cache");
164
+ HybridCache {
165
+ memory_cache: InMemoryCache::build(config).await,
166
+ redis_cache: RedisCache::build(config).await,
167
+ }
168
+ }
169
+
170
+ async fn cached_results(&mut self, url: &str) -> Result<SearchResults, Report<CacheError>> {
171
+ match self.redis_cache.cached_results(url).await {
172
+ Ok(res) => Ok(res),
173
+ Err(_) => self.memory_cache.cached_results(url).await,
174
+ }
175
+ }
176
+
177
+ async fn cache_results(
178
+ &mut self,
179
+ search_results: &SearchResults,
180
+ url: &str,
181
+ ) -> Result<(), Report<CacheError>> {
182
+ self.redis_cache.cache_results(search_results, url).await?;
183
+ self.memory_cache.cache_results(search_results, url).await?;
184
+
185
+ Ok(())
186
+ }
187
+ }
188
+
189
+ /// Dummy cache backend
190
+ pub struct DisabledCache;
191
+
192
+ #[async_trait::async_trait]
193
+ impl Cacher for DisabledCache {
194
+ async fn build(_config: &Config) -> Self {
195
+ log::info!("Caching is disabled");
196
+ DisabledCache
197
+ }
198
+
199
+ async fn cached_results(&mut self, _url: &str) -> Result<SearchResults, Report<CacheError>> {
200
+ Err(Report::new(CacheError::MissingValue))
201
+ }
202
+
203
+ async fn cache_results(
204
  &mut self,
205
  _search_results: &SearchResults,
206
  _url: &str,
207
  ) -> Result<(), Report<CacheError>> {
208
+ Ok(())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  }
211
 
212
  /// A structure to efficiently share the cache between threads - as it is protected by a Mutex.
213
  pub struct SharedCache {
214
  /// The internal cache protected from concurrent access by a mutex
215
+ cache: Mutex<Box<dyn Cacher>>,
216
  }
217
 
218
  impl SharedCache {
 
223
  /// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type.
224
  ///
225
  /// Returns a newly constructed `SharedCache` struct.
226
+ pub fn new(cache: impl Cacher + 'static) -> Self {
227
  Self {
228
+ cache: Mutex::new(Box::new(cache)),
229
  }
230
  }
231
 
 
240
  ///
241
  /// Returns a `SearchResults` struct containing the search results from the cache if nothing
242
  /// goes wrong otherwise returns a `CacheError`.
243
+ pub async fn cached_results(&self, url: &str) -> Result<SearchResults, Report<CacheError>> {
244
  let mut mut_cache = self.cache.lock().await;
245
+ mut_cache.cached_results(url).await
246
  }
247
 
248
  /// A setter function which caches the results by using the `url` as the key and
 
268
  mut_cache.cache_results(search_results, url).await
269
  }
270
  }
271
+
272
+ /// A function to initialise the cache backend.
273
+ pub async fn create_cache(config: &Config) -> impl Cacher {
274
+ #[cfg(all(feature = "redis-cache", feature = "memory-cache"))]
275
+ return HybridCache::build(config).await;
276
+
277
+ #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
278
+ return InMemoryCache::build(config).await;
279
+
280
+ #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))]
281
+ return RedisCache::build(config).await;
282
+
283
+ #[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))]
284
+ return DisabledCache::build(config).await;
285
+ }
src/cache/redis_cacher.rs CHANGED
@@ -1,7 +1,6 @@
1
  //! This module provides the functionality to cache the aggregated results fetched and aggregated
2
  //! from the upstream search engines in a json format.
3
 
4
- use blake3::hash;
5
  use error_stack::Report;
6
  use futures::future::try_join_all;
7
  use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
@@ -53,32 +52,22 @@ impl RedisCache {
53
  Ok(redis_cache)
54
  }
55
 
56
- /// A helper function which computes the hash of the url and formats and returns it as string.
57
  ///
58
  /// # Arguments
59
  ///
60
- /// * `url` - It takes an url as string.
61
- fn hash_url(&self, url: &str) -> String {
62
- format!("{:?}", blake3::hash(url.as_bytes()))
63
- }
64
-
65
- /// A function which fetches the cached json results as json string from the redis server.
66
- ///
67
- /// # Arguments
68
- ///
69
- /// * `url` - It takes an url as a string.
70
  ///
71
  /// # Error
72
  ///
73
- /// Returns the results as a String from the cache on success otherwise returns a `CacheError`
74
  /// on a failure.
75
- pub async fn cached_json(&mut self, url: &str) -> Result<String, Report<CacheError>> {
76
  self.current_connection = Default::default();
77
- let hashed_url_string: &str = &self.hash_url(url);
78
 
79
  let mut result: Result<String, RedisError> = self.connection_pool
80
  [self.current_connection as usize]
81
- .get(hashed_url_string)
82
  .await;
83
 
84
  // Code to check whether the current connection being used is dropped with connection error
@@ -99,7 +88,7 @@ impl RedisCache {
99
  ));
100
  }
101
  result = self.connection_pool[self.current_connection as usize]
102
- .get(hashed_url_string)
103
  .await;
104
  continue;
105
  }
@@ -110,30 +99,29 @@ impl RedisCache {
110
  }
111
  }
112
 
113
- /// A function which caches the results by using the hashed `url` as the key and
114
  /// `json results` as the value and stores it in redis server with ttl(time to live)
115
  /// set to 60 seconds.
116
  ///
117
  /// # Arguments
118
  ///
119
  /// * `json_results` - It takes the json results string as an argument.
120
- /// * `url` - It takes the url as a String.
121
  ///
122
  /// # Error
123
  ///
124
  /// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError`
125
  /// on a failure.
126
- pub async fn cache_results(
127
  &mut self,
128
  json_results: &str,
129
- url: &str,
130
  ) -> Result<(), Report<CacheError>> {
131
  self.current_connection = Default::default();
132
- let hashed_url_string: &str = &self.hash_url(url);
133
 
134
  let mut result: Result<(), RedisError> = self.connection_pool
135
  [self.current_connection as usize]
136
- .set_ex(hashed_url_string, json_results, 60)
137
  .await;
138
 
139
  // Code to check whether the current connection being used is dropped with connection error
@@ -154,7 +142,7 @@ impl RedisCache {
154
  ));
155
  }
156
  result = self.connection_pool[self.current_connection as usize]
157
- .set_ex(hashed_url_string, json_results, 60)
158
  .await;
159
  continue;
160
  }
 
1
  //! This module provides the functionality to cache the aggregated results fetched and aggregated
2
  //! from the upstream search engines in a json format.
3
 
 
4
  use error_stack::Report;
5
  use futures::future::try_join_all;
6
  use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError};
 
52
  Ok(redis_cache)
53
  }
54
 
55
+ /// A function which fetches the cached json as json string from the redis server.
56
  ///
57
  /// # Arguments
58
  ///
59
+ /// * `key` - It takes a string as key.
 
 
 
 
 
 
 
 
 
60
  ///
61
  /// # Error
62
  ///
63
+ /// Returns the json as a String from the cache on success otherwise returns a `CacheError`
64
  /// on a failure.
65
+ pub async fn cached_json(&mut self, key: &str) -> Result<String, Report<CacheError>> {
66
  self.current_connection = Default::default();
 
67
 
68
  let mut result: Result<String, RedisError> = self.connection_pool
69
  [self.current_connection as usize]
70
+ .get(key)
71
  .await;
72
 
73
  // Code to check whether the current connection being used is dropped with connection error
 
88
  ));
89
  }
90
  result = self.connection_pool[self.current_connection as usize]
91
+ .get(key)
92
  .await;
93
  continue;
94
  }
 
99
  }
100
  }
101
 
102
+ /// A function which caches the json by using the key and
103
  /// `json results` as the value and stores it in redis server with ttl(time to live)
104
  /// set to 60 seconds.
105
  ///
106
  /// # Arguments
107
  ///
108
  /// * `json_results` - It takes the json results string as an argument.
109
+ /// * `key` - It takes the key as a String.
110
  ///
111
  /// # Error
112
  ///
113
  /// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError`
114
  /// on a failure.
115
+ pub async fn cache_json(
116
  &mut self,
117
  json_results: &str,
118
+ key: &str,
119
  ) -> Result<(), Report<CacheError>> {
120
  self.current_connection = Default::default();
 
121
 
122
  let mut result: Result<(), RedisError> = self.connection_pool
123
  [self.current_connection as usize]
124
+ .set_ex(key, json_results, 600)
125
  .await;
126
 
127
  // Code to check whether the current connection being used is dropped with connection error
 
142
  ));
143
  }
144
  result = self.connection_pool[self.current_connection as usize]
145
+ .set_ex(key, json_results, 60)
146
  .await;
147
  continue;
148
  }
src/lib.rs CHANGED
@@ -22,7 +22,7 @@ use actix_cors::Cors;
22
  use actix_files as fs;
23
  use actix_governor::{Governor, GovernorConfigBuilder};
24
  use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
25
- use cache::cacher::{Cache, SharedCache};
26
  use config::parser::Config;
27
  use handler::{file_path, FileType};
28
 
@@ -40,14 +40,21 @@ use handler::{file_path, FileType};
40
  ///
41
  /// ```rust
42
  /// use std::net::TcpListener;
43
- /// use websurfx::{config::parser::Config, run, cache::cacher::Cache};
44
  ///
45
- /// let config = Config::parse(true).unwrap();
46
- /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
47
- /// let cache = Cache::new_in_memory();
48
- /// let server = run(listener,config,cache).expect("Failed to start server");
 
 
 
49
  /// ```
50
- pub fn run(listener: TcpListener, config: Config, cache: Cache) -> std::io::Result<Server> {
 
 
 
 
51
  let public_folder_path: &str = file_path(FileType::Theme)?;
52
 
53
  let cloned_config_threads_opt: u8 = config.threads;
 
22
  use actix_files as fs;
23
  use actix_governor::{Governor, GovernorConfigBuilder};
24
  use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer};
25
+ use cache::cacher::{Cacher, SharedCache};
26
  use config::parser::Config;
27
  use handler::{file_path, FileType};
28
 
 
40
  ///
41
  /// ```rust
42
  /// use std::net::TcpListener;
43
+ /// use websurfx::{config::parser::Config, run, cache::cacher::create_cache};
44
  ///
45
+ /// #[tokio::main]
46
+ /// async fn main(){
47
+ /// let config = Config::parse(true).unwrap();
48
+ /// let listener = TcpListener::bind("127.0.0.1:8080").expect("Failed to bind address");
49
+ /// let cache = create_cache(&config).await;
50
+ /// let server = run(listener,config,cache).expect("Failed to start server");
51
+ /// }
52
  /// ```
53
+ pub fn run(
54
+ listener: TcpListener,
55
+ config: Config,
56
+ cache: impl Cacher + 'static,
57
+ ) -> std::io::Result<Server> {
58
  let public_folder_path: &str = file_path(FileType::Theme)?;
59
 
60
  let cloned_config_threads_opt: u8 = config.threads;
src/server/routes/search.rs CHANGED
@@ -107,41 +107,40 @@ async fn results(
107
  req: HttpRequest,
108
  safe_search: &Option<u8>,
109
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
110
- let url = format!(
111
- "http://{}:{}/search?q={}&page={}&safesearch=",
112
- config.binding_ip,
113
- config.port,
114
- query,
115
- page - 1,
 
 
 
 
 
 
 
 
 
 
116
  );
117
 
118
  // fetch the cached results json.
119
- let cached_results = cache.cached_json(&url).await;
120
  // check if fetched cache results was indeed fetched or it was an error and if so
121
  // handle the data accordingly.
122
  match cached_results {
123
  Ok(results) => Ok(results),
124
  Err(_) => {
125
- let mut safe_search_level: u8 = match config.safe_search {
126
- 3..=4 => config.safe_search,
127
- _ => match safe_search {
128
- Some(safesearch) => match safesearch {
129
- 0..=2 => *safesearch,
130
- _ => config.safe_search,
131
- },
132
- None => config.safe_search,
133
- },
134
- };
135
-
136
  if safe_search_level == 4 {
137
  let mut results: SearchResults = SearchResults::default();
138
 
139
  let flag: bool =
140
  !is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
141
-
142
  if flag {
143
  results.set_disallowed();
144
- cache.cache_results(&results, &url).await?;
145
  results.set_safe_search_level(safe_search_level);
146
  return Ok(results);
147
  }
@@ -151,28 +150,14 @@ async fn results(
151
  // default selected upstream search engines from the config file otherwise
152
  // parse the non-empty cookie and grab the user selected engines from the
153
  // UI and use that.
154
- let mut results: SearchResults = match req.cookie("appCookie") {
155
  Some(cookie_value) => {
156
- let cookie_value: Cookie<'_> =
157
- serde_json::from_str(cookie_value.name_value().1)?;
158
-
159
  let engines: Vec<EngineHandler> = cookie_value
160
  .engines
161
  .iter()
162
  .filter_map(|name| EngineHandler::new(name).ok())
163
  .collect();
164
 
165
- safe_search_level = match config.safe_search {
166
- 3..=4 => config.safe_search,
167
- _ => match safe_search {
168
- Some(safesearch) => match safesearch {
169
- 0..=2 => *safesearch,
170
- _ => config.safe_search,
171
- },
172
- None => cookie_value.safe_search_level,
173
- },
174
- };
175
-
176
  match engines.is_empty() {
177
  false => {
178
  aggregate(
@@ -217,9 +202,7 @@ async fn results(
217
  {
218
  results.set_filtered();
219
  }
220
- cache
221
- .cache_results(&results, &(format!("{url}{safe_search_level}")))
222
- .await?;
223
  results.set_safe_search_level(safe_search_level);
224
  Ok(results)
225
  }
@@ -252,3 +235,24 @@ fn is_match_from_filter_list(
252
 
253
  Ok(false)
254
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  req: HttpRequest,
108
  safe_search: &Option<u8>,
109
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
110
+ // eagerly parse cookie value to evaluate safe search level
111
+ let cookie_value = req.cookie("appCookie");
112
+
113
+ let cookie_value: Option<Cookie<'_>> = cookie_value
114
+ .as_ref()
115
+ .and_then(|cv| serde_json::from_str(cv.name_value().1).ok());
116
+
117
+ let safe_search_level = get_safesearch_level(
118
+ safe_search,
119
+ &cookie_value.as_ref().map(|cv| cv.safe_search_level),
120
+ config.safe_search,
121
+ );
122
+
123
+ let cache_key = format!(
124
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
125
+ config.binding_ip, config.port, query, page, safe_search_level
126
  );
127
 
128
  // fetch the cached results json.
129
+ let cached_results = cache.cached_results(&cache_key).await;
130
  // check if fetched cache results was indeed fetched or it was an error and if so
131
  // handle the data accordingly.
132
  match cached_results {
133
  Ok(results) => Ok(results),
134
  Err(_) => {
 
 
 
 
 
 
 
 
 
 
 
135
  if safe_search_level == 4 {
136
  let mut results: SearchResults = SearchResults::default();
137
 
138
  let flag: bool =
139
  !is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
140
+ // Return early when query contains disallowed words,
141
  if flag {
142
  results.set_disallowed();
143
+ cache.cache_results(&results, &cache_key).await?;
144
  results.set_safe_search_level(safe_search_level);
145
  return Ok(results);
146
  }
 
150
  // default selected upstream search engines from the config file otherwise
151
  // parse the non-empty cookie and grab the user selected engines from the
152
  // UI and use that.
153
+ let mut results: SearchResults = match cookie_value {
154
  Some(cookie_value) => {
 
 
 
155
  let engines: Vec<EngineHandler> = cookie_value
156
  .engines
157
  .iter()
158
  .filter_map(|name| EngineHandler::new(name).ok())
159
  .collect();
160
 
 
 
 
 
 
 
 
 
 
 
 
161
  match engines.is_empty() {
162
  false => {
163
  aggregate(
 
202
  {
203
  results.set_filtered();
204
  }
205
+ cache.cache_results(&results, &cache_key).await?;
 
 
206
  results.set_safe_search_level(safe_search_level);
207
  Ok(results)
208
  }
 
235
 
236
  Ok(false)
237
  }
238
+
239
+ /// A helper function which returns the safe search level based on the url params
240
+ /// and cookie value.
241
+ ///
242
+ /// # Argurments
243
+ ///
244
+ /// * `safe_search` - Safe search level from the url.
245
+ /// * `cookie` - User's cookie
246
+ /// * `default` - Safe search level to fall back to
247
+ fn get_safesearch_level(safe_search: &Option<u8>, cookie: &Option<u8>, default: u8) -> u8 {
248
+ match safe_search {
249
+ Some(ss) => {
250
+ if *ss >= 3 {
251
+ default
252
+ } else {
253
+ *ss
254
+ }
255
+ }
256
+ None => cookie.unwrap_or(default),
257
+ }
258
+ }
tests/index.rs CHANGED
@@ -3,18 +3,13 @@ use std::net::TcpListener;
3
  use websurfx::{config::parser::Config, run, templates::views};
4
 
5
  // Starts a new instance of the HTTP server, bound to a random available port
6
- fn spawn_app() -> String {
7
  // Binding to port 0 will trigger the OS to assign a port for us.
8
  let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
9
  let port = listener.local_addr().unwrap().port();
10
  let config = Config::parse(false).unwrap();
11
- let server = run(
12
- listener,
13
- config,
14
- #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))]
15
- websurfx::cache::cacher::Cache::new_in_memory(),
16
- )
17
- .expect("Failed to bind address");
18
 
19
  tokio::spawn(server);
20
  format!("http://127.0.0.1:{}/", port)
@@ -22,7 +17,7 @@ fn spawn_app() -> String {
22
 
23
  #[tokio::test]
24
  async fn test_index() {
25
- let address = spawn_app();
26
 
27
  let client = reqwest::Client::new();
28
  let res = client.get(address).send().await.unwrap();
 
3
  use websurfx::{config::parser::Config, run, templates::views};
4
 
5
  // Starts a new instance of the HTTP server, bound to a random available port
6
+ async fn spawn_app() -> String {
7
  // Binding to port 0 will trigger the OS to assign a port for us.
8
  let listener = TcpListener::bind("127.0.0.1:0").expect("Failed to bind random port");
9
  let port = listener.local_addr().unwrap().port();
10
  let config = Config::parse(false).unwrap();
11
+ let cache = websurfx::cache::cacher::create_cache(&config).await;
12
+ let server = run(listener, config, cache).expect("Failed to bind address");
 
 
 
 
 
13
 
14
  tokio::spawn(server);
15
  format!("http://127.0.0.1:{}/", port)
 
17
 
18
  #[tokio::test]
19
  async fn test_index() {
20
+ let address = spawn_app().await;
21
 
22
  let client = reqwest::Client::new();
23
  let res = client.get(address).send().await.unwrap();