//! This module provides the functionality to cache the aggregated results fetched and aggregated //! from the upstream search engines in a json format. use error_stack::Report; #[cfg(feature = "memory-cache")] use mini_moka::sync::Cache as MokaCache; #[cfg(feature = "memory-cache")] use mini_moka::sync::ConcurrentCacheExt; #[cfg(feature = "memory-cache")] use std::time::Duration; use tokio::sync::Mutex; use crate::{config::parser::Config, models::aggregation_models::SearchResults}; use super::error::CacheError; #[cfg(feature = "redis-cache")] use super::redis_cacher::RedisCache; #[cfg(any(feature = "encrypt-cache-results", feature = "cec-cache-results"))] use super::encryption::*; /// Abstraction trait for common methods provided by a cache backend. #[async_trait::async_trait] pub trait Cacher: Send + Sync { // A function that builds the cache from the given configuration. /// /// # Arguments /// /// * `config` - It takes the config struct as an argument. /// /// # Returns /// /// It returns a newly initialized backend based on the feature enabled by the user. async fn build(config: &Config) -> Self where Self: Sized; /// A function which fetches the cached json results as json string. /// /// # Arguments /// /// * `url` - It takes an url as a string. /// /// # Error /// /// Returns the `SearchResults` from the cache if the program executes normally otherwise /// returns a `CacheError` if the results cannot be retrieved from the cache. async fn cached_results(&mut self, url: &str) -> Result>; /// A function which caches the results by using the `url` as the key and /// `json results` as the value and stores it in the cache /// /// # Arguments /// /// * `json_results` - It takes the json results string as an argument. /// * `url` - It takes the url as a String. /// /// # Error /// /// Returns a unit type if the program caches the given search results without a failure /// otherwise it returns a `CacheError` if the search results cannot be cached due to a /// failure. async fn cache_results( &mut self, search_results: &[SearchResults], urls: &[String], ) -> Result<(), Report>; /// A helper function which computes the hash of the url and formats and returns it as string. /// /// # Arguments /// /// * `url` - It takes an url as string. fn hash_url(&self, url: &str) -> String { blake3::hash(url.as_bytes()).to_string() } /// A helper function that returns either encrypted or decrypted results. /// Feature flags (**encrypt-cache-results or cec-cache-results**) are required for this to work. /// /// # Arguments /// /// * `bytes` - It takes a slice of bytes as an argument. /// * `encrypt` - A boolean to choose whether to encrypt or decrypt the bytes /// /// # Error /// Returns either encrypted or decrypted bytes on success otherwise it returns a CacheError /// on failure. #[cfg(any( // feature = "compress-cache-results", feature = "encrypt-cache-results", feature = "cec-cache-results" ))] fn encrypt_or_decrypt_results( &mut self, mut bytes: Vec, encrypt: bool, ) -> Result, Report> { use chacha20poly1305::{ aead::{Aead, AeadCore, KeyInit, OsRng}, ChaCha20Poly1305, }; let cipher = CIPHER.get_or_init(|| { let key = ChaCha20Poly1305::generate_key(&mut OsRng); ChaCha20Poly1305::new(&key) }); let encryption_key = ENCRYPTION_KEY.get_or_init( || ChaCha20Poly1305::generate_nonce(&mut OsRng), // 96-bits; unique per message ); bytes = if encrypt { cipher .encrypt(encryption_key, bytes.as_ref()) .map_err(|_| CacheError::EncryptionError)? } else { cipher .decrypt(encryption_key, bytes.as_ref()) .map_err(|_| CacheError::EncryptionError)? }; Ok(bytes) } /// A helper function that returns compressed results. /// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work. /// /// # Arguments /// /// * `bytes` - It takes a slice of bytes as an argument. /// /// # Error /// Returns the compressed bytes on success otherwise it returns a CacheError /// on failure. #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] fn compress_results(&mut self, mut bytes: Vec) -> Result, Report> { use std::io::Write; let mut writer = brotli::CompressorWriter::new(Vec::new(), 4096, 11, 22); writer .write_all(&bytes) .map_err(|_| CacheError::CompressionError)?; bytes = writer.into_inner(); Ok(bytes) } /// A helper function that returns compressed-encrypted results. /// Feature flag (**cec-cache-results**) is required for this to work. /// /// # Arguments /// /// * `bytes` - It takes a slice of bytes as an argument. /// /// # Error /// Returns the compressed and encrypted bytes on success otherwise it returns a CacheError /// on failure. #[cfg(feature = "cec-cache-results")] fn compress_encrypt_compress_results( &mut self, mut bytes: Vec, ) -> Result, Report> { // compress first bytes = self.compress_results(bytes)?; // encrypt bytes = self.encrypt_or_decrypt_results(bytes, true)?; // compress again; bytes = self.compress_results(bytes)?; Ok(bytes) } /// A helper function that returns compressed results. /// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work. /// If bytes where /// # Arguments /// /// * `bytes` - It takes a slice of bytes as an argument. /// /// # Error /// Returns the uncompressed bytes on success otherwise it returns a CacheError /// on failure. #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] fn decompress_results(&mut self, bytes: &[u8]) -> Result, Report> { cfg_if::cfg_if! { if #[cfg(feature = "compress-cache-results")] { decompress_util(bytes) } else if #[cfg(feature = "cec-cache-results")] { let decompressed = decompress_util(bytes)?; let decrypted = self.encrypt_or_decrypt_results(decompressed, false)?; decompress_util(&decrypted) } } } /// A helper function that compresses or encrypts search results before they're inserted into a cache store /// # Arguments /// /// * `search_results` - A reference to the search_Results to process. /// /// /// # Error /// Returns a Vec of compressed or encrypted bytes on success otherwise it returns a CacheError /// on failure. fn pre_process_search_results( &mut self, search_results: &SearchResults, ) -> Result, Report> { #[allow(unused_mut)] // needs to be mutable when any of the features is enabled let mut bytes: Vec = search_results.try_into()?; #[cfg(feature = "compress-cache-results")] { let compressed = self.compress_results(bytes)?; bytes = compressed; } #[cfg(feature = "encrypt-cache-results")] { let encrypted = self.encrypt_or_decrypt_results(bytes, true)?; bytes = encrypted; } #[cfg(feature = "cec-cache-results")] { let compressed_encrypted_compressed = self.compress_encrypt_compress_results(bytes)?; bytes = compressed_encrypted_compressed; } Ok(bytes) } /// A helper function that decompresses or decrypts search results after they're fetched from the cache-store /// # Arguments /// /// * `bytes` - A Vec of bytes stores in the cache. /// /// /// # Error /// Returns the SearchResults struct on success otherwise it returns a CacheError /// on failure. #[allow(unused_mut)] // needs to be mutable when any of the features is enabled fn post_process_search_results( &mut self, mut bytes: Vec, ) -> Result> { #[cfg(feature = "compress-cache-results")] { let decompressed = self.decompress_results(&bytes)?; bytes = decompressed } #[cfg(feature = "encrypt-cache-results")] { let decrypted = self.encrypt_or_decrypt_results(bytes, false)?; bytes = decrypted } #[cfg(feature = "cec-cache-results")] { let decompressed_decrypted = self.decompress_results(&bytes)?; bytes = decompressed_decrypted; } Ok(bytes.try_into()?) } } /// A helper function that returns compressed results. /// Feature flags (**compress-cache-results or cec-cache-results**) are required for this to work. /// If bytes where /// # Arguments /// /// * `bytes` - It takes a slice of bytes as an argument. /// /// # Error /// Returns the uncompressed bytes on success otherwise it returns a CacheError /// on failure. #[cfg(any(feature = "compress-cache-results", feature = "cec-cache-results"))] fn decompress_util(input: &[u8]) -> Result, Report> { use std::io::Write; let mut writer = brotli::DecompressorWriter::new(Vec::new(), 4096); writer .write_all(input) .map_err(|_| CacheError::CompressionError)?; let bytes = writer .into_inner() .map_err(|_| CacheError::CompressionError)?; Ok(bytes) } #[cfg(feature = "redis-cache")] #[async_trait::async_trait] impl Cacher for RedisCache { async fn build(config: &Config) -> Self { log::info!( "Initialising redis cache. Listening to {}", &config.redis_url ); RedisCache::new(&config.redis_url, 5, config.cache_expiry_time) .await .expect("Redis cache configured") } async fn cached_results(&mut self, url: &str) -> Result> { use base64::Engine; let hashed_url_string: &str = &self.hash_url(url); let base64_string = self.cached_json(hashed_url_string).await?; let bytes = base64::engine::general_purpose::STANDARD_NO_PAD .decode(base64_string) .map_err(|_| CacheError::Base64DecodingOrEncodingError)?; self.post_process_search_results(bytes) } async fn cache_results( &mut self, search_results: &[SearchResults], urls: &[String], ) -> Result<(), Report> { use base64::Engine; // size of search_results is expected to be equal to size of urls -> key/value pairs for cache; let search_results_len = search_results.len(); let mut bytes = Vec::with_capacity(search_results_len); for result in search_results { let processed = self.pre_process_search_results(result)?; bytes.push(processed); } let base64_strings = bytes .iter() .map(|bytes_vec| base64::engine::general_purpose::STANDARD_NO_PAD.encode(bytes_vec)); let mut hashed_url_strings = Vec::with_capacity(search_results_len); for url in urls { let hash = self.hash_url(url); hashed_url_strings.push(hash); } self.cache_json(base64_strings, hashed_url_strings.into_iter()) .await } } /// TryInto implementation for SearchResults from Vec use std::convert::TryInto; impl TryInto for Vec { type Error = CacheError; fn try_into(self) -> Result { serde_json::from_slice(&self).map_err(|_| CacheError::SerializationError) } } impl TryInto> for &SearchResults { type Error = CacheError; fn try_into(self) -> Result, Self::Error> { serde_json::to_vec(self).map_err(|_| CacheError::SerializationError) } } /// Memory based cache backend. #[cfg(feature = "memory-cache")] pub struct InMemoryCache { /// The backend cache which stores data. cache: MokaCache>, } #[cfg(feature = "memory-cache")] #[async_trait::async_trait] impl Cacher for InMemoryCache { async fn build(config: &Config) -> Self { log::info!("Initialising in-memory cache"); InMemoryCache { cache: MokaCache::builder() .time_to_live(Duration::from_secs(config.cache_expiry_time.into())) .build(), } } async fn cached_results(&mut self, url: &str) -> Result> { let hashed_url_string = self.hash_url(url); match self.cache.get(&hashed_url_string) { Some(res) => self.post_process_search_results(res), None => Err(Report::new(CacheError::MissingValue)), } } async fn cache_results( &mut self, search_results: &[SearchResults], urls: &[String], ) -> Result<(), Report> { for (url, search_result) in urls.iter().zip(search_results.iter()) { let hashed_url_string = self.hash_url(url); let bytes = self.pre_process_search_results(search_result)?; self.cache.insert(hashed_url_string, bytes); } self.cache.sync(); Ok(()) } } /// Cache backend which utilises both memory and redis based caches. /// /// The hybrid cache system uses both the types of cache to ensure maximum availability. /// The set method sets the key, value pair in both the caches. Therefore in a case where redis /// cache becomes unavailable, the backend will retreive the value from in-memory cache. #[cfg(all(feature = "memory-cache", feature = "redis-cache"))] pub struct HybridCache { /// The in-memory backend cache which stores data. memory_cache: InMemoryCache, /// The redis backend cache which stores data. redis_cache: RedisCache, } #[cfg(all(feature = "memory-cache", feature = "redis-cache"))] #[async_trait::async_trait] impl Cacher for HybridCache { async fn build(config: &Config) -> Self { log::info!("Initialising hybrid cache"); HybridCache { memory_cache: InMemoryCache::build(config).await, redis_cache: RedisCache::build(config).await, } } async fn cached_results(&mut self, url: &str) -> Result> { match self.redis_cache.cached_results(url).await { Ok(res) => Ok(res), Err(_) => self.memory_cache.cached_results(url).await, } } async fn cache_results( &mut self, search_results: &[SearchResults], urls: &[String], ) -> Result<(), Report> { self.redis_cache.cache_results(search_results, urls).await?; self.memory_cache .cache_results(search_results, urls) .await?; Ok(()) } } /// Dummy cache backend pub struct DisabledCache; #[async_trait::async_trait] impl Cacher for DisabledCache { async fn build(_config: &Config) -> Self { log::info!("Caching is disabled"); DisabledCache } async fn cached_results(&mut self, _url: &str) -> Result> { Err(Report::new(CacheError::MissingValue)) } async fn cache_results( &mut self, _search_results: &[SearchResults], _urls: &[String], ) -> Result<(), Report> { Ok(()) } } /// A structure to efficiently share the cache between threads - as it is protected by a Mutex. pub struct SharedCache { /// The internal cache protected from concurrent access by a mutex cache: Mutex>, } impl SharedCache { /// A function that creates a new `SharedCache` from a Cache implementation. /// /// # Arguments /// /// * `cache` - It takes the `Cache` enum variant as an argument with the prefered cache type. /// /// Returns a newly constructed `SharedCache` struct. pub fn new(cache: impl Cacher + 'static) -> Self { Self { cache: Mutex::new(Box::new(cache)), } } /// A getter function which retrieves the cached SearchResulsts from the internal cache. /// /// # Arguments /// /// * `url` - It takes the search url as an argument which will be used as the key to fetch the /// cached results from the cache. /// /// # Error /// /// Returns a `SearchResults` struct containing the search results from the cache if nothing /// goes wrong otherwise returns a `CacheError`. pub async fn cached_results(&self, url: &str) -> Result> { let mut mut_cache = self.cache.lock().await; mut_cache.cached_results(url).await } /// A setter function which caches the results by using the `url` as the key and /// `SearchResults` as the value. /// /// # Arguments /// /// * `search_results` - It takes the `SearchResults` as an argument which are results that /// needs to be cached. /// * `url` - It takes the search url as an argument which will be used as the key for storing /// results in the cache. /// /// # Error /// /// Returns an unit type if the results are cached succesfully otherwise returns a `CacheError` /// on a failure. pub async fn cache_results( &self, search_results: &[SearchResults], urls: &[String], ) -> Result<(), Report> { let mut mut_cache = self.cache.lock().await; mut_cache.cache_results(search_results, urls).await } } /// A function to initialise the cache backend. pub async fn create_cache(config: &Config) -> impl Cacher { #[cfg(all(feature = "redis-cache", feature = "memory-cache"))] return HybridCache::build(config).await; #[cfg(all(feature = "memory-cache", not(feature = "redis-cache")))] return InMemoryCache::build(config).await; #[cfg(all(feature = "redis-cache", not(feature = "memory-cache")))] return RedisCache::build(config).await; #[cfg(not(any(feature = "memory-cache", feature = "redis-cache")))] return DisabledCache::build(config).await; } //#[cfg(feature = "Compress-cache-results")]