diff --git a/native/native_rust_library/Cargo.lock b/native/native_rust_library/Cargo.lock --- a/native/native_rust_library/Cargo.lock +++ b/native/native_rust_library/Cargo.lock @@ -19,9 +19,9 @@ [[package]] name = "aho-corasick" -version = "0.7.19" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -1004,7 +1004,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" dependencies = [ - "regex-automata", + "regex-automata 0.1.10", ] [[package]] @@ -1015,9 +1015,9 @@ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "mime" @@ -1421,13 +1421,14 @@ [[package]] name = "regex" -version = "1.6.0" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-automata 0.4.3", + "regex-syntax 0.8.2", ] [[package]] @@ -1436,7 +1437,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" dependencies = [ - "regex-syntax", + "regex-syntax 0.6.27", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.2", ] [[package]] @@ -1445,6 +1457,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "remove_dir_all" version = "0.5.3" diff --git a/native/native_rust_library/Cargo.toml b/native/native_rust_library/Cargo.toml --- a/native/native_rust_library/Cargo.toml +++ b/native/native_rust_library/Cargo.toml @@ -17,6 +17,7 @@ argon2 = { version = "0.5.1", features = ["std"] } grpc_clients = { path = "../../shared/grpc_clients" } base64 = "0.21" +regex = "1.10" [target.'cfg(target_os = "android")'.dependencies] backup_client = { path = "../../shared/backup_client", default-features = false, features = [ diff --git a/native/native_rust_library/src/backup.rs b/native/native_rust_library/src/backup.rs --- a/native/native_rust_library/src/backup.rs +++ b/native/native_rust_library/src/backup.rs @@ -1,7 +1,6 @@ +mod file_info; mod upload_handler; -use std::error::Error; - use crate::argon2_tools::{compute_backup_key, compute_backup_key_str}; use crate::constants::{aes, secure_store}; use crate::ffi::secure_store_get; @@ -15,6 +14,7 @@ }; use serde::{Deserialize, Serialize}; use serde_json::json; +use std::error::Error; pub mod ffi { use super::*; diff --git a/native/native_rust_library/src/backup/file_info.rs b/native/native_rust_library/src/backup/file_info.rs new file mode 100644 --- /dev/null +++ b/native/native_rust_library/src/backup/file_info.rs @@ -0,0 +1,69 @@ +use lazy_static::lazy_static; +use regex::Regex; +use std::path::PathBuf; + +lazy_static! { + static ref BACKUP_DATA_FILE_REGEX: Regex = Regex::new( + r"^backup-(?[^-]*)(?:-log-(?\d*))?(?-userkeys|-attachments)?$" + ) + .expect("Regex compilation failed"); +} + +#[derive(Debug)] +pub struct BackupFileInfo { + pub backup_id: String, + pub log_id: Option, + pub additional_data: Option, +} + +impl TryFrom for BackupFileInfo { + type Error = (); + + fn try_from(value: PathBuf) -> Result { + let Some(file_name) = value.file_name() else { + return Err(()); + }; + let file_name = file_name.to_string_lossy(); + + let Some(captures) = BACKUP_DATA_FILE_REGEX.captures(&file_name) else { + return Err(()); + }; + + let Some(backup_id) = captures + .name("backup_id") + .map(|re_match| re_match.as_str().to_string()) + else { + // Should never happen happen because regex matched the filename + println!( + "Couldn't parse 'backup_id' from backup filename: {file_name:?}" + ); + return Err(()); + }; + + let log_id = match captures + .name("log_id") + .map(|re_match| re_match.as_str().parse::()) + { + None => None, + Some(Ok(log_id)) => Some(log_id), + Some(Err(err)) => { + // Should never happen happen because regex matched the filename + println!( + "Couldn't parse 'log_id' from backup filename: {file_name:?}. \ + Error: {err:?}" + ); + return Err(()); + } + }; + + let additional_data = captures + .name("additional_data") + .map(|m| m.as_str().to_string()); + + Ok(Self { + backup_id, + log_id, + additional_data, + }) + } +} diff --git a/native/native_rust_library/src/backup/upload_handler.rs b/native/native_rust_library/src/backup/upload_handler.rs --- a/native/native_rust_library/src/backup/upload_handler.rs +++ b/native/native_rust_library/src/backup/upload_handler.rs @@ -1,7 +1,14 @@ +use super::file_info::BackupFileInfo; use super::get_user_identity_from_secure_store; use crate::constants::BACKUP_SERVICE_CONNECTION_RETRY_DELAY; +use crate::ffi::{ + get_backup_directory_path, get_backup_file_path, + get_backup_user_keys_file_path, +}; use crate::BACKUP_SOCKET_ADDR; use crate::RUNTIME; +use backup_client::BackupData; +use backup_client::UserIdentity; use backup_client::{ BackupClient, Error as BackupError, LogUploadConfirmation, Stream, StreamExt, WSError, @@ -9,7 +16,10 @@ use lazy_static::lazy_static; use std::convert::Infallible; use std::error::Error; -use std::future::{self, Future}; +use std::future::Future; +use std::io::BufRead; +use std::io::ErrorKind; +use std::path::PathBuf; use std::pin::Pin; use std::sync::{Arc, Mutex}; use tokio::sync::Notify; @@ -19,6 +29,9 @@ pub static ref UPLOAD_HANDLER: Arc>>> = Arc::new(Mutex::new(None)); static ref TRIGGER_BACKUP_FILE_UPLOAD: Arc = Arc::new(Notify::new()); + static ref BACKUP_FOLDER_PATH: PathBuf = PathBuf::from( + get_backup_directory_path().expect("Getting backup directory path failed") + ); } pub mod ffi { @@ -74,12 +87,21 @@ let mut _tx = Box::pin(tx); let mut rx = Box::pin(rx); - let err = tokio::select! { - Err(err) = watch_and_upload_files() => err, - Err(err) = delete_confirmed_logs(&mut rx) => err, - }; - - println!("Backup handler error: '{err:?}'"); + loop { + let err = tokio::select! { + Err(err) = watch_and_upload_files(&backup_client, &user_identity) => err, + Err(err) = delete_confirmed_logs(&mut rx) => err, + }; + + println!("Backup handler error: '{err:?}'"); + match err { + BackupHandlerError::BackupError(_) + | BackupHandlerError::BackupWSError(_) + | BackupHandlerError::WSClosed => break, + BackupHandlerError::IoError(_) + | BackupHandlerError::CxxException(_) => continue, + } + } tokio::time::sleep(BACKUP_SERVICE_CONNECTION_RETRY_DELAY).await; println!("Retrying backup log upload"); @@ -87,9 +109,48 @@ }) } -async fn watch_and_upload_files() -> Result { +async fn watch_and_upload_files( + backup_client: &BackupClient, + user_identity: &UserIdentity, +) -> Result { loop { - let () = future::pending().await; + let mut file_stream = match tokio::fs::read_dir(&*BACKUP_FOLDER_PATH).await + { + Ok(file_stream) => file_stream, + Err(err) if err.kind() == ErrorKind::NotFound => { + TRIGGER_BACKUP_FILE_UPLOAD.notified().await; + continue; + } + Err(err) => return Err(err.into()), + }; + + while let Some(file) = file_stream.next_entry().await? { + let path = file.path(); + + let Ok(BackupFileInfo { + backup_id, + log_id, + additional_data, + }) = path.try_into() + else { + continue; + }; + + // Skip additional data files (attachments, user keys). They will be + // handled when we iterate over the corresponding files with the + // main content + if additional_data.is_some() { + continue; + } + + if let Some(_) = log_id { + } else { + compaction::upload_files(backup_client, user_identity, backup_id) + .await?; + } + } + + TRIGGER_BACKUP_FILE_UPLOAD.notified().await; } } @@ -101,11 +162,70 @@ Err(BackupHandlerError::WSClosed) } +mod compaction { + use super::*; + + pub async fn upload_files( + backup_client: &BackupClient, + user_identity: &UserIdentity, + backup_id: String, + ) -> Result<(), BackupHandlerError> { + let user_data_path = get_backup_file_path(&backup_id, false)?; + let user_data = tokio::fs::read(&user_data_path).await?; + + let user_keys_path = get_backup_user_keys_file_path(&backup_id)?; + let user_keys = tokio::fs::read(&user_keys_path).await?; + + let attachments_path = get_backup_file_path(&backup_id, true)?; + let attachments = match tokio::fs::read(&attachments_path).await { + Ok(data) => data.lines().collect::>()?, + Err(err) if err.kind() == ErrorKind::NotFound => Vec::new(), + Err(err) => return Err(err.into()), + }; + + let backup_data = BackupData { + backup_id: backup_id.clone(), + user_data, + user_keys, + attachments, + }; + + backup_client + .upload_backup(user_identity, backup_data) + .await?; + + tokio::spawn(cleanup_files(backup_id)); + + Ok(()) + } + + pub async fn cleanup_files(backup_id: String) { + let backup_files_cleanup = async { + let user_data_path = get_backup_file_path(&backup_id, false)?; + tokio::fs::remove_file(&user_data_path).await?; + let user_keys_path = get_backup_user_keys_file_path(&backup_id)?; + tokio::fs::remove_file(&user_keys_path).await?; + let attachments_path = get_backup_file_path(&backup_id, true)?; + match tokio::fs::remove_file(&attachments_path).await { + Ok(()) => Result::<_, Box>::Ok(()), + Err(err) if err.kind() == ErrorKind::NotFound => Ok(()), + Err(err) => Err(err.into()), + } + }; + + if let Err(err) = backup_files_cleanup.await { + println!("Error when cleaning up the backup files: {err:?}"); + } + } +} + #[derive( Debug, derive_more::Display, derive_more::From, derive_more::Error, )] -enum BackupHandlerError { +pub enum BackupHandlerError { BackupError(BackupError), BackupWSError(WSError), WSClosed, + IoError(std::io::Error), + CxxException(cxx::Exception), }