diff --git a/services/backup/src/database/mod.rs b/services/backup/src/database/mod.rs --- a/services/backup/src/database/mod.rs +++ b/services/backup/src/database/mod.rs @@ -4,10 +4,11 @@ use std::collections::HashMap; use aws_sdk_dynamodb::{ - operation::get_item::GetItemOutput, types::AttributeValue, + operation::get_item::GetItemOutput, + types::{AttributeValue, ReturnValue}, }; use comm_services_lib::database::Error; -use tracing::error; +use tracing::{error, trace, warn}; use crate::constants::{ BACKUP_TABLE_FIELD_BACKUP_ID, BACKUP_TABLE_FIELD_USER_ID, @@ -61,16 +62,7 @@ user_id: &str, backup_id: &str, ) -> Result, Error> { - let item_key = HashMap::from([ - ( - BACKUP_TABLE_FIELD_USER_ID.to_string(), - AttributeValue::S(user_id.to_string()), - ), - ( - BACKUP_TABLE_FIELD_BACKUP_ID.to_string(), - AttributeValue::S(backup_id.to_string()), - ), - ]); + let item_key = Self::get_item_key(user_id, backup_id); let output = self .client @@ -127,15 +119,19 @@ } } - pub async fn remove_backup_item(&self, backup_id: &str) -> Result<(), Error> { - self + pub async fn remove_backup_item( + &self, + user_id: &str, + backup_id: &str, + ) -> Result, Error> { + let item_key = Self::get_item_key(user_id, backup_id); + + let response = self .client .delete_item() .table_name(BACKUP_TABLE_NAME) - .key( - BACKUP_TABLE_FIELD_BACKUP_ID, - AttributeValue::S(backup_id.to_string()), - ) + .set_key(Some(item_key)) + .return_values(ReturnValue::AllOld) .send() .await .map_err(|e| { @@ -143,7 +139,95 @@ Error::AwsSdk(e.into()) })?; - Ok(()) + response + .attributes + .map(BackupItem::try_from) + .transpose() + .map_err(Error::from) + } + + /// For the purposes of the initial backup version this function + /// removes all backups except for the latest one + pub async fn remove_old_backups( + &self, + user_id: &str, + ) -> Result, Error> { + let response = self + .client + .query() + .table_name(BACKUP_TABLE_NAME) + .index_name(BACKUP_TABLE_INDEX_USERID_CREATED) + .key_condition_expression("#userID = :valueToMatch") + .expression_attribute_names("#userID", BACKUP_TABLE_FIELD_USER_ID) + .expression_attribute_values( + ":valueToMatch", + AttributeValue::S(user_id.to_string()), + ) + .scan_index_forward(false) + .send() + .await + .map_err(|e| { + error!("DynamoDB client failed to fetch backups"); + Error::AwsSdk(e.into()) + })?; + + if response.last_evaluated_key().is_some() { + // In the intial version of the backup service this function will be run + // for every new backup (each user only has one backup), so this shouldn't + // happen + warn!("Not all old backups have been cleaned up"); + } + + let items = response + .items + .unwrap_or_default() + .into_iter() + .map(OrderedBackupItem::try_from) + .collect::, _>>()?; + + let mut removed_backups = vec![]; + + let Some(latest) = items.iter().map(|item| item.created).max() else { + return Ok(removed_backups); + }; + + for item in items { + if item.created == latest { + trace!( + "Skipping removal of the latest backup item: {}", + item.backup_id + ); + continue; + } + + trace!("Removing backup item: {item:?}"); + + if let Some(backup) = + self.remove_backup_item(user_id, &item.backup_id).await? + { + removed_backups.push(backup); + } else { + warn!("Backup was found during query, but wasn't found when deleting") + }; + } + + Ok(removed_backups) + } + + fn get_item_key( + user_id: &str, + backup_id: &str, + ) -> HashMap { + HashMap::from([ + ( + BACKUP_TABLE_FIELD_USER_ID.to_string(), + AttributeValue::S(user_id.to_string()), + ), + ( + BACKUP_TABLE_FIELD_BACKUP_ID.to_string(), + AttributeValue::S(backup_id.to_string()), + ), + ]) } // log item diff --git a/services/backup/src/http/handlers/backup.rs b/services/backup/src/http/handlers/backup.rs --- a/services/backup/src/http/handlers/backup.rs +++ b/services/backup/src/http/handlers/backup.rs @@ -66,7 +66,7 @@ }; let item = BackupItem::new( - user.user_id, + user.user_id.clone(), backup_id, user_keys_blob_info, user_data_blob_info, @@ -81,6 +81,22 @@ user_keys_revoke.cancel(); user_data_revoke.cancel(); + for backup in db_client + .remove_old_backups(&user.user_id) + .await + .map_err(BackupError::from)? + { + blob_client.schedule_revoke_holder( + backup.user_keys.blob_hash, + backup.user_keys.holder, + ); + + blob_client.schedule_revoke_holder( + backup.user_data.blob_hash, + backup.user_data.holder, + ); + } + Ok(HttpResponse::Ok().finish()) } diff --git a/services/commtest/tests/backup_integration_test.rs b/services/commtest/tests/backup_integration_test.rs --- a/services/commtest/tests/backup_integration_test.rs +++ b/services/commtest/tests/backup_integration_test.rs @@ -8,6 +8,7 @@ }, tools::{generate_stable_nbytes, Error}, }; +use reqwest::StatusCode; use std::env; #[tokio::test] @@ -106,5 +107,22 @@ .await?; assert_eq!(user_keys, backup_datas[1].user_keys); + // Test cleanup + let first_backup_descriptor = BackupDescriptor::BackupID { + backup_id: backup_datas[0].backup_id.clone(), + user_identity: user_identity.clone(), + }; + + let response = pull_backup::run( + url.clone(), + first_backup_descriptor.clone(), + RequestedData::UserKeys, + ) + .await; + assert!( + matches!(response, Err(Error::HttpStatus(StatusCode::NOT_FOUND))), + "First backup should have been removed, instead got response: {response:?}" + ); + Ok(()) }