diff --git a/services/reports/src/database/item.rs b/services/reports/src/database/item.rs --- a/services/reports/src/database/item.rs +++ b/services/reports/src/database/item.rs @@ -5,8 +5,8 @@ constants::DDB_ITEM_SIZE_LIMIT, crypto::aes256::EncryptionKey, database::{ - self, blob::BlobOrDBContent, AttributeExtractor, AttributeMap, DBItemError, - TryFromAttribute, + self, blob::BlobOrDBContent, calculate_size_in_db, AttributeExtractor, + AttributeMap, DBItemError, TryFromAttribute, }, }; use num_traits::FromPrimitive; @@ -33,20 +33,6 @@ pub encryption_key: Option, } -/// contains some redundancy as not all keys are always present -static REPORT_ITEM_KEYS_SIZE: usize = { - let mut size: usize = 0; - size += ATTR_REPORT_ID.as_bytes().len(); - size += ATTR_REPORT_TYPE.as_bytes().len(); - size += ATTR_USER_ID.as_bytes().len(); - size += ATTR_PLATFORM.as_bytes().len(); - size += ATTR_CREATION_TIME.as_bytes().len(); - size += ATTR_ENCRYPTION_KEY.as_bytes().len(); - size += ATTR_BLOB_INFO.as_bytes().len(); - size += ATTR_REPORT_CONTENT.as_bytes().len(); - size -}; - impl ReportItem { pub fn into_attrs(self) -> AttributeMap { let creation_time = self @@ -79,9 +65,11 @@ &mut self, blob_client: &BlobServiceClient, ) -> Result<(), BlobServiceError> { - if self.total_size() < DDB_ITEM_SIZE_LIMIT { - return Ok(()); - }; + if let Ok(size) = calculate_size_in_db(&self.clone().into_attrs()) { + if size < DDB_ITEM_SIZE_LIMIT { + return Ok(()); + }; + } debug!( report_id = ?self.id, @@ -89,29 +77,6 @@ ); self.content.move_to_blob(blob_client).await } - - fn total_size(&self) -> usize { - let mut size = REPORT_ITEM_KEYS_SIZE; - size += self.id.as_bytes().len(); - size += self.user_id.as_bytes().len(); - size += self.platform.to_string().as_bytes().len(); - size += (self.report_type as u8).to_string().as_bytes().len(); - size += match &self.content { - BlobOrDBContent::Database(data) => data.len(), - BlobOrDBContent::Blob(info) => { - let mut blob_size = 0; - blob_size += "holder".as_bytes().len(); - blob_size += "blob_hash".as_bytes().len(); - blob_size += info.holder.as_bytes().len(); - blob_size += info.blob_hash.as_bytes().len(); - blob_size - } - }; - if let Some(key) = self.encryption_key.as_ref() { - size += key.as_ref().len(); - } - size - } } impl TryFrom for ReportItem { diff --git a/shared/comm-lib/src/database.rs b/shared/comm-lib/src/database.rs --- a/shared/comm-lib/src/database.rs +++ b/shared/comm-lib/src/database.rs @@ -724,6 +724,55 @@ } } +#[derive(Debug, Clone, Copy, derive_more::Display, derive_more::Error)] +pub struct UnknownAttributeTypeError; + +fn calculate_attr_value_size_in_db( + value: &AttributeValue, +) -> Result { + const ELEMENT_BYTE_OVERHEAD: usize = 1; + const CONTAINER_BYTE_OVERHEAD: usize = 3; + /// AWS doesn't provide an exact algorithm for calculating number size in bytes + /// in case they change the internal representation. We know that number can use + /// between 2 and 21 bytes so we use the maximum value as the byte size. + const NUMBER_BYTE_SIZE: usize = 21; + + let result = match value { + AttributeValue::B(blob) => blob.as_ref().len(), + AttributeValue::L(list) => { + CONTAINER_BYTE_OVERHEAD + + list.len() * ELEMENT_BYTE_OVERHEAD + + list + .iter() + .try_fold(0, |a, v| Ok(a + calculate_attr_value_size_in_db(v)?))? + } + AttributeValue::M(map) => { + CONTAINER_BYTE_OVERHEAD + + map.len() * ELEMENT_BYTE_OVERHEAD + + calculate_size_in_db(map)? + } + AttributeValue::Bool(_) | AttributeValue::Null(_) => 1, + AttributeValue::Bs(set) => set.len(), + AttributeValue::N(_) => NUMBER_BYTE_SIZE, + AttributeValue::Ns(set) => set.len() * NUMBER_BYTE_SIZE, + AttributeValue::S(string) => string.as_bytes().len(), + AttributeValue::Ss(set) => { + set.iter().map(|string| string.as_bytes().len()).sum() + } + _ => return Err(UnknownAttributeTypeError), + }; + + Ok(result) +} + +pub fn calculate_size_in_db( + value: &AttributeMap, +) -> Result { + value.iter().try_fold(0, |a, (attr, value)| { + Ok(a + attr.as_bytes().len() + calculate_attr_value_size_in_db(value)?) + }) +} + #[cfg(test)] mod tests { use super::*;