diff --git a/services/backup/CMakeLists.txt b/services/backup/CMakeLists.txt index 4256dea7b..802b734f0 100644 --- a/services/backup/CMakeLists.txt +++ b/services/backup/CMakeLists.txt @@ -1,117 +1,110 @@ PROJECT(backup C CXX) cmake_minimum_required(VERSION 3.16) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY bin) if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy) set(CMAKE_CXX_STANDARD 17) set(BUILD_TESTING OFF CACHE BOOL "Turn off tests" FORCE) set(WITH_GTEST "Use Google Test" OFF) # FIND LIBS include(./cmake-components/grpc.cmake) include(./cmake-components/folly.cmake) add_subdirectory(./lib/glog) find_package(AWSSDK REQUIRED COMPONENTS core dynamodb) find_package(Boost 1.40 COMPONENTS program_options REQUIRED) # FIND FILES file(GLOB DOUBLE_CONVERSION_SOURCES "./lib/double-conversion/double-conversion/*.cc") -if ($ENV{COMM_TEST_SERVICES} MATCHES 1) - add_compile_definitions(COMM_TEST_SERVICES) -endif() - file(GLOB GENERATED_CODE "./_generated/*.cc") -if ($ENV{COMM_SERVICES_DEV_MODE} MATCHES 1) - add_compile_definitions(COMM_SERVICES_DEV_MODE) -endif() file(GLOB_RECURSE SOURCE_CODE "./src/*.cpp") include_directories( ./src ./src/grpc-client ./src/DatabaseEntities ./src/Reactors ./src/Reactors/server ./src/Reactors/server/base-reactors ./src/Reactors/client ./src/Reactors/client/blob ./src/Reactors/client/base-reactors ./_generated ${FOLLY_INCLUDES} ./lib/double-conversion ${Boost_INCLUDE_DIR} ) # SERVER add_executable( backup ${GENERATED_CODE} ${DOUBLE_CONVERSION_SOURCES} ${FOLLY_SOURCES} ${SOURCE_CODE} ) set( LIBS ${GRPC_LIBS} ${AWSSDK_LINK_LIBRARIES} ${Boost_LIBRARIES} glog::glog ) target_link_libraries( backup ${LIBS} ) install( TARGETS backup RUNTIME DESTINATION bin/ ) # TEST if ($ENV{COMM_TEST_SERVICES} MATCHES 1) file(GLOB TEST_CODE "./test/*.cpp") list(FILTER SOURCE_CODE EXCLUDE REGEX "./src/server.cpp") enable_testing() find_package(GTest REQUIRED) include_directories( ${GTEST_INCLUDE_DIR} ./test ) add_executable( runTests ${GENERATED_CODE} ${DOUBLE_CONVERSION_SOURCES} ${FOLLY_SOURCES} ${SOURCE_CODE} ${TEST_CODE} ) target_link_libraries( runTests ${LIBS} gtest gtest_main ) add_test( NAME runTests COMMAND runTests ) endif() diff --git a/services/backup/src/AwsTools.cpp b/services/backup/src/AwsTools.cpp index 575a33e37..04b553381 100644 --- a/services/backup/src/AwsTools.cpp +++ b/services/backup/src/AwsTools.cpp @@ -1,18 +1,21 @@ #include "AwsTools.h" #include "Constants.h" +#include "Tools.h" + +#include namespace comm { namespace network { std::unique_ptr getDynamoDBClient() { Aws::Client::ClientConfiguration config; config.region = AWS_REGION; -#ifdef COMM_SERVICES_DEV_MODE - config.endpointOverride = Aws::String("localstack:4566"); - config.scheme = Aws::Http::Scheme::HTTP; -#endif + if (isDevMode()) { + config.endpointOverride = Aws::String("localstack:4566"); + config.scheme = Aws::Http::Scheme::HTTP; + } return std::make_unique(config); } } // namespace network } // namespace comm diff --git a/services/backup/src/Constants.h b/services/backup/src/Constants.h index 84b04092b..3f57908b0 100644 --- a/services/backup/src/Constants.h +++ b/services/backup/src/Constants.h @@ -1,41 +1,39 @@ #pragma once +#include "Tools.h" + #include namespace comm { namespace network { // 4MB limit // WARNING: use keeping in mind that grpc adds its own headers to messages // https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md // so the message that actually is being sent over the network looks like this // [Compressed-Flag] [Message-Length] [Message] // [Compressed-Flag] 1 byte - added by grpc // [Message-Length] 4 bytes - added by grpc // [Message] N bytes - actual data // so for every message we get 5 additional bytes of data // as mentioned here // https://github.com/grpc/grpc/issues/15734#issuecomment-396962671 // grpc stream may contain more than one message const size_t GRPC_CHUNK_SIZE_LIMIT = 4 * 1024 * 1024; const size_t GRPC_METADATA_SIZE_PER_MESSAGE = 5; const std::string AWS_REGION = "us-east-2"; -#ifdef COMM_TEST_SERVICES -const std::string LOG_TABLE_NAME = "backup-service-log-test"; -const std::string BACKUP_TABLE_NAME = "backup-service-backup-test"; -#else -const std::string LOG_TABLE_NAME = "backup-service-log"; -const std::string BACKUP_TABLE_NAME = "backup-service-backup"; -#endif +const std::string LOG_TABLE_NAME = decorateTableName("backup-service-log"); +const std::string BACKUP_TABLE_NAME = + decorateTableName("backup-service-backup"); // This has to be smaller than GRPC_CHUNK_SIZE_LIMIT because we need to // recognize if we may receive multiple chunks or just one. If it was larger // than the chunk limit, once we get the amount of data of size equal to the // limit, we wouldn't know if we should put this in the database right away or // wait for more data. const size_t LOG_DATA_SIZE_DATABASE_LIMIT = 1 * 1024 * 1024; } // namespace network } // namespace comm diff --git a/services/backup/src/Tools.cpp b/services/backup/src/Tools.cpp index d3e19808c..467cca9f4 100644 --- a/services/backup/src/Tools.cpp +++ b/services/backup/src/Tools.cpp @@ -1,28 +1,45 @@ #include "Tools.h" #include +#include #include namespace comm { namespace network { std::string generateRandomString(std::size_t length) { const std::string CHARACTERS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; thread_local std::random_device generator; std::uniform_int_distribution<> distribution(0, CHARACTERS.size() - 1); std::string random_string; for (std::size_t i = 0; i < length; ++i) { random_string += CHARACTERS[distribution(generator)]; } return random_string; } uint64_t getCurrentTimestamp() { using namespace std::chrono; return duration_cast(system_clock::now().time_since_epoch()) .count(); } +std::string decorateTableName(const std::string &baseName) { + std::string suffix = ""; + if (std::getenv("COMM_TEST_SERVICES") != nullptr && + std::string(std::getenv("COMM_TEST_SERVICES")) == "1") { + suffix = "-test"; + } + return baseName + suffix; +} + +bool isDevMode() { + if (std::getenv("COMM_SERVICES_DEV_MODE") == nullptr) { + return false; + } + return std::string(std::getenv("COMM_SERVICES_DEV_MODE")) == "1"; +} + } // namespace network } // namespace comm diff --git a/services/backup/src/Tools.h b/services/backup/src/Tools.h index fd25bbb15..e057a7a92 100644 --- a/services/backup/src/Tools.h +++ b/services/backup/src/Tools.h @@ -1,13 +1,15 @@ #pragma once #include #include namespace comm { namespace network { std::string generateRandomString(std::size_t length = 20); uint64_t getCurrentTimestamp(); +std::string decorateTableName(const std::string &baseName); +bool isDevMode(); } // namespace network } // namespace comm diff --git a/services/blob/CMakeLists.txt b/services/blob/CMakeLists.txt index c254808dc..8b2ed0de0 100644 --- a/services/blob/CMakeLists.txt +++ b/services/blob/CMakeLists.txt @@ -1,116 +1,108 @@ PROJECT(blob C CXX) cmake_minimum_required(VERSION 3.16) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY bin) if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy) set(CMAKE_CXX_STANDARD 17) set(BUILD_TESTING OFF CACHE BOOL "Turn off tests" FORCE) set(WITH_GTEST "Use Google Test" OFF) # FIND LIBS include(./cmake-components/grpc.cmake) include(./cmake-components/folly.cmake) add_subdirectory(./lib/glog) find_package(AWSSDK REQUIRED COMPONENTS s3 core dynamodb) find_package(Boost 1.40 COMPONENTS program_options REQUIRED) find_package(OpenSSL REQUIRED) # FIND FILES file(GLOB DOUBLE_CONVERSION_SOURCES "./lib/double-conversion/double-conversion/*.cc") - -if ($ENV{COMM_TEST_SERVICES} MATCHES 1) - add_compile_definitions(COMM_TEST_SERVICES) -endif() - file(GLOB GENERATED_CODE "./_generated/*.cc") -if ($ENV{COMM_SERVICES_DEV_MODE} MATCHES 1) - add_compile_definitions(COMM_SERVICES_DEV_MODE) -endif() file(GLOB SOURCE_CODE "./src/*.cpp" "./src/**/*.cpp") include_directories( ./src ./src/DatabaseEntities ./src/Reactors/ ./src/Reactors/server ./src/Reactors/server/base-reactors ./_generated ${FOLLY_INCLUDES} ./lib/double-conversion ${Boost_INCLUDE_DIR} ) # SERVER add_executable( blob ${GENERATED_CODE} ${DOUBLE_CONVERSION_SOURCES} ${FOLLY_SOURCES} ${SOURCE_CODE} ) set( LIBS ${GRPC_LIBS} ${AWSSDK_LINK_LIBRARIES} ${Boost_LIBRARIES} OpenSSL::SSL glog::glog ) target_link_libraries( blob ${LIBS} ) install( TARGETS blob RUNTIME DESTINATION bin/ ) # TEST if ($ENV{COMM_TEST_SERVICES} MATCHES 1) file(GLOB TEST_CODE "./test/*.cpp") list(FILTER SOURCE_CODE EXCLUDE REGEX "./src/server.cpp") enable_testing() find_package(GTest REQUIRED) include_directories( ${GTEST_INCLUDE_DIR} ./test ) add_executable( runTests ${GENERATED_CODE} ${DOUBLE_CONVERSION_SOURCES} ${FOLLY_SOURCES} ${SOURCE_CODE} ${TEST_CODE} ) target_link_libraries( runTests ${LIBS} gtest gtest_main ) add_test( NAME runTests COMMAND runTests ) endif() diff --git a/services/blob/src/AwsTools.cpp b/services/blob/src/AwsTools.cpp index 8d5737a89..71c6af137 100644 --- a/services/blob/src/AwsTools.cpp +++ b/services/blob/src/AwsTools.cpp @@ -1,52 +1,55 @@ #include "AwsTools.h" #include "Constants.h" #include "Tools.h" #include +#include + namespace comm { namespace network { AwsS3Bucket getBucket(const std::string &bucketName) { return AwsS3Bucket(bucketName); } std::vector listBuckets() { Aws::S3::Model::ListBucketsOutcome outcome = getS3Client()->ListBuckets(); std::vector result; if (!outcome.IsSuccess()) { throw std::runtime_error(outcome.GetError().GetMessage()); } Aws::Vector buckets = outcome.GetResult().GetBuckets(); for (Aws::S3::Model::Bucket &bucket : buckets) { result.push_back(bucket.GetName()); } return result; } std::unique_ptr getDynamoDBClient() { Aws::Client::ClientConfiguration config; config.region = AWS_REGION; -#ifdef COMM_SERVICES_DEV_MODE - config.endpointOverride = Aws::String("localstack:4566"); - config.scheme = Aws::Http::Scheme::HTTP; -#endif + if (isDevMode()) { + config.endpointOverride = Aws::String("localstack:4566"); + config.scheme = Aws::Http::Scheme::HTTP; + } return std::make_unique(config); } std::unique_ptr getS3Client() { Aws::Client::ClientConfiguration config; config.region = AWS_REGION; -#ifdef COMM_SERVICES_DEV_MODE - config.endpointOverride = Aws::String("localstack:4566"); - config.scheme = Aws::Http::Scheme::HTTP; - return std::make_unique( - config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false); -#else + if (isDevMode()) { + config.endpointOverride = Aws::String("localstack:4566"); + config.scheme = Aws::Http::Scheme::HTTP; + return std::make_unique( + config, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + false); + } return std::make_unique(config); -#endif } } // namespace network } // namespace comm diff --git a/services/blob/src/Constants.h b/services/blob/src/Constants.h index f0110eed6..daaeb5251 100644 --- a/services/blob/src/Constants.h +++ b/services/blob/src/Constants.h @@ -1,37 +1,35 @@ #pragma once +#include "Tools.h" + #include namespace comm { namespace network { // 4MB limit // WARNING: use keeping in mind that grpc adds its own headers to messages // https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md // so the message that actually is being sent over the network looks like this // [Compressed-Flag] [Message-Length] [Message] // [Compressed-Flag] 1 byte - added by grpc // [Message-Length] 4 bytes - added by grpc // [Message] N bytes - actual data // so for every message we get 5 additional bytes of data // as mentioned here // https://github.com/grpc/grpc/issues/15734#issuecomment-396962671 // grpc stream may contain more than one message const size_t GRPC_CHUNK_SIZE_LIMIT = 4 * 1024 * 1024; const size_t GRPC_METADATA_SIZE_PER_MESSAGE = 5; // 5MB limit const size_t AWS_MULTIPART_UPLOAD_MINIMUM_CHUNK_SIZE = 5 * 1024 * 1024; const std::string AWS_REGION = "us-east-2"; const std::string BLOB_BUCKET_NAME = "commapp-blob"; -#ifdef COMM_TEST_SERVICES -const std::string BLOB_TABLE_NAME = "blob-service-blob-test"; -const std::string REVERSE_INDEX_TABLE_NAME = "blob-service-reverse-index-test"; -#else -const std::string BLOB_TABLE_NAME = "blob-service-blob"; -const std::string REVERSE_INDEX_TABLE_NAME = "blob-service-reverse-index"; -#endif +const std::string BLOB_TABLE_NAME = decorateTableName("blob-service-blob"); +const std::string REVERSE_INDEX_TABLE_NAME = + decorateTableName("blob-service-reverse-index"); } // namespace network } // namespace comm diff --git a/services/blob/src/Tools.cpp b/services/blob/src/Tools.cpp index 6c246b5c4..1e644188c 100644 --- a/services/blob/src/Tools.cpp +++ b/services/blob/src/Tools.cpp @@ -1,76 +1,94 @@ #include "Tools.h" #include "AwsTools.h" +#include "Constants.h" #include "DatabaseEntitiesTools.h" #include "DatabaseManager.h" #include #include +#include #include #include namespace comm { namespace network { database::S3Path generateS3Path(const std::string &bucketName, const std::string &blobHash) { return database::S3Path(bucketName, blobHash); } std::string computeHashForFile(const database::S3Path &s3Path) { SHA512_CTX ctx; SHA512_Init(&ctx); const std::function callback = [&ctx](const std::string &chunk) { SHA512_Update(&ctx, chunk.data(), chunk.size()); }; getBucket(s3Path.getBucketName()) .getObjectDataChunks( s3Path.getObjectName(), callback, GRPC_CHUNK_SIZE_LIMIT); unsigned char hash[SHA512_DIGEST_LENGTH]; SHA512_Final(hash, &ctx); std::ostringstream hashStream; for (int i = 0; i < SHA512_DIGEST_LENGTH; i++) { hashStream << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int)hash[i]; } return hashStream.str(); } database::S3Path findS3Path(const std::string &holder) { std::shared_ptr reverseIndexItem = database::DatabaseManager::getInstance().findReverseIndexItemByHolder( holder); if (reverseIndexItem == nullptr) { throw std::runtime_error( "provided holder: [" + holder + "] has not been found in the database"); } return findS3Path(*reverseIndexItem); } database::S3Path findS3Path(const database::ReverseIndexItem &reverseIndexItem) { std::shared_ptr blobItem = database::DatabaseManager::getInstance().findBlobItem( reverseIndexItem.getBlobHash()); if (blobItem == nullptr) { throw std::runtime_error( "no blob found for blobHash: [" + reverseIndexItem.getBlobHash() + "]"); } database::S3Path result = blobItem->getS3Path(); return result; } uint64_t getCurrentTimestamp() { using namespace std::chrono; return duration_cast(system_clock::now().time_since_epoch()) .count(); } +std::string decorateTableName(const std::string &baseName) { + std::string suffix = ""; + if (std::getenv("COMM_TEST_SERVICES") != nullptr && + std::string(std::getenv("COMM_TEST_SERVICES")) == "1") { + suffix = "-test"; + } + return baseName + suffix; +} + +bool isDevMode() { + if (std::getenv("COMM_SERVICES_DEV_MODE") == nullptr) { + return false; + } + return std::string(std::getenv("COMM_SERVICES_DEV_MODE")) == "1"; +} + } // namespace network } // namespace comm diff --git a/services/blob/src/Tools.h b/services/blob/src/Tools.h index 4ed72265c..71fb53d2e 100644 --- a/services/blob/src/Tools.h +++ b/services/blob/src/Tools.h @@ -1,29 +1,32 @@ #pragma once -#include "Constants.h" #include "DatabaseEntitiesTools.h" #include "S3Path.h" namespace comm { namespace network { database::S3Path generateS3Path(const std::string &bucketName, const std::string &blobHash); std::string computeHashForFile(const database::S3Path &s3Path); database::S3Path findS3Path(const std::string &holder); database::S3Path findS3Path(const database::ReverseIndexItem &reverseIndexItem); uint64_t getCurrentTimestamp(); +std::string decorateTableName(const std::string &baseName); + +bool isDevMode(); + class invalid_argument_error : public std::runtime_error { public: invalid_argument_error(std::string errorMessage) : std::runtime_error(errorMessage) { } }; } // namespace network } // namespace comm