Page MenuHomePhabricator

D13395.diff
No OneTemporary

D13395.diff

diff --git a/services/terraform/remote/aws_cloudwatch_alarms.tf b/services/terraform/remote/aws_cloudwatch_alarms.tf
--- a/services/terraform/remote/aws_cloudwatch_alarms.tf
+++ b/services/terraform/remote/aws_cloudwatch_alarms.tf
@@ -14,6 +14,13 @@
Http = { name = "HTTP", pattern = "HTTP Error" }
}
+ blob_error_patterns = {
+ S3 = { name = "S3", pattern = "S3 Error" },
+ DDB = { name = "DDB", pattern = "DDB Error" },
+ HTTP = { name = "HTTP", pattern = "HTTP Error" },
+ Other = { name = "Other", pattern = "Other Error" },
+ }
+
service_log_groups = {
Backup = { name = "Backup", log_group_name = "/ecs/backup-service-task-def" },
Blob = { name = "Blob", log_group_name = "/ecs/blob-service-task-def" },
@@ -128,6 +135,7 @@
}
resource "aws_cloudwatch_event_target" "ecs_task_stop" {
+
rule = aws_cloudwatch_event_rule.ecs_task_stop.name
arn = aws_cloudwatch_log_group.ecs_task_stop.arn
}
@@ -199,3 +207,44 @@
alarm_actions = [aws_sns_topic.service_connection_error_topic.arn]
}
+
+resource "aws_sns_topic" "blob_error_topic" {
+ name = "blob-error-topic"
+}
+
+resource "aws_sns_topic_subscription" "blob_email_subscription" {
+ topic_arn = aws_sns_topic.blob_error_topic.arn
+ protocol = "email"
+ endpoint = local.error_reports_subscribed_email
+}
+
+resource "aws_cloudwatch_log_metric_filter" "blob_error_filters" {
+ for_each = local.blob_error_patterns
+
+ name = "Blob${each.value.name}ErrorCount"
+ pattern = "{ $.level = \"ERROR\" && $.fields.errorType = \"${each.value.pattern}\" }"
+ log_group_name = "/ecs/blob-service-task-def"
+
+ metric_transformation {
+ name = "Blob${each.value.name}ErrorCount"
+ namespace = "BlobServiceMetricFilters"
+ value = "1"
+ }
+}
+
+resource "aws_cloudwatch_metric_alarm" "blob_error_alarms" {
+ for_each = local.blob_error_patterns
+
+ alarm_name = "Blob${local.is_staging ? "Staging" : "Production"}${each.value.name}ErrorAlarm"
+ comparison_operator = "GreaterThanOrEqualToThreshold"
+ evaluation_periods = "1"
+ metric_name = "Blob${each.value.name}ErrorCount"
+ namespace = "BlobServiceMetricFilters"
+ period = "300"
+ statistic = "Sum"
+ threshold = 1
+ alarm_description = "Alarm when Blob ${each.value.name} errors exceed threshold"
+ actions_enabled = true
+ alarm_actions = [aws_sns_topic.blob_error_topic.arn]
+}
+
diff --git a/services/terraform/remote/main.tf b/services/terraform/remote/main.tf
--- a/services/terraform/remote/main.tf
+++ b/services/terraform/remote/main.tf
@@ -23,6 +23,8 @@
target_account_id = lookup(local.secrets.accountIDs, local.environment)
terraform_role_arn = "arn:aws:iam::${local.target_account_id}:role/Terraform"
+
+ comm_services_use_json_logs = "true"
}
provider "aws" {
diff --git a/services/terraform/remote/service_identity.tf b/services/terraform/remote/service_identity.tf
--- a/services/terraform/remote/service_identity.tf
+++ b/services/terraform/remote/service_identity.tf
@@ -34,8 +34,6 @@
http://localhost:3009
EOT
production_allow_origin_list = "https://web.comm.app"
-
- comm_services_use_json_logs = "true"
}
data "aws_secretsmanager_secret" "identity_server_setup" {

File Metadata

Mime Type
text/plain
Expires
Fri, Sep 20, 10:14 PM (28 m, 29 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2154519
Default Alt Text
D13395.diff (3 KB)

Event Timeline