Page MenuHomePhabricator

D12579.diff
No OneTemporary

D12579.diff

diff --git a/services/terraform/remote/aws_cloudwatch_alarms.tf b/services/terraform/remote/aws_cloudwatch_alarms.tf
--- a/services/terraform/remote/aws_cloudwatch_alarms.tf
+++ b/services/terraform/remote/aws_cloudwatch_alarms.tf
@@ -13,6 +13,16 @@
Tunnelbroker = { name = "Tunnelbroker", pattern = "Tunnelbroker Error" }
Http = { name = "HTTP", pattern = "HTTP Error" }
}
+
+ service_log_groups = {
+ Backup = { name = "Backup", log_group_name = "/ecs/backup-service-task-def" },
+ Blob = { name = "Blob", log_group_name = "/ecs/blob-service-task-def" },
+ ElectronUpdate = { name = "ElectronUpdate", log_group_name = "/ecs/electron-update-task-def" },
+ FeatureFlags = { name = "FeatureFlags", log_group_name = "/ecs/feature-flags-task-def" },
+ Identity = { name = "Identity", log_group_name = "/ecs/identity-service-task-def" },
+ Reports = { name = "Reports", log_group_name = "/ecs/reports-service-task-def" },
+ Tunnelbroker = { name = "Tunnelbroker", log_group_name = "/ecs/tunnelbroker-task-def" }
+ }
}
resource "aws_sns_topic" "lambda_alarm_topic" {
@@ -126,3 +136,44 @@
actions_enabled = true
alarm_actions = [aws_sns_topic.ecs_task_stop_topic.arn]
}
+
+resource "aws_sns_topic" "service_connection_error_topic" {
+ name = "service-connection-error-topic"
+}
+
+resource "aws_sns_topic_subscription" "service_connection_error_email_subscription" {
+ topic_arn = aws_sns_topic.service_connection_error_topic.arn
+ protocol = "email"
+ endpoint = local.error_reports_subscribed_email
+}
+
+resource "aws_cloudwatch_log_metric_filter" "service_connection_error_filters" {
+ for_each = local.service_log_groups
+
+ name = "${each.value.name}ConnectionErrorCount"
+ pattern = "dns error"
+ log_group_name = each.value.log_group_name
+
+ metric_transformation {
+ name = "${each.value.name}ConnectionErrorCount"
+ namespace = "ServiceConnectionMetricFilters"
+ value = "1"
+ }
+}
+
+resource "aws_cloudwatch_metric_alarm" "service_connection_error_alarms" {
+ for_each = local.service_log_groups
+
+ alarm_name = "${each.value.name}ConnectionErrorAlarm"
+ comparison_operator = "GreaterThanOrEqualToThreshold"
+ evaluation_periods = "1"
+ metric_name = "${each.value.name}ConnectionErrorCount"
+ namespace = "ServiceConnectionMetricFilters"
+ period = "300"
+ statistic = "Sum"
+ threshold = "1"
+ alarm_description = "Alarm when ${each.value.name} connection errors exceed threshold"
+ actions_enabled = true
+ alarm_actions = [aws_sns_topic.service_connection_error_topic.arn]
+}
+

File Metadata

Mime Type
text/plain
Expires
Sun, Dec 1, 4:10 AM (19 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2603618
Default Alt Text
D12579.diff (2 KB)

Event Timeline