Page MenuHomePhabricator

D13002.diff
No OneTemporary

D13002.diff

diff --git a/services/terraform/remote/aws_cloudwatch_alarms.tf b/services/terraform/remote/aws_cloudwatch_alarms.tf
--- a/services/terraform/remote/aws_cloudwatch_alarms.tf
+++ b/services/terraform/remote/aws_cloudwatch_alarms.tf
@@ -62,6 +62,12 @@
endpoint = local.error_reports_subscribed_email
}
+resource "aws_sns_topic_subscription" "ecs_task_stop_subscription" {
+ topic_arn = aws_sns_topic.ecs_task_stop_topic.arn
+ protocol = "email"
+ endpoint = local.error_reports_subscribed_email
+}
+
resource "aws_cloudwatch_log_metric_filter" "identity_error_filters" {
for_each = local.identity_error_patterns
@@ -92,49 +98,65 @@
alarm_actions = [aws_sns_topic.identity_error_topic.arn]
}
+resource "aws_sns_topic" "ecs_task_stop_topic" {
+ name = "ecs-task-stop-topic"
+}
+
resource "aws_cloudwatch_event_rule" "ecs_task_stop" {
name = "ecs-task-stop-rule"
- description = "Trigger when ECS tasks are stopped"
+ description = "Filters for ecs task stop events"
event_pattern = jsonencode({
- source = ["aws.ecs"],
- detail-type = ["ECS Task State Change"],
+ source = ["aws.ecs"],
detail = {
lastStatus = ["STOPPED"],
- clusterArn = [aws_ecs_cluster.comm_services.arn]
- group = ["service:${aws_ecs_service.identity_service.name}"]
+ clusterArn = [aws_ecs_cluster.comm_services.arn],
+ containers = {
+ exitCode = [
+ {
+ anything-but = 0
+ }
+ ]
+ }
}
})
}
-resource "aws_sns_topic" "ecs_task_stop_topic" {
- name = "ecs-task-stopped-topic"
+resource "aws_cloudwatch_log_group" "ecs_task_stop" {
+ name = "/aws/events/ecs_task_stop"
+ retention_in_days = 1
}
-resource "aws_sns_topic_subscription" "ecs_task_stop_subscription" {
- topic_arn = aws_sns_topic.ecs_task_stop_topic.arn
- protocol = "email"
- endpoint = local.error_reports_subscribed_email
+resource "aws_cloudwatch_event_target" "ecs_task_stop" {
+ rule = aws_cloudwatch_event_rule.ecs_task_stop.name
+ arn = aws_cloudwatch_log_group.ecs_task_stop.arn
}
-resource "aws_cloudwatch_metric_alarm" "identity_ecs_task_stop" {
- alarm_name = "IdentityECSTaskStop"
+resource "aws_cloudwatch_log_metric_filter" "ecs_task_stop" {
+ name = "ECSTaskStopCount"
+ log_group_name = aws_cloudwatch_log_group.ecs_task_stop.name
+ pattern = "{ $.detail.stopCode = \"EssentialContainerExited\" }"
+
+ metric_transformation {
+ name = "ECSTaskStopCount"
+ namespace = "ECSMetrics"
+ value = "1"
+ default_value = 0
+ }
+}
+
+resource "aws_cloudwatch_metric_alarm" "ecs_task_stop" {
+ alarm_name = "ECSTaskStop"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "1"
- metric_name = "TaskStop"
- namespace = "AWS/ECS"
- period = "60"
- statistic = "SampleCount"
+ metric_name = "ECSTaskStopCount"
+ namespace = "ECSMetrics"
+ period = "300"
+ statistic = "Sum"
threshold = "1"
alarm_description = "This metric monitors ECS tasks stops"
-
- dimensions = {
- ClusterName = aws_ecs_cluster.comm_services.name
- ServiceName = aws_ecs_service.identity_service.name
- }
-
- actions_enabled = true
- alarm_actions = [aws_sns_topic.ecs_task_stop_topic.arn]
+ actions_enabled = true
+ alarm_actions = [aws_sns_topic.ecs_task_stop_topic.arn]
}
resource "aws_sns_topic" "service_connection_error_topic" {

File Metadata

Mime Type
text/plain
Expires
Fri, Sep 20, 3:25 PM (21 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2150977
Default Alt Text
D13002.diff (3 KB)

Event Timeline