Page MenuHomePhabricator

D12731.diff
No OneTemporary

D12731.diff

diff --git a/services/terraform/self-host/aws-deploy.sh b/services/terraform/self-host/aws-deploy.sh
new file mode 100755
--- /dev/null
+++ b/services/terraform/self-host/aws-deploy.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+# Disable AWS cli command pager outputs
+export AWS_PAGER=""
+
+
+# Do not change without replacing keyserver_cluster name in aws_ecs.tf
+cluster_name="keyserver-cluster"
+
+# Do not change without replacing keyserver_primary_service
+# name in keyserver_primary.tf
+primary_service_name="keyserver-primary-service"
+
+# Do not change without replacing keyserver_secondary_service
+# name in keyserver_secondary.tf
+secondary_service_name="keyserver-secondary-service"
+
+# Grab user configuration variables from terraform.tfvars
+health_check_domain=$(echo "var.domain_name" | terraform console -var-file terraform.tfvars.json | tr -d '"')
+health_check_url="https://${health_check_domain}/health"
+num_desired_secondary_nodes=$(echo "var.desired_secondary_nodes" | terraform console -var-file terraform.tfvars.json)
+
+if [[ -z "${AWS_ACCESS_KEY_ID}" || -z "${AWS_SECRET_ACCESS_KEY}" ]]; then
+ echo "Error: AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables must be set to run migration."
+ exit 1
+fi
+
+if ! aws sts get-caller-identity > /dev/null; then
+ echo "Error: Invalid AWS credentials or not in nix shell. Please check your AWS configuration and/or run nix develop."
+ exit 1
+fi
+
+convert_seconds() {
+ total_seconds="$1"
+ minutes="$((total_seconds / 60))"
+ seconds="$((total_seconds % 60))"
+
+ if (( minutes > 0 )); then
+ echo "${minutes} minute(s) and ${seconds} seconds"
+ else
+ echo "${seconds} seconds"
+ fi
+}
+
+check_health() {
+ local retry_interval=10
+ local total_elapsed_time=0
+
+ while true; do
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" "$health_check_url")
+ if [[ "$http_code" -eq 200 ]]; then
+ echo "Health check returned status 200 OK $http_code. Primary keyserver node ready"
+ return 0
+ fi
+ total_elapsed_time="$(( total_elapsed_time + retry_interval ))"
+ converted_time="$(convert_seconds $total_elapsed_time)"
+
+ echo "Health check returned status $http_code. Elapsed time: ${converted_time}."
+ sleep $retry_interval
+ done
+}
+
+disable_lb_traffic() {
+ aws ec2 revoke-security-group-ingress \
+ --group-id "$(aws ec2 describe-security-groups --filters "Name=group-name,Values=lb-sg" --query "SecurityGroups[0].GroupId" --output text)" \
+ --protocol tcp \
+ --port 443 \
+ --cidr 0.0.0.0/0 > /dev/null
+}
+
+enable_lb_traffic() {
+ aws ec2 authorize-security-group-ingress \
+ --group-id "$(aws ec2 describe-security-groups --filters "Name=group-name,Values=lb-sg" --query "SecurityGroups[0].GroupId" --output text)" \
+ --protocol tcp \
+ --port 443 \
+ --cidr 0.0.0.0/0 > /dev/null
+}
+
+# Stop all primary and secondary tasks and disable traffic to load balancer
+echo "Disabling traffic to load balancer"
+disable_lb_traffic
+
+http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$health_check_url")
+if [[ "$http_code" -eq 000 ]]; then
+ echo "Error: Health check timed out trying to access keyserver domain at ${health_check_url}."
+ echo "Ensure terraform variable allowed_ip is properly configured and run terraform apply"
+
+ echo "Re-enabling traffic to load balancer until domain is accessible and migration script is rerun"
+ enable_lb_traffic
+ exit 1
+fi
+
+echo "Set desired count of secondary service to 0"
+aws ecs update-service --cluster "$cluster_name" --service "$secondary_service_name" --desired-count 0 > /dev/null
+
+echo "Taking down all secondary nodes in $cluster_name"
+
+task_arns=$(aws ecs list-tasks --cluster "$cluster_name" --service-name "$secondary_service_name" --query 'taskArns[*]' --output text)
+
+for task_arn in $task_arns; do
+ echo "Stopping secondary node running on task $task_arn"
+ aws ecs stop-task --cluster "$cluster_name" --task "$task_arn" > /dev/null
+done
+
+echo "Set desired count of primary service to 0"
+aws ecs update-service --cluster "$cluster_name" --service "$primary_service_name" --desired-count 0 > /dev/null
+
+echo "Taking down primary node in $cluster_name"
+primary_task_arns=$(aws ecs list-tasks --cluster "$cluster_name" --service-name "$primary_service_name" --query 'taskArns[*]' --output text)
+
+for primary_task_arn in $primary_task_arns; do
+ echo "Stopping primary node running on task $primary_task_arn"
+ aws ecs stop-task --cluster "$cluster_name" --task "$primary_task_arn" > /dev/null
+done
+
+echo "Waiting until primary and secondary nodes have been shutdown"
+total_elapsed_time=0
+retry_interval=10
+while true; do
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" "$health_check_url")
+
+ echo "Health check returned status $http_code. Elapsed time: $(convert_seconds $total_elapsed_time)"
+
+ if [[ "$http_code" -ne 200 ]]; then
+ echo "Stopping primary and secondary nodes was successful. Continuing with migration."
+ break
+ fi
+
+ total_elapsed_time=$(( total_elapsed_time + retry_interval ))
+ sleep $retry_interval
+done
+
+echo "Redeploying primary service in $cluster_name"
+aws ecs update-service --cluster "$cluster_name" --service "$primary_service_name" --force-new-deployment --desired-count 1 > /dev/null
+
+echo "Waiting for health check at $health_check_domain to return status 200 OK"
+check_health
+
+echo "Primary node successfully running. Re-enabling public access to load balancer"
+enable_lb_traffic
+
+echo "Setting desired count of secondary service to $num_desired_secondary_nodes".
+aws ecs update-service --cluster "$cluster_name" --service "$secondary_service_name" --desired-count "$num_desired_secondary_nodes" --force-new-deployment > /dev/null
+
+echo "Successfully ran migration"
diff --git a/services/terraform/self-host/aws_ecs.tf b/services/terraform/self-host/aws_ecs.tf
--- a/services/terraform/self-host/aws_ecs.tf
+++ b/services/terraform/self-host/aws_ecs.tf
@@ -1,4 +1,5 @@
resource "aws_ecs_cluster" "keyserver_cluster" {
+ # Do not change without replacing cluster_name in aws-deploy.sh
name = "keyserver-cluster"
configuration {
diff --git a/services/terraform/self-host/aws_lb.tf b/services/terraform/self-host/aws_lb.tf
--- a/services/terraform/self-host/aws_lb.tf
+++ b/services/terraform/self-host/aws_lb.tf
@@ -10,6 +10,13 @@
cidr_blocks = ["0.0.0.0/0"]
}
+ ingress {
+ from_port = 443
+ to_port = 443
+ protocol = "tcp"
+ cidr_blocks = ["${var.allowed_ip}/32"]
+ }
+
egress {
from_port = 0
to_port = 0
diff --git a/services/terraform/self-host/keyserver_primary.tf b/services/terraform/self-host/keyserver_primary.tf
--- a/services/terraform/self-host/keyserver_primary.tf
+++ b/services/terraform/self-host/keyserver_primary.tf
@@ -136,6 +136,7 @@
resource "aws_ecs_service" "keyserver_primary_service" {
depends_on = [null_resource.create_comm_database]
+ # Do not change name without replacing primary_service_name in aws-deploy.sh
name = "keyserver-primary-service"
cluster = aws_ecs_cluster.keyserver_cluster.id
task_definition = aws_ecs_task_definition.keyserver_primary_service.arn
diff --git a/services/terraform/self-host/keyserver_secondary.tf b/services/terraform/self-host/keyserver_secondary.tf
--- a/services/terraform/self-host/keyserver_secondary.tf
+++ b/services/terraform/self-host/keyserver_secondary.tf
@@ -147,6 +147,7 @@
resource "aws_ecs_service" "keyserver_secondary_service" {
depends_on = [aws_ecs_service.keyserver_primary_service]
+ # Do not change name without replacing secondary_service_name in aws-deploy.sh
name = "keyserver-secondary-service"
cluster = aws_ecs_cluster.keyserver_cluster.id
task_definition = aws_ecs_task_definition.keyserver_secondary_service.arn

File Metadata

Mime Type
text/plain
Expires
Sat, Nov 9, 4:51 AM (19 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2448773
Default Alt Text
D12731.diff (7 KB)

Event Timeline