diff --git a/services/terraform/self-host/aws-deploy.sh b/services/terraform/self-host/aws-deploy.sh index 72554faca..a85f6bf0f 100755 --- a/services/terraform/self-host/aws-deploy.sh +++ b/services/terraform/self-host/aws-deploy.sh @@ -1,149 +1,173 @@ #!/bin/bash # Disable AWS cli command pager outputs export AWS_PAGER="" # Do not change without replacing keyserver_cluster name in aws_ecs.tf cluster_name="keyserver-cluster" # Do not change without replacing keyserver_primary_service # name in keyserver_primary.tf primary_service_name="keyserver-primary-service" # Do not change without replacing keyserver_secondary_service # name in keyserver_secondary.tf secondary_service_name="keyserver-secondary-service" # Grab user configuration variables from terraform.tfvars health_check_domain=$(echo "var.keyserver_domain_name" | terraform console -var-file terraform.tfvars.json | tr -d '"') health_check_url="https://${health_check_domain}/health" num_desired_secondary_nodes=$(echo "var.desired_secondary_nodes" | terraform console -var-file terraform.tfvars.json) aws_region=$(echo "var.region" | terraform console -var-file terraform.tfvars.json | tr -d '"') # Set aws-cli region to aws region self-hosted keyserver is deployed on export AWS_REGION=$aws_region if [[ -z "${AWS_ACCESS_KEY_ID}" || -z "${AWS_SECRET_ACCESS_KEY}" ]]; then echo "Error: AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables must be set to run migration." exit 1 fi if ! aws sts get-caller-identity > /dev/null; then echo "Error: Invalid AWS credentials or not in nix shell. Please check your AWS configuration and/or run nix develop." exit 1 fi +# Get the current public IP address +ip_address="$(curl -s ipv4.wtfismyip.com/text)" +if [[ -z "$ip_address" ]]; then + echo "Failed to retrieve IP address. Exiting." + exit 1 +fi + +# Grab resource info from AWS +keyserver_lb_sg_id="$(aws ec2 describe-security-groups --filters "Name=group-name,Values=lb-sg" --query "SecurityGroups[0].GroupId" --output text)" + convert_seconds() { total_seconds="$1" minutes="$((total_seconds / 60))" seconds="$((total_seconds % 60))" if (( minutes > 0 )); then echo "${minutes} minute(s) and ${seconds} seconds" else echo "${seconds} seconds" fi } check_health() { local retry_interval=10 local total_elapsed_time=0 while true; do http_code=$(curl -s -o /dev/null -w "%{http_code}" "$health_check_url") if [[ "$http_code" -eq 200 ]]; then echo "Health check returned status 200 OK $http_code. Primary keyserver node ready" return 0 fi total_elapsed_time="$(( total_elapsed_time + retry_interval ))" converted_time="$(convert_seconds $total_elapsed_time)" echo "Health check returned status $http_code. Elapsed time: ${converted_time}." sleep $retry_interval done } -disable_lb_traffic() { +disable_general_lb_traffic() { + # disables general ip access aws ec2 revoke-security-group-ingress \ - --group-id "$(aws ec2 describe-security-groups --filters "Name=group-name,Values=lb-sg" --query "SecurityGroups[0].GroupId" --output text)" \ + --group-id "$keyserver_lb_sg_id" \ --protocol tcp \ --port 443 \ --cidr 0.0.0.0/0 > /dev/null + + # enables traffic only for ip calling aws deploy script + aws ec2 authorize-security-group-ingress \ + --group-id "$keyserver_lb_sg_id" \ + --protocol tcp \ + --port 443 \ + --cidr "${ip_address}/32" > /dev/null } enable_lb_traffic() { aws ec2 authorize-security-group-ingress \ - --group-id "$(aws ec2 describe-security-groups --filters "Name=group-name,Values=lb-sg" --query "SecurityGroups[0].GroupId" --output text)" \ + --group-id "$keyserver_lb_sg_id" \ --protocol tcp \ --port 443 \ --cidr 0.0.0.0/0 > /dev/null + + # disables personal ip address ingress rule as no longer necessary + aws ec2 revoke-security-group-ingress \ + --group-id "$keyserver_lb_sg_id" \ + --protocol tcp \ + --port 443 \ + --cidr "${ip_address}/32" > /dev/null } # Stop all primary and secondary tasks and disable traffic to load balancer echo "Disabling traffic to load balancer" -disable_lb_traffic +disable_general_lb_traffic http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$health_check_url") if [[ "$http_code" -eq 000 ]]; then echo "Error: Health check timed out trying to access keyserver domain at ${health_check_url}." - echo "Ensure terraform variable allowed_ip is properly configured and run terraform apply" echo "Re-enabling traffic to load balancer until domain is accessible and migration script is rerun" enable_lb_traffic exit 1 fi echo "Set desired count of secondary service to 0" aws ecs update-service --cluster "$cluster_name" --service "$secondary_service_name" --desired-count 0 > /dev/null echo "Taking down all secondary nodes in $cluster_name" task_arns=$(aws ecs list-tasks --cluster "$cluster_name" --service-name "$secondary_service_name" --query 'taskArns[*]' --output text) for task_arn in $task_arns; do echo "Stopping secondary node running on task $task_arn" aws ecs stop-task --cluster "$cluster_name" --task "$task_arn" > /dev/null done echo "Set desired count of primary service to 0" aws ecs update-service --cluster "$cluster_name" --service "$primary_service_name" --desired-count 0 > /dev/null echo "Taking down primary node in $cluster_name" primary_task_arns=$(aws ecs list-tasks --cluster "$cluster_name" --service-name "$primary_service_name" --query 'taskArns[*]' --output text) for primary_task_arn in $primary_task_arns; do echo "Stopping primary node running on task $primary_task_arn" aws ecs stop-task --cluster "$cluster_name" --task "$primary_task_arn" > /dev/null done echo "Waiting until primary and secondary nodes have been shutdown" total_elapsed_time=0 retry_interval=10 while true; do http_code=$(curl -s -o /dev/null -w "%{http_code}" "$health_check_url") echo "Health check returned status $http_code. Elapsed time: $(convert_seconds $total_elapsed_time)" if [[ "$http_code" -ne 200 ]]; then echo "Stopping primary and secondary nodes was successful. Continuing with migration." break fi total_elapsed_time=$(( total_elapsed_time + retry_interval )) sleep $retry_interval done echo "Redeploying primary service in $cluster_name" aws ecs update-service --cluster "$cluster_name" --service "$primary_service_name" --force-new-deployment --desired-count 1 > /dev/null echo "Waiting for health check at $health_check_domain to return status 200 OK" check_health echo "Primary node successfully running. Re-enabling public access to load balancer" enable_lb_traffic echo "Setting desired count of secondary service to $num_desired_secondary_nodes". aws ecs update-service --cluster "$cluster_name" --service "$secondary_service_name" --desired-count "$num_desired_secondary_nodes" --force-new-deployment > /dev/null echo "Successfully ran migration" diff --git a/services/terraform/self-host/aws_lb.tf b/services/terraform/self-host/aws_lb.tf index 2ece8a81d..18f78bcf2 100644 --- a/services/terraform/self-host/aws_lb.tf +++ b/services/terraform/self-host/aws_lb.tf @@ -1,84 +1,77 @@ resource "aws_security_group" "lb_sg" { name = "lb-sg" description = "Security group for keyserver load balancer" vpc_id = local.vpc_id ingress { from_port = 443 to_port = 443 protocol = "tcp" cidr_blocks = ["0.0.0.0/0"] } - ingress { - from_port = 443 - to_port = 443 - protocol = "tcp" - cidr_blocks = [for ip in var.allowed_ips : "${ip}/32"] - } - egress { from_port = 0 to_port = 0 protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } } resource "aws_lb_target_group" "keyserver_service" { name = "keyserver-service-ecs-tg" port = 3000 protocol = "HTTP" vpc_id = local.vpc_id # "awsvpc" network mode requires target type set to ip target_type = "ip" stickiness { type = "lb_cookie" cookie_duration = 86500 enabled = true } health_check { enabled = true healthy_threshold = 2 unhealthy_threshold = 3 protocol = "HTTP" path = "/health" matcher = "200-299" } } resource "aws_lb" "keyserver_service" { load_balancer_type = "application" name = "keyserver-service-lb" security_groups = [aws_security_group.lb_sg.id] internal = false subnets = local.vpc_subnets } resource "aws_lb_listener" "keyserver_service" { load_balancer_arn = aws_lb.keyserver_service.arn port = "443" protocol = "HTTPS" ssl_policy = "ELBSecurityPolicy-2016-08" certificate_arn = data.aws_acm_certificate.keyserver_service.arn default_action { type = "forward" target_group_arn = aws_lb_target_group.keyserver_service.arn } lifecycle { ignore_changes = [default_action[0].forward[0].stickiness[0].duration] replace_triggered_by = [aws_lb_target_group.keyserver_service] } } data "aws_acm_certificate" "keyserver_service" { domain = var.keyserver_domain_name statuses = ["ISSUED"] }