そのままでは動かないので参考までに。
DatadogでAPIの外形監視をして、応答がなければDatadog -> Lambda -> EC2(SSM)経由でEC2のNginxを再起動します。
もくじ
Datadog側 Terraform
terraform.tfvars
datadog_api_key = "xxxxx" datadog_app_key = "yyyyy" # SampleAppヘルスチェックURL health_check_url_sample_app = "https://example.net/login" # Slack通知先 slack_channel_name = "slack-datadog通知"
variables.tf
variable health_check_url_sample_app {} variable slack_channel_name {}
provider.tf
terraform { required_version = "= 1.1.1" backend "s3" { bucket = "sampleapp-terraform-xxxxx" region = "ap-northeast-1" # keyは環境で一意にすること key = "datadog/terraform.tfstate" profile = "terraform-local-deployer" } required_providers { aws = { source = "hashicorp/aws" version = "4.10.0" } datadog = { source = "DataDog/datadog" } } } provider "datadog" { api_key = var.datadog_api_key app_key = var.datadog_app_key }
synthetics_test.tf
resource "datadog_synthetics_test" "sample_app" { type = "api" subtype = "http" request_definition { method = "GET" url = var.health_check_url_sample_app } assertion { type = "statusCode" operator = "is" target = "200" } assertion { type = "responseTime" operator = "lessThan" target = 2000 } locations = ["aws:ap-northeast-1"] options_list { # 1分毎にチェック tick_every = 60 retry { count = 2 interval = 300 } monitor_options { renotify_interval = 60 } } name = "An API test on ${var.health_check_url_sample_app} [SampleApp]" message = "API TEST on ${var.health_check_url_sample_app} @${var.slack_channel_name} @webhook-${datadog_webhook.restart_nginx_ec2.name} [SampleApp]" tags = ["env:production", "service:proxy"] status = "live" }
webhook.tf
resource "datadog_webhook" "restart_nginx_ec2" { name = "restart-nginx-ec2-production" url = "https://xxxxx.lambda-url.ap-northeast-1.on.aws/" encode_as = "json" payload = <<EOF { "body": "$EVENT_MSG", "last_updated": "$LAST_UPDATED", "event_type": "$EVENT_TYPE", "title": "$EVENT_TITLE", "date": "$DATE", "org": { "id": "$ORG_ID", "name": "$ORG_NAME" }, "id": "$ID" } EOF }
AWS側 Terraform
# Lambda EC2 SSM操作用 resource "aws_iam_role" "lambda_ssm_role" { name = "lambda-ssm-role-${var.ENV_VALUE_ENVIRONMENT}" assume_role_policy = file("${path.module}/policy/assume-lambda.json") } resource "aws_iam_policy" "lambda_ssm_role_policy" { name = "lambda-ssm-policy-${var.ENV_VALUE_ENVIRONMENT}" policy = file("${path.module}/policy/lambda-ssm.json") } resource "aws_iam_role_policy_attachment" "lambda_ssm_role_attach_policy" { role = aws_iam_role.lambda_ssm_role.name policy_arn = aws_iam_policy.lambda_ssm_role_policy.arn } # EC2用 resource "aws_iam_role" "sample_app_ec2_role" { name = "sample_app-ec2-role-${var.ENV_VALUE_ENVIRONMENT}" assume_role_policy = file("${path.module}/policy/assume-ec2.json") } resource "aws_iam_policy" "sample_app_ec2_role_policy" { name = "sample_app-ec2-policy-${var.ENV_VALUE_ENVIRONMENT}" policy = file("${path.module}/policy/sample-app-ec2.json") } resource "aws_iam_role_policy_attachment" "sample_app_ec2_role_attach_policy" { role = aws_iam_role.sample_app_ec2_role.name policy_arn = aws_iam_policy.sample_app_ec2_role_policy.arn } # EC2 instance profile resource "aws_iam_instance_profile" "ec2_profile" { name = "sample_app-ec2-profile-${var.ENV_VALUE_ENVIRONMENT}" role = aws_iam_role.sample_app_ec2_role.name }
assume-lambda.json
{ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "lambda.amazonaws.com" }, "Action": "sts:AssumeRole" } ] }
lambda-ssm.json
{ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Action": [ "cloudwatch:PutMetricData", "ds:CreateComputer", "ds:DescribeDirectories", "ec2:DescribeInstanceStatus", "logs:*", "ssm:*", "ec2messages:*" ], "Resource": "*" }, { "Effect": "Allow", "Action": "iam:CreateServiceLinkedRole", "Resource": "arn:aws:iam::*:role/aws-service-role/ssm.amazonaws.com/AWSServiceRoleForAmazonSSM*", "Condition": { "StringLike": { "iam:AWSServiceName": "ssm.amazonaws.com" } } }, { "Effect": "Allow", "Action": [ "iam:DeleteServiceLinkedRole", "iam:GetServiceLinkedRoleDeletionStatus" ], "Resource": "arn:aws:iam::*:role/aws-service-role/ssm.amazonaws.com/AWSServiceRoleForAmazonSSM*" }, { "Effect": "Allow", "Action": [ "ssmmessages:CreateControlChannel", "ssmmessages:CreateDataChannel", "ssmmessages:OpenControlChannel", "ssmmessages:OpenDataChannel" ], "Resource": "*" } ] }
assume-ec2.json
{ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "ec2.amazonaws.com" }, "Action": "sts:AssumeRole" } ] }
sample-app-ec2.json
{ "Version": "2012-10-17", "Statement": [ { "Action": [ "logs:*" ], "Effect": "Allow", "Resource": "*" }, { "Effect": "Allow", "Action": [ "ssm:DescribeAssociation", "ssm:GetDeployablePatchSnapshotForInstance", "ssm:GetDocument", "ssm:DescribeDocument", "ssm:GetManifest", "ssm:GetParameters", "ssm:ListAssociations", "ssm:ListInstanceAssociations", "ssm:PutInventory", "ssm:PutComplianceItems", "ssm:PutConfigurePackageResult", "ssm:UpdateAssociationStatus", "ssm:UpdateInstanceAssociationStatus", "ssm:UpdateInstanceInformation" ], "Resource": "*" }, { "Effect": "Allow", "Action": [ "ssmmessages:CreateControlChannel", "ssmmessages:CreateDataChannel", "ssmmessages:OpenControlChannel", "ssmmessages:OpenDataChannel" ], "Resource": "*" }, { "Effect": "Allow", "Action": [ "ec2messages:AcknowledgeMessage", "ec2messages:DeleteMessage", "ec2messages:FailMessage", "ec2messages:GetEndpoint", "ec2messages:GetMessages", "ec2messages:SendReply" ], "Resource": "*" }, { "Effect": "Allow", "Action": [ "cloudwatch:PutMetricData" ], "Resource": "*" }, { "Effect": "Allow", "Action": [ "ec2:DescribeInstanceStatus" ], "Resource": "*" } ] }
lambda.tf
# EC2のNginx再起動用関数 data "archive_file" "restart_nginx_ec2" { type = "zip" source_dir = "${path.module}/../../src/${var.ENV_VALUE_ENVIRONMENT}/${var.lambda_repository_name}/lambda/functions/restartNginxEc2" output_path = "${path.module}/../../upload/${var.ENV_VALUE_ENVIRONMENT}/${var.lambda_repository_name}/lambda/functions/restartNginxEc2.zip" } resource "aws_lambda_function" "restart_nginx_ec2" { filename = "${data.archive_file.restart_nginx_ec2.output_path}" function_name = "restart-nginx-ec2-${var.ENV_VALUE_ENVIRONMENT}" role = "${var.lambda_ssm_role_arn}" handler = "function.lambda_handler" source_code_hash = "${data.archive_file.restart_nginx_ec2.output_base64sha256}" runtime = "python3.9" memory_size = 128 timeout = 60 environment { variables = { # 対象のid EC2_INSTANCE_ID = var.lambda_restart_nginx_ec2_target_ec2_id } } tags = { is_datadog_enable = (var.ENV_VALUE_ENVIRONMENT != "develop") role = var.TAG_ROLE_LAMBDA } } resource "aws_lambda_function_url" "restart_nginx_ec2" { function_name = aws_lambda_function.restart_nginx_ec2.function_name authorization_type = "NONE" }
Lambdaリポジトリ
{リポジトリ名}/lambda/functions/restartNginxEc2/function.py
import boto3 import logging import os logger = logging.getLogger() logger.setLevel(logging.INFO) EC2_INSTANCE_ID = os.environ['EC2_INSTANCE_ID'] def lambda_handler(event, context): ssm = boto3.client('ssm') res = ssm.send_command( InstanceIds=[EC2_INSTANCE_ID], DocumentName="AWS-RunShellScript", Parameters={ "commands": [ "systemctl restart nginx" ], "executionTimeout": ["60"] }, ) if res['ResponseMetadata']['HTTPStatusCode'] != 200: print('FAILED to execute ssm.send_command().') return { 'message': "script completed." }