DNS管理 dns-management

本技能涉及使用Route53、Azure DNS或CloudFlare等工具进行DNS记录管理、路由策略配置、故障转移和高可用性设置,关键词包括DNS、故障转移、高可用性、路由策略。

云安全 0 次安装 0 次浏览 更新于 3/3/2026

DNS管理

概述

实现DNS管理策略,用于流量路由、故障转移、地理路由和高可用性,使用Route53、Azure DNS或CloudFlare。

何时使用

  • 域名管理和路由
  • 故障转移和灾难恢复
  • 地理负载均衡
  • 多区域部署
  • 基于DNS的流量管理
  • CDN集成
  • 健康检查路由
  • 零停机迁移

实施示例

1. AWS Route53配置

# route53-setup.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: route53-config
  namespace: operations
data:
  setup-dns.sh: |
    #!/bin/bash
    set -euo pipefail

    DOMAIN="myapp.com"
    HOSTED_ZONE_ID="Z1234567890ABC"
    PRIMARY_ENDPOINT="myapp-primary.example.com"
    SECONDARY_ENDPOINT="myapp-secondary.example.com"

    echo "Setting up Route53 DNS for $DOMAIN"

    # Create health check for primary
    PRIMARY_HEALTH=$(aws route53 create-health-check \
      --health-check-config '{
        "Type": "HTTPS",
        "ResourcePath": "/health",
        "FullyQualifiedDomainName": "'${PRIMARY_ENDPOINT}'",
        "Port": 443,
        "RequestInterval": 30,
        "FailureThreshold": 3
      }' --query 'HealthCheck.Id' --output text)

    echo "Created health check: $PRIMARY_HEALTH"

    # Create failover record for primary
    aws route53 change-resource-record-sets \
      --hosted-zone-id "$HOSTED_ZONE_ID" \
      --change-batch '{
        "Changes": [{
          "Action": "UPSERT",
          "ResourceRecordSet": {
            "Name": "'$DOMAIN'",
            "Type": "A",
            "TTL": 60,
            "SetIdentifier": "Primary",
            "Failover": "PRIMARY",
            "AliasTarget": {
              "HostedZoneId": "Z35SXDOTRQ7X7K",
              "DNSName": "'${PRIMARY_ENDPOINT}'",
              "EvaluateTargetHealth": true
            },
            "HealthCheckId": "'${PRIMARY_HEALTH}'"
          }
        }]
      }'

    # Create failover record for secondary
    aws route53 change-resource-record-sets \
      --hosted-zone-id "$HOSTED_ZONE_ID" \
      --change-batch '{
        "Changes": [{
          "Action": "UPSERT",
          "ResourceRecordSet": {
            "Name": "'$DOMAIN'",
            "Type": "A",
            "TTL": 60,
            "SetIdentifier": "Secondary",
            "Failover": "SECONDARY",
            "AliasTarget": {
              "HostedZoneId": "Z35SXDOTRQ7X7K",
              "DNSName": "'${SECONDARY_ENDPOINT}'",
              "EvaluateTargetHealth": false
            }
          }
        }]
      }'

    echo "DNS failover configured"

---
# Terraform Route53 configuration
resource "aws_route53_zone" "myapp" {
  name = "myapp.com"

  tags = {
    Name = "myapp-zone"
  }
}

# Health check for primary region
resource "aws_route53_health_check" "primary" {
  ip_address = aws_lb.primary.ip_address
  port       = 443
  type       = "HTTPS"
  resource_path = "/health"

  failure_threshold = 3
  request_interval  = 30

  tags = {
    Name = "primary-health-check"
  }
}

# Primary failover record
resource "aws_route53_record" "primary" {
  zone_id       = aws_route53_zone.myapp.zone_id
  name          = "myapp.com"
  type          = "A"
  ttl           = 60
  set_identifier = "Primary"

  failover_routing_policy {
    type = "PRIMARY"
  }

  alias {
    name                   = aws_lb.primary.dns_name
    zone_id                = aws_lb.primary.zone_id
    evaluate_target_health = true
  }

  health_check_id = aws_route53_health_check.primary.id
}

# Secondary failover record
resource "aws_route53_record" "secondary" {
  zone_id       = aws_route53_zone.myapp.zone_id
  name          = "myapp.com"
  type          = "A"
  ttl           = 60
  set_identifier = "Secondary"

  failover_routing_policy {
    type = "SECONDARY"
  }

  alias {
    name                   = aws_lb.secondary.dns_name
    zone_id                = aws_lb.secondary.zone_id
    evaluate_target_health = false
  }
}

# Weighted routing for canary deployments
resource "aws_route53_record" "canary" {
  zone_id       = aws_route53_zone.myapp.zone_id
  name          = "api.myapp.com"
  type          = "A"
  ttl           = 60
  set_identifier = "Canary"

  weighted_routing_policy {
    weight = 10
  }

  alias {
    name                   = aws_lb.canary.dns_name
    zone_id                = aws_lb.canary.zone_id
    evaluate_target_health = true
  }
}

# Geolocation routing
resource "aws_route53_record" "geo_us" {
  zone_id       = aws_route53_zone.myapp.zone_id
  name          = "myapp.com"
  type          = "A"
  ttl           = 60
  set_identifier = "US"

  geolocation_routing_policy {
    country = "US"
  }

  alias {
    name                   = aws_lb.us_east.dns_name
    zone_id                = aws_lb.us_east.zone_id
    evaluate_target_health = true
  }
}

resource "aws_route53_record" "geo_eu" {
  zone_id       = aws_route53_zone.myapp.zone_id
  name          = "myapp.com"
  type          = "A"
  ttl           = 60
  set_identifier = "EU"

  geolocation_routing_policy {
    continent = "EU"
  }

  alias {
    name                   = aws_lb.eu_west.dns_name
    zone_id                = aws_lb.eu_west.zone_id
    evaluate_target_health = true
  }
}

2. DNS故障转移脚本

#!/bin/bash
# dns-failover.sh - 管理DNS故障转移

set -euo pipefail

DOMAIN="${1:-myapp.com}"
HOSTED_ZONE_ID="${2:-Z1234567890ABC}"
NEW_PRIMARY="${3:-}"

if [ -z "$NEW_PRIMARY" ]; then
    echo "Usage: $0 <domain> <hosted-zone-id> <new-primary-endpoint>"
    exit 1
fi

echo "Initiating DNS failover for $DOMAIN"

# Get current primary
CURRENT_PRIMARY=$(aws route53 list-resource-record-sets \
    --hosted-zone-id "$HOSTED_ZONE_ID" \
    --query "ResourceRecordSets[?Name=='$DOMAIN.' && SetIdentifier=='Primary'].AliasTarget.DNSName" \
    --output text)

echo "Current primary: $CURRENT_PRIMARY"
echo "New primary: $NEW_PRIMARY"

# Verify new endpoint is healthy
echo "Verifying new endpoint health..."
if ! curl -sf --max-time 5 "https://${NEW_PRIMARY}/health" > /dev/null; then
    echo "ERROR: New endpoint is not healthy"
    exit 1
fi

# Update primary record
aws route53 change-resource-record-sets \
    --hosted-zone-id "$HOSTED_ZONE_ID" \
    --change-batch '{
        "Changes": [{
            "Action": "UPSERT",
            "ResourceRecordSet": {
                "Name": "'$DOMAIN'",
                "Type": "A",
                "TTL": 60,
                "SetIdentifier": "Primary",
                "Failover": "PRIMARY",
                "AliasTarget": {
                    "HostedZoneId": "Z35SXDOTRQ7X7K",
                    "DNSName": "'$NEW_PRIMARY'",
                    "EvaluateTargetHealth": true
                }
            }
        }]
    }'

echo "DNS failover completed: $NEW_PRIMARY is now primary"

3. CloudFlare DNS配置

#!/bin/bash
# cloudflare-dns.sh - CloudFlare DNS管理

set -euo pipefail

CF_EMAIL="${CF_EMAIL}"
CF_API_KEY="${CF_API_KEY}"
DOMAIN="${1:-myapp.com}"
ZONE_ID="${2:-}"

# Get zone ID
if [ -z "$ZONE_ID" ]; then
    ZONE_ID=$(curl -s -X GET "https://api.cloudflare.com/client/v4/zones?name=$DOMAIN" \
        -H "X-Auth-Email: $CF_EMAIL" \
        -H "X-Auth-Key: $CF_API_KEY" \
        -H "Content-Type: application/json" \
        | jq -r '.result[0].id')
fi

echo "Zone ID: $ZONE_ID"

# Create DNS record
create_record() {
    local type="$1"
    local name="$2"
    local content="$3"
    local ttl="${4:-3600}"

    curl -s -X POST "https://api.cloudflare.com/client/v4/zones/$ZONE_ID/dns_records" \
        -H "X-Auth-Email: $CF_EMAIL" \
        -H "X-Auth-Key: $CF_API_KEY" \
        -H "Content-Type: application/json" \
        --data '{
            "type":"'$type'",
            "name":"'$name'",
            "content":"'$content'",
            "ttl":'$ttl',
            "proxied":true
        }' | jq '.'
}

# List records
list_records() {
    curl -s -X GET "https://api.cloudflare.com/client/v4/zones/$ZONE_ID/dns_records" \
        -H "X-Auth-Email: $CF_EMAIL" \
        -H "X-Auth-Key: $CF_API_KEY" \
        -H "Content-Type: application/json" | jq '.result[] | {id, type, name, content}'
}

list_records

4. DNS监控和验证

# dns-monitoring.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
  name: dns-health-check
  namespace: operations
spec:
  schedule: "*/5 * * * *"  # Every 5 minutes
  jobTemplate:
    spec:
      template:
        spec:
          containers:
            - name: health-check
              image: curlimages/curl:latest
              command:
                - sh
                - -c
                - |
                  DOMAIN="myapp.com"
                  PRIMARY_IP=$(nslookup $DOMAIN | grep "Address:" | tail -1 | awk '{print $2}')

                  echo "Checking DNS resolution for $DOMAIN"
                  echo "Resolved to: $PRIMARY_IP"

                  # Verify connectivity
                  if curl -sf --max-time 10 "https://$PRIMARY_IP/health" > /dev/null 2>&1; then
                    echo "PASS: Primary endpoint is healthy"
                    exit 0
                  else
                    echo "FAIL: Primary endpoint is unreachable"
                    exit 1
                  fi
          restartPolicy: OnFailure

最佳实践

✅ 应该做

  • 使用健康检查进行故障转移
  • 设置适当的TTL值
  • 实施地理路由
  • 使用加权路由进行金丝雀部署
  • 监控DNS解析
  • 记录DNS变更
  • 测试故障转移程序
  • 使用DNS DNSSEC

❌ 不应该做

  • 使用TTL为0
  • 指向单个端点
  • 忘记健康检查
  • 混淆DNS和应用故障转移
  • 在事件期间更改DNS
  • 忽略DNS传播时间
  • 使用通用名称
  • 跳过DNS监控

DNS路由策略

  • 简单:单个资源
  • 加权:按百分比分配
  • 基于延迟:路由到最低延迟
  • 故障转移:主动/被动故障转移
  • 地理位置:按地理位置路由
  • 多值:多个资源和健康检查

资源