cloud-migration-planning cloud-migration-planning

云迁移规划工具,包含评估、数据库迁移、应用重构和切换策略,支持AWS、Azure和GCP平台,旨在实现平滑云迁移。

云迁移 0 次安装 0 次浏览 更新于 3/3/2026

云迁移规划

概述

云迁移规划涉及评估当前基础设施,设计迁移策略,执行迁移以最小化停机时间,并验证结果。支持提升和转移、重新平台化和重构方法,以实现平滑的云采用。

何时使用

  • 从本地迁移到云
  • 云平台整合
  • 遗留系统现代化
  • 降低数据中心成本
  • 提高可扩展性和可用性
  • 满足合规性要求
  • 增强灾难恢复
  • 技术刷新计划

实施示例

1. 迁移评估和规划

# 云迁移评估工具
from enum import Enum
from typing import Dict, List, Tuple
from dataclasses import dataclass

class MigrationStrategy(Enum):
    LIFT_AND_SHIFT = "lift_and_shift"  # 重新托管
    REPLATFORM = "replatform"          # 重新托管并优化
    REFACTOR = "refactor"              # 为云重建
    REPURCHASE = "repurchase"          # 切换到SaaS
    RETIRE = "retire"                  # 停用

class ApplicationComplexity(Enum):
    LOW = 1
    MEDIUM = 2
    HIGH = 3

@dataclass
class ApplicationAssessment:
    name: str
    complexity: ApplicationComplexity
    dependencies: List[str]
    estimated_effort: int  # 天
    business_criticality: int  # 1-10
    current_costs: float  # 年度
    cloud_costs_estimate: float  # 年度

class CloudMigrationPlanner:
    def __init__(self):
        self.applications: List[ApplicationAssessment] = []
        self.total_effort = 0
        self.total_cost_savings = 0

    def add_application(self, app: ApplicationAssessment):
        """将应用程序添加到迁移评估"""
        self.applications.append(app)

    def recommend_migration_strategy(self, app: ApplicationAssessment) -> MigrationStrategy:
        """基于应用程序特征推荐迁移策略"""
        if app.complexity == ApplicationComplexity.LOW:
            return MigrationStrategy.LIFT_AND_SHIFT

        elif app.complexity == ApplicationComplexity.MEDIUM:
            # 检查节省成本是否合理重构
            annual_savings = app.current_costs - app.cloud_costs_estimate
            refactor_cost = app.estimated_effort * 500  # 每天成本
            payback_months = (refactor_cost / annual_savings) * 12 if annual_savings > 0 else float('inf')

            if payback_months < 6:
                return MigrationStrategy.REFACTOR
            else:
                return MigrationStrategy.REPLATFORM

        else:  # 高复杂性
            # 评估现代化是否值得
            if app.business_criticality >= 8:
                return MigrationStrategy.REFACTOR
            else:
                return MigrationStrategy.RETIRE  # 考虑退役

    def create_migration_wave_plan(self) -> Dict:
        """创建分阶段迁移计划"""
        # 按关键性和依赖关系排序
        sorted_apps = sorted(
            self.applications,
            key=lambda x: (len(x.dependencies), -x.business_criticality)
        )

        waves = {
            'wave_1': [],  # 低风险,依赖少
            'wave_2': [],  # 中等风险
            'wave_3': []   # 高风险或关键
        }

        migrated = set()

        for app in sorted_apps:
            # 检查依赖关系是否满足
            deps_satisfied = all(dep in migrated for dep in app.dependencies)

            if not deps_satisfied:
                continue

            if app.complexity == ApplicationComplexity.LOW:
                waves['wave_1'].append(app.name)
            elif app.complexity == ApplicationComplexity.MEDIUM:
                waves['wave_2'].append(app.name)
            else:
                waves['wave_3'].append(app.name)

            migrated.add(app.name)

        return {
            'waves': waves,
            'total_applications': len(self.applications),
            'migrated_count': len(migrated),
            'total_effort_days': sum(app.estimated_effort for app in self.applications)
        }

    def calculate_roi(self) -> Dict:
        """计算迁移ROI"""
        总当前成本 = sum(app.current_costs for app in self.applications)
        总云成本 = sum(app.cloud_costs_estimate for app in self.applications)
        年节省 = 总当前成本 - 总云成本

        # 估计迁移成本
        总努力 = sum(app.estimated_effort for app in self.applications)
        迁移成本 = 总努力 * 250  # 每天成本

        回本期 = (迁移成本 / 年节省) * 12 if 年节省 > 0 else float('inf')

        return {
            'total_current_costs': 总当前成本,
            'total_cloud_costs': 总云成本,
            'annual_savings': 年节省,
            'migration_cost': 迁移成本,
            'payback_months': 回本期,
            'year1_savings': 年节省 - 迁移成本,
            'year3_savings': (年节省 * 3) - 迁移成本
        }

# 使用
planner = CloudMigrationPlanner()

app1 = ApplicationAssessment(
    name="Web Frontend",
    complexity=ApplicationComplexity.LOW,
    dependencies=[],
    estimated_effort=5,
    business_criticality=7,
    current_costs=50000,
    cloud_costs_estimate=30000
)

app2 = ApplicationAssessment(
    name="API Backend",
    complexity=ApplicationComplexity.MEDIUM,
    dependencies=["Database"],
    estimated_effort=20,
    business_criticality=9,
    current_costs=80000,
    cloud_costs_estimate=40000
)

app3 = ApplicationAssessment(
    name="Database",
    complexity=ApplicationComplexity.HIGH,
    dependencies=[],
    estimated_effort=30,
    business_criticality=10,
    current_costs=120000,
    cloud_costs_estimate=80000
)

planner.add_application(app1)
planner.add_application(app2)
planner.add_application(app3)

print("迁移波计划:")
print(planner.create_migration_wave_plan())

print("
ROI分析:")
print(planner.calculate_roi())

2. 数据库迁移策略

# AWS数据库迁移服务(DMS)
aws dms create-replication-instance \
  --replication-instance-identifier my-replication-instance \
  --replication-instance-class dms.t3.large \
  --allocated-storage 100 \
  --vpc-security-group-ids sg-12345

# 创建源端点
aws dms create-endpoint \
  --endpoint-identifier source-db \
  --endpoint-type source \
  --engine-name postgres \
  --server-name source-db.example.com \
  --port 5432 \
  --username sourceadmin \
  --password sourcepassword \
  --database-name sourcedb

# 创建目标端点
aws dms create-endpoint \
  --endpoint-identifier target-rds \
  --endpoint-type target \
  --engine-name postgres \
  --server-name my-db.xyz.us-east-1.rds.amazonaws.com \
  --port 5432 \
  --username targetadmin \
  --password targetpassword \
  --database-name targetdb

# 创建迁移任务
aws dms create-replication-task \
  --replication-task-identifier postgres-migration \
  --source-endpoint-arn arn:aws:dms:region:account:endpoint/source-db \
  --target-endpoint-arn arn:aws:dms:region:account:endpoint/target-rds \
  --replication-instance-arn arn:aws:dms:region:account:rep:my-replication-instance \
  --migration-type fullload \
  --table-mappings file://mappings.json

# 监控迁移
aws dms describe-replication-tasks \
  --filters Name=replication-task-arn,Values=arn:aws:dms:region:account:task:task-id

# 开始迁移
aws dms start-replication-task \
  --replication-task-arn arn:aws:dms:region:account:task:postgres-migration \
  --start-replication-task-type start-replication

3. Terraform迁移基础设施

# migration.tf
terraform {
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
  }
}

provider "aws" {
  region = var.aws_region
}

# 迁移基础设施的VPC
resource "aws_vpc" "migration" {
  cidr_block           = "10.100.0.0/16"
  enable_dns_hostnames = true

  tags = { Name = "migration-vpc" }
}

# DMS的子网
resource "aws_subnet" "migration" {
  count             = 2
  vpc_id            = aws_vpc.migration.id
  cidr_block        = "10.100.${count.index}.0/24"
  availability_zone = data.aws_availability_zones.available.names[count.index]

  tags = { Name = "migration-subnet-${count.index}" }
}

# 复制子网组
resource "aws_dms_replication_subnet_group" "migration" {
  replication_subnet_group_description = "Migration subnet group"
  replication_subnet_group_id          = "migration-subnet-group"
  subnet_ids                           = aws_subnet.migration[*].id
}

# 复制实例
resource "aws_dms_replication_instance" "migration" {
  allocated_storage           = 100
  apply_immediately           = true
  auto_minor_version_upgrade  = true
  engine_version              = "3.4.5"
  multi_az                    = true
  publicly_accessible         = false
  replication_instance_class  = "dms.c5.2xlarge"
  replication_instance_id     = "migration-instance"
  replication_subnet_group_id = aws_dms_replication_subnet_group.migration.id

  tags = { Name = "migration-instance" }
}

# 源数据库端点
resource "aws_dms_endpoint" "source" {
  endpoint_type   = "source"
  engine_name     = "postgres"
  server_name     = var.source_db_host
  port            = 5432
  username        = var.source_db_user
  password        = var.source_db_password
  database_name   = var.source_db_name
  endpoint_id     = "source-postgres"

  ssl_mode = "require"

  tags = { Name = "source-endpoint" }
}

# 目标RDS端点
resource "aws_dms_endpoint" "target" {
  endpoint_type = "target"
  engine_name   = "postgres"
  server_name   = aws_db_instance.target.endpoint
  port          = 5432
  username      = aws_db_instance.target.username
  password      = var.target_db_password
  database_name = aws_db_instance.target.db_name
  endpoint_id   = "target-rds"

  tags = { Name = "target-endpoint" }
}

# 目标RDS实例
resource "aws_db_instance" "target" {
  identifier          = "migration-target-db"
  allocated_storage   = 100
  engine              = "postgres"
  engine_version      = "15.2"
  instance_class      = "db.r5.2xlarge"
  username            = "postgres"
  password            = random_password.db.result
  db_name             = "targetdb"
  multi_az            = true
  publicly_accessible = false

  backup_retention_period = 30
  backup_window          = "03:00-04:00"

  skip_final_snapshot = false
  final_snapshot_identifier = "migration-target-final-snapshot"
}

# 复制任务
resource "aws_dms_replication_task" "migration" {
  migration_type           = "full-load-and-cdc"
  replication_instance_arn = aws_dms_replication_instance.migration.replication_instance_arn
  replication_task_id      = "postgres-full-migration"
  source_endpoint_arn      = aws_dms_endpoint.source.endpoint_arn
  target_endpoint_arn      = aws_dms_endpoint.target.endpoint_arn

  table_mappings = jsonencode({
    rules = [
      {
        rule_type   = "selection"
        rule_id     = "1"
        rule_action = "include"
        object_locator = {
          schema_name = "%"
          table_name  = "%"
        }
      }
    ]
  })

  replication_task_settings = jsonencode({
    TargetMetadata = {
      TargetSchema        = "public"
      SupportLobs         = true
      FullLobMode         = false
      LobChunkSize        = 64
      LobMaxSize          = 32
    }
    FullLoadSettings = {
      TargetPrepMode             = "DROP_AND_CREATE"
      CreatePkAfterFullLoad      = false
      StopTaskCachedSourceNotApplied = false
    }
    Logging = {
      EnableLogging = true
      LogComponents = [
        {
          LogType = "SOURCE_UNSPECIFIED"
          Id      = "%COMMON_MESSAGES%"
          Severity = "LOGGER_SEVERITY_DEBUG"
        }
      ]
    }
  })

  tags = { Name = "postgres-migration" }

  depends_on = [
    aws_dms_endpoint.source,
    aws_dms_endpoint.target,
    aws_dms_replication_instance.migration
  ]
}

# 秘密管理器用于凭证
resource "aws_secretsmanager_secret" "migration_creds" {
  name_prefix = "migration/"
}

resource "aws_secretsmanager_secret_version" "migration_creds" {
  secret_id = aws_secretsmanager_secret.migration_creds.id
  secret_string = jsonencode({
    source_db_password = var.source_db_password
    target_db_password = var.target_db_password
  })
}

# CloudWatch监控
resource "aws_cloudwatch_log_group" "dms" {
  name              = "/aws/dms/migration"
  retention_in_days = 7
}

resource "aws_cloudwatch_metric_alarm" "migration_failed" {
  alarm_name          = "dms-migration-failed"
  comparison_operator = "GreaterThanOrEqualToThreshold"
  evaluation_periods  = 1
  metric_name         = "FailureCount"
  namespace           = "AWS/DMS"
  period              = 300
  statistic           = "Sum"
  threshold           = 1
  alarm_description   = "DMS迁移失败时发出警报"
}

# 随机密码
resource "random_password" "db" {
  length  = 16
  special = true
}

# AZ数据源
data "aws_availability_zones" "available" {
  state = "available"
}

# 输出
output "dms_instance_id" {
  value = aws_dms_replication_instance.migration.replication_instance_id
}

output "target_db_endpoint" {
  value = aws_db_instance.target.endpoint
}

4. 切换验证清单

# cutover-validation.yaml
pre_cutover:
  - name: "源数据库健康检查"
    steps:
      - command: "SELECT COUNT(*) FROM pg_stat_replication;"
      - validate: "复制延迟<1秒"
      - expected: "所有副本同步"

  - name: "目标数据库准备情况"
    steps:
      - command: "SELECT datname, pg_size_pretty(pg_database_size(datname)) FROM pg_database;"
      - validate: "目标数据库大小与源匹配"
      - expected: "完全匹配"

  - name: "网络连接性"
    steps:
      - test: "源到目标的连接性"
      - command: "nc -zv target-db.rds.amazonaws.com 5432"
      - expected: "连接成功"

  - name: "备份验证"
    steps:
      - verify: "存在最近的备份"
      - test: "恢复到测试实例"
      - expected: "恢复成功"

cutover:
  - name: "切换前任务"
    steps:
      - "通知利益相关者"
      - "停止应用程序写入"
      - "验证复制延迟<1秒"
      - "从源捕获最终指标"

  - name: "DNS切换"
    steps:
      - "更新DNS指向目标"
      - "验证DNS传播"
      - "测试测试客户端的连接性"

  - name: "应用程序故障转移"
    steps:
      - "更新连接字符串"
      - "重新启动应用程序服务器"
      - "验证应用程序健康"
      - "运行烟雾测试"

post_cutover:
  - name: "验证"
    steps:
      - "在生产上运行测试套件"
      - "验证数据完整性"
      - "检查应用程序日志"
      - "监控错误率"

  - name: "清理"
    steps:
      - "记录最终指标"
      - "归档源数据库"
      - "更新文档"
      - "安排迁移后审查"

validation_criteria:
  - "零数据丢失"
  - "应用程序响应时间<200ms"
  - "错误率<0.1%"
  - "所有用户旅程通过"
  - "数据库复制成功"

最佳实践

✅ DO

  • 进行全面的发现和评估
  • 过渡期间运行并行系统
  • 在切换前彻底测试
  • 准备回滚计划
  • 迁移后密切监控
  • 记录所有更改
  • 培训运营团队
  • 暂时保留旧系统

❌ DON’T

  • 未计划就匆忙迁移
  • 未测试就迁移
  • 忘记回滚程序
  • 忽略依赖关系
  • 跳过利益相关者沟通
  • 一次性迁移所有内容
  • 忘记更新文档

迁移阶段

  1. 评估(2-4周):发现、评估、计划
  2. 试点(2-8周):迁移非关键应用程序
  3. 波次迁移(8-16周):按优先级迁移
  4. 优化(4+周):微调云资源
  5. 收尾(1-2周):停用源系统

资源