NoSQL数据库设计

概览

为MongoDB（文档型）和DynamoDB（键值型）设计可扩展的NoSQL模式。涵盖数据建模模式、反规范化策略和NoSQL系统的查询优化。

使用场景

MongoDB集合设计
DynamoDB表和索引设计
文档结构建模
嵌入与引用决策
查询模式优化
NoSQL索引策略
数据反规范化规划

MongoDB模式设计

文档结构设计

MongoDB - 嵌入式文档：

// 单个文档，包含嵌入式数组
db.createCollection("users")

db.users.insertOne({
  _id: ObjectId("..."),
  email: "john@example.com",
  name: "John Doe",
  createdAt: new Date(),

  // 嵌入式地址
  address: {
    street: "123 Main St",
    city: "New York",
    state: "NY",
    zipCode: "10001"
  },

  // 嵌入式订单数组
  orders: [
    {
      orderId: ObjectId("..."),
      date: new Date(),
      total: 149.99
    },
    {
      orderId: ObjectId("..."),
      date: new Date(),
      total: 89.99
    }
  ]
})

MongoDB - 引用文档：

// 分开的集合与引用
db.createCollection("users")
db.createCollection("orders")

db.users.insertOne({
  _id: ObjectId("..."),
  email: "john@example.com",
  name: "John Doe"
})

db.orders.insertMany([
  {
    _id: ObjectId("..."),
    userId: ObjectId("..."),  // 用户引用
    orderDate: new Date(),
    total: 149.99
  },
  {
    _id: ObjectId("..."),
    userId: ObjectId("..."),
    orderDate: new Date(),
    total: 89.99
  }
])

// 使用$lookup进行JOIN查询
db.orders.aggregate([
  {
    $match: { userId: ObjectId("...") }
  },
  {
    $lookup: {
      from: "users",
      localField: "userId",
      foreignField: "_id",
      as: "user"
    }
  }
])

MongoDB中的索引

// 单字段索引
db.users.createIndex({ email: 1 })
db.orders.createIndex({ createdAt: -1 })

// 复合索引
db.orders.createIndex({ userId: 1, createdAt: -1 })

// 用于搜索的文本索引
db.products.createIndex({ name: "text", description: "text" })

// 地理空间索引
db.stores.createIndex({ location: "2dsphere" })

// 自动过期的TTL索引
db.sessions.createIndex({ createdAt: 1 }, { expireAfterSeconds: 3600 })

// 稀疏索引（只有包含字段的文档）
db.users.createIndex({ phone: 1 }, { sparse: true })

// 检查索引使用情况
db.users.aggregate([{ $indexStats: {} }])

模式验证

// 定义集合验证模式
db.createCollection("products", {
  validator: {
    $jsonSchema: {
      bsonType: "object",
      required: ["name", "price", "category"],
      properties: {
        _id: { bsonType: "objectId" },
        name: {
          bsonType: "string",
          description: "产品名称（必填）"
        },
        price: {
          bsonType: "decimal",
          minimum: 0,
          description: "价格必须为正数"
        },
        category: {
          enum: ["electronics", "clothing", "food"],
          description: "类别必须是列出的值之一"
        },
        tags: {
          bsonType: "array",
          items: { bsonType: "string" }
        },
        createdAt: {
          bsonType: "date"
        }
      }
    }
  }
})

DynamoDB模式设计

表结构

// 单主键的DynamoDB表
const TableName = "users"
const params = {
  TableName,
  KeySchema: [
    { AttributeName: "userId", KeyType: "HASH" }  // 分区键
  ],
  AttributeDefinitions: [
    { AttributeName: "userId", AttributeType: "S" }  // 字符串
  ],
  BillingMode: "PAY_PER_REQUEST"  // 按需付费
}

// 复合主键的DynamoDB表
const ordersParams = {
  TableName: "orders",
  KeySchema: [
    { AttributeName: "userId", KeyType: "HASH" },      // 分区键
    { AttributeName: "orderId", KeyType: "RANGE" }    // 排序键
  ],
  AttributeDefinitions: [
    { AttributeName: "userId", AttributeType: "S" },
    { AttributeName: "orderId", AttributeType: "S" }
  ],
  BillingMode: "PAY_PER_REQUEST"
}

全局二级索引（GSI）

// 添加GSI以通过电子邮件查询
const gsiParams = {
  TableName: "users",
  AttributeDefinitions: [
    { AttributeName: "email", AttributeType: "S" }
  ],
  GlobalSecondaryIndexes: [
    {
      IndexName: "emailIndex",
      KeySchema: [
        { AttributeName: "email", KeyType: "HASH" }
      ],
      Projection: {
        ProjectionType: "ALL"  // 返回所有属性
      },
      BillingMode: "PAY_PER_REQUEST"
    }
  ]
}

// 具有复合键的GSI，用于基于时间的查询
const timeIndexParams = {
  GlobalSecondaryIndexes: [
    {
      IndexName: "userCreatedIndex",
      KeySchema: [
        { AttributeName: "userId", KeyType: "HASH" },
        { AttributeName: "createdAt", KeyType: "RANGE" }
      ],
      Projection: { ProjectionType: "ALL" },
      BillingMode: "PAY_PER_REQUEST"
    }
  ]
}

DynamoDB项目操作

// 放入项目（插入/更新）
const putParams = {
  TableName: "users",
  Item: {
    userId: { S: "user-123" },
    email: { S: "john@example.com" },
    name: { S: "John Doe" },
    createdAt: { N: Date.now().toString() },
    metadata: {
      M: {
        joinDate: { N: Date.now().toString() },
        source: { S: "web" }
      }
    }
  }
}

// 使用GSI查询
const queryParams = {
  TableName: "users",
  IndexName: "emailIndex",
  KeyConditionExpression: "email = :email",
  ExpressionAttributeValues: {
    ":email": { S: "john@example.com" }
  }
}

// 批量获取项目
const batchGetParams = {
  RequestItems: {
    "users": {
      Keys: [
        { userId: { S: "user-123" } },
        { userId: { S: "user-456" } }
      ]
    }
  }
}

反规范化模式

MongoDB - 为性能而嵌入：

// 嵌入频繁访问的数据以避免查找
db.orders.insertOne({
  _id: ObjectId("..."),
  userId: ObjectId("..."),
  userEmail: "john@example.com",      // 反规范化
  userName: "John Doe",                // 反规范化
  createdAt: new Date(),
  items: [
    {
      productId: ObjectId("..."),
      productName: "Laptop",            // 反规范化
      productPrice: 999.99,             // 反规范化
      quantity: 1
    }
  ]
})

DynamoDB - 一致性反规范化：

// 在同一项目中存储相关数据以确保一致性
const params = {
  TableName: "orders",
  Item: {
    userId: { S: "user-123" },
    orderId: { S: "order-456" },
    orderDate: { N: Date.now().toString() },

    // 用户数据在订单时的快照
    userSnapshot: {
      M: {
        email: { S: "john@example.com" },
        address: { S: "123 Main St" }
      }
    },

    // 带有产品信息的项目
    items: {
      L: [
        {
          M: {
            productId: { S: "prod-789" },
            name: { S: "Laptop" },
            price: { N: "999.99" },
            quantity: { N: "1" }
          }
        }
      ]
    }
  }
}

设计模式

MongoDB - 时间序列模式：

// 高效的时间序列数据存储
db.sensor_data.insertOne({
  _id: ObjectId("..."),
  sensorId: "sensor-123",
  date: ISODate("2024-01-15"),
  measurements: [
    { time: "12:00", temperature: 72.5, humidity: 45 },
    { time: "12:01", temperature: 72.6, humidity: 45.2 },
    { time: "12:02", temperature: 72.4, humidity: 44.8 }
  ]
})

// 索引以高效查询
db.sensor_data.createIndex({ sensorId: 1, date: -1 })

DynamoDB - 一对多关系：

// 高效存储一对多关系
// 用户评论使用userId作为分区键，commentId作为排序键
const commentParams = {
  TableName: "comments",
  Item: {
    userId: { S: "user-123" },           // 分区键
    commentId: { S: "comment-789" },     // 排序键
    postId: { S: "post-456" },
    content: { S: "Great article!" },
    createdAt: { N: Date.now().toString() }
  }
}

容量规划

MongoDB - 水平扩展：

// 对大型集合进行分片
sh.shardCollection("ecommerce.orders", { userId: "hashed" })

// 监控分片分布
db.orders.aggregate([
  { $group: { _id: "$userId", count: { $sum: 1 } } },
  { $sort: { count: -1 } }
])

DynamoDB - 分区键设计：

// 好：跨多个键分布
// 分区键："USER#123"（跨分区分布）
// 排序键："ORDER#2024-01"

// 坏：热分区
// 分区键："ADMIN"（所有管理员操作击中同一分区）

// 解决方案：添加时间戳或随机后缀
// 分区键："ADMIN#20240115#random"

迁移考虑因素

规划数据迁移策略
考虑一致性要求
在最终确定模式前测试查询模式
部署后监控性能
文档记录关系和访问模式
计划模式演变

NoSQL数据库设计Skill nosql-database-design