NoSQL数据库设计
概览
为MongoDB(文档型)和DynamoDB(键值型)设计可扩展的NoSQL模式。涵盖数据建模模式、反规范化策略和NoSQL系统的查询优化。
使用场景
- MongoDB集合设计
- DynamoDB表和索引设计
- 文档结构建模
- 嵌入与引用决策
- 查询模式优化
- NoSQL索引策略
- 数据反规范化规划
MongoDB模式设计
文档结构设计
MongoDB - 嵌入式文档:
// 单个文档,包含嵌入式数组
db.createCollection("users")
db.users.insertOne({
_id: ObjectId("..."),
email: "john@example.com",
name: "John Doe",
createdAt: new Date(),
// 嵌入式地址
address: {
street: "123 Main St",
city: "New York",
state: "NY",
zipCode: "10001"
},
// 嵌入式订单数组
orders: [
{
orderId: ObjectId("..."),
date: new Date(),
total: 149.99
},
{
orderId: ObjectId("..."),
date: new Date(),
total: 89.99
}
]
})
MongoDB - 引用文档:
// 分开的集合与引用
db.createCollection("users")
db.createCollection("orders")
db.users.insertOne({
_id: ObjectId("..."),
email: "john@example.com",
name: "John Doe"
})
db.orders.insertMany([
{
_id: ObjectId("..."),
userId: ObjectId("..."), // 用户引用
orderDate: new Date(),
total: 149.99
},
{
_id: ObjectId("..."),
userId: ObjectId("..."),
orderDate: new Date(),
total: 89.99
}
])
// 使用$lookup进行JOIN查询
db.orders.aggregate([
{
$match: { userId: ObjectId("...") }
},
{
$lookup: {
from: "users",
localField: "userId",
foreignField: "_id",
as: "user"
}
}
])
MongoDB中的索引
// 单字段索引
db.users.createIndex({ email: 1 })
db.orders.createIndex({ createdAt: -1 })
// 复合索引
db.orders.createIndex({ userId: 1, createdAt: -1 })
// 用于搜索的文本索引
db.products.createIndex({ name: "text", description: "text" })
// 地理空间索引
db.stores.createIndex({ location: "2dsphere" })
// 自动过期的TTL索引
db.sessions.createIndex({ createdAt: 1 }, { expireAfterSeconds: 3600 })
// 稀疏索引(只有包含字段的文档)
db.users.createIndex({ phone: 1 }, { sparse: true })
// 检查索引使用情况
db.users.aggregate([{ $indexStats: {} }])
模式验证
// 定义集合验证模式
db.createCollection("products", {
validator: {
$jsonSchema: {
bsonType: "object",
required: ["name", "price", "category"],
properties: {
_id: { bsonType: "objectId" },
name: {
bsonType: "string",
description: "产品名称(必填)"
},
price: {
bsonType: "decimal",
minimum: 0,
description: "价格必须为正数"
},
category: {
enum: ["electronics", "clothing", "food"],
description: "类别必须是列出的值之一"
},
tags: {
bsonType: "array",
items: { bsonType: "string" }
},
createdAt: {
bsonType: "date"
}
}
}
}
})
DynamoDB模式设计
表结构
// 单主键的DynamoDB表
const TableName = "users"
const params = {
TableName,
KeySchema: [
{ AttributeName: "userId", KeyType: "HASH" } // 分区键
],
AttributeDefinitions: [
{ AttributeName: "userId", AttributeType: "S" } // 字符串
],
BillingMode: "PAY_PER_REQUEST" // 按需付费
}
// 复合主键的DynamoDB表
const ordersParams = {
TableName: "orders",
KeySchema: [
{ AttributeName: "userId", KeyType: "HASH" }, // 分区键
{ AttributeName: "orderId", KeyType: "RANGE" } // 排序键
],
AttributeDefinitions: [
{ AttributeName: "userId", AttributeType: "S" },
{ AttributeName: "orderId", AttributeType: "S" }
],
BillingMode: "PAY_PER_REQUEST"
}
全局二级索引(GSI)
// 添加GSI以通过电子邮件查询
const gsiParams = {
TableName: "users",
AttributeDefinitions: [
{ AttributeName: "email", AttributeType: "S" }
],
GlobalSecondaryIndexes: [
{
IndexName: "emailIndex",
KeySchema: [
{ AttributeName: "email", KeyType: "HASH" }
],
Projection: {
ProjectionType: "ALL" // 返回所有属性
},
BillingMode: "PAY_PER_REQUEST"
}
]
}
// 具有复合键的GSI,用于基于时间的查询
const timeIndexParams = {
GlobalSecondaryIndexes: [
{
IndexName: "userCreatedIndex",
KeySchema: [
{ AttributeName: "userId", KeyType: "HASH" },
{ AttributeName: "createdAt", KeyType: "RANGE" }
],
Projection: { ProjectionType: "ALL" },
BillingMode: "PAY_PER_REQUEST"
}
]
}
DynamoDB项目操作
// 放入项目(插入/更新)
const putParams = {
TableName: "users",
Item: {
userId: { S: "user-123" },
email: { S: "john@example.com" },
name: { S: "John Doe" },
createdAt: { N: Date.now().toString() },
metadata: {
M: {
joinDate: { N: Date.now().toString() },
source: { S: "web" }
}
}
}
}
// 使用GSI查询
const queryParams = {
TableName: "users",
IndexName: "emailIndex",
KeyConditionExpression: "email = :email",
ExpressionAttributeValues: {
":email": { S: "john@example.com" }
}
}
// 批量获取项目
const batchGetParams = {
RequestItems: {
"users": {
Keys: [
{ userId: { S: "user-123" } },
{ userId: { S: "user-456" } }
]
}
}
}
反规范化模式
MongoDB - 为性能而嵌入:
// 嵌入频繁访问的数据以避免查找
db.orders.insertOne({
_id: ObjectId("..."),
userId: ObjectId("..."),
userEmail: "john@example.com", // 反规范化
userName: "John Doe", // 反规范化
createdAt: new Date(),
items: [
{
productId: ObjectId("..."),
productName: "Laptop", // 反规范化
productPrice: 999.99, // 反规范化
quantity: 1
}
]
})
DynamoDB - 一致性反规范化:
// 在同一项目中存储相关数据以确保一致性
const params = {
TableName: "orders",
Item: {
userId: { S: "user-123" },
orderId: { S: "order-456" },
orderDate: { N: Date.now().toString() },
// 用户数据在订单时的快照
userSnapshot: {
M: {
email: { S: "john@example.com" },
address: { S: "123 Main St" }
}
},
// 带有产品信息的项目
items: {
L: [
{
M: {
productId: { S: "prod-789" },
name: { S: "Laptop" },
price: { N: "999.99" },
quantity: { N: "1" }
}
}
]
}
}
}
设计模式
MongoDB - 时间序列模式:
// 高效的时间序列数据存储
db.sensor_data.insertOne({
_id: ObjectId("..."),
sensorId: "sensor-123",
date: ISODate("2024-01-15"),
measurements: [
{ time: "12:00", temperature: 72.5, humidity: 45 },
{ time: "12:01", temperature: 72.6, humidity: 45.2 },
{ time: "12:02", temperature: 72.4, humidity: 44.8 }
]
})
// 索引以高效查询
db.sensor_data.createIndex({ sensorId: 1, date: -1 })
DynamoDB - 一对多关系:
// 高效存储一对多关系
// 用户评论使用userId作为分区键,commentId作为排序键
const commentParams = {
TableName: "comments",
Item: {
userId: { S: "user-123" }, // 分区键
commentId: { S: "comment-789" }, // 排序键
postId: { S: "post-456" },
content: { S: "Great article!" },
createdAt: { N: Date.now().toString() }
}
}
容量规划
MongoDB - 水平扩展:
// 对大型集合进行分片
sh.shardCollection("ecommerce.orders", { userId: "hashed" })
// 监控分片分布
db.orders.aggregate([
{ $group: { _id: "$userId", count: { $sum: 1 } } },
{ $sort: { count: -1 } }
])
DynamoDB - 分区键设计:
// 好:跨多个键分布
// 分区键:"USER#123"(跨分区分布)
// 排序键:"ORDER#2024-01"
// 坏:热分区
// 分区键:"ADMIN"(所有管理员操作击中同一分区)
// 解决方案:添加时间戳或随机后缀
// 分区键:"ADMIN#20240115#random"
迁移考虑因素
- 规划数据迁移策略
- 考虑一致性要求
- 在最终确定模式前测试查询模式
- 部署后监控性能
- 文档记录关系和访问模式
- 计划模式演变