名称: 性能测试 描述: 性能测试和负载测试专业知识,包括k6、locust、JMeter、Gatling、artillery、API负载测试、数据库查询优化、基准测试策略、分析技术、指标分析(p95/p99延迟、吞吐量、RPS)、性能预算和瓶颈识别。用于实现负载测试、压力测试、峰值测试、浸泡测试,分析系统在并发用户下的行为,测量饱和点,或优化负载下的应用程序性能。触发关键词:性能测试、负载测试、压力测试、压力测试、负载测试、性能测试、k6、locust、JMeter、Gatling、artillery、基准、基准测试、分析、延迟、吞吐量、RPS、每秒请求数、并发用户、虚拟用户、百分位数、p95、p99、p50、中位数延迟、饱和、瓶颈、性能预算、API负载测试、数据库性能、查询优化、慢查询、可扩展性测试、容量规划、响应时间、错误率、apdex。
性能测试
概述
性能测试验证应用程序在各种负载条件下满足速度、可扩展性和稳定性要求。此技能提供负载测试工具(k6、locust、JMeter、Gatling)、API和数据库性能测试、基准测试策略、分析技术以及识别和解决性能瓶颈的系统方法的全面专业知识。
何时使用此技能
使用此技能当您需要:
- 实现负载测试、压力测试、峰值测试或浸泡测试
- 测量API端点在并发用户下的性能
- 优化数据库查询并识别慢查询
- 分析延迟百分位数(p50、p95、p99)和吞吐量(RPS)
- 设置性能监控和警报
- 识别系统饱和点和瓶颈
- 建立性能预算和SLOs
- 进行容量规划和可扩展性分析
测试类型和模式
负载测试类型
| 测试类型 | 目的 | 模式 | 何时使用 |
|---|---|---|---|
| 负载测试 | 验证预期负载下的性能 | 随时间恒定VUs | 建立基线性能 |
| 压力测试 | 找到崩溃点 | 逐步增加直到失败 | 确定系统限制 |
| 峰值测试 | 测试突然流量激增 | 快速增加到高负载 | 验证自动扩展、缓存 |
| 浸泡测试 | 检测内存泄漏、退化 | 中等负载延长时间(小时) | 生产就绪准备 |
| 断点测试 | 找到最大容量 | 增量负载增加 | 容量规划 |
k6 测试模式
模式:基线负载测试
export const options = {
vus: 50,
duration: "5m",
thresholds: {
http_req_duration: ["p(95)<500"],
},
};
模式:压力测试(找到崩溃点)
export const options = {
stages: [
{ duration: "2m", target: 100 },
{ duration: "5m", target: 100 },
{ duration: "2m", target: 200 },
{ duration: "5m", target: 200 },
{ duration: "2m", target: 300 },
{ duration: "5m", target: 300 },
{ duration: "5m", target: 0 },
],
};
模式:峰值测试
export const options = {
stages: [
{ duration: "30s", target: 50 }, // 正常负载
{ duration: "10s", target: 500 }, // 峰值!
{ duration: "1m", target: 500 }, // 保持峰值
{ duration: "10s", target: 50 }, // 下降
{ duration: "1m", target: 50 }, // 恢复
],
};
模式:浸泡测试(内存泄漏)
export const options = {
vus: 100,
duration: "4h", // 延长时间
thresholds: {
http_req_duration: ["p(95)<500"],
http_req_failed: ["rate<0.01"],
},
};
指令
1. 使用现代工具进行负载测试
k6(推荐用于API负载测试):
# 安装
brew install k6
# 或
npm install -g k6
// load-test.js
import http from "k6/http";
import { check, sleep } from "k6";
import { Rate, Trend } from "k6/metrics";
// 自定义指标
const errorRate = new Rate("errors");
const latencyTrend = new Trend("latency");
export const options = {
stages: [
{ duration: "2m", target: 100 }, // 增加
{ duration: "5m", target: 100 }, // 稳定状态
{ duration: "2m", target: 200 }, // 峰值
{ duration: "5m", target: 200 }, // 持续峰值
{ duration: "2m", target: 0 }, // 下降
],
thresholds: {
http_req_duration: ["p(95)<500", "p(99)<1000"],
errors: ["rate<0.01"],
http_req_failed: ["rate<0.01"],
},
};
export default function () {
const payload = JSON.stringify({
username: `user_${__VU}_${__ITER}`,
action: "test",
});
const params = {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${__ENV.API_TOKEN}`,
},
};
const response = http.post(
"https://api.example.com/endpoint",
payload,
params,
);
latencyTrend.add(response.timings.duration);
errorRate.add(response.status !== 200);
check(response, {
"状态是200": (r) => r.status === 200,
"响应时间 < 500ms": (r) => r.timings.duration < 500,
"有必需字段": (r) => {
const body = JSON.parse(r.body);
return body.id && body.status;
},
});
sleep(1);
}
export function handleSummary(data) {
return {
"summary.json": JSON.stringify(data),
stdout: textSummary(data, { indent: " ", enableColors: true }),
};
}
运行 k6 测试:
# 基本运行
k6 run load-test.js
# 带环境变量
k6 run -e API_TOKEN=xxx load-test.js
# 云执行
k6 cloud load-test.js
# 输出到InfluxDB用于Grafana
k6 run --out influxdb=http://localhost:8086/k6 load-test.js
Locust(基于Python的负载测试):
# locustfile.py
from locust import HttpUser, task, between
from locust import events
import time
class WebsiteUser(HttpUser):
wait_time = between(1, 3)
def on_start(self):
"""开始时登录"""
response = self.client.post("/login", json={
"username": "testuser",
"password": "testpass"
})
self.token = response.json().get("token")
@task(3)
def view_products(self):
"""最常见的操作"""
self.client.get("/api/products", headers={
"Authorization": f"Bearer {self.token}"
})
@task(2)
def view_product_detail(self):
"""查看单个产品"""
self.client.get("/api/products/1", headers={
"Authorization": f"Bearer {self.token}"
})
@task(1)
def add_to_cart(self):
"""较不常见的操作"""
self.client.post("/api/cart", json={
"product_id": 1,
"quantity": 1
}, headers={
"Authorization": f"Bearer {self.token}"
})
class AdminUser(HttpUser):
wait_time = between(2, 5)
weight = 1 # 1个管理员对应10个普通用户
@task
def view_dashboard(self):
self.client.get("/admin/dashboard")
# 自定义指标
@events.request.add_listener
def on_request(request_type, name, response_time, response_length, exception, **kwargs):
if exception:
print(f"请求失败: {name} - {exception}")
运行 Locust:
# Web UI模式
locust -f locustfile.py --host=https://api.example.com
# 无头模式
locust -f locustfile.py --headless -u 100 -r 10 --run-time 5m --host=https://api.example.com
# 分布式模式
locust -f locustfile.py --master
locust -f locustfile.py --worker --master-host=192.168.1.1
Artillery(基于YAML的负载测试):
# artillery-config.yml
config:
target: "https://api.example.com"
phases:
- duration: 60
arrivalRate: 5
name: "热身"
- duration: 120
arrivalRate: 20
rampTo: 50
name: "增加"
- duration: 300
arrivalRate: 50
name: "持续负载"
defaults:
headers:
Content-Type: "application/json"
plugins:
expect: {}
ensure:
p95: 500
maxErrorRate: 1
scenarios:
- name: "用户旅程"
flow:
- post:
url: "/auth/login"
json:
username: "{{ $randomString() }}"
password: "password123"
capture:
- json: "$.token"
as: "authToken"
expect:
- statusCode: 200
- hasProperty: "token"
- get:
url: "/api/products"
headers:
Authorization: "Bearer {{ authToken }}"
expect:
- statusCode: 200
- contentType: "application/json"
- think: 2
- post:
url: "/api/cart"
headers:
Authorization: "Bearer {{ authToken }}"
json:
productId: "{{ $randomNumber(1, 100) }}"
quantity: 1
expect:
- statusCode: 201
运行 Artillery:
# 运行测试
artillery run artillery-config.yml
# 生成报告
artillery run artillery-config.yml --output report.json
artillery report report.json --output report.html
2. API负载测试模式
REST API负载测试:
// k6 API负载测试带认证和数据变化
import http from "k6/http";
import { check, sleep } from "k6";
import { SharedArray } from "k6/data";
import { randomIntBetween } from "k6/x/util";
// 从CSV加载测试数据
const testData = new SharedArray("users", function () {
return JSON.parse(open("./test-data.json"));
});
export const options = {
scenarios: {
// 读取重工作负载(70%读取)
reads: {
executor: "constant-arrival-rate",
rate: 700,
timeUnit: "1s",
duration: "5m",
preAllocatedVUs: 50,
maxVUs: 200,
exec: "readScenario",
},
// 写入工作负载(30%写入)
writes: {
executor: "constant-arrival-rate",
rate: 300,
timeUnit: "1s",
duration: "5m",
preAllocatedVUs: 30,
maxVUs: 100,
exec: "writeScenario",
},
},
thresholds: {
"http_req_duration{scenario:reads}": ["p(95)<200", "p(99)<500"],
"http_req_duration{scenario:writes}": ["p(95)<500", "p(99)<1000"],
http_req_failed: ["rate<0.01"],
},
};
let authToken;
export function setup() {
const loginRes = http.post(
`${__ENV.API_URL}/auth/login`,
JSON.stringify({
email: "loadtest@example.com",
password: "test123",
}),
{
headers: { "Content-Type": "application/json" },
},
);
return { token: loginRes.json("token") };
}
export function readScenario(data) {
const headers = {
Authorization: `Bearer ${data.token}`,
"Content-Type": "application/json",
};
// GET请求带查询参数
const userId = randomIntBetween(1, 10000);
const res = http.get(`${__ENV.API_URL}/api/users/${userId}`, {
headers,
tags: { name: "GetUser" },
});
check(res, {
"状态是200": (r) => r.status === 200,
"有用户数据": (r) => r.json("id") === userId,
"响应时间OK": (r) => r.timings.duration < 200,
});
sleep(0.5);
}
export function writeScenario(data) {
const headers = {
Authorization: `Bearer ${data.token}`,
"Content-Type": "application/json",
};
// POST请求带动态有效负载
const user = testData[Math.floor(Math.random() * testData.length)];
const res = http.post(
`${__ENV.API_URL}/api/orders`,
JSON.stringify({
userId: user.id,
items: [{ productId: randomIntBetween(1, 100), quantity: 1 }],
timestamp: new Date().toISOString(),
}),
{ headers, tags: { name: "CreateOrder" } },
);
check(res, {
"状态是201": (r) => r.status === 201,
"订单已创建": (r) => r.json("id") !== undefined,
});
sleep(1);
}
GraphQL API负载测试:
import http from "k6/http";
import { check } from "k6";
export default function () {
const query = `
query GetUserWithOrders($userId: ID!) {
user(id: $userId) {
id
name
orders(limit: 10) {
id
total
items {
productId
quantity
}
}
}
}
`;
const variables = {
userId: `${__VU}`,
};
const res = http.post(
"https://api.example.com/graphql",
JSON.stringify({
query,
variables,
}),
{
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${__ENV.TOKEN}`,
},
},
);
check(res, {
"无GraphQL错误": (r) => !r.json("errors"),
"用户数据存在": (r) => r.json("data.user.id") === variables.userId,
});
}
WebSocket负载测试:
import ws from "k6/ws";
import { check } from "k6";
export default function () {
const url = "wss://api.example.com/ws";
const params = { tags: { my_tag: "websocket" } };
const res = ws.connect(url, params, function (socket) {
socket.on("open", () => {
console.log("已连接");
socket.send(JSON.stringify({ type: "subscribe", channel: "updates" }));
});
socket.on("message", (data) => {
const msg = JSON.parse(data);
check(msg, {
"有效消息": (m) => m.type !== undefined,
});
});
socket.on("error", (e) => {
console.log("错误:", e.error());
});
socket.setTimeout(() => {
socket.close();
}, 60000);
});
check(res, { "状态是101": (r) => r && r.status === 101 });
}
3. 数据库性能测试
查询性能测试:
// database-perf-test.ts
import { performance } from "perf_hooks";
import { Pool } from "pg";
interface QueryBenchmark {
query: string;
params?: any[];
iterations: number;
results: {
min: number;
max: number;
avg: number;
p50: number;
p95: number;
p99: number;
};
}
async function benchmarkQuery(
pool: Pool,
query: string,
params: any[] = [],
iterations: number = 100,
): Promise<QueryBenchmark> {
const timings: number[] = [];
// 热身
for (let i = 0; i < 10; i++) {
await pool.query(query, params);
}
// 基准测试
for (let i = 0; i < iterations; i++) {
const start = performance.now();
await pool.query(query, params);
timings.push(performance.now() - start);
}
timings.sort((a, b) => a - b);
return {
query: query.substring(0, 100),
params,
iterations,
results: {
min: timings[0],
max: timings[timings.length - 1],
avg: timings.reduce((a, b) => a + b, 0) / timings.length,
p50: timings[Math.floor(timings.length * 0.5)],
p95: timings[Math.floor(timings.length * 0.95)],
p99: timings[Math.floor(timings.length * 0.99)],
},
};
}
// 比较查询性能
async function compareQueries() {
const pool = new Pool({
/* 配置 */
});
const queries = [
{
name: "无索引",
sql: "SELECT * FROM users WHERE email = $1",
params: ["test@example.com"],
},
{
name: "有索引",
sql: "SELECT * FROM users WHERE id = $1",
params: [1],
},
{
name: "复杂连接",
sql: `
SELECT u.*, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at > $1
GROUP BY u.id
LIMIT 100
`,
params: ["2024-01-01"],
},
];
for (const { name, sql, params } of queries) {
const result = await benchmarkQuery(pool, sql, params);
console.log(`
${name}:`);
console.table(result.results);
}
await pool.end();
}
连接池负载测试:
// connection-pool-test.ts
import { Pool } from "pg";
import { performance } from "perf_hooks";
async function testConnectionPool(
poolSize: number,
concurrentQueries: number,
duration: number,
) {
const pool = new Pool({
max: poolSize,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
const stats = {
totalQueries: 0,
successfulQueries: 0,
failedQueries: 0,
timeouts: 0,
queryTimes: [] as number[],
};
const startTime = Date.now();
const workers: Promise<void>[] = [];
for (let i = 0; i < concurrentQueries; i++) {
workers.push(
(async () => {
while (Date.now() - startTime < duration) {
try {
const start = performance.now();
await pool.query("SELECT 1");
const elapsed = performance.now() - start;
stats.queryTimes.push(elapsed);
stats.successfulQueries++;
} catch (err) {
stats.failedQueries++;
if (err.message.includes("timeout")) {
stats.timeouts++;
}
}
stats.totalQueries++;
}
})(),
);
}
await Promise.all(workers);
await pool.end();
stats.queryTimes.sort((a, b) => a - b);
return {
poolSize,
concurrentQueries,
duration,
...stats,
avgQueryTime:
stats.queryTimes.reduce((a, b) => a + b, 0) / stats.queryTimes.length,
p95QueryTime: stats.queryTimes[Math.floor(stats.queryTimes.length * 0.95)],
qps: stats.successfulQueries / (duration / 1000),
};
}
// 测试不同池大小
async function findOptimalPoolSize() {
const results = [];
for (const poolSize of [5, 10, 20, 50, 100]) {
console.log(`测试池大小: ${poolSize}`);
const result = await testConnectionPool(poolSize, 100, 30000);
results.push(result);
}
console.table(results);
}
N+1查询检测:
// n-plus-one-detector.ts
class QueryTracker {
private queries: Map<string, number> = new Map();
private startTime: number = 0;
start() {
this.queries.clear();
this.startTime = Date.now();
}
track(sql: string) {
const normalized = this.normalizeSql(sql);
this.queries.set(normalized, (this.queries.get(normalized) || 0) + 1);
}
detectNPlusOne(
threshold: number = 10,
): Array<{ query: string; count: number }> {
const suspicious: Array<{ query: string; count: number }> = [];
for (const [query, count] of this.queries.entries()) {
if (count > threshold) {
suspicious.push({ query, count });
}
}
return suspicious.sort((a, b) => b.count - a.count);
}
private normalizeSql(sql: string): string {
// 将字面量替换为占位符进行比较
return sql
.replace(/\d+/g, "?")
.replace(/'[^']*'/g, "?")
.replace(/\s+/g, " ")
.trim();
}
report() {
const duration = Date.now() - this.startTime;
const nPlusOne = this.detectNPlusOne();
console.log(`
查询分析 (${duration}ms):`);
console.log(`总唯一查询数: ${this.queries.size}`);
console.log(
`总查询执行次数: ${Array.from(this.queries.values()).reduce((a, b) => a + b, 0)}`,
);
if (nPlusOne.length > 0) {
console.log("
潜在N+1查询:");
console.table(nPlusOne);
}
}
}
4. 基准测试策略
微基准测试(函数级别):
// Node.js 使用 benchmark.js
import Benchmark from "benchmark";
const suite = new Benchmark.Suite();
const data = Array.from({ length: 10000 }, (_, i) => i);
suite
.add("for循环", function () {
let sum = 0;
for (let i = 0; i < data.length; i++) {
sum += data[i];
}
return sum;
})
.add("forEach", function () {
let sum = 0;
data.forEach((n) => {
sum += n;
});
return sum;
})
.add("reduce", function () {
return data.reduce((sum, n) => sum + n, 0);
})
.on("cycle", function (event: Benchmark.Event) {
console.log(String(event.target));
})
.on("complete", function (this: Benchmark.Suite) {
console.log("最快的是 " + this.filter("fastest").map("name"));
})
.run({ async: true });
数据库查询基准测试:
// benchmark-queries.ts
import { performance } from "perf_hooks";
interface BenchmarkResult {
query: string;
avgTime: number;
minTime: number;
maxTime: number;
p95: number;
iterations: number;
}
async function benchmarkQuery(
name: string,
queryFn: () => Promise<any>,
iterations: number = 100,
): Promise<BenchmarkResult> {
const times: number[] = [];
// 热身
for (let i = 0; i < 10; i++) {
await queryFn();
}
// 实际基准测试
for (let i = 0; i < iterations; i++) {
const start = performance.now();
await queryFn();
times.push(performance.now() - start);
}
times.sort((a, b) => a - b);
return {
query: name,
avgTime: times.reduce((a, b) => a + b) / times.length,
minTime: times[0],
maxTime: times[times.length - 1],
p95: times[Math.floor(times.length * 0.95)],
iterations,
};
}
// 使用
const results = await Promise.all([
benchmarkQuery("findUserById", () => db.users.findById(1)),
benchmarkQuery("findUserWithJoin", () =>
db.users.findById(1).include("orders"),
),
benchmarkQuery("complexAggregation", () =>
db.orders.aggregate([
/* 管道 */
]),
),
]);
console.table(results);
HTTP端点基准测试:
# 使用 wrk
wrk -t12 -c400 -d30s --latency https://api.example.com/endpoint
# 使用 autocannon (Node.js)
npx autocannon -c 100 -d 30 -p 10 https://api.example.com/endpoint
# 使用 hey
hey -n 10000 -c 100 https://api.example.com/endpoint
5. 分析技术
Node.js CPU分析:
// 启用内置分析器
// node --prof app.js
// node --prof-process isolate-*.log > processed.txt
// 程序化分析
import { Session } from "inspector";
import { writeFileSync } from "fs";
async function profileFunction(fn: () => Promise<any>) {
const session = new Session();
session.connect();
session.post("Profiler.enable");
session.post("Profiler.start");
await fn();
return new Promise<void>((resolve) => {
session.post("Profiler.stop", (err, { profile }) => {
writeFileSync("profile.cpuprofile", JSON.stringify(profile));
session.disconnect();
resolve();
});
});
}
// 使用
await profileFunction(async () => {
// 要分析的代码
await heavyComputation();
});
// 在Chrome DevTools中打开profile.cpuprofile
内存分析:
// 内存快照
import v8 from "v8";
import { writeFileSync } from "fs";
function takeHeapSnapshot(filename: string) {
const snapshotStream = v8.writeHeapSnapshot(filename);
console.log(`堆快照写入到 ${snapshotStream}`);
}
// 跟踪内存使用
function logMemoryUsage(label: string) {
const usage = process.memoryUsage();
console.log(`内存 [${label}]:`, {
heapUsed: `${Math.round(usage.heapUsed / 1024 / 1024)}MB`,
heapTotal: `${Math.round(usage.heapTotal / 1024 / 1024)}MB`,
external: `${Math.round(usage.external / 1024 / 1024)}MB`,
rss: `${Math.round(usage.rss / 1024 / 1024)}MB`,
});
}
// 检测内存泄漏
class MemoryLeakDetector {
private samples: number[] = [];
private interval: NodeJS.Timer | null = null;
start(sampleInterval: number = 1000) {
this.interval = setInterval(() => {
this.samples.push(process.memoryUsage().heapUsed);
if (this.samples.length > 60) {
const trend = this.calculateTrend();
if (trend > 0.1) {
// 每分钟增长10%
console.warn("检测到潜在内存泄漏!");
}
this.samples.shift();
}
}, sampleInterval);
}
private calculateTrend(): number {
if (this.samples.length < 2) return 0;
const first = this.samples[0];
const last = this.samples[this.samples.length - 1];
return (last - first) / first;
}
stop() {
if (this.interval) clearInterval(this.interval);
}
}
数据库查询分析:
-- PostgreSQL:启用查询日志
SET log_statement = 'all';
SET log_duration = on;
-- 分析查询计划
EXPLAIN ANALYZE SELECT * FROM users
WHERE created_at > '2024-01-01'
ORDER BY created_at DESC
LIMIT 100;
-- 查找慢查询
SELECT
query,
calls,
total_time / 1000 as total_seconds,
mean_time / 1000 as mean_seconds,
rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 20;
// 应用级查询分析
import { performance } from "perf_hooks";
const queryLogger = {
queries: [] as Array<{ sql: string; duration: number; timestamp: Date }>,
log(sql: string, duration: number) {
this.queries.push({ sql, duration, timestamp: new Date() });
if (duration > 100) {
console.warn(`慢查询 (${duration}ms): ${sql.substring(0, 100)}...`);
}
},
getSlowQueries(threshold: number = 100) {
return this.queries.filter((q) => q.duration > threshold);
},
getStats() {
const durations = this.queries.map((q) => q.duration);
return {
count: durations.length,
avg: durations.reduce((a, b) => a + b, 0) / durations.length,
max: Math.max(...durations),
p95: durations.sort((a, b) => a - b)[Math.floor(durations.length * 0.95)],
};
},
};
6. 跟踪关键指标
基本性能指标:
| 指标 | 描述 | 目标 | 警报阈值 |
|---|---|---|---|
| 延迟 (p50) | 中位数响应时间 | <100ms | >200ms |
| 延迟 (p95) | 95百分位数 | <500ms | >1000ms |
| 延迟 (p99) | 99百分位数 | <1000ms | >2000ms |
| 吞吐量 | 每秒请求数 | >1000 RPS | <500 RPS |
| 错误率 | 失败请求百分比 | <0.1% | >1% |
| 饱和 | 资源利用率 | <70% | >85% |
| Apdex | 用户满意度得分 | >0.9 | <0.7 |
实现指标收集:
// metrics.ts
import { Counter, Histogram, Gauge, Registry } from "prom-client";
const register = new Registry();
// 请求指标
const httpRequestDuration = new Histogram({
name: "http_request_duration_seconds",
help: "HTTP请求持续时间(秒)",
labelNames: ["method", "route", "status_code"],
buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
registers: [register],
});
const httpRequestTotal = new Counter({
name: "http_requests_total",
help: "HTTP请求总数",
labelNames: ["method", "route", "status_code"],
registers: [register],
});
const activeConnections = new Gauge({
name: "active_connections",
help: "活动连接数",
registers: [register],
});
// Express的中间件
export function metricsMiddleware(
req: Request,
res: Response,
next: NextFunction,
) {
const start = process.hrtime.bigint();
activeConnections.inc();
res.on("finish", () => {
const duration = Number(process.hrtime.bigint() - start) / 1e9;
const route = req.route?.path || req.path;
httpRequestDuration.observe(
{ method: req.method, route, status_code: res.statusCode },
duration,
);
httpRequestTotal.inc({
method: req.method,
route,
status_code: res.statusCode,
});
activeConnections.dec();
});
next();
}
// 指标端点
app.get("/metrics", async (req, res) => {
res.set("Content-Type", register.contentType);
res.end(await register.metrics());
});
自定义业务指标:
// business-metrics.ts
const orderProcessingTime = new Histogram({
name: "order_processing_duration_seconds",
help: "处理订单的时间",
labelNames: ["payment_method", "status"],
buckets: [0.5, 1, 2, 5, 10, 30, 60],
});
const cartValue = new Histogram({
name: "cart_value_dollars",
help: "结账时的购物车价值",
buckets: [10, 25, 50, 100, 250, 500, 1000],
});
const concurrentUsers = new Gauge({
name: "concurrent_authenticated_users",
help: "当前认证用户数",
});
7. 定义性能预算
Web性能预算:
// performance-budget.ts
interface PerformanceBudget {
metric: string;
budget: number;
unit: string;
}
const webBudgets: PerformanceBudget[] = [
// 时序指标
{ metric: "First Contentful Paint", budget: 1800, unit: "ms" },
{ metric: "Largest Contentful Paint", budget: 2500, unit: "ms" },
{ metric: "Time to Interactive", budget: 3800, unit: "ms" },
{ metric: "Total Blocking Time", budget: 300, unit: "ms" },
{ metric: "Cumulative Layout Shift", budget: 0.1, unit: "" },
// 资源预算
{ metric: "JavaScript包大小", budget: 300, unit: "KB" },
{ metric: "CSS包大小", budget: 100, unit: "KB" },
{ metric: "总页面重量", budget: 1500, unit: "KB" },
{ metric: "图像重量", budget: 500, unit: "KB" },
// 请求预算
{ metric: "总请求数", budget: 50, unit: "requests" },
{ metric: "第三方请求数", budget: 10, unit: "requests" },
];
Lighthouse CI配置:
// lighthouserc.js
module.exports = {
ci: {
collect: {
url: ["http://localhost:3000/", "http://localhost:3000/products"],
numberOfRuns: 3,
},
assert: {
assertions: {
"first-contentful-paint": ["error", { maxNumericValue: 1800 }],
"largest-contentful-paint": ["error", { maxNumericValue: 2500 }],
interactive: ["error", { maxNumericValue: 3800 }],
"total-blocking-time": ["error", { maxNumericValue: 300 }],
"cumulative-layout-shift": ["error", { maxNumericValue: 0.1 }],
"resource-summary:script:size": ["error", { maxNumericValue: 300000 }],
"resource-summary:total:size": ["error", { maxNumericValue: 1500000 }],
},
},
upload: {
target: "temporary-public-storage",
},
},
};
API性能预算:
# api-budgets.yml
endpoints:
GET /api/products:
p50_latency_ms: 50
p95_latency_ms: 200
p99_latency_ms: 500
error_rate_percent: 0.1
throughput_rps: 1000
POST /api/orders:
p50_latency_ms: 200
p95_latency_ms: 800
p99_latency_ms: 2000
error_rate_percent: 0.01
throughput_rps: 100
GET /api/search:
p50_latency_ms: 100
p95_latency_ms: 500
p99_latency_ms: 1500
error_rate_percent: 0.5
throughput_rps: 500
8. 识别和解决瓶颈
系统性瓶颈分析:
// bottleneck-analyzer.ts
interface BottleneckReport {
category: "cpu" | "memory" | "io" | "network" | "database";
severity: "low" | "medium" | "high" | "critical";
description: string;
recommendation: string;
metrics: Record<string, number>;
}
async function analyzeBottlenecks(): Promise<BottleneckReport[]> {
const reports: BottleneckReport[] = [];
// CPU分析
const cpuUsage = process.cpuUsage();
if (cpuUsage.user / 1000000 > 80) {
reports.push({
category: "cpu",
severity: "high",
description: "检测到高CPU利用率",
recommendation: "分析CPU使用情况,优化热点路径,考虑缓存",
metrics: { userCpuPercent: cpuUsage.user / 1000000 },
});
}
// 内存分析
const memUsage = process.memoryUsage();
const heapUsedPercent = (memUsage.heapUsed / memUsage.heapTotal) * 100;
if (heapUsedPercent > 85) {
reports.push({
category: "memory",
severity: "high",
description: "检测到高内存压力",
recommendation: "检查内存泄漏,减少对象保留",
metrics: { heapUsedPercent, heapUsedMB: memUsage.heapUsed / 1024 / 1024 },
});
}
// 事件循环延迟
const lagStart = Date.now();
await new Promise((resolve) => setImmediate(resolve));
const eventLoopLag = Date.now() - lagStart;
if (eventLoopLag > 100) {
reports.push({
category: "cpu",
severity: "medium",
description: "检测到事件循环阻塞",
recommendation: "将CPU密集型工作移动到工作线程",
metrics: { eventLoopLagMs: eventLoopLag },
});
}
return reports;
}
常见瓶颈和解决方案:
| 瓶颈 | 症状 | 诊断 | 解决方案 |
|---|---|---|---|
| N+1 查询 | 延迟线性增加 | 查询日志 | 急切加载、批处理 |
| 缺少索引 | 大表上的慢查询 | EXPLAIN ANALYZE | 添加适当索引 |
| 连接池 | 负载下超时 | 池指标 | 增加池大小、添加队列 |
| 同步I/O | 高事件循环延迟 | 分析 | 使用异步操作 |
| 内存泄漏 | 随时间堆增长 | 堆快照 | 修复对象保留 |
| 未优化的JSON | 序列化时高CPU | CPU分析 | 流解析、模式验证 |
| 大有效负载 | 高网络延迟 | 响应大小监控 | 分页、压缩 |
数据库优化清单:
-- 检查缺少索引
SELECT
schemaname, tablename,
seq_scan, seq_tup_read,
idx_scan, idx_tup_fetch
FROM pg_stat_user_tables
WHERE seq_scan > idx_scan
ORDER BY seq_tup_read DESC;
-- 检查慢查询
SELECT query, calls, total_time, mean_time, rows
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 20;
-- 检查锁争用
SELECT blocked_locks.pid AS blocked_pid,
blocking_locks.pid AS blocking_pid,
blocked_activity.query AS blocked_query
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
WHERE NOT blocked_locks.granted;
9. 测试数据生成和现实负载模式
生成现实测试数据:
// test-data-generator.ts
import { faker } from "@faker-js/faker";
interface TestUser {
id: number;
email: string;
name: string;
createdAt: Date;
preferences: Record<string, any>;
}
function generateUsers(count: number): TestUser[] {
return Array.from({ length: count }, (_, i) => ({
id: i + 1,
email: faker.internet.email(),
name: faker.person.fullName(),
createdAt: faker.date.past({ years: 2 }),
preferences: {
theme: faker.helpers.arrayElement(["light", "dark"]),
notifications: faker.datatype.boolean(),
language: faker.helpers.arrayElement(["en", "es", "fr", "de"]),
},
}));
}
// 使用现实分布生成数据
function generateOrdersWithDistribution(userCount: number, orderCount: number) {
const users = generateUsers(userCount);
const orders = [];
// 80/20规则:20%的用户产生80%的订单
const powerUsers = users.slice(0, Math.floor(userCount * 0.2));
const regularUsers = users.slice(Math.floor(userCount * 0.2));
const powerUserOrders = Math.floor(orderCount * 0.8);
const regularUserOrders = orderCount - powerUserOrders;
// 活跃用户
for (let i = 0; i < powerUserOrders; i++) {
const user = faker.helpers.arrayElement(powerUsers);
orders.push(generateOrder(user.id, i + 1));
}
// 普通用户
for (let i = 0; i < regularUserOrders; i++) {
const user = faker.helpers.arrayElement(regularUsers);
orders.push(generateOrder(user.id, powerUserOrders + i + 1));
}
return { users, orders };
}
function generateOrder(userId: number, orderId: number) {
const itemCount = faker.number.int({ min: 1, max: 10 });
return {
id: orderId,
userId,
items: Array.from({ length: itemCount }, () => ({
productId: faker.number.int({ min: 1, max: 1000 }),
quantity: faker.number.int({ min: 1, max: 5 }),
price: parseFloat(faker.commerce.price()),
})),
status: faker.helpers.arrayElement([
"pending",
"processing",
"shipped",
"delivered",
]),
createdAt: faker.date.recent({ days: 90 }),
};
}
现实流量模式:
// k6 现实流量模式
import http from "k6/http";
import { sleep } from "k6";
export const options = {
scenarios: {
// 上午流量峰值(9am)
morning_spike: {
executor: "ramping-arrival-rate",
startRate: 10,
timeUnit: "1s",
preAllocatedVUs: 50,
maxVUs: 200,
stages: [
{ duration: "5m", target: 50 }, // 增加
{ duration: "10m", target: 50 }, // 持续
{ duration: "5m", target: 10 }, // 下降
],
startTime: "0s",
},
// 午餐流量(12pm)
lunch_traffic: {
executor: "constant-arrival-rate",
rate: 30,
timeUnit: "1s",
duration: "30m",
preAllocatedVUs: 100,
maxVUs: 150,
startTime: "20m",
},
// 晚上峰值(6pm)
evening_spike: {
executor: "ramping-arrival-rate",
startRate: 10,
timeUnit: "1s",
preAllocatedVUs: 50,
maxVUs: 300,
stages: [
{ duration: "5m", target: 100 },
{ duration: "15m", target: 100 },
{ duration: "5m", target: 10 },
],
startTime: "50m",
},
// 后台作业(恒定低负载)
background_jobs: {
executor: "constant-vus",
vus: 5,
duration: "2h",
exec: "backgroundJob",
},
},
};
export default function () {
// 模拟不同用户行为
const userType = Math.random();
if (userType < 0.6) {
// 60% - 浏览器用户(快速,多请求)
http.get(`${__ENV.API_URL}/api/products`);
sleep(0.5);
http.get(
`${__ENV.API_URL}/api/products/${Math.floor(Math.random() * 100)}`,
);
sleep(0.5);
} else if (userType < 0.9) {
// 30% - 普通用户(中等速度)
http.get(`${__ENV.API_URL}/api/products`);
sleep(2);
http.get(`${__ENV.API_URL}/api/cart`);
sleep(3);
} else {
// 10% - 活跃用户(复杂操作)
http.post(
`${__ENV.API_URL}/api/orders`,
JSON.stringify({
items: [{ productId: 1, quantity: 1 }],
}),
{
headers: { "Content-Type": "application/json" },
},
);
sleep(5);
}
}
export function backgroundJob() {
// 模拟cron作业、工作线程
http.post(
`${__ENV.API_URL}/internal/process-batch`,
JSON.stringify({
batchId: Math.floor(Math.random() * 1000),
}),
{
headers: { "Content-Type": "application/json" },
},
);
sleep(60); // 每分钟
}
思考时间和用户行为:
import { sleep } from "k6";
import { randomIntBetween } from "https://jslib.k6.io/k6-utils/1.2.0/index.js";
// 人性化思考时间
function thinkTime() {
// 围绕2秒的正态分布
const mean = 2;
const stdDev = 0.5;
const time =
mean + stdDev * (Math.random() + Math.random() + Math.random() - 1.5);
sleep(Math.max(0.5, time));
}
// 模拟用户会话
export default function () {
// 登录
http.post(`${__ENV.API_URL}/auth/login` /* ... */);
thinkTime();
// 浏览产品(3-7页)
const pageViews = randomIntBetween(3, 7);
for (let i = 0; i < pageViews; i++) {
http.get(`${__ENV.API_URL}/api/products?page=${i}`);
thinkTime();
}
// 30%添加到购物车
if (Math.random() < 0.3) {
http.post(`${__ENV.API_URL}/api/cart` /* ... */);
thinkTime();
// 50%的添加到购物车用户完成结账
if (Math.random() < 0.5) {
http.post(`${__ENV.API_URL}/api/orders` /* ... */);
sleep(3); // 结账时间较长
}
}
// 注销(20%的用户明确注销)
if (Math.random() < 0.2) {
http.post(`${__ENV.API_URL}/auth/logout`);
}
}
边缘情况和错误场景:
import http from "k6/http";
import { check } from "k6";
export default function () {
const scenarios = [
// 快乐路径(70%)
() => {
const res = http.get(`${__ENV.API_URL}/api/products`);
check(res, { "状态是200": (r) => r.status === 200 });
},
// 大有效负载(10%)
() => {
const res = http.get(`${__ENV.API_URL}/api/products?limit=1000`);
check(res, { "处理大响应": (r) => r.status === 200 });
},
// 无效输入(10%)
() => {
const res = http.get(`${__ENV.API_URL}/api/products/-1`);
check(res, { "处理无效ID": (r) => r.status === 400 });
},
// 未找到(5%)
() => {
const res = http.get(`${__ENV.API_URL}/api/products/999999`);
check(res, { "处理未找到": (r) => r.status === 404 });
},
// 超时场景(3%)
() => {
const res = http.get(`${__ENV.API_URL}/api/slow-endpoint`, {
timeout: "5s",
});
check(res, { "处理超时": (r) => r.status === 200 || r.error });
},
// 未授权(2%)
() => {
const res = http.get(`${__ENV.API_URL}/api/admin/users`);
check(res, {
"强制执行认证": (r) => r.status === 401 || r.status === 403,
});
},
];
// 加权随机场景选择
const weights = [0.7, 0.8, 0.9, 0.95, 0.98, 1.0];
const random = Math.random();
for (let i = 0; i < weights.length; i++) {
if (random < weights[i]) {
scenarios[i]();
break;
}
}
}
最佳实践
测试环境和设置
-
在类似生产环境中测试
- 匹配硬件规格(CPU、RAM、磁盘I/O)
- 使用现实数据量(生产规模数据库)
- 模拟实际流量模式和用户分布
- 如果测试分布式系统,包括网络延迟
- 使用类似生产配置测试(缓存、CDN、负载均衡器)
-
首先建立基线
- 在任何优化前测量当前性能
- 记录所有关键端点的基线指标
- 随时间跟踪变化以检测退化
- 创建性能基线报告用于利益相关者沟通
-
使用现实测试数据
- 数量应匹配生产规模(不仅仅是小样本)
- 包括边缘情况(大记录、Unicode、特殊字符、畸形数据)
- 使用冷和暖缓存测试以测量两种场景
- 应用现实数据分布(80/20规则、帕累托原理)
- 包括时区、区域设置和国际化变化
测试执行策略
-
早期和经常测试
- 在CI/CD流水线中包括性能测试
- 每次提交运行冒烟测试(快速基线检查)
- 每晚或在拉取请求上运行完整负载测试
- 在生产部署前捕获退化,而不是在生产中
- 监控趋势,不仅仅是通过/失败阈值
-
实施渐进式负载测试
- 从冒烟测试开始(最小负载,验证功能)
- 进展到负载测试(预期正常负载)
- 执行压力测试(找到崩溃点)
- 运行峰值测试(验证自动扩展和恢复)
- 最后进行浸泡测试(检测内存泄漏和随时间退化)
-
测试关键用户旅程
- 识别前3-5个最重要的用户流
- 优先测试创收路径
- 包括认证、支付和结账流
- 单独测试管理员和特权操作
- 验证部分故障下的优雅降级
分析和报告
-
分析百分位数,而不是平均值
- 始终报告p50、p95、p99延迟(不仅仅是平均)
- 使用p95/p99用于SLOs和警报阈值
- 理解异常值对用户体验很重要
- 跟踪最大延迟以识别最坏情况场景
-
在生产中监控
- 性能测试补充,但不取代生产监控
- 使用APM工具(Datadog、New Relic等)获取真实用户指标
- 对关键路径实施综合监控
- 在用户抱怨前警报性能退化
- 将负载测试结果与生产行为关联
-
记录和分享结果
- 在版本控制中保留性能测试报告
- 创建趋势分析的可视化仪表板
- 在定期评审中与团队分享发现
- 跟踪优化改进与前后指标
- 记录基础设施变化及其性能影响
优化和迭代
-
遵循科学方法
- 在优化前形成假设(“我认为X慢是因为Y”)
- 一次更改一个变量
- 使用负载测试测量前后影响
- 记录失败尝试(什么没工作及为什么)
- 验证优化不会破坏功能
-
设置和执行性能预算
- 为每个端点定义可接受的延迟目标
- 设置吞吐量要求(RPS、并发用户)
- 建立错误率阈值
- 阻止违反预算的部署
- 根据业务需求季度评审和调整预算
-
单独测试数据库性能
- 将数据库瓶颈与应用问题隔离
- 在负载下基准测试查询(不仅仅在开发中)
- 测试连接池耗尽场景
- 使用生产数据量验证索引有效性
- 在负载测试期间监控慢查询日志
示例
示例:完整的k6负载测试套件
// k6/scenarios/api-load-test.js
import http from "k6/http";
import { check, group, sleep } from "k6";
import { Rate, Trend, Counter } from "k6/metrics";
// 自定义指标
const errorRate = new Rate("errors");
const orderLatency = new Trend("order_latency");
const ordersCreated = new Counter("orders_created");
// 测试配置
export const options = {
scenarios: {
// 恒定负载用于基线
baseline: {
executor: "constant-vus",
vus: 10,
duration: "5m",
tags: { scenario: "baseline" },
},
// 增加负载用于压力测试
stress: {
executor: "ramping-vus",
startVUs: 0,
stages: [
{ duration: "2m", target: 50 },
{ duration: "5m", target: 50 },
{ duration: "2m", target: 100 },
{ duration: "5m", target: 100 },
{ duration: "2m", target: 0 },
],
startTime: "5m",
tags: { scenario: "stress" },
},
// 峰值测试
spike: {
executor: "ramping-vus",
startVUs: 0,
stages: [
{ duration: "10s", target: 200 },
{ duration: "1m", target: 200 },
{ duration: "10s", target: 0 },
],
startTime: "20m",
tags: { scenario: "spike" },
},
},
thresholds: {
http_req_duration: ["p(95)<500", "p(99)<1500"],
errors: ["rate<0.01"],
order_latency: ["p(95)<2000"],
},
};
const BASE_URL = __ENV.BASE_URL || "http://localhost:3000";
export function setup() {
// 登录并获取认证令牌
const loginRes = http.post(
`${BASE_URL}/api/auth/login`,
JSON.stringify({
email: "loadtest@example.com",
password: "loadtest123",
}),
{
headers: { "Content-Type": "application/json" },
},
);
return { token: loginRes.json("token") };
}
export default function (data) {
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${data.token}`,
};
group("浏览产品", () => {
const productsRes = http.get(`${BASE_URL}/api/products`, { headers });
check(productsRes, {
"产品状态200": (r) => r.status === 200,
"产品返回": (r) => r.json("data").length > 0,
});
errorRate.add(productsRes.status !== 200);
sleep(1);
});
group("查看产品详情", () => {
const productRes = http.get(`${BASE_URL}/api/products/1`, { headers });
check(productRes, {
"产品状态200": (r) => r.status === 200,
});
errorRate.add(productRes.status !== 200);
sleep(0.5);
});
group("创建订单", () => {
const start = Date.now();
const orderRes = http.post(
`${BASE_URL}/api/orders`,
JSON.stringify({
items: [{ productId: 1, quantity: 1 }],
}),
{ headers },
);
orderLatency.add(Date.now() - start);
const success = check(orderRes, {
"订单状态201": (r) => r.status === 201,
"订单有id": (r) => r.json("id") !== undefined,
});
if (success) ordersCreated.add(1);
errorRate.add(!success);
sleep(2);
});
}
export function teardown(data) {
// 如果需要,清理测试数据
console.log("测试完成");
}
示例:性能监控仪表板配置
# grafana/dashboards/performance.json (简化)
panels:
- title: "请求延迟 (p95)"
type: graph
targets:
- expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))
- title: "吞吐量 (RPS)"
type: graph
targets:
- expr: rate(http_requests_total[1m])
- title: "错误率"
type: graph
targets:
- expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m])
- title: "活动连接"
type: gauge
targets:
- expr: active_connections
alerts:
- name: 高延迟
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 0.5
for: 5m
labels:
severity: 警告
- name: 高错误率
expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.01
for: 2m
labels:
severity: 严重