名称: 性能测试描述: 性能测试和负载测试专业知识，包括k6、locust、JMeter、Gatling、artillery、API负载测试、数据库查询优化、基准测试策略、分析技术、指标分析（p95/p99延迟、吞吐量、RPS）、性能预算和瓶颈识别。用于实现负载测试、压力测试、峰值测试、浸泡测试，分析系统在并发用户下的行为，测量饱和点，或优化负载下的应用程序性能。触发关键词：性能测试、负载测试、压力测试、压力测试、负载测试、性能测试、k6、locust、JMeter、Gatling、artillery、基准、基准测试、分析、延迟、吞吐量、RPS、每秒请求数、并发用户、虚拟用户、百分位数、p95、p99、p50、中位数延迟、饱和、瓶颈、性能预算、API负载测试、数据库性能、查询优化、慢查询、可扩展性测试、容量规划、响应时间、错误率、apdex。

性能测试

概述

性能测试验证应用程序在各种负载条件下满足速度、可扩展性和稳定性要求。此技能提供负载测试工具（k6、locust、JMeter、Gatling）、API和数据库性能测试、基准测试策略、分析技术以及识别和解决性能瓶颈的系统方法的全面专业知识。

何时使用此技能

使用此技能当您需要：

实现负载测试、压力测试、峰值测试或浸泡测试
测量API端点在并发用户下的性能
优化数据库查询并识别慢查询
分析延迟百分位数（p50、p95、p99）和吞吐量（RPS）
设置性能监控和警报
识别系统饱和点和瓶颈
建立性能预算和SLOs
进行容量规划和可扩展性分析

测试类型和模式

负载测试类型

测试类型	目的	模式	何时使用
负载测试	验证预期负载下的性能	随时间恒定VUs	建立基线性能
压力测试	找到崩溃点	逐步增加直到失败	确定系统限制
峰值测试	测试突然流量激增	快速增加到高负载	验证自动扩展、缓存
浸泡测试	检测内存泄漏、退化	中等负载延长时间（小时）	生产就绪准备
断点测试	找到最大容量	增量负载增加	容量规划

k6 测试模式

模式：基线负载测试

export const options = {
  vus: 50,
  duration: "5m",
  thresholds: {
    http_req_duration: ["p(95)<500"],
  },
};

模式：压力测试（找到崩溃点）

export const options = {
  stages: [
    { duration: "2m", target: 100 },
    { duration: "5m", target: 100 },
    { duration: "2m", target: 200 },
    { duration: "5m", target: 200 },
    { duration: "2m", target: 300 },
    { duration: "5m", target: 300 },
    { duration: "5m", target: 0 },
  ],
};

模式：峰值测试

export const options = {
  stages: [
    { duration: "30s", target: 50 }, // 正常负载
    { duration: "10s", target: 500 }, // 峰值！
    { duration: "1m", target: 500 }, // 保持峰值
    { duration: "10s", target: 50 }, // 下降
    { duration: "1m", target: 50 }, // 恢复
  ],
};

模式：浸泡测试（内存泄漏）

export const options = {
  vus: 100,
  duration: "4h", // 延长时间
  thresholds: {
    http_req_duration: ["p(95)<500"],
    http_req_failed: ["rate<0.01"],
  },
};

指令

1. 使用现代工具进行负载测试

k6（推荐用于API负载测试）：

# 安装
brew install k6
# 或
npm install -g k6

// load-test.js
import http from "k6/http";
import { check, sleep } from "k6";
import { Rate, Trend } from "k6/metrics";

// 自定义指标
const errorRate = new Rate("errors");
const latencyTrend = new Trend("latency");

export const options = {
  stages: [
    { duration: "2m", target: 100 }, // 增加
    { duration: "5m", target: 100 }, // 稳定状态
    { duration: "2m", target: 200 }, // 峰值
    { duration: "5m", target: 200 }, // 持续峰值
    { duration: "2m", target: 0 }, // 下降
  ],
  thresholds: {
    http_req_duration: ["p(95)<500", "p(99)<1000"],
    errors: ["rate<0.01"],
    http_req_failed: ["rate<0.01"],
  },
};

export default function () {
  const payload = JSON.stringify({
    username: `user_${__VU}_${__ITER}`,
    action: "test",
  });

  const params = {
    headers: {
      "Content-Type": "application/json",
      Authorization: `Bearer ${__ENV.API_TOKEN}`,
    },
  };

  const response = http.post(
    "https://api.example.com/endpoint",
    payload,
    params,
  );

  latencyTrend.add(response.timings.duration);
  errorRate.add(response.status !== 200);

  check(response, {
    "状态是200": (r) => r.status === 200,
    "响应时间 < 500ms": (r) => r.timings.duration < 500,
    "有必需字段": (r) => {
      const body = JSON.parse(r.body);
      return body.id && body.status;
    },
  });

  sleep(1);
}

export function handleSummary(data) {
  return {
    "summary.json": JSON.stringify(data),
    stdout: textSummary(data, { indent: " ", enableColors: true }),
  };
}

运行 k6 测试：

# 基本运行
k6 run load-test.js

# 带环境变量
k6 run -e API_TOKEN=xxx load-test.js

# 云执行
k6 cloud load-test.js

# 输出到InfluxDB用于Grafana
k6 run --out influxdb=http://localhost:8086/k6 load-test.js

Locust（基于Python的负载测试）：

# locustfile.py
from locust import HttpUser, task, between
from locust import events
import time

class WebsiteUser(HttpUser):
    wait_time = between(1, 3)

    def on_start(self):
        """开始时登录"""
        response = self.client.post("/login", json={
            "username": "testuser",
            "password": "testpass"
        })
        self.token = response.json().get("token")

    @task(3)
    def view_products(self):
        """最常见的操作"""
        self.client.get("/api/products", headers={
            "Authorization": f"Bearer {self.token}"
        })

    @task(2)
    def view_product_detail(self):
        """查看单个产品"""
        self.client.get("/api/products/1", headers={
            "Authorization": f"Bearer {self.token}"
        })

    @task(1)
    def add_to_cart(self):
        """较不常见的操作"""
        self.client.post("/api/cart", json={
            "product_id": 1,
            "quantity": 1
        }, headers={
            "Authorization": f"Bearer {self.token}"
        })

class AdminUser(HttpUser):
    wait_time = between(2, 5)
    weight = 1  # 1个管理员对应10个普通用户

    @task
    def view_dashboard(self):
        self.client.get("/admin/dashboard")

# 自定义指标
@events.request.add_listener
def on_request(request_type, name, response_time, response_length, exception, **kwargs):
    if exception:
        print(f"请求失败: {name} - {exception}")

运行 Locust：

# Web UI模式
locust -f locustfile.py --host=https://api.example.com

# 无头模式
locust -f locustfile.py --headless -u 100 -r 10 --run-time 5m --host=https://api.example.com

# 分布式模式
locust -f locustfile.py --master
locust -f locustfile.py --worker --master-host=192.168.1.1

Artillery（基于YAML的负载测试）：

# artillery-config.yml
config:
  target: "https://api.example.com"
  phases:
    - duration: 60
      arrivalRate: 5
      name: "热身"
    - duration: 120
      arrivalRate: 20
      rampTo: 50
      name: "增加"
    - duration: 300
      arrivalRate: 50
      name: "持续负载"
  defaults:
    headers:
      Content-Type: "application/json"
  plugins:
    expect: {}
  ensure:
    p95: 500
    maxErrorRate: 1

scenarios:
  - name: "用户旅程"
    flow:
      - post:
          url: "/auth/login"
          json:
            username: "{{ $randomString() }}"
            password: "password123"
          capture:
            - json: "$.token"
              as: "authToken"
          expect:
            - statusCode: 200
            - hasProperty: "token"

      - get:
          url: "/api/products"
          headers:
            Authorization: "Bearer {{ authToken }}"
          expect:
            - statusCode: 200
            - contentType: "application/json"

      - think: 2

      - post:
          url: "/api/cart"
          headers:
            Authorization: "Bearer {{ authToken }}"
          json:
            productId: "{{ $randomNumber(1, 100) }}"
            quantity: 1
          expect:
            - statusCode: 201

运行 Artillery：

# 运行测试
artillery run artillery-config.yml

# 生成报告
artillery run artillery-config.yml --output report.json
artillery report report.json --output report.html

2. API负载测试模式

REST API负载测试：

// k6 API负载测试带认证和数据变化
import http from "k6/http";
import { check, sleep } from "k6";
import { SharedArray } from "k6/data";
import { randomIntBetween } from "k6/x/util";

// 从CSV加载测试数据
const testData = new SharedArray("users", function () {
  return JSON.parse(open("./test-data.json"));
});

export const options = {
  scenarios: {
    // 读取重工作负载（70%读取）
    reads: {
      executor: "constant-arrival-rate",
      rate: 700,
      timeUnit: "1s",
      duration: "5m",
      preAllocatedVUs: 50,
      maxVUs: 200,
      exec: "readScenario",
    },
    // 写入工作负载（30%写入）
    writes: {
      executor: "constant-arrival-rate",
      rate: 300,
      timeUnit: "1s",
      duration: "5m",
      preAllocatedVUs: 30,
      maxVUs: 100,
      exec: "writeScenario",
    },
  },
  thresholds: {
    "http_req_duration{scenario:reads}": ["p(95)<200", "p(99)<500"],
    "http_req_duration{scenario:writes}": ["p(95)<500", "p(99)<1000"],
    http_req_failed: ["rate<0.01"],
  },
};

let authToken;

export function setup() {
  const loginRes = http.post(
    `${__ENV.API_URL}/auth/login`,
    JSON.stringify({
      email: "loadtest@example.com",
      password: "test123",
    }),
    {
      headers: { "Content-Type": "application/json" },
    },
  );

  return { token: loginRes.json("token") };
}

export function readScenario(data) {
  const headers = {
    Authorization: `Bearer ${data.token}`,
    "Content-Type": "application/json",
  };

  // GET请求带查询参数
  const userId = randomIntBetween(1, 10000);
  const res = http.get(`${__ENV.API_URL}/api/users/${userId}`, {
    headers,
    tags: { name: "GetUser" },
  });

  check(res, {
    "状态是200": (r) => r.status === 200,
    "有用户数据": (r) => r.json("id") === userId,
    "响应时间OK": (r) => r.timings.duration < 200,
  });

  sleep(0.5);
}

export function writeScenario(data) {
  const headers = {
    Authorization: `Bearer ${data.token}`,
    "Content-Type": "application/json",
  };

  // POST请求带动态有效负载
  const user = testData[Math.floor(Math.random() * testData.length)];
  const res = http.post(
    `${__ENV.API_URL}/api/orders`,
    JSON.stringify({
      userId: user.id,
      items: [{ productId: randomIntBetween(1, 100), quantity: 1 }],
      timestamp: new Date().toISOString(),
    }),
    { headers, tags: { name: "CreateOrder" } },
  );

  check(res, {
    "状态是201": (r) => r.status === 201,
    "订单已创建": (r) => r.json("id") !== undefined,
  });

  sleep(1);
}

GraphQL API负载测试：

import http from "k6/http";
import { check } from "k6";

export default function () {
  const query = `
    query GetUserWithOrders($userId: ID!) {
      user(id: $userId) {
        id
        name
        orders(limit: 10) {
          id
          total
          items {
            productId
            quantity
          }
        }
      }
    }
  `;

  const variables = {
    userId: `${__VU}`,
  };

  const res = http.post(
    "https://api.example.com/graphql",
    JSON.stringify({
      query,
      variables,
    }),
    {
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${__ENV.TOKEN}`,
      },
    },
  );

  check(res, {
    "无GraphQL错误": (r) => !r.json("errors"),
    "用户数据存在": (r) => r.json("data.user.id") === variables.userId,
  });
}

WebSocket负载测试：

import ws from "k6/ws";
import { check } from "k6";

export default function () {
  const url = "wss://api.example.com/ws";
  const params = { tags: { my_tag: "websocket" } };

  const res = ws.connect(url, params, function (socket) {
    socket.on("open", () => {
      console.log("已连接");
      socket.send(JSON.stringify({ type: "subscribe", channel: "updates" }));
    });

    socket.on("message", (data) => {
      const msg = JSON.parse(data);
      check(msg, {
        "有效消息": (m) => m.type !== undefined,
      });
    });

    socket.on("error", (e) => {
      console.log("错误:", e.error());
    });

    socket.setTimeout(() => {
      socket.close();
    }, 60000);
  });

  check(res, { "状态是101": (r) => r && r.status === 101 });
}

3. 数据库性能测试

查询性能测试：

// database-perf-test.ts
import { performance } from "perf_hooks";
import { Pool } from "pg";

interface QueryBenchmark {
  query: string;
  params?: any[];
  iterations: number;
  results: {
    min: number;
    max: number;
    avg: number;
    p50: number;
    p95: number;
    p99: number;
  };
}

async function benchmarkQuery(
  pool: Pool,
  query: string,
  params: any[] = [],
  iterations: number = 100,
): Promise<QueryBenchmark> {
  const timings: number[] = [];

  // 热身
  for (let i = 0; i < 10; i++) {
    await pool.query(query, params);
  }

  // 基准测试
  for (let i = 0; i < iterations; i++) {
    const start = performance.now();
    await pool.query(query, params);
    timings.push(performance.now() - start);
  }

  timings.sort((a, b) => a - b);

  return {
    query: query.substring(0, 100),
    params,
    iterations,
    results: {
      min: timings[0],
      max: timings[timings.length - 1],
      avg: timings.reduce((a, b) => a + b, 0) / timings.length,
      p50: timings[Math.floor(timings.length * 0.5)],
      p95: timings[Math.floor(timings.length * 0.95)],
      p99: timings[Math.floor(timings.length * 0.99)],
    },
  };
}

// 比较查询性能
async function compareQueries() {
  const pool = new Pool({
    /* 配置 */
  });

  const queries = [
    {
      name: "无索引",
      sql: "SELECT * FROM users WHERE email = $1",
      params: ["test@example.com"],
    },
    {
      name: "有索引",
      sql: "SELECT * FROM users WHERE id = $1",
      params: [1],
    },
    {
      name: "复杂连接",
      sql: `
        SELECT u.*, COUNT(o.id) as order_count
        FROM users u
        LEFT JOIN orders o ON u.id = o.user_id
        WHERE u.created_at > $1
        GROUP BY u.id
        LIMIT 100
      `,
      params: ["2024-01-01"],
    },
  ];

  for (const { name, sql, params } of queries) {
    const result = await benchmarkQuery(pool, sql, params);
    console.log(`
${name}:`);
    console.table(result.results);
  }

  await pool.end();
}

连接池负载测试：

// connection-pool-test.ts
import { Pool } from "pg";
import { performance } from "perf_hooks";

async function testConnectionPool(
  poolSize: number,
  concurrentQueries: number,
  duration: number,
) {
  const pool = new Pool({
    max: poolSize,
    idleTimeoutMillis: 30000,
    connectionTimeoutMillis: 2000,
  });

  const stats = {
    totalQueries: 0,
    successfulQueries: 0,
    failedQueries: 0,
    timeouts: 0,
    queryTimes: [] as number[],
  };

  const startTime = Date.now();
  const workers: Promise<void>[] = [];

  for (let i = 0; i < concurrentQueries; i++) {
    workers.push(
      (async () => {
        while (Date.now() - startTime < duration) {
          try {
            const start = performance.now();
            await pool.query("SELECT 1");
            const elapsed = performance.now() - start;

            stats.queryTimes.push(elapsed);
            stats.successfulQueries++;
          } catch (err) {
            stats.failedQueries++;
            if (err.message.includes("timeout")) {
              stats.timeouts++;
            }
          }
          stats.totalQueries++;
        }
      })(),
    );
  }

  await Promise.all(workers);
  await pool.end();

  stats.queryTimes.sort((a, b) => a - b);

  return {
    poolSize,
    concurrentQueries,
    duration,
    ...stats,
    avgQueryTime:
      stats.queryTimes.reduce((a, b) => a + b, 0) / stats.queryTimes.length,
    p95QueryTime: stats.queryTimes[Math.floor(stats.queryTimes.length * 0.95)],
    qps: stats.successfulQueries / (duration / 1000),
  };
}

// 测试不同池大小
async function findOptimalPoolSize() {
  const results = [];

  for (const poolSize of [5, 10, 20, 50, 100]) {
    console.log(`测试池大小: ${poolSize}`);
    const result = await testConnectionPool(poolSize, 100, 30000);
    results.push(result);
  }

  console.table(results);
}

N+1查询检测：

// n-plus-one-detector.ts
class QueryTracker {
  private queries: Map<string, number> = new Map();
  private startTime: number = 0;

  start() {
    this.queries.clear();
    this.startTime = Date.now();
  }

  track(sql: string) {
    const normalized = this.normalizeSql(sql);
    this.queries.set(normalized, (this.queries.get(normalized) || 0) + 1);
  }

  detectNPlusOne(
    threshold: number = 10,
  ): Array<{ query: string; count: number }> {
    const suspicious: Array<{ query: string; count: number }> = [];

    for (const [query, count] of this.queries.entries()) {
      if (count > threshold) {
        suspicious.push({ query, count });
      }
    }

    return suspicious.sort((a, b) => b.count - a.count);
  }

  private normalizeSql(sql: string): string {
    // 将字面量替换为占位符进行比较
    return sql
      .replace(/\d+/g, "?")
      .replace(/'[^']*'/g, "?")
      .replace(/\s+/g, " ")
      .trim();
  }

  report() {
    const duration = Date.now() - this.startTime;
    const nPlusOne = this.detectNPlusOne();

    console.log(`
查询分析 (${duration}ms):`);
    console.log(`总唯一查询数: ${this.queries.size}`);
    console.log(
      `总查询执行次数: ${Array.from(this.queries.values()).reduce((a, b) => a + b, 0)}`,
    );

    if (nPlusOne.length > 0) {
      console.log("
潜在N+1查询:");
      console.table(nPlusOne);
    }
  }
}

4. 基准测试策略

微基准测试（函数级别）：

// Node.js 使用 benchmark.js
import Benchmark from "benchmark";

const suite = new Benchmark.Suite();

const data = Array.from({ length: 10000 }, (_, i) => i);

suite
  .add("for循环", function () {
    let sum = 0;
    for (let i = 0; i < data.length; i++) {
      sum += data[i];
    }
    return sum;
  })
  .add("forEach", function () {
    let sum = 0;
    data.forEach((n) => {
      sum += n;
    });
    return sum;
  })
  .add("reduce", function () {
    return data.reduce((sum, n) => sum + n, 0);
  })
  .on("cycle", function (event: Benchmark.Event) {
    console.log(String(event.target));
  })
  .on("complete", function (this: Benchmark.Suite) {
    console.log("最快的是 " + this.filter("fastest").map("name"));
  })
  .run({ async: true });

数据库查询基准测试：

// benchmark-queries.ts
import { performance } from "perf_hooks";

interface BenchmarkResult {
  query: string;
  avgTime: number;
  minTime: number;
  maxTime: number;
  p95: number;
  iterations: number;
}

async function benchmarkQuery(
  name: string,
  queryFn: () => Promise<any>,
  iterations: number = 100,
): Promise<BenchmarkResult> {
  const times: number[] = [];

  // 热身
  for (let i = 0; i < 10; i++) {
    await queryFn();
  }

  // 实际基准测试
  for (let i = 0; i < iterations; i++) {
    const start = performance.now();
    await queryFn();
    times.push(performance.now() - start);
  }

  times.sort((a, b) => a - b);

  return {
    query: name,
    avgTime: times.reduce((a, b) => a + b) / times.length,
    minTime: times[0],
    maxTime: times[times.length - 1],
    p95: times[Math.floor(times.length * 0.95)],
    iterations,
  };
}

// 使用
const results = await Promise.all([
  benchmarkQuery("findUserById", () => db.users.findById(1)),
  benchmarkQuery("findUserWithJoin", () =>
    db.users.findById(1).include("orders"),
  ),
  benchmarkQuery("complexAggregation", () =>
    db.orders.aggregate([
      /* 管道 */
    ]),
  ),
]);

console.table(results);

HTTP端点基准测试：

# 使用 wrk
wrk -t12 -c400 -d30s --latency https://api.example.com/endpoint

# 使用 autocannon (Node.js)
npx autocannon -c 100 -d 30 -p 10 https://api.example.com/endpoint

# 使用 hey
hey -n 10000 -c 100 https://api.example.com/endpoint

5. 分析技术

Node.js CPU分析：

// 启用内置分析器
// node --prof app.js
// node --prof-process isolate-*.log > processed.txt

// 程序化分析
import { Session } from "inspector";
import { writeFileSync } from "fs";

async function profileFunction(fn: () => Promise<any>) {
  const session = new Session();
  session.connect();

  session.post("Profiler.enable");
  session.post("Profiler.start");

  await fn();

  return new Promise<void>((resolve) => {
    session.post("Profiler.stop", (err, { profile }) => {
      writeFileSync("profile.cpuprofile", JSON.stringify(profile));
      session.disconnect();
      resolve();
    });
  });
}

// 使用
await profileFunction(async () => {
  // 要分析的代码
  await heavyComputation();
});
// 在Chrome DevTools中打开profile.cpuprofile

内存分析：

// 内存快照
import v8 from "v8";
import { writeFileSync } from "fs";

function takeHeapSnapshot(filename: string) {
  const snapshotStream = v8.writeHeapSnapshot(filename);
  console.log(`堆快照写入到 ${snapshotStream}`);
}

// 跟踪内存使用
function logMemoryUsage(label: string) {
  const usage = process.memoryUsage();
  console.log(`内存 [${label}]:`, {
    heapUsed: `${Math.round(usage.heapUsed / 1024 / 1024)}MB`,
    heapTotal: `${Math.round(usage.heapTotal / 1024 / 1024)}MB`,
    external: `${Math.round(usage.external / 1024 / 1024)}MB`,
    rss: `${Math.round(usage.rss / 1024 / 1024)}MB`,
  });
}

// 检测内存泄漏
class MemoryLeakDetector {
  private samples: number[] = [];
  private interval: NodeJS.Timer | null = null;

  start(sampleInterval: number = 1000) {
    this.interval = setInterval(() => {
      this.samples.push(process.memoryUsage().heapUsed);

      if (this.samples.length > 60) {
        const trend = this.calculateTrend();
        if (trend > 0.1) {
          // 每分钟增长10%
          console.warn("检测到潜在内存泄漏！");
        }
        this.samples.shift();
      }
    }, sampleInterval);
  }

  private calculateTrend(): number {
    if (this.samples.length < 2) return 0;
    const first = this.samples[0];
    const last = this.samples[this.samples.length - 1];
    return (last - first) / first;
  }

  stop() {
    if (this.interval) clearInterval(this.interval);
  }
}

数据库查询分析：

-- PostgreSQL：启用查询日志
SET log_statement = 'all';
SET log_duration = on;

-- 分析查询计划
EXPLAIN ANALYZE SELECT * FROM users
WHERE created_at > '2024-01-01'
ORDER BY created_at DESC
LIMIT 100;

-- 查找慢查询
SELECT
  query,
  calls,
  total_time / 1000 as total_seconds,
  mean_time / 1000 as mean_seconds,
  rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 20;

// 应用级查询分析
import { performance } from "perf_hooks";

const queryLogger = {
  queries: [] as Array<{ sql: string; duration: number; timestamp: Date }>,

  log(sql: string, duration: number) {
    this.queries.push({ sql, duration, timestamp: new Date() });
    if (duration > 100) {
      console.warn(`慢查询 (${duration}ms): ${sql.substring(0, 100)}...`);
    }
  },

  getSlowQueries(threshold: number = 100) {
    return this.queries.filter((q) => q.duration > threshold);
  },

  getStats() {
    const durations = this.queries.map((q) => q.duration);
    return {
      count: durations.length,
      avg: durations.reduce((a, b) => a + b, 0) / durations.length,
      max: Math.max(...durations),
      p95: durations.sort((a, b) => a - b)[Math.floor(durations.length * 0.95)],
    };
  },
};

6. 跟踪关键指标

基本性能指标：

指标	描述	目标	警报阈值
延迟 (p50)	中位数响应时间	<100ms	>200ms
延迟 (p95)	95百分位数	<500ms	>1000ms
延迟 (p99)	99百分位数	<1000ms	>2000ms
吞吐量	每秒请求数	>1000 RPS	<500 RPS
错误率	失败请求百分比	<0.1%	>1%
饱和	资源利用率	<70%	>85%
Apdex	用户满意度得分	>0.9	<0.7

实现指标收集：

// metrics.ts
import { Counter, Histogram, Gauge, Registry } from "prom-client";

const register = new Registry();

// 请求指标
const httpRequestDuration = new Histogram({
  name: "http_request_duration_seconds",
  help: "HTTP请求持续时间（秒）",
  labelNames: ["method", "route", "status_code"],
  buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
  registers: [register],
});

const httpRequestTotal = new Counter({
  name: "http_requests_total",
  help: "HTTP请求总数",
  labelNames: ["method", "route", "status_code"],
  registers: [register],
});

const activeConnections = new Gauge({
  name: "active_connections",
  help: "活动连接数",
  registers: [register],
});

// Express的中间件
export function metricsMiddleware(
  req: Request,
  res: Response,
  next: NextFunction,
) {
  const start = process.hrtime.bigint();

  activeConnections.inc();

  res.on("finish", () => {
    const duration = Number(process.hrtime.bigint() - start) / 1e9;
    const route = req.route?.path || req.path;

    httpRequestDuration.observe(
      { method: req.method, route, status_code: res.statusCode },
      duration,
    );

    httpRequestTotal.inc({
      method: req.method,
      route,
      status_code: res.statusCode,
    });

    activeConnections.dec();
  });

  next();
}

// 指标端点
app.get("/metrics", async (req, res) => {
  res.set("Content-Type", register.contentType);
  res.end(await register.metrics());
});

自定义业务指标：

// business-metrics.ts
const orderProcessingTime = new Histogram({
  name: "order_processing_duration_seconds",
  help: "处理订单的时间",
  labelNames: ["payment_method", "status"],
  buckets: [0.5, 1, 2, 5, 10, 30, 60],
});

const cartValue = new Histogram({
  name: "cart_value_dollars",
  help: "结账时的购物车价值",
  buckets: [10, 25, 50, 100, 250, 500, 1000],
});

const concurrentUsers = new Gauge({
  name: "concurrent_authenticated_users",
  help: "当前认证用户数",
});

7. 定义性能预算

Web性能预算：

// performance-budget.ts
interface PerformanceBudget {
  metric: string;
  budget: number;
  unit: string;
}

const webBudgets: PerformanceBudget[] = [
  // 时序指标
  { metric: "First Contentful Paint", budget: 1800, unit: "ms" },
  { metric: "Largest Contentful Paint", budget: 2500, unit: "ms" },
  { metric: "Time to Interactive", budget: 3800, unit: "ms" },
  { metric: "Total Blocking Time", budget: 300, unit: "ms" },
  { metric: "Cumulative Layout Shift", budget: 0.1, unit: "" },

  // 资源预算
  { metric: "JavaScript包大小", budget: 300, unit: "KB" },
  { metric: "CSS包大小", budget: 100, unit: "KB" },
  { metric: "总页面重量", budget: 1500, unit: "KB" },
  { metric: "图像重量", budget: 500, unit: "KB" },

  // 请求预算
  { metric: "总请求数", budget: 50, unit: "requests" },
  { metric: "第三方请求数", budget: 10, unit: "requests" },
];

Lighthouse CI配置：

// lighthouserc.js
module.exports = {
  ci: {
    collect: {
      url: ["http://localhost:3000/", "http://localhost:3000/products"],
      numberOfRuns: 3,
    },
    assert: {
      assertions: {
        "first-contentful-paint": ["error", { maxNumericValue: 1800 }],
        "largest-contentful-paint": ["error", { maxNumericValue: 2500 }],
        interactive: ["error", { maxNumericValue: 3800 }],
        "total-blocking-time": ["error", { maxNumericValue: 300 }],
        "cumulative-layout-shift": ["error", { maxNumericValue: 0.1 }],
        "resource-summary:script:size": ["error", { maxNumericValue: 300000 }],
        "resource-summary:total:size": ["error", { maxNumericValue: 1500000 }],
      },
    },
    upload: {
      target: "temporary-public-storage",
    },
  },
};

API性能预算：

# api-budgets.yml
endpoints:
  GET /api/products:
    p50_latency_ms: 50
    p95_latency_ms: 200
    p99_latency_ms: 500
    error_rate_percent: 0.1
    throughput_rps: 1000

  POST /api/orders:
    p50_latency_ms: 200
    p95_latency_ms: 800
    p99_latency_ms: 2000
    error_rate_percent: 0.01
    throughput_rps: 100

  GET /api/search:
    p50_latency_ms: 100
    p95_latency_ms: 500
    p99_latency_ms: 1500
    error_rate_percent: 0.5
    throughput_rps: 500

8. 识别和解决瓶颈

系统性瓶颈分析：

// bottleneck-analyzer.ts
interface BottleneckReport {
  category: "cpu" | "memory" | "io" | "network" | "database";
  severity: "low" | "medium" | "high" | "critical";
  description: string;
  recommendation: string;
  metrics: Record<string, number>;
}

async function analyzeBottlenecks(): Promise<BottleneckReport[]> {
  const reports: BottleneckReport[] = [];

  // CPU分析
  const cpuUsage = process.cpuUsage();
  if (cpuUsage.user / 1000000 > 80) {
    reports.push({
      category: "cpu",
      severity: "high",
      description: "检测到高CPU利用率",
      recommendation: "分析CPU使用情况，优化热点路径，考虑缓存",
      metrics: { userCpuPercent: cpuUsage.user / 1000000 },
    });
  }

  // 内存分析
  const memUsage = process.memoryUsage();
  const heapUsedPercent = (memUsage.heapUsed / memUsage.heapTotal) * 100;
  if (heapUsedPercent > 85) {
    reports.push({
      category: "memory",
      severity: "high",
      description: "检测到高内存压力",
      recommendation: "检查内存泄漏，减少对象保留",
      metrics: { heapUsedPercent, heapUsedMB: memUsage.heapUsed / 1024 / 1024 },
    });
  }

  // 事件循环延迟
  const lagStart = Date.now();
  await new Promise((resolve) => setImmediate(resolve));
  const eventLoopLag = Date.now() - lagStart;
  if (eventLoopLag > 100) {
    reports.push({
      category: "cpu",
      severity: "medium",
      description: "检测到事件循环阻塞",
      recommendation: "将CPU密集型工作移动到工作线程",
      metrics: { eventLoopLagMs: eventLoopLag },
    });
  }

  return reports;
}

常见瓶颈和解决方案：

瓶颈	症状	诊断	解决方案
N+1 查询	延迟线性增加	查询日志	急切加载、批处理
缺少索引	大表上的慢查询	EXPLAIN ANALYZE	添加适当索引
连接池	负载下超时	池指标	增加池大小、添加队列
同步I/O	高事件循环延迟	分析	使用异步操作
内存泄漏	随时间堆增长	堆快照	修复对象保留
未优化的JSON	序列化时高CPU	CPU分析	流解析、模式验证
大有效负载	高网络延迟	响应大小监控	分页、压缩

数据库优化清单：

-- 检查缺少索引
SELECT
  schemaname, tablename,
  seq_scan, seq_tup_read,
  idx_scan, idx_tup_fetch
FROM pg_stat_user_tables
WHERE seq_scan > idx_scan
ORDER BY seq_tup_read DESC;

-- 检查慢查询
SELECT query, calls, total_time, mean_time, rows
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 20;

-- 检查锁争用
SELECT blocked_locks.pid AS blocked_pid,
       blocking_locks.pid AS blocking_pid,
       blocked_activity.query AS blocked_query
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_locks blocking_locks
  ON blocking_locks.locktype = blocked_locks.locktype
WHERE NOT blocked_locks.granted;

9. 测试数据生成和现实负载模式

生成现实测试数据：

// test-data-generator.ts
import { faker } from "@faker-js/faker";

interface TestUser {
  id: number;
  email: string;
  name: string;
  createdAt: Date;
  preferences: Record<string, any>;
}

function generateUsers(count: number): TestUser[] {
  return Array.from({ length: count }, (_, i) => ({
    id: i + 1,
    email: faker.internet.email(),
    name: faker.person.fullName(),
    createdAt: faker.date.past({ years: 2 }),
    preferences: {
      theme: faker.helpers.arrayElement(["light", "dark"]),
      notifications: faker.datatype.boolean(),
      language: faker.helpers.arrayElement(["en", "es", "fr", "de"]),
    },
  }));
}

// 使用现实分布生成数据
function generateOrdersWithDistribution(userCount: number, orderCount: number) {
  const users = generateUsers(userCount);
  const orders = [];

  // 80/20规则：20%的用户产生80%的订单
  const powerUsers = users.slice(0, Math.floor(userCount * 0.2));
  const regularUsers = users.slice(Math.floor(userCount * 0.2));

  const powerUserOrders = Math.floor(orderCount * 0.8);
  const regularUserOrders = orderCount - powerUserOrders;

  // 活跃用户
  for (let i = 0; i < powerUserOrders; i++) {
    const user = faker.helpers.arrayElement(powerUsers);
    orders.push(generateOrder(user.id, i + 1));
  }

  // 普通用户
  for (let i = 0; i < regularUserOrders; i++) {
    const user = faker.helpers.arrayElement(regularUsers);
    orders.push(generateOrder(user.id, powerUserOrders + i + 1));
  }

  return { users, orders };
}

function generateOrder(userId: number, orderId: number) {
  const itemCount = faker.number.int({ min: 1, max: 10 });

  return {
    id: orderId,
    userId,
    items: Array.from({ length: itemCount }, () => ({
      productId: faker.number.int({ min: 1, max: 1000 }),
      quantity: faker.number.int({ min: 1, max: 5 }),
      price: parseFloat(faker.commerce.price()),
    })),
    status: faker.helpers.arrayElement([
      "pending",
      "processing",
      "shipped",
      "delivered",
    ]),
    createdAt: faker.date.recent({ days: 90 }),
  };
}

现实流量模式：

// k6 现实流量模式
import http from "k6/http";
import { sleep } from "k6";

export const options = {
  scenarios: {
    // 上午流量峰值（9am）
    morning_spike: {
      executor: "ramping-arrival-rate",
      startRate: 10,
      timeUnit: "1s",
      preAllocatedVUs: 50,
      maxVUs: 200,
      stages: [
        { duration: "5m", target: 50 }, // 增加
        { duration: "10m", target: 50 }, // 持续
        { duration: "5m", target: 10 }, // 下降
      ],
      startTime: "0s",
    },
    // 午餐流量（12pm）
    lunch_traffic: {
      executor: "constant-arrival-rate",
      rate: 30,
      timeUnit: "1s",
      duration: "30m",
      preAllocatedVUs: 100,
      maxVUs: 150,
      startTime: "20m",
    },
    // 晚上峰值（6pm）
    evening_spike: {
      executor: "ramping-arrival-rate",
      startRate: 10,
      timeUnit: "1s",
      preAllocatedVUs: 50,
      maxVUs: 300,
      stages: [
        { duration: "5m", target: 100 },
        { duration: "15m", target: 100 },
        { duration: "5m", target: 10 },
      ],
      startTime: "50m",
    },
    // 后台作业（恒定低负载）
    background_jobs: {
      executor: "constant-vus",
      vus: 5,
      duration: "2h",
      exec: "backgroundJob",
    },
  },
};

export default function () {
  // 模拟不同用户行为
  const userType = Math.random();

  if (userType < 0.6) {
    // 60% - 浏览器用户（快速，多请求）
    http.get(`${__ENV.API_URL}/api/products`);
    sleep(0.5);
    http.get(
      `${__ENV.API_URL}/api/products/${Math.floor(Math.random() * 100)}`,
    );
    sleep(0.5);
  } else if (userType < 0.9) {
    // 30% - 普通用户（中等速度）
    http.get(`${__ENV.API_URL}/api/products`);
    sleep(2);
    http.get(`${__ENV.API_URL}/api/cart`);
    sleep(3);
  } else {
    // 10% - 活跃用户（复杂操作）
    http.post(
      `${__ENV.API_URL}/api/orders`,
      JSON.stringify({
        items: [{ productId: 1, quantity: 1 }],
      }),
      {
        headers: { "Content-Type": "application/json" },
      },
    );
    sleep(5);
  }
}

export function backgroundJob() {
  // 模拟cron作业、工作线程
  http.post(
    `${__ENV.API_URL}/internal/process-batch`,
    JSON.stringify({
      batchId: Math.floor(Math.random() * 1000),
    }),
    {
      headers: { "Content-Type": "application/json" },
    },
  );
  sleep(60); // 每分钟
}

思考时间和用户行为：

import { sleep } from "k6";
import { randomIntBetween } from "https://jslib.k6.io/k6-utils/1.2.0/index.js";

// 人性化思考时间
function thinkTime() {
  // 围绕2秒的正态分布
  const mean = 2;
  const stdDev = 0.5;
  const time =
    mean + stdDev * (Math.random() + Math.random() + Math.random() - 1.5);
  sleep(Math.max(0.5, time));
}

// 模拟用户会话
export default function () {
  // 登录
  http.post(`${__ENV.API_URL}/auth/login` /* ... */);
  thinkTime();

  // 浏览产品（3-7页）
  const pageViews = randomIntBetween(3, 7);
  for (let i = 0; i < pageViews; i++) {
    http.get(`${__ENV.API_URL}/api/products?page=${i}`);
    thinkTime();
  }

  // 30%添加到购物车
  if (Math.random() < 0.3) {
    http.post(`${__ENV.API_URL}/api/cart` /* ... */);
    thinkTime();

    // 50%的添加到购物车用户完成结账
    if (Math.random() < 0.5) {
      http.post(`${__ENV.API_URL}/api/orders` /* ... */);
      sleep(3); // 结账时间较长
    }
  }

  // 注销（20%的用户明确注销）
  if (Math.random() < 0.2) {
    http.post(`${__ENV.API_URL}/auth/logout`);
  }
}

边缘情况和错误场景：

import http from "k6/http";
import { check } from "k6";

export default function () {
  const scenarios = [
    // 快乐路径（70%）
    () => {
      const res = http.get(`${__ENV.API_URL}/api/products`);
      check(res, { "状态是200": (r) => r.status === 200 });
    },
    // 大有效负载（10%）
    () => {
      const res = http.get(`${__ENV.API_URL}/api/products?limit=1000`);
      check(res, { "处理大响应": (r) => r.status === 200 });
    },
    // 无效输入（10%）
    () => {
      const res = http.get(`${__ENV.API_URL}/api/products/-1`);
      check(res, { "处理无效ID": (r) => r.status === 400 });
    },
    // 未找到（5%）
    () => {
      const res = http.get(`${__ENV.API_URL}/api/products/999999`);
      check(res, { "处理未找到": (r) => r.status === 404 });
    },
    // 超时场景（3%）
    () => {
      const res = http.get(`${__ENV.API_URL}/api/slow-endpoint`, {
        timeout: "5s",
      });
      check(res, { "处理超时": (r) => r.status === 200 || r.error });
    },
    // 未授权（2%）
    () => {
      const res = http.get(`${__ENV.API_URL}/api/admin/users`);
      check(res, {
        "强制执行认证": (r) => r.status === 401 || r.status === 403,
      });
    },
  ];

  // 加权随机场景选择
  const weights = [0.7, 0.8, 0.9, 0.95, 0.98, 1.0];
  const random = Math.random();

  for (let i = 0; i < weights.length; i++) {
    if (random < weights[i]) {
      scenarios[i]();
      break;
    }
  }
}

最佳实践

测试环境和设置

在类似生产环境中测试
- 匹配硬件规格（CPU、RAM、磁盘I/O）
- 使用现实数据量（生产规模数据库）
- 模拟实际流量模式和用户分布
- 如果测试分布式系统，包括网络延迟
- 使用类似生产配置测试（缓存、CDN、负载均衡器）
首先建立基线
- 在任何优化前测量当前性能
- 记录所有关键端点的基线指标
- 随时间跟踪变化以检测退化
- 创建性能基线报告用于利益相关者沟通
使用现实测试数据
- 数量应匹配生产规模（不仅仅是小样本）
- 包括边缘情况（大记录、Unicode、特殊字符、畸形数据）
- 使用冷和暖缓存测试以测量两种场景
- 应用现实数据分布（80/20规则、帕累托原理）
- 包括时区、区域设置和国际化变化

测试执行策略

早期和经常测试
- 在CI/CD流水线中包括性能测试
- 每次提交运行冒烟测试（快速基线检查）
- 每晚或在拉取请求上运行完整负载测试
- 在生产部署前捕获退化，而不是在生产中
- 监控趋势，不仅仅是通过/失败阈值
实施渐进式负载测试
- 从冒烟测试开始（最小负载，验证功能）
- 进展到负载测试（预期正常负载）
- 执行压力测试（找到崩溃点）
- 运行峰值测试（验证自动扩展和恢复）
- 最后进行浸泡测试（检测内存泄漏和随时间退化）
测试关键用户旅程
- 识别前3-5个最重要的用户流
- 优先测试创收路径
- 包括认证、支付和结账流
- 单独测试管理员和特权操作
- 验证部分故障下的优雅降级

分析和报告

分析百分位数，而不是平均值
- 始终报告p50、p95、p99延迟（不仅仅是平均）
- 使用p95/p99用于SLOs和警报阈值
- 理解异常值对用户体验很重要
- 跟踪最大延迟以识别最坏情况场景
在生产中监控
- 性能测试补充，但不取代生产监控
- 使用APM工具（Datadog、New Relic等）获取真实用户指标
- 对关键路径实施综合监控
- 在用户抱怨前警报性能退化
- 将负载测试结果与生产行为关联
记录和分享结果
- 在版本控制中保留性能测试报告
- 创建趋势分析的可视化仪表板
- 在定期评审中与团队分享发现
- 跟踪优化改进与前后指标
- 记录基础设施变化及其性能影响

优化和迭代

遵循科学方法
- 在优化前形成假设（“我认为X慢是因为Y”）
- 一次更改一个变量
- 使用负载测试测量前后影响
- 记录失败尝试（什么没工作及为什么）
- 验证优化不会破坏功能
设置和执行性能预算
- 为每个端点定义可接受的延迟目标
- 设置吞吐量要求（RPS、并发用户）
- 建立错误率阈值
- 阻止违反预算的部署
- 根据业务需求季度评审和调整预算
单独测试数据库性能
- 将数据库瓶颈与应用问题隔离
- 在负载下基准测试查询（不仅仅在开发中）
- 测试连接池耗尽场景
- 使用生产数据量验证索引有效性
- 在负载测试期间监控慢查询日志

示例

示例：完整的k6负载测试套件

// k6/scenarios/api-load-test.js
import http from "k6/http";
import { check, group, sleep } from "k6";
import { Rate, Trend, Counter } from "k6/metrics";

// 自定义指标
const errorRate = new Rate("errors");
const orderLatency = new Trend("order_latency");
const ordersCreated = new Counter("orders_created");

// 测试配置
export const options = {
  scenarios: {
    // 恒定负载用于基线
    baseline: {
      executor: "constant-vus",
      vus: 10,
      duration: "5m",
      tags: { scenario: "baseline" },
    },
    // 增加负载用于压力测试
    stress: {
      executor: "ramping-vus",
      startVUs: 0,
      stages: [
        { duration: "2m", target: 50 },
        { duration: "5m", target: 50 },
        { duration: "2m", target: 100 },
        { duration: "5m", target: 100 },
        { duration: "2m", target: 0 },
      ],
      startTime: "5m",
      tags: { scenario: "stress" },
    },
    // 峰值测试
    spike: {
      executor: "ramping-vus",
      startVUs: 0,
      stages: [
        { duration: "10s", target: 200 },
        { duration: "1m", target: 200 },
        { duration: "10s", target: 0 },
      ],
      startTime: "20m",
      tags: { scenario: "spike" },
    },
  },
  thresholds: {
    http_req_duration: ["p(95)<500", "p(99)<1500"],
    errors: ["rate<0.01"],
    order_latency: ["p(95)<2000"],
  },
};

const BASE_URL = __ENV.BASE_URL || "http://localhost:3000";

export function setup() {
  // 登录并获取认证令牌
  const loginRes = http.post(
    `${BASE_URL}/api/auth/login`,
    JSON.stringify({
      email: "loadtest@example.com",
      password: "loadtest123",
    }),
    {
      headers: { "Content-Type": "application/json" },
    },
  );

  return { token: loginRes.json("token") };
}

export default function (data) {
  const headers = {
    "Content-Type": "application/json",
    Authorization: `Bearer ${data.token}`,
  };

  group("浏览产品", () => {
    const productsRes = http.get(`${BASE_URL}/api/products`, { headers });
    check(productsRes, {
      "产品状态200": (r) => r.status === 200,
      "产品返回": (r) => r.json("data").length > 0,
    });
    errorRate.add(productsRes.status !== 200);
    sleep(1);
  });

  group("查看产品详情", () => {
    const productRes = http.get(`${BASE_URL}/api/products/1`, { headers });
    check(productRes, {
      "产品状态200": (r) => r.status === 200,
    });
    errorRate.add(productRes.status !== 200);
    sleep(0.5);
  });

  group("创建订单", () => {
    const start = Date.now();
    const orderRes = http.post(
      `${BASE_URL}/api/orders`,
      JSON.stringify({
        items: [{ productId: 1, quantity: 1 }],
      }),
      { headers },
    );

    orderLatency.add(Date.now() - start);

    const success = check(orderRes, {
      "订单状态201": (r) => r.status === 201,
      "订单有id": (r) => r.json("id") !== undefined,
    });

    if (success) ordersCreated.add(1);
    errorRate.add(!success);
    sleep(2);
  });
}

export function teardown(data) {
  // 如果需要，清理测试数据
  console.log("测试完成");
}

示例：性能监控仪表板配置

# grafana/dashboards/performance.json (简化)
panels:
  - title: "请求延迟 (p95)"
    type: graph
    targets:
      - expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))

  - title: "吞吐量 (RPS)"
    type: graph
    targets:
      - expr: rate(http_requests_total[1m])

  - title: "错误率"
    type: graph
    targets:
      - expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m])

  - title: "活动连接"
    type: gauge
    targets:
      - expr: active_connections

alerts:
  - name: 高延迟
    expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 0.5
    for: 5m
    labels:
      severity: 警告

  - name: 高错误率
    expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.01
    for: 2m
    labels:
      severity: 严重