TimescaleDB (时间序列数据库) 模式
1. TimescaleDB 设置
安装
-- 安装 TimescaleDB 扩展
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- 检查 TimescaleDB 版本
SELECT extversion FROM pg_extension WHERE extname = 'timescaledb';
数据库初始化
-- 创建数据库
CREATE DATABASE timeseries_db;
-- 连接到数据库
\c timeseries_db;
-- 启用 TimescaleDB
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- 验证安装
SELECT * FROM timescaledb_information.hypertables;
2. 超表创建
基本超表
-- 创建一个基本的时间序列表
CREATE TABLE sensor_data (
time TIMESTAMPTZ NOT NULL,
sensor_id INTEGER NOT NULL,
temperature DOUBLE PRECISION,
humidity DOUBLE PRECISION,
pressure DOUBLE PRECISION
);
-- 转换为超表
SELECT create_hypertable('sensor_data', 'time');
-- 验证超表
SELECT * FROM timescaledb_information.hypertables WHERE hypertable_name = 'sensor_data';
多维度超表
-- 创建具有多个维度的表
CREATE TABLE iot_metrics (
time TIMESTAMPTZ NOT NULL,
device_id INTEGER NOT NULL,
location_id INTEGER NOT NULL,
metric_type TEXT NOT NULL,
value DOUBLE PRECISION,
metadata JSONB
);
-- 创建具有时间和空间分区的超表
SELECT create_hypertable(
'iot_metrics',
'time',
partitioning_column => 'device_id',
number_partitions => 4
);
自定义块大小的超表
-- 创建具有自定义块间隔(1天)的超表
SELECT create_hypertable(
'sensor_data',
'time',
chunk_time_interval => interval '1 day'
);
-- 创建具有自定义块间隔(1小时,用于高频数据)的超表
SELECT create_hypertable(
'high_frequency_data',
'time',
chunk_time_interval => interval '1 hour'
);
3. 基于时间的分区
自动分区
-- TimescaleDB 根据 chunk_time_interval 自动分区数据
-- 查看超表的块
SELECT *
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
ORDER BY range_start;
-- 查看块大小
SELECT
chunk_schema,
chunk_name,
range_start,
range_end,
pg_size_pretty(pg_total_relation_size(chunk_schema || '.' || chunk_name)) as size
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
ORDER BY range_start;
手动块管理
-- 显示块
SELECT show_chunks('sensor_data');
-- 显示特定时间范围的块
SELECT show_chunks('sensor_data', NOW() - INTERVAL '7 days');
-- 为未来数据创建块
SELECT create_chunk('sensor_data', NOW() + INTERVAL '1 day');
-- 删除旧块
SELECT drop_chunks('sensor_data', INTERVAL '30 days');
4. 连续聚合
基本连续聚合
-- 为每小时平均值创建连续聚合
CREATE MATERIALIZED VIEW sensor_hourly_avg
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
AVG(humidity) AS avg_humidity,
AVG(pressure) AS avg_pressure,
COUNT(*) AS count
FROM sensor_data
GROUP BY bucket, sensor_id;
-- 刷新连续聚合
CALL refresh_continuous_aggregate('sensor_hourly_avg', NULL, NULL);
多时间桶连续聚合
-- 日聚合
CREATE MATERIALIZED VIEW sensor_daily_avg
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 day', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
STDDEV(temperature) AS stddev_temperature,
COUNT(*) AS count
FROM sensor_data
GROUP BY bucket, sensor_id;
-- 周聚合
CREATE MATERIALIZED VIEW sensor_weekly_avg
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 week', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
COUNT(*) AS count
FROM sensor_data
GROUP BY bucket, sensor_id;
实时聚合连续聚合
-- 创建具有实时聚合的连续聚合
CREATE MATERIALIZED VIEW sensor_hourly_avg_realtime
WITH (timescaledb.continuous, timescaledb.materialized_only = false) AS
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
AVG(humidity) AS avg_humidity,
COUNT(*) AS count
FROM sensor_data
GROUP BY bucket, sensor_id;
-- 使用实时数据查询
SELECT * FROM sensor_hourly_avg_realtime
WHERE bucket >= NOW() - INTERVAL '1 day';
自定义刷新策略的连续聚合
-- 设置刷新策略
SELECT add_continuous_aggregate_policy(
'sensor_hourly_avg',
start_offset => INTERVAL '1 hour',
end_offset => INTERVAL '1 minute',
schedule_interval => INTERVAL '5 minutes'
);
-- 查看连续聚合策略
SELECT * FROM timescaledb_information.jobs
WHERE hypertable_name = 'sensor_hourly_avg';
-- 移除策略
SELECT remove_continuous_aggregate_policy('sensor_hourly_avg');
5. 数据保留策略
删除块策略
-- 创建保留数据30天的删除块策略
SELECT add_drop_chunks_policy(
'sensor_data',
INTERVAL '30 days',
schedule_interval => INTERVAL '1 day'
);
-- 查看删除块策略
SELECT * FROM timescaledb_information.jobs
WHERE hypertable_name = 'sensor_data';
-- 移除删除块策略
SELECT remove_drop_chunks_policy('sensor_data');
压缩块策略
-- 在超表上启用压缩
ALTER TABLE sensor_data SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'sensor_id'
);
-- 创建压缩块策略
SELECT add_compression_policy(
'sensor_data',
INTERVAL '7 days',
schedule_interval => INTERVAL '1 day'
);
-- 查看压缩策略
SELECT * FROM timescaledb_information.jobs
WHERE hypertable_name = 'sensor_data';
-- 移除压缩策略
SELECT remove_compression_policy('sensor_data');
组合保留策略
-- 创建组合策略:7天后压缩,90天后删除
SELECT add_compression_policy(
'sensor_data',
INTERVAL '7 days',
schedule_interval => INTERVAL '1 day'
);
SELECT add_drop_chunks_policy(
'sensor_data',
INTERVAL '90 days',
schedule_interval => INTERVAL '1 day'
);
6. 时间序列查询优化
时间桶查询
-- 基本时间桶
SELECT
time_bucket('5 minutes', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY bucket;
-- 带偏移的时间桶
SELECT
time_bucket('1 hour', time, TIMESTAMP '2020-01-01 00:00:00') AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY bucket;
时间桶间隙填充
-- 用 NULL 填充缺失的时间桶
SELECT
time_bucket('5 minutes', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 hour'
GROUP BY bucket, sensor_id
ORDER BY bucket;
-- 用前一个值填充缺失的时间桶(LOCF)
SELECT
time_bucket('5 minutes', time) AS bucket,
sensor_id,
locf(AVG(temperature)) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 hour'
GROUP BY bucket, sensor_id
ORDER BY bucket;
-- 用线性插值填充缺失的时间桶
SELECT
time_bucket('5 minutes', time) AS bucket,
sensor_id,
interpolate(AVG(temperature)) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 hour'
GROUP BY bucket, sensor_id
ORDER BY bucket;
最新数据查询
-- 获取每个传感器的最新数据
SELECT DISTINCT ON (sensor_id)
sensor_id,
time,
temperature,
humidity,
pressure
FROM sensor_data
ORDER BY sensor_id, time DESC;
-- 使用 LAST() 函数
SELECT
sensor_id,
last(temperature, time) AS latest_temperature,
last(humidity, time) AS latest_humidity,
last(pressure, time) AS latest_pressure
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY sensor_id;
第一/最后数据查询
-- 获取时间范围内的第一个数据点
SELECT
sensor_id,
first(temperature, time) AS first_temperature,
first(humidity, time) AS first_humidity
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY sensor_id;
-- 获取时间范围内的最后一个数据点
SELECT
sensor_id,
last(temperature, time) AS last_temperature,
last(humidity, time) AS last_humidity
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY sensor_id;
增量和速率计算
-- 计算连续值之间的增量
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
delta(AVG(temperature)) OVER (
PARTITION BY sensor_id
ORDER BY bucket
) AS temperature_delta
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY sensor_id, bucket;
-- 计算变化率
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
rate(AVG(temperature), bucket) OVER (
PARTITION BY sensor_id
ORDER BY bucket
) AS temperature_rate
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY sensor_id, bucket;
7. 压缩
启用压缩
-- 在超表上启用压缩
ALTER TABLE sensor_data SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'sensor_id',
timescaledb.compress_orderby = 'time'
);
-- 查看压缩设置
SELECT *
FROM timescaledb_information.compression_settings
WHERE hypertable_name = 'sensor_data';
手动压缩
-- 压缩特定块
SELECT compress_chunk(
(SELECT chunk_name FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
AND range_start < NOW() - INTERVAL '7 days'
LIMIT 1)
);
-- 解压缩块
SELECT decompress_chunk(
(SELECT chunk_name FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
AND range_start < NOW() - INTERVAL '7 days'
LIMIT 1)
);
压缩统计
-- 查看压缩统计
SELECT
chunk_schema,
chunk_name,
range_start,
range_end,
is_compressed,
pg_size_pretty(pg_total_relation_size(chunk_schema || '.' || chunk_name)) as size,
pg_size_pretty(pg_total_relation_size(chunk_schema || '.' || chunk_name || '_compressed')) as compressed_size
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
ORDER BY range_start;
-- 查看压缩比率
SELECT
hypertable_name,
COUNT(*) as total_chunks,
SUM(CASE WHEN is_compressed THEN 1 ELSE 0 END) as compressed_chunks,
ROUND(100.0 * SUM(CASE WHEN is_compressed THEN 1 ELSE 0 END) / COUNT(*), 2) as compression_percentage
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
GROUP BY hypertable_name;
8. 常见时间序列查询
时间上的聚合
-- 每小时聚合
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
STDDEV(temperature) AS stddev_temperature,
COUNT(*) AS count
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY bucket;
-- 每日聚合
SELECT
time_bucket('1 day', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
COUNT(*) AS count
FROM sensor_data
WHERE time >= NOW() - INTERVAL '30 days'
GROUP BY bucket, sensor_id
ORDER BY bucket;
滚动聚合
-- 使用窗口函数的滚动平均
SELECT
time,
sensor_id,
temperature,
AVG(temperature) OVER (
PARTITION BY sensor_id
ORDER BY time
RANGE BETWEEN INTERVAL '1 hour' PRECEDING AND CURRENT ROW
) AS rolling_avg_1h
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
ORDER BY sensor_id, time;
-- 滚动总和
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
SUM(temperature) OVER (
PARTITION BY sensor_id
ORDER BY bucket
RANGE BETWEEN INTERVAL '24 hours' PRECEDING AND CURRENT ROW
) AS rolling_sum_24h
FROM sensor_data
WHERE time >= NOW() - INTERVAL '7 days'
GROUP BY bucket, sensor_id
ORDER BY sensor_id, bucket;
百分位计算
-- 使用 percentile_cont
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
percentile_cont(0.5) WITHIN GROUP (ORDER BY temperature) AS median_temperature,
percentile_cont(0.95) WITHIN GROUP (ORDER BY temperature) AS p95_temperature,
percentile_cont(0.99) WITHIN GROUP (ORDER BY temperature) AS p99_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY bucket;
-- 使用 percentile_disc
SELECT
time_bucket('1 day', time) AS bucket,
sensor_id,
percentile_disc(0.5) WITHIN GROUP (ORDER BY temperature) AS median_temperature,
percentile_disc(0.95) WITHIN GROUP (ORDER BY temperature) AS p95_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '30 days'
GROUP BY bucket, sensor_id
ORDER BY bucket;
时间序列连接
-- 将传感器数据与传感器元数据连接
SELECT
sd.time,
sd.sensor_id,
sm.name AS sensor_name,
sm.location,
sd.temperature,
sd.humidity
FROM sensor_data sd
JOIN sensor_metadata sm ON sd.sensor_id = sm.id
WHERE sd.time >= NOW() - INTERVAL '1 day'
ORDER BY sd.time;
-- 连接时时间桶对齐
SELECT
time_bucket('1 hour', sd.time) AS bucket,
sd.sensor_id,
sm.name AS sensor_name,
AVG(sd.temperature) AS avg_temperature,
AVG(sd.humidity) AS avg_humidity
FROM sensor_data sd
JOIN sensor_metadata sm ON sd.sensor_id = sm.id
WHERE sd.time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sd.sensor_id, sm.name
ORDER BY bucket;
异常检测
-- 使用 z-score 的简单异常检测
WITH stats AS (
SELECT
sensor_id,
AVG(temperature) AS avg_temp,
STDDEV(temperature) AS stddev_temp
FROM sensor_data
WHERE time >= NOW() - INTERVAL '7 days'
GROUP BY sensor_id
)
SELECT
sd.time,
sd.sensor_id,
sd.temperature,
s.avg_temp,
s.stddev_temp,
(sd.temperature - s.avg_temp) / s.stddev_temp AS z_score
FROM sensor_data sd
JOIN stats s ON sd.sensor_id = s.sensor_id
WHERE sd.time >= NOW() - INTERVAL '1 day'
AND ABS((sd.temperature - s.avg_temp) / s.stddev_temp) > 3
ORDER BY sd.time;
9. 下采样策略
基本下采样
-- 创建下采样表
CREATE TABLE sensor_data_downsampled (
time TIMESTAMPTZ NOT NULL,
sensor_id INTEGER NOT NULL,
avg_temperature DOUBLE PRECISION,
min_temperature DOUBLE PRECISION,
max_temperature DOUBLE PRECISION,
avg_humidity DOUBLE PRECISION,
min_humidity DOUBLE PRECISION,
max_humidity DOUBLE PRECISION,
count BIGINT
);
-- 创建超表
SELECT create_hypertable('sensor_data_downsampled', 'time');
-- 插入下采样数据
INSERT INTO sensor_data_downsampled
SELECT
time_bucket('1 hour', time) AS time,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
AVG(humidity) AS avg_humidity,
MIN(humidity) AS min_humidity,
MAX(humidity) AS max_humidity,
COUNT(*) AS count
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY time, sensor_id;
使用连续聚合的下采样
-- 为下采样创建连续聚合
CREATE MATERIALIZED VIEW sensor_data_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS time,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
STDDEV(temperature) AS stddev_temperature,
AVG(humidity) AS avg_humidity,
MIN(humidity) AS min_humidity,
MAX(humidity) AS max_humidity,
COUNT(*) AS count
FROM sensor_data
GROUP BY time, sensor_id;
-- 设置刷新策略
SELECT add_continuous_aggregate_policy(
'sensor_data_hourly',
start_offset => INTERVAL '1 hour',
end_offset => INTERVAL '1 minute',
schedule_interval => INTERVAL '5 minutes'
);
多级下采样
-- 小时级别
CREATE MATERIALIZED VIEW sensor_data_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS time,
sensor_id,
AVG(temperature) AS avg_temperature,
MIN(temperature) AS min_temperature,
MAX(temperature) AS max_temperature,
COUNT(*) AS count
FROM sensor_data
GROUP BY time, sensor_id;
-- 日级别(基于小时级别)
CREATE MATERIALIZED VIEW sensor_data_daily
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 day', time) AS time,
sensor_id,
AVG(avg_temperature) AS avg_temperature,
MIN(min_temperature) AS min_temperature,
MAX(max_temperature) AS max_temperature,
SUM(count) AS count
FROM sensor_data_hourly
GROUP BY time, sensor_id;
-- 周级别(基于日级别)
CREATE MATERIALIZED VIEW sensor_data_weekly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 week', time) AS time,
sensor_id,
AVG(avg_temperature) AS avg_temperature,
MIN(min_temperature) AS min_temperature,
MAX(max_temperature) AS max_temperature,
SUM(count) AS count
FROM sensor_data_daily
GROUP BY time, sensor_id;
10. 监控和告警
查询性能监控
-- 启用查询统计
ALTER DATABASE timeseries_db SET timescaledb.enable_telemetry = on;
-- 查看查询统计
SELECT * FROM pg_stat_statements WHERE query LIKE '%sensor_data%';
-- 查看超表统计
SELECT * FROM timescaledb_information.hypertable_size;
-- 查看块统计
SELECT
chunk_schema,
chunk_name,
range_start,
range_end,
pg_size_pretty(pg_total_relation_size(chunk_schema || '.' || chunk_name)) as size
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
ORDER BY range_start;
数据采集监控
-- 监控数据采集速率
SELECT
time_bucket('1 minute', time) AS bucket,
COUNT(*) AS rows_per_minute
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 hour'
GROUP BY bucket
ORDER BY bucket;
-- 按传感器监控数据
SELECT
sensor_id,
COUNT(*) AS row_count,
MIN(time) AS first_seen,
MAX(time) AS last_seen
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY sensor_id
ORDER BY row_count DESC;
存储监控
-- 查看超表的存储使用情况
SELECT
hypertable_name,
pg_size_pretty(pg_total_relation_size(hypertable_schema || '.' || hypertable_name)) AS total_size,
pg_size_pretty(pg_total_relation_size(hypertable_schema || '.' || hypertable_name || '_compressed')) AS compressed_size
FROM timescaledb_information.hypertables;
-- 查看块的存储使用情况
SELECT
chunk_schema,
chunk_name,
range_start,
range_end,
is_compressed,
pg_size_pretty(pg_total_relation_size(chunk_schema || '.' || chunk_name)) AS size
FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
ORDER BY range_start DESC
LIMIT 10;
告警查询
-- 缺失数据告警
SELECT
sensor_id,
MAX(time) AS last_data_time,
NOW() - MAX(time) AS time_since_last_data
FROM sensor_data
GROUP BY sensor_id
HAVING NOW() - MAX(time) > INTERVAL '1 hour'
ORDER BY time_since_last_data DESC;
-- 异常值告警
WITH sensor_stats AS (
SELECT
sensor_id,
AVG(temperature) AS avg_temp,
STDDEV(temperature) AS stddev_temp
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY sensor_id
)
SELECT
sd.time,
sd.sensor_id,
sd.temperature,
ss.avg_temp,
ss.stddev_temp,
(sd.temperature - ss.avg_temp) / ss.stddev_temp AS z_score
FROM sensor_data sd
JOIN sensor_stats ss ON sd.sensor_id = ss.sensor_id
WHERE sd.time >= NOW() - INTERVAL '1 hour'
AND ABS((sd.temperature - ss.avg_temp) / ss.stddev_temp) > 3
ORDER BY sd.time;
11. 与 Grafana 集成
TimescaleDB 数据源配置
// Grafana 数据源配置
{
"name": "TimescaleDB",
"type": "postgres",
"url": "localhost:5432",
"database": "timeseries_db",
"user": "postgres",
"password": "password",
"sslMode": "disable",
"timescaledb": true
}
Grafana 查询示例
-- Grafana 的时间序列查询
SELECT
time_bucket('$__interval', time) AS time,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= $__timeFrom() AND time <= $__timeTo()
AND sensor_id = $sensor_id
GROUP BY time, sensor_id
ORDER BY time;
-- 多传感器查询
SELECT
time_bucket('$__interval', time) AS time,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= $__timeFrom() AND time <= $__timeTo()
AND sensor_id IN ($sensor_ids)
GROUP BY time, sensor_id
ORDER BY time;
12. 最佳实践
1. 选择合适的块间隔
-- 好:块间隔与数据保留匹配
SELECT create_hypertable(
'sensor_data',
'time',
chunk_time_interval => interval '1 day' -- 用于30天保留
);
-- 坏:块间隔太小导致开销
SELECT create_hypertable(
'sensor_data',
'time',
chunk_time_interval => interval '1 minute' -- 太小
);
2. 使用连续聚合预计算数据
-- 好:预计算聚合
CREATE MATERIALIZED VIEW sensor_hourly_avg
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
GROUP BY bucket, sensor_id;
-- 坏:每次查询都计算聚合
SELECT
time_bucket('1 hour', time) AS bucket,
sensor_id,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '30 days'
GROUP BY bucket, sensor_id;
3. 对历史数据使用压缩
-- 好:压缩旧数据
SELECT add_compression_policy(
'sensor_data',
INTERVAL '7 days',
schedule_interval => INTERVAL '1 day'
);
-- 坏:保持所有数据未压缩
4. 使用适当的时间桶
-- 好:为查询使用适当的桶大小
SELECT
time_bucket('1 hour', time) AS bucket,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket;
-- 坏:对于大时间范围使用太小的桶
SELECT
time_bucket('1 second', time) AS bucket,
AVG(temperature) AS avg_temperature
FROM sensor_data
WHERE time >= NOW() - INTERVAL '30 days'
GROUP BY bucket;
5. 明智地使用索引
-- 好:在频繁查询的列上创建索引
CREATE INDEX idx_sensor_data_sensor_id ON sensor_data(sensor_id);
CREATE INDEX idx_sensor_data_time ON sensor_data(time DESC);
-- 坏:创建太多索引
6. 监控性能
-- 定期监控查询性能
SELECT * FROM pg_stat_statements
WHERE query LIKE '%sensor_data%'
ORDER BY total_time DESC
LIMIT 10;
7. 对大型数据集使用分区
-- 好:使用多个维度进行分区
SELECT create_hypertable(
'iot_metrics',
'time',
partitioning_column => 'device_id',
number_partitions => 4
);
-- 坏:对于多租户数据仅使用时间分区