数据库 | timescaledb时序表使用注意事项
官方文档:Timescale documentation
注意事项
- timescaledb本质上是基于PostgreSQL数据库的扩展,PostgreSQL的语法都支持.
- 建表,和普通关系数据库建表的差异点
a. 时序表必须要有TIMESTAMP列,用于分区(分区列会默认建索引)
示例:
begin;
--建普通表
CREATE table if not exists tpt_warning_record (warning_time TIMESTAMP NOT NULL,tag_name VARCHAR(200) NOT NULL ,info TEXT DEFAULT NULL
);
COMMENT ON COLUMN tpt_warning_record.warning_time IS '预警时间';
COMMENT ON COLUMN tpt_warning_record.tag_name IS '位号';
COMMENT ON COLUMN tpt_warning_record.info IS '预警信息';--转时序表,同时会自动创建索引
SELECT create_hypertable('tpt_warning_record', 'warning_time', chunk_time_interval => interval '1 day');--建其他索引,比如位号
CREATE INDEX idx_tpt_warning_record_time_name ON tpt_warning_record (tag_name,warning_time desc);--设置分区保留时间
SELECT add_retention_policy('tpt_warning_record', INTERVAL '30 days');--数据是否支持压缩
ALTER TABLE dm_tag_value SET (timescaledb.compress);end;
b. 非分区列要建索引的话,需要跟分区列建复合索引
c. 分区大小规范:为了提高性能最好是整个分区能完全加载到内存,建议每个分区存储数据量大小是内存的四分之一,比如内存是4G那分区不要超过1G,具体多少条记录根据每行数据大小估算,以此来确定是7天还是1天或几个小时一个分区。
d. 建索引的时候可以选择正序或倒序,根据实际业务查询选择,比如使用的时候是时间倒序那么索引语句可以这样,如果查询排序和索引排序方向不一致很影响性能。
CREATE INDEX idx_tpt_warning_record_time_name ON tpt_warning_record (tag_name,warning_time desc);
- 写入,尽量用批量写入提高性能
- 查询
a. 查询都要带上索引字段(建议都带上时间范围),不走索引的全量扫描的性能和普通数据库没有区别,表数据量大了就很慢。
b. 因为数据库内存有限,查询结果大的情况使用分页查询
c. 查询不要随意order by,尽量走默认排序(order by 和索引顺序一致)
d. 时间过滤时不要用时间函数,会影响性能
比如:
--错误如下:
SELECT app_time,tag_name,tag_value,quality FROM dm_tag_value WHERE DATE(app_time) = '2024-06-19'--正确如下:
SELECT app_time,tag_name,tag_value,quality FROM dm_tag_value WHERE app_time >= '2024-06-19' and app_time < '2024-06-20'
e. 时序数据库有一些特有函数可以大大提高性能。
比如:按时间窗口聚合
SELECT time_bucket('1 day', time_column) AS day, AVG(value_column)
FROM table_name
GROUP BY day;
比如:用物理视图做连续预聚合,适合要对大时间范围做聚合的场景
CREATE MATERIALIZED VIEW continuous_aggregate_view WITH (timescaledb.continuous) AS
SELECT time_bucket('1 day', time_column) AS day, AVG(value_column)
FROM table_name
GROUP BY day;SELECT add_continuous_aggregate_policy('continuous_aggregate_view', start_offset => INTERVAL '1 month', end_offset => INTERVAL '1 day', schedule_interval => INTERVAL '1 hour');
其他一些高级查询:Timescale Documentation | Perform advanced analytic queries
一些自定义函数
其他一些方便查询的自定义函数,供参考
查询指定时间最近一条记录(opt可取:interpolate 插值、before 前值、after 后值)
CREATE OR REPLACE FUNCTION get_latest_records(tag_names text[], t timestamp DEFAULT now(), opt text DEFAULT 'before'
)
RETURNS TABLE(tag_name varchar(200), app_time timestamp, tag_value varchar(1000)) AS $$
DECLAREtag text;query text;
BEGINFOR tag IN SELECT unnest(tag_names)LOOPIF opt = 'after' THENquery := format('SELECT dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_valueFROM dm_tag_valueWHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time >= %LORDER BY dm_tag_value.app_time ASCLIMIT 1', tag, t);ELSIF opt = 'interpolate' THENquery := format('WITH closest_before AS (SELECT dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_valueFROM dm_tag_valueWHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time <= %LORDER BY app_time DESCLIMIT 1),closest_after AS (SELECT dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_valueFROM dm_tag_valueWHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time > %LORDER BY app_time ASCLIMIT 1),combined AS (SELECT b.tag_name AS name,EXTRACT(EPOCH FROM b.app_time) AS time_before, CAST(b.tag_value AS DOUBLE PRECISION) AS value_before, EXTRACT(EPOCH FROM a.app_time) AS time_after, CAST(a.tag_value AS DOUBLE PRECISION) AS value_afterFROM closest_before bFULL JOIN closest_after a ON 1=1)SELECT COALESCE(combined.name,ca.tag_name, cb.tag_name) AS tag_name, CAST(%L AS TIMESTAMP) AS app_time,CASEWHEN cb.app_time IS NOT NULL AND ca.app_time IS NOT NULL THENCAST((value_before + (value_after - value_before) * (EXTRACT(EPOCH FROM TIMESTAMP %L) - time_before) / (time_after - time_before)) AS VARCHAR(1000))WHEN cb.app_time IS NOT NULL THENCAST(cb.tag_value AS VARCHAR(1000))WHEN ca.app_time IS NOT NULL THENCAST(ca.tag_value AS VARCHAR(1000))ELSENULLEND AS tag_valueFROM combined LEFT JOIN closest_before cb ON 1=1 LEFT JOIN closest_after ca ON 1=1', tag, t , tag,t ,t, t);else query := format('SELECT dm_tag_value.tag_name, dm_tag_value.app_time, dm_tag_value.tag_value FROM dm_tag_valueWHERE dm_tag_value.tag_name = %L AND dm_tag_value.app_time <= %LORDER BY dm_tag_value.app_time DESCLIMIT 1', tag, t);END IF;RETURN QUERY EXECUTE query;END LOOP;
END;
$$ LANGUAGE plpgsql;SELECT *
FROM get_latest_records(ARRAY['TAG8710_ISYS7.1', 'TAG8728_ISYS7.1', 'TAG8727_ISYS7.1'], '2024-07-18','interpolate');
行转列
CREATE EXTENSION IF NOT EXISTS tablefunc;SELECT *
FROM crosstab('SELECT app_time, tag_name, tag_value FROM dm_tag_value where tag_name in (''TAG41'',''TAG42'',''TAG43'') and app_time between ''2024-06-04 00:00:00.000'' and ''2024-06-04 23:59:59.000'' ORDER BY 1, 2','SELECT DISTINCT tag_name FROM dm_tag_value where tag_name in (''TAG41'',''TAG42'',''TAG43'') ORDER BY 1'
) AS ct (app_time TIMESTAMPTZ,tag_v1 NUMERIC,tag_v2 numeric,tag_v3 NUMERIC
);
间隔采样
--按间隔取位号数据
CREATE OR REPLACE FUNCTION get_one_tag_with_interval_seconds(tag_names TEXT[],interval_seconds INT,duration_minutes INT,delay_seconds INT
)
RETURNS TABLE (tag_name VARCHAR, app_time timestamp,tag_value TEXT) AS $$
BEGINRETURN QUERYSELECTdm_tag_value.tag_name,time_bucket(INTERVAL '1 second' * interval_seconds, dm_tag_value.app_time) AS interval_time,min(concat(cast(dm_tag_value.app_time as VARCHAR),'|',dm_tag_value.tag_value,'|',dm_tag_value.quality)) as app_time_tag_valueFROMdm_tag_valueWHEREdm_tag_value.tag_name = ANY(tag_names)AND dm_tag_value.app_time >= NOW() - INTERVAL '1 minute' * duration_minutes - INTERVAL '1 second' * delay_secondsAND dm_tag_value.app_time < NOW() - INTERVAL '1 second' * delay_secondsGROUP BYdm_tag_value.tag_name, interval_time;
END;
$$ LANGUAGE plpgsql;
其他一些常用命令
查看分区空间占用:
SELECT relname AS table_name,pg_size_pretty(pg_total_relation_size(relid)) AS total_size,pg_size_pretty(pg_relation_size(relid)) AS table_size,pg_size_pretty(pg_indexes_size(relid)) AS indexes_size
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC;
查看分区
SELECT show_chunks('dm_tag_value');SELECT *
FROM timescaledb_information.chunks
WHERE hypertable_name = 'dm_tag_value';
删除分区
SELECT drop_chunks('dm_tag_value','2024-05-30'::timestamp);--删除小于等于指定日期的分区
压缩分区
SELECT compress_chunk('_timescaledb_internal._hyper_1_3_chunk', if_not_compressed => true);