DWD完成

This commit is contained in:
Neo
2025-12-09 04:57:05 +08:00
parent f301cc1fd5
commit 561c640700
46 changed files with 26181 additions and 3540 deletions

View File

@@ -0,0 +1,105 @@
-- 文件说明etl_admin 调度元数据 DDL独立文件便于初始化任务单独执行
-- 包含任务注册表、游标表、运行记录表;字段注释使用中文。
CREATE SCHEMA IF NOT EXISTS etl_admin;
CREATE TABLE IF NOT EXISTS etl_admin.etl_task (
task_id BIGSERIAL PRIMARY KEY,
task_code TEXT NOT NULL,
store_id BIGINT NOT NULL,
enabled BOOLEAN DEFAULT TRUE,
cursor_field TEXT,
window_minutes_default INT DEFAULT 30,
overlap_seconds INT DEFAULT 120,
page_size INT DEFAULT 200,
retry_max INT DEFAULT 3,
params JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE (task_code, store_id)
);
COMMENT ON TABLE etl_admin.etl_task IS '任务注册表:调度依据的任务清单(与 task_registry 中的任务码对应)。';
COMMENT ON COLUMN etl_admin.etl_task.task_code IS '任务编码,需与代码中的任务码一致。';
COMMENT ON COLUMN etl_admin.etl_task.store_id IS '门店/租户粒度,区分多门店执行。';
COMMENT ON COLUMN etl_admin.etl_task.enabled IS '是否启用此任务。';
COMMENT ON COLUMN etl_admin.etl_task.cursor_field IS '增量游标字段名(可选)。';
COMMENT ON COLUMN etl_admin.etl_task.window_minutes_default IS '默认时间窗口(分钟)。';
COMMENT ON COLUMN etl_admin.etl_task.overlap_seconds IS '窗口重叠秒数,用于防止遗漏。';
COMMENT ON COLUMN etl_admin.etl_task.page_size IS '默认分页大小。';
COMMENT ON COLUMN etl_admin.etl_task.retry_max IS 'API重试次数上限。';
COMMENT ON COLUMN etl_admin.etl_task.params IS '任务级自定义参数 JSON。';
COMMENT ON COLUMN etl_admin.etl_task.created_at IS '创建时间。';
COMMENT ON COLUMN etl_admin.etl_task.updated_at IS '更新时间。';
CREATE TABLE IF NOT EXISTS etl_admin.etl_cursor (
cursor_id BIGSERIAL PRIMARY KEY,
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
store_id BIGINT NOT NULL,
last_start TIMESTAMPTZ,
last_end TIMESTAMPTZ,
last_id BIGINT,
last_run_id BIGINT,
extra JSONB DEFAULT '{}'::jsonb,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
UNIQUE (task_id, store_id)
);
COMMENT ON TABLE etl_admin.etl_cursor IS '任务游标表:记录每个任务/门店的增量窗口及最后 run。';
COMMENT ON COLUMN etl_admin.etl_cursor.task_id IS '关联 etl_task.task_id。';
COMMENT ON COLUMN etl_admin.etl_cursor.store_id IS '门店/租户粒度。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_start IS '上次窗口开始时间(含重叠偏移)。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_end IS '上次窗口结束时间。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_id IS '上次处理的最大主键/游标值(可选)。';
COMMENT ON COLUMN etl_admin.etl_cursor.last_run_id IS '上次运行ID对应 etl_run.run_id。';
COMMENT ON COLUMN etl_admin.etl_cursor.extra IS '附加游标信息 JSON。';
COMMENT ON COLUMN etl_admin.etl_cursor.created_at IS '创建时间。';
COMMENT ON COLUMN etl_admin.etl_cursor.updated_at IS '更新时间。';
CREATE TABLE IF NOT EXISTS etl_admin.etl_run (
run_id BIGSERIAL PRIMARY KEY,
run_uuid TEXT NOT NULL,
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
store_id BIGINT NOT NULL,
status TEXT NOT NULL,
started_at TIMESTAMPTZ DEFAULT now(),
ended_at TIMESTAMPTZ,
window_start TIMESTAMPTZ,
window_end TIMESTAMPTZ,
window_minutes INT,
overlap_seconds INT,
fetched_count INT DEFAULT 0,
loaded_count INT DEFAULT 0,
updated_count INT DEFAULT 0,
skipped_count INT DEFAULT 0,
error_count INT DEFAULT 0,
unknown_fields INT DEFAULT 0,
export_dir TEXT,
log_path TEXT,
request_params JSONB DEFAULT '{}'::jsonb,
manifest JSONB DEFAULT '{}'::jsonb,
error_message TEXT,
extra JSONB DEFAULT '{}'::jsonb
);
COMMENT ON TABLE etl_admin.etl_run IS '运行记录表:记录每次任务执行的窗口、状态、计数与日志路径。';
COMMENT ON COLUMN etl_admin.etl_run.run_uuid IS '本次调度的唯一标识。';
COMMENT ON COLUMN etl_admin.etl_run.task_id IS '关联 etl_task.task_id。';
COMMENT ON COLUMN etl_admin.etl_run.store_id IS '门店/租户粒度。';
COMMENT ON COLUMN etl_admin.etl_run.status IS '运行状态SUCC/FAIL/PARTIAL 等)。';
COMMENT ON COLUMN etl_admin.etl_run.started_at IS '开始时间。';
COMMENT ON COLUMN etl_admin.etl_run.ended_at IS '结束时间。';
COMMENT ON COLUMN etl_admin.etl_run.window_start IS '本次窗口开始时间。';
COMMENT ON COLUMN etl_admin.etl_run.window_end IS '本次窗口结束时间。';
COMMENT ON COLUMN etl_admin.etl_run.window_minutes IS '窗口跨度(分钟)。';
COMMENT ON COLUMN etl_admin.etl_run.overlap_seconds IS '窗口重叠秒数。';
COMMENT ON COLUMN etl_admin.etl_run.fetched_count IS '抓取/读取的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.loaded_count IS '插入的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.updated_count IS '更新的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.skipped_count IS '跳过的记录数。';
COMMENT ON COLUMN etl_admin.etl_run.error_count IS '错误记录数。';
COMMENT ON COLUMN etl_admin.etl_run.unknown_fields IS '未知字段计数(清洗阶段)。';
COMMENT ON COLUMN etl_admin.etl_run.export_dir IS '抓取/导出目录。';
COMMENT ON COLUMN etl_admin.etl_run.log_path IS '日志路径。';
COMMENT ON COLUMN etl_admin.etl_run.request_params IS '请求参数 JSON。';
COMMENT ON COLUMN etl_admin.etl_run.manifest IS '运行产出清单/统计 JSON。';
COMMENT ON COLUMN etl_admin.etl_run.error_message IS '错误信息(若失败)。';
COMMENT ON COLUMN etl_admin.etl_run.extra IS '附加字段,保留扩展。';