合并
This commit is contained in:
@@ -25,46 +25,54 @@ class DatabaseOperations:
|
||||
|
||||
use_returning = "RETURNING" in sql.upper()
|
||||
|
||||
with self.conn.cursor() as c:
|
||||
if not use_returning:
|
||||
# 不带 RETURNING:直接批量执行即可
|
||||
if not use_returning:
|
||||
with self.conn.cursor() as c:
|
||||
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
|
||||
return (0, 0)
|
||||
|
||||
# 尝试向量化执行
|
||||
return (0, 0)
|
||||
|
||||
# 尝试向量化执行(execute_values + fetch returning)
|
||||
vectorized_failed = False
|
||||
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
|
||||
if m:
|
||||
tpl = "(" + m.group(1) + ")"
|
||||
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
|
||||
try:
|
||||
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
|
||||
if m:
|
||||
tpl = "(" + m.group(1) + ")"
|
||||
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
|
||||
|
||||
with self.conn.cursor() as c:
|
||||
ret = psycopg2.extras.execute_values(
|
||||
c, base_sql, rows, template=tpl, page_size=page_size, fetch=True
|
||||
)
|
||||
|
||||
if not ret:
|
||||
return (0, 0)
|
||||
|
||||
inserted = sum(1 for rec in ret if self._is_inserted(rec))
|
||||
return (inserted, len(ret) - inserted)
|
||||
if not ret:
|
||||
return (0, 0)
|
||||
inserted = sum(1 for rec in ret if self._is_inserted(rec))
|
||||
return (inserted, len(ret) - inserted)
|
||||
except Exception:
|
||||
# 向量化失败后,事务通常处于 aborted 状态,需要先 rollback 才能继续执行。
|
||||
vectorized_failed = True
|
||||
|
||||
if vectorized_failed:
|
||||
try:
|
||||
self.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 回退:逐行执行
|
||||
inserted = 0
|
||||
updated = 0
|
||||
|
||||
# 回退:逐行执行
|
||||
inserted = 0
|
||||
updated = 0
|
||||
with self.conn.cursor() as c:
|
||||
for r in rows:
|
||||
c.execute(sql, r)
|
||||
try:
|
||||
rec = c.fetchone()
|
||||
except Exception:
|
||||
rec = None
|
||||
|
||||
|
||||
if self._is_inserted(rec):
|
||||
inserted += 1
|
||||
else:
|
||||
updated += 1
|
||||
|
||||
return (inserted, updated)
|
||||
|
||||
return (inserted, updated)
|
||||
|
||||
@staticmethod
|
||||
def _is_inserted(rec) -> bool:
|
||||
|
||||
1945
etl_billiards/database/schema_ODS_doc.sql
Normal file
1945
etl_billiards/database/schema_ODS_doc.sql
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
50
etl_billiards/database/schema_dws.sql
Normal file
50
etl_billiards/database/schema_dws.sql
Normal file
@@ -0,0 +1,50 @@
|
||||
-- DWS schema for aggregated / serving tables.
|
||||
CREATE SCHEMA IF NOT EXISTS billiards_dws;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS billiards_dws.dws_order_summary (
|
||||
site_id BIGINT NOT NULL,
|
||||
order_settle_id BIGINT NOT NULL,
|
||||
order_trade_no TEXT,
|
||||
order_date DATE,
|
||||
tenant_id BIGINT,
|
||||
member_id BIGINT,
|
||||
member_flag BOOLEAN,
|
||||
recharge_order_flag BOOLEAN,
|
||||
item_count INT,
|
||||
total_item_quantity NUMERIC,
|
||||
table_fee_amount NUMERIC,
|
||||
assistant_service_amount NUMERIC,
|
||||
goods_amount NUMERIC,
|
||||
group_amount NUMERIC,
|
||||
total_coupon_deduction NUMERIC,
|
||||
member_discount_amount NUMERIC,
|
||||
manual_discount_amount NUMERIC,
|
||||
order_original_amount NUMERIC,
|
||||
order_final_amount NUMERIC,
|
||||
stored_card_deduct NUMERIC,
|
||||
external_paid_amount NUMERIC,
|
||||
total_paid_amount NUMERIC,
|
||||
book_table_flow NUMERIC,
|
||||
book_assistant_flow NUMERIC,
|
||||
book_goods_flow NUMERIC,
|
||||
book_group_flow NUMERIC,
|
||||
book_order_flow NUMERIC,
|
||||
order_effective_consume_cash NUMERIC,
|
||||
order_effective_recharge_cash NUMERIC,
|
||||
order_effective_flow NUMERIC,
|
||||
refund_amount NUMERIC,
|
||||
net_income NUMERIC,
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now(),
|
||||
PRIMARY KEY (site_id, order_settle_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_order_summary_order_date
|
||||
ON billiards_dws.dws_order_summary (order_date);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_order_summary_tenant_date
|
||||
ON billiards_dws.dws_order_summary (tenant_id, order_date);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dws_order_summary_member_date
|
||||
ON billiards_dws.dws_order_summary (member_id, order_date);
|
||||
|
||||
105
etl_billiards/database/schema_etl_admin.sql
Normal file
105
etl_billiards/database/schema_etl_admin.sql
Normal file
@@ -0,0 +1,105 @@
|
||||
-- 文件说明:etl_admin 调度元数据 DDL(独立文件,便于初始化任务单独执行)。
|
||||
-- 包含任务注册表、游标表、运行记录表;字段注释使用中文。
|
||||
|
||||
CREATE SCHEMA IF NOT EXISTS etl_admin;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS etl_admin.etl_task (
|
||||
task_id BIGSERIAL PRIMARY KEY,
|
||||
task_code TEXT NOT NULL,
|
||||
store_id BIGINT NOT NULL,
|
||||
enabled BOOLEAN DEFAULT TRUE,
|
||||
cursor_field TEXT,
|
||||
window_minutes_default INT DEFAULT 30,
|
||||
overlap_seconds INT DEFAULT 120,
|
||||
page_size INT DEFAULT 200,
|
||||
retry_max INT DEFAULT 3,
|
||||
params JSONB DEFAULT '{}'::jsonb,
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE (task_code, store_id)
|
||||
);
|
||||
COMMENT ON TABLE etl_admin.etl_task IS '任务注册表:调度依据的任务清单(与 task_registry 中的任务码对应)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.task_code IS '任务编码,需与代码中的任务码一致。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.store_id IS '门店/租户粒度,区分多门店执行。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.enabled IS '是否启用此任务。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.cursor_field IS '增量游标字段名(可选)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.window_minutes_default IS '默认时间窗口(分钟)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.overlap_seconds IS '窗口重叠秒数,用于防止遗漏。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.page_size IS '默认分页大小。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.retry_max IS 'API重试次数上限。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.params IS '任务级自定义参数 JSON。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.created_at IS '创建时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_task.updated_at IS '更新时间。';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS etl_admin.etl_cursor (
|
||||
cursor_id BIGSERIAL PRIMARY KEY,
|
||||
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
|
||||
store_id BIGINT NOT NULL,
|
||||
last_start TIMESTAMPTZ,
|
||||
last_end TIMESTAMPTZ,
|
||||
last_id BIGINT,
|
||||
last_run_id BIGINT,
|
||||
extra JSONB DEFAULT '{}'::jsonb,
|
||||
created_at TIMESTAMPTZ DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE (task_id, store_id)
|
||||
);
|
||||
COMMENT ON TABLE etl_admin.etl_cursor IS '任务游标表:记录每个任务/门店的增量窗口及最后 run。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.task_id IS '关联 etl_task.task_id。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.store_id IS '门店/租户粒度。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.last_start IS '上次窗口开始时间(含重叠偏移)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.last_end IS '上次窗口结束时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.last_id IS '上次处理的最大主键/游标值(可选)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.last_run_id IS '上次运行ID,对应 etl_run.run_id。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.extra IS '附加游标信息 JSON。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.created_at IS '创建时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_cursor.updated_at IS '更新时间。';
|
||||
|
||||
CREATE TABLE IF NOT EXISTS etl_admin.etl_run (
|
||||
run_id BIGSERIAL PRIMARY KEY,
|
||||
run_uuid TEXT NOT NULL,
|
||||
task_id BIGINT NOT NULL REFERENCES etl_admin.etl_task(task_id) ON DELETE CASCADE,
|
||||
store_id BIGINT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
started_at TIMESTAMPTZ DEFAULT now(),
|
||||
ended_at TIMESTAMPTZ,
|
||||
window_start TIMESTAMPTZ,
|
||||
window_end TIMESTAMPTZ,
|
||||
window_minutes INT,
|
||||
overlap_seconds INT,
|
||||
fetched_count INT DEFAULT 0,
|
||||
loaded_count INT DEFAULT 0,
|
||||
updated_count INT DEFAULT 0,
|
||||
skipped_count INT DEFAULT 0,
|
||||
error_count INT DEFAULT 0,
|
||||
unknown_fields INT DEFAULT 0,
|
||||
export_dir TEXT,
|
||||
log_path TEXT,
|
||||
request_params JSONB DEFAULT '{}'::jsonb,
|
||||
manifest JSONB DEFAULT '{}'::jsonb,
|
||||
error_message TEXT,
|
||||
extra JSONB DEFAULT '{}'::jsonb
|
||||
);
|
||||
COMMENT ON TABLE etl_admin.etl_run IS '运行记录表:记录每次任务执行的窗口、状态、计数与日志路径。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.run_uuid IS '本次调度的唯一标识。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.task_id IS '关联 etl_task.task_id。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.store_id IS '门店/租户粒度。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.status IS '运行状态(SUCC/FAIL/PARTIAL 等)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.started_at IS '开始时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.ended_at IS '结束时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.window_start IS '本次窗口开始时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.window_end IS '本次窗口结束时间。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.window_minutes IS '窗口跨度(分钟)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.overlap_seconds IS '窗口重叠秒数。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.fetched_count IS '抓取/读取的记录数。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.loaded_count IS '插入的记录数。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.updated_count IS '更新的记录数。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.skipped_count IS '跳过的记录数。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.error_count IS '错误记录数。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.unknown_fields IS '未知字段计数(清洗阶段)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.export_dir IS '抓取/导出目录。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.log_path IS '日志路径。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.request_params IS '请求参数 JSON。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.manifest IS '运行产出清单/统计 JSON。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.error_message IS '错误信息(若失败)。';
|
||||
COMMENT ON COLUMN etl_admin.etl_run.extra IS '附加字段,保留扩展。';
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,34 +1,37 @@
|
||||
-- 将新的 ODS 任务注册到 etl_admin.etl_task(根据需要替换 store_id)
|
||||
-- 将新的 ODS 任务注册到 etl_admin.etl_task(按需替换 store_id)。
|
||||
-- 使用方式(示例):
|
||||
-- psql "$PG_DSN" -f etl_billiards/database/seed_ods_tasks.sql
|
||||
-- 或者在 psql 中执行本文件内容。
|
||||
-- 或在 psql 中直接执行本文件内容。
|
||||
|
||||
WITH target_store AS (
|
||||
SELECT 2790685415443269::bigint AS store_id -- TODO: 替换为实际 store_id
|
||||
),
|
||||
task_codes AS (
|
||||
SELECT unnest(ARRAY[
|
||||
'ODS_ASSISTANT_ACCOUNTS',
|
||||
-- Must match tasks/ods_tasks.py (ENABLED_ODS_CODES)
|
||||
'ODS_ASSISTANT_ACCOUNT',
|
||||
'ODS_ASSISTANT_LEDGER',
|
||||
'ODS_ASSISTANT_ABOLISH',
|
||||
'ODS_INVENTORY_CHANGE',
|
||||
'ODS_INVENTORY_STOCK',
|
||||
'ODS_PACKAGE',
|
||||
'ODS_GROUP_BUY_REDEMPTION',
|
||||
'ODS_MEMBER',
|
||||
'ODS_MEMBER_BALANCE',
|
||||
'ODS_MEMBER_CARD',
|
||||
'ODS_SETTLEMENT_RECORDS',
|
||||
'ODS_TABLE_USE',
|
||||
'ODS_PAYMENT',
|
||||
'ODS_REFUND',
|
||||
'ODS_COUPON_VERIFY',
|
||||
'ODS_PLATFORM_COUPON',
|
||||
'ODS_MEMBER',
|
||||
'ODS_MEMBER_CARD',
|
||||
'ODS_MEMBER_BALANCE',
|
||||
'ODS_RECHARGE_SETTLE',
|
||||
'ODS_GROUP_PACKAGE',
|
||||
'ODS_GROUP_BUY_REDEMPTION',
|
||||
'ODS_INVENTORY_STOCK',
|
||||
'ODS_INVENTORY_CHANGE',
|
||||
'ODS_TABLES',
|
||||
'ODS_GOODS_CATEGORY',
|
||||
'ODS_STORE_GOODS',
|
||||
'ODS_TABLE_DISCOUNT',
|
||||
'ODS_STORE_GOODS_SALES',
|
||||
'ODS_TABLE_FEE_DISCOUNT',
|
||||
'ODS_TENANT_GOODS',
|
||||
'ODS_SETTLEMENT_TICKET',
|
||||
'ODS_ORDER_SETTLE'
|
||||
'ODS_SETTLEMENT_TICKET'
|
||||
]) AS task_code
|
||||
)
|
||||
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)
|
||||
@@ -36,4 +39,3 @@ SELECT t.task_code, s.store_id, TRUE
|
||||
FROM task_codes t CROSS JOIN target_store s
|
||||
ON CONFLICT (task_code, store_id) DO UPDATE
|
||||
SET enabled = EXCLUDED.enabled;
|
||||
|
||||
|
||||
50
etl_billiards/database/seed_scheduler_tasks.sql
Normal file
50
etl_billiards/database/seed_scheduler_tasks.sql
Normal file
@@ -0,0 +1,50 @@
|
||||
-- Seed scheduler-compatible tasks into etl_admin.etl_task.
|
||||
--
|
||||
-- Notes:
|
||||
-- - These task_code values must match orchestration/task_registry.py.
|
||||
-- - ODS_* tasks are intentionally excluded here because they don't follow the
|
||||
-- BaseTask(cursor_data) scheduler interface in this repo version.
|
||||
--
|
||||
-- Usage (example):
|
||||
-- psql "%PG_DSN%" -f etl_billiards/database/seed_scheduler_tasks.sql
|
||||
--
|
||||
WITH target_store AS (
|
||||
SELECT 2790685415443269::bigint AS store_id -- TODO: replace with your store_id
|
||||
),
|
||||
task_codes AS (
|
||||
SELECT unnest(ARRAY[
|
||||
'ASSISTANT_ABOLISH',
|
||||
'ASSISTANTS',
|
||||
'COUPON_USAGE',
|
||||
'CHECK_CUTOFF',
|
||||
'DATA_INTEGRITY_CHECK',
|
||||
'DWD_LOAD_FROM_ODS',
|
||||
'DWD_QUALITY_CHECK',
|
||||
'INIT_DWD_SCHEMA',
|
||||
'INIT_DWS_SCHEMA',
|
||||
'INIT_ODS_SCHEMA',
|
||||
'INVENTORY_CHANGE',
|
||||
'LEDGER',
|
||||
'MANUAL_INGEST',
|
||||
'MEMBERS',
|
||||
'MEMBERS_DWD',
|
||||
'ODS_JSON_ARCHIVE',
|
||||
'ORDERS',
|
||||
'PACKAGES_DEF',
|
||||
'PAYMENTS',
|
||||
'PAYMENTS_DWD',
|
||||
'PRODUCTS',
|
||||
'REFUNDS',
|
||||
'TABLE_DISCOUNT',
|
||||
'TABLES',
|
||||
'TICKET_DWD',
|
||||
'TOPUPS',
|
||||
'DWS_BUILD_ORDER_SUMMARY'
|
||||
]) AS task_code
|
||||
)
|
||||
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)
|
||||
SELECT t.task_code, s.store_id, TRUE
|
||||
FROM task_codes t CROSS JOIN target_store s
|
||||
ON CONFLICT (task_code, store_id) DO UPDATE
|
||||
SET enabled = EXCLUDED.enabled,
|
||||
updated_at = now();
|
||||
Reference in New Issue
Block a user