ETL 完成

This commit is contained in:
Neo
2026-01-18 22:37:38 +08:00
parent 8da6cb6563
commit 7ca19a4a2c
159 changed files with 31225 additions and 467 deletions

View File

@@ -25,46 +25,54 @@ class DatabaseOperations:
use_returning = "RETURNING" in sql.upper()
with self.conn.cursor() as c:
if not use_returning:
# 不带 RETURNING直接批量执行即可
if not use_returning:
with self.conn.cursor() as c:
psycopg2.extras.execute_batch(c, sql, rows, page_size=page_size)
return (0, 0)
# 尝试向量化执行
return (0, 0)
# 尝试向量化执行execute_values + fetch returning
vectorized_failed = False
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
if m:
tpl = "(" + m.group(1) + ")"
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
try:
m = re.search(r"VALUES\s*\((.*?)\)", sql, flags=re.IGNORECASE | re.DOTALL)
if m:
tpl = "(" + m.group(1) + ")"
base_sql = sql[:m.start()] + "VALUES %s" + sql[m.end():]
with self.conn.cursor() as c:
ret = psycopg2.extras.execute_values(
c, base_sql, rows, template=tpl, page_size=page_size, fetch=True
)
if not ret:
return (0, 0)
inserted = sum(1 for rec in ret if self._is_inserted(rec))
return (inserted, len(ret) - inserted)
if not ret:
return (0, 0)
inserted = sum(1 for rec in ret if self._is_inserted(rec))
return (inserted, len(ret) - inserted)
except Exception:
# 向量化失败后,事务通常处于 aborted 状态,需要先 rollback 才能继续执行。
vectorized_failed = True
if vectorized_failed:
try:
self.conn.rollback()
except Exception:
pass
# 回退:逐行执行
inserted = 0
updated = 0
# 回退:逐行执行
inserted = 0
updated = 0
with self.conn.cursor() as c:
for r in rows:
c.execute(sql, r)
try:
rec = c.fetchone()
except Exception:
rec = None
if self._is_inserted(rec):
inserted += 1
else:
updated += 1
return (inserted, updated)
return (inserted, updated)
@staticmethod
def _is_inserted(rec) -> bool:

View File

@@ -0,0 +1,50 @@
-- DWS schema for aggregated / serving tables.
CREATE SCHEMA IF NOT EXISTS billiards_dws;
CREATE TABLE IF NOT EXISTS billiards_dws.dws_order_summary (
site_id BIGINT NOT NULL,
order_settle_id BIGINT NOT NULL,
order_trade_no TEXT,
order_date DATE,
tenant_id BIGINT,
member_id BIGINT,
member_flag BOOLEAN,
recharge_order_flag BOOLEAN,
item_count INT,
total_item_quantity NUMERIC,
table_fee_amount NUMERIC,
assistant_service_amount NUMERIC,
goods_amount NUMERIC,
group_amount NUMERIC,
total_coupon_deduction NUMERIC,
member_discount_amount NUMERIC,
manual_discount_amount NUMERIC,
order_original_amount NUMERIC,
order_final_amount NUMERIC,
stored_card_deduct NUMERIC,
external_paid_amount NUMERIC,
total_paid_amount NUMERIC,
book_table_flow NUMERIC,
book_assistant_flow NUMERIC,
book_goods_flow NUMERIC,
book_group_flow NUMERIC,
book_order_flow NUMERIC,
order_effective_consume_cash NUMERIC,
order_effective_recharge_cash NUMERIC,
order_effective_flow NUMERIC,
refund_amount NUMERIC,
net_income NUMERIC,
created_at TIMESTAMPTZ DEFAULT now(),
updated_at TIMESTAMPTZ DEFAULT now(),
PRIMARY KEY (site_id, order_settle_id)
);
CREATE INDEX IF NOT EXISTS idx_dws_order_summary_order_date
ON billiards_dws.dws_order_summary (order_date);
CREATE INDEX IF NOT EXISTS idx_dws_order_summary_tenant_date
ON billiards_dws.dws_order_summary (tenant_id, order_date);
CREATE INDEX IF NOT EXISTS idx_dws_order_summary_member_date
ON billiards_dws.dws_order_summary (member_id, order_date);

View File

@@ -8,28 +8,30 @@ WITH target_store AS (
),
task_codes AS (
SELECT unnest(ARRAY[
'assistant_accounts_masterS',
'assistant_service_records',
'assistant_cancellation_records',
'goods_stock_movements',
'ODS_INVENTORY_STOCK',
'ODS_PACKAGE',
'ODS_GROUP_BUY_REDEMPTION',
'ODS_MEMBER',
'ODS_MEMBER_BALANCE',
'member_stored_value_cards',
-- Must match tasks/ods_tasks.py (ENABLED_ODS_CODES)
'ODS_ASSISTANT_ACCOUNT',
'ODS_ASSISTANT_LEDGER',
'ODS_ASSISTANT_ABOLISH',
'ODS_SETTLEMENT_RECORDS',
'ODS_TABLE_USE',
'ODS_PAYMENT',
'ODS_REFUND',
'platform_coupon_redemption_records',
'recharge_settlements',
'ODS_PLATFORM_COUPON',
'ODS_MEMBER',
'ODS_MEMBER_CARD',
'ODS_MEMBER_BALANCE',
'ODS_RECHARGE_SETTLE',
'ODS_GROUP_PACKAGE',
'ODS_GROUP_BUY_REDEMPTION',
'ODS_INVENTORY_STOCK',
'ODS_INVENTORY_CHANGE',
'ODS_TABLES',
'ODS_GOODS_CATEGORY',
'ODS_STORE_GOODS',
'table_fee_discount_records',
'ODS_STORE_GOODS_SALES',
'ODS_TABLE_FEE_DISCOUNT',
'ODS_TENANT_GOODS',
'ODS_SETTLEMENT_TICKET',
'settlement_records',
'INIT_ODS_SCHEMA'
'ODS_SETTLEMENT_TICKET'
]) AS task_code
)
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)

View File

@@ -0,0 +1,49 @@
-- Seed scheduler-compatible tasks into etl_admin.etl_task.
--
-- Notes:
-- - These task_code values must match orchestration/task_registry.py.
-- - ODS_* tasks are intentionally excluded here because they don't follow the
-- BaseTask(cursor_data) scheduler interface in this repo version.
--
-- Usage (example):
-- psql "%PG_DSN%" -f etl_billiards/database/seed_scheduler_tasks.sql
--
WITH target_store AS (
SELECT 2790685415443269::bigint AS store_id -- TODO: replace with your store_id
),
task_codes AS (
SELECT unnest(ARRAY[
'ASSISTANT_ABOLISH',
'ASSISTANTS',
'COUPON_USAGE',
'CHECK_CUTOFF',
'DWD_LOAD_FROM_ODS',
'DWD_QUALITY_CHECK',
'INIT_DWD_SCHEMA',
'INIT_DWS_SCHEMA',
'INIT_ODS_SCHEMA',
'INVENTORY_CHANGE',
'LEDGER',
'MANUAL_INGEST',
'MEMBERS',
'MEMBERS_DWD',
'ODS_JSON_ARCHIVE',
'ORDERS',
'PACKAGES_DEF',
'PAYMENTS',
'PAYMENTS_DWD',
'PRODUCTS',
'REFUNDS',
'TABLE_DISCOUNT',
'TABLES',
'TICKET_DWD',
'TOPUPS',
'DWS_BUILD_ORDER_SUMMARY'
]) AS task_code
)
INSERT INTO etl_admin.etl_task (task_code, store_id, enabled)
SELECT t.task_code, s.store_id, TRUE
FROM task_codes t CROSS JOIN target_store s
ON CONFLICT (task_code, store_id) DO UPDATE
SET enabled = EXCLUDED.enabled,
updated_at = now();