DWD完成

This commit is contained in:
Neo
2025-12-09 04:57:05 +08:00
parent f301cc1fd5
commit 561c640700
46 changed files with 26181 additions and 3540 deletions

View File

@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""Populate PRD DWD tables from ODS payload snapshots."""
from __future__ import annotations
@@ -16,9 +16,9 @@ SQL_STEPS: list[tuple[str, str]] = [
INSERT INTO billiards_dwd.dim_tenant (tenant_id, tenant_name, status)
SELECT DISTINCT tenant_id, 'default' AS tenant_name, 'active' AS status
FROM (
SELECT tenant_id FROM billiards_ods.ods_order_settle
SELECT tenant_id FROM billiards_ods.settlement_records
UNION SELECT tenant_id FROM billiards_ods.ods_order_receipt_detail
UNION SELECT tenant_id FROM billiards_ods.ods_member_profile
UNION SELECT tenant_id FROM billiards_ods.member_profiles
) s
WHERE tenant_id IS NOT NULL
ON CONFLICT (tenant_id) DO UPDATE SET updated_at = now();
@@ -30,7 +30,7 @@ SQL_STEPS: list[tuple[str, str]] = [
INSERT INTO billiards_dwd.dim_site (site_id, tenant_id, site_name, status)
SELECT DISTINCT site_id, MAX(tenant_id) AS tenant_id, 'default' AS site_name, 'active' AS status
FROM (
SELECT site_id, tenant_id FROM billiards_ods.ods_order_settle
SELECT site_id, tenant_id FROM billiards_ods.settlement_records
UNION SELECT site_id, tenant_id FROM billiards_ods.ods_order_receipt_detail
UNION SELECT site_id, tenant_id FROM billiards_ods.ods_table_info
) s
@@ -84,7 +84,7 @@ SQL_STEPS: list[tuple[str, str]] = [
"""
INSERT INTO billiards_dwd.dim_member_card_type (card_type_id, card_type_name, discount_rate)
SELECT DISTINCT card_type_id, card_type_name, discount_rate
FROM billiards_ods.ods_member_card
FROM billiards_ods.member_stored_value_cards
WHERE card_type_id IS NOT NULL
ON CONFLICT (card_type_id) DO UPDATE SET
card_type_name = EXCLUDED.card_type_name,
@@ -119,10 +119,10 @@ SQL_STEPS: list[tuple[str, str]] = [
prof.wechat_id,
prof.alipay_id,
prof.remarks
FROM billiards_ods.ods_member_profile prof
FROM billiards_ods.member_profiles prof
LEFT JOIN (
SELECT DISTINCT site_id, member_id, card_type_id AS member_type_id, card_type_name AS member_type_name
FROM billiards_ods.ods_member_card
FROM billiards_ods.member_stored_value_cards
) card
ON prof.site_id = card.site_id AND prof.member_id = card.member_id
WHERE prof.member_id IS NOT NULL
@@ -167,7 +167,7 @@ SQL_STEPS: list[tuple[str, str]] = [
"""
INSERT INTO billiards_dwd.dim_assistant (assistant_id, assistant_name, mobile, status)
SELECT DISTINCT assistant_id, assistant_name, mobile, status
FROM billiards_ods.ods_assistant_account
FROM billiards_ods.assistant_accounts_master
WHERE assistant_id IS NOT NULL
ON CONFLICT (assistant_id) DO UPDATE SET
assistant_name = EXCLUDED.assistant_name,
@@ -181,7 +181,7 @@ SQL_STEPS: list[tuple[str, str]] = [
"""
INSERT INTO billiards_dwd.dim_pay_method (pay_method_code, pay_method_name, is_stored_value, status)
SELECT DISTINCT pay_method_code, pay_method_name, FALSE AS is_stored_value, 'active' AS status
FROM billiards_ods.ods_payment_record
FROM billiards_ods.payment_transactions
WHERE pay_method_code IS NOT NULL
ON CONFLICT (pay_method_code) DO UPDATE SET
pay_method_name = EXCLUDED.pay_method_name,
@@ -250,7 +250,7 @@ SQL_STEPS: list[tuple[str, str]] = [
final_table_fee,
FALSE AS is_canceled,
NULL::TIMESTAMPTZ AS cancel_time
FROM billiards_ods.ods_table_use_log
FROM billiards_ods.table_fee_transactions_log
ON CONFLICT (site_id, ledger_id) DO NOTHING;
""",
),
@@ -325,7 +325,7 @@ SQL_STEPS: list[tuple[str, str]] = [
pay_time,
relate_type,
relate_id
FROM billiards_ods.ods_payment_record
FROM billiards_ods.payment_transactions
ON CONFLICT (site_id, pay_id) DO NOTHING;
""",
),
@@ -346,7 +346,7 @@ SQL_STEPS: list[tuple[str, str]] = [
refund_amount,
refund_time,
status
FROM billiards_ods.ods_refund_record
FROM billiards_ods.refund_transactions
ON CONFLICT (site_id, refund_id) DO NOTHING;
""",
),
@@ -369,7 +369,7 @@ SQL_STEPS: list[tuple[str, str]] = [
balance_before,
balance_after,
change_time
FROM billiards_ods.ods_balance_change
FROM billiards_ods.member_balance_changes
ON CONFLICT (site_id, change_id) DO NOTHING;
""",
),
@@ -423,3 +423,4 @@ def main() -> int:
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,117 @@
# -*- coding: utf-8 -*-
"""
ODS JSON 字段核对脚本:对照当前数据库中的 ODS 表字段,检查示例 JSON默认目录 C:\\dev\\LLTQ\\export\\test-json-doc
是否包含同名键,并输出每表未命中的字段,便于补充映射或确认确实无源字段。
使用方法:
set PG_DSN=postgresql://... # 如 .env 中配置
python -m etl_billiards.scripts.check_ods_json_vs_table
"""
from __future__ import annotations
import json
import os
import pathlib
from typing import Dict, Iterable, Set, Tuple
import psycopg2
from etl_billiards.tasks.manual_ingest_task import ManualIngestTask
def _flatten_keys(obj, prefix: str = "") -> Set[str]:
"""递归展开 JSON 所有键路径,返回形如 data.assistantInfos.id 的集合。列表不保留索引,仅继续向下展开。"""
keys: Set[str] = set()
if isinstance(obj, dict):
for k, v in obj.items():
new_prefix = f"{prefix}.{k}" if prefix else k
keys.add(new_prefix)
keys |= _flatten_keys(v, new_prefix)
elif isinstance(obj, list):
for item in obj:
keys |= _flatten_keys(item, prefix)
return keys
def _load_json_keys(path: pathlib.Path) -> Tuple[Set[str], dict[str, Set[str]]]:
"""读取单个 JSON 文件并返回展开后的键集合以及末段->路径列表映射,若文件不存在或无法解析则返回空集合。"""
if not path.exists():
return set(), {}
data = json.loads(path.read_text(encoding="utf-8"))
paths = _flatten_keys(data)
last_map: dict[str, Set[str]] = {}
for p in paths:
last = p.split(".")[-1].lower()
last_map.setdefault(last, set()).add(p)
return paths, last_map
def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
"""从数据库读取 billiards_ods.* 的列名集合,按表返回。"""
conn = psycopg2.connect(dsn)
cur = conn.cursor()
cur.execute(
"""
SELECT table_name, column_name
FROM information_schema.columns
WHERE table_schema='billiards_ods'
ORDER BY table_name, ordinal_position
"""
)
result: Dict[str, Set[str]] = {}
for table, col in cur.fetchall():
result.setdefault(table, set()).add(col.lower())
cur.close()
conn.close()
return result
def main() -> None:
"""主流程:遍历 FILE_MAPPING 中的 ODS 表,检查 JSON 键覆盖情况并打印报告。"""
dsn = os.environ.get("PG_DSN")
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", r"C:\dev\LLTQ\export\test-json-doc"))
ods_cols_map = _load_ods_columns(dsn)
print(f"使用 JSON 目录: {json_dir}")
print(f"连接 DSN: {dsn}")
print("=" * 80)
for keywords, ods_table in ManualIngestTask.FILE_MAPPING:
table = ods_table.split(".")[-1]
cols = ods_cols_map.get(table, set())
file_name = f"{keywords[0]}.json"
file_path = json_dir / file_name
keys_full, path_map = _load_json_keys(file_path)
key_last_parts = set(path_map.keys())
missing: Set[str] = set()
extra_keys: Set[str] = set()
present: Set[str] = set()
for col in sorted(cols):
if col in key_last_parts:
present.add(col)
else:
missing.add(col)
for k in key_last_parts:
if k not in cols:
extra_keys.add(k)
print(f"[{table}] 文件={file_name} 列数={len(cols)} JSON键(末段)覆盖={len(present)}/{len(cols)}")
if missing:
print(" 未命中列:", ", ".join(sorted(missing)))
else:
print(" 未命中列: 无")
if extra_keys:
extras = []
for k in sorted(extra_keys):
paths = ", ".join(sorted(path_map.get(k, [])))
extras.append(f"{k} ({paths})")
print(" JSON 仅有(表无此列):", "; ".join(extras))
else:
print(" JSON 仅有(表无此列): 无")
print("-" * 80)
if __name__ == "__main__":
main()