DWD完成
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Populate PRD DWD tables from ODS payload snapshots."""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -16,9 +16,9 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
INSERT INTO billiards_dwd.dim_tenant (tenant_id, tenant_name, status)
|
||||
SELECT DISTINCT tenant_id, 'default' AS tenant_name, 'active' AS status
|
||||
FROM (
|
||||
SELECT tenant_id FROM billiards_ods.ods_order_settle
|
||||
SELECT tenant_id FROM billiards_ods.settlement_records
|
||||
UNION SELECT tenant_id FROM billiards_ods.ods_order_receipt_detail
|
||||
UNION SELECT tenant_id FROM billiards_ods.ods_member_profile
|
||||
UNION SELECT tenant_id FROM billiards_ods.member_profiles
|
||||
) s
|
||||
WHERE tenant_id IS NOT NULL
|
||||
ON CONFLICT (tenant_id) DO UPDATE SET updated_at = now();
|
||||
@@ -30,7 +30,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
INSERT INTO billiards_dwd.dim_site (site_id, tenant_id, site_name, status)
|
||||
SELECT DISTINCT site_id, MAX(tenant_id) AS tenant_id, 'default' AS site_name, 'active' AS status
|
||||
FROM (
|
||||
SELECT site_id, tenant_id FROM billiards_ods.ods_order_settle
|
||||
SELECT site_id, tenant_id FROM billiards_ods.settlement_records
|
||||
UNION SELECT site_id, tenant_id FROM billiards_ods.ods_order_receipt_detail
|
||||
UNION SELECT site_id, tenant_id FROM billiards_ods.ods_table_info
|
||||
) s
|
||||
@@ -84,7 +84,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_member_card_type (card_type_id, card_type_name, discount_rate)
|
||||
SELECT DISTINCT card_type_id, card_type_name, discount_rate
|
||||
FROM billiards_ods.ods_member_card
|
||||
FROM billiards_ods.member_stored_value_cards
|
||||
WHERE card_type_id IS NOT NULL
|
||||
ON CONFLICT (card_type_id) DO UPDATE SET
|
||||
card_type_name = EXCLUDED.card_type_name,
|
||||
@@ -119,10 +119,10 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
prof.wechat_id,
|
||||
prof.alipay_id,
|
||||
prof.remarks
|
||||
FROM billiards_ods.ods_member_profile prof
|
||||
FROM billiards_ods.member_profiles prof
|
||||
LEFT JOIN (
|
||||
SELECT DISTINCT site_id, member_id, card_type_id AS member_type_id, card_type_name AS member_type_name
|
||||
FROM billiards_ods.ods_member_card
|
||||
FROM billiards_ods.member_stored_value_cards
|
||||
) card
|
||||
ON prof.site_id = card.site_id AND prof.member_id = card.member_id
|
||||
WHERE prof.member_id IS NOT NULL
|
||||
@@ -167,7 +167,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_assistant (assistant_id, assistant_name, mobile, status)
|
||||
SELECT DISTINCT assistant_id, assistant_name, mobile, status
|
||||
FROM billiards_ods.ods_assistant_account
|
||||
FROM billiards_ods.assistant_accounts_master
|
||||
WHERE assistant_id IS NOT NULL
|
||||
ON CONFLICT (assistant_id) DO UPDATE SET
|
||||
assistant_name = EXCLUDED.assistant_name,
|
||||
@@ -181,7 +181,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
"""
|
||||
INSERT INTO billiards_dwd.dim_pay_method (pay_method_code, pay_method_name, is_stored_value, status)
|
||||
SELECT DISTINCT pay_method_code, pay_method_name, FALSE AS is_stored_value, 'active' AS status
|
||||
FROM billiards_ods.ods_payment_record
|
||||
FROM billiards_ods.payment_transactions
|
||||
WHERE pay_method_code IS NOT NULL
|
||||
ON CONFLICT (pay_method_code) DO UPDATE SET
|
||||
pay_method_name = EXCLUDED.pay_method_name,
|
||||
@@ -250,7 +250,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
final_table_fee,
|
||||
FALSE AS is_canceled,
|
||||
NULL::TIMESTAMPTZ AS cancel_time
|
||||
FROM billiards_ods.ods_table_use_log
|
||||
FROM billiards_ods.table_fee_transactions_log
|
||||
ON CONFLICT (site_id, ledger_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
@@ -325,7 +325,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
pay_time,
|
||||
relate_type,
|
||||
relate_id
|
||||
FROM billiards_ods.ods_payment_record
|
||||
FROM billiards_ods.payment_transactions
|
||||
ON CONFLICT (site_id, pay_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
@@ -346,7 +346,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
refund_amount,
|
||||
refund_time,
|
||||
status
|
||||
FROM billiards_ods.ods_refund_record
|
||||
FROM billiards_ods.refund_transactions
|
||||
ON CONFLICT (site_id, refund_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
@@ -369,7 +369,7 @@ SQL_STEPS: list[tuple[str, str]] = [
|
||||
balance_before,
|
||||
balance_after,
|
||||
change_time
|
||||
FROM billiards_ods.ods_balance_change
|
||||
FROM billiards_ods.member_balance_changes
|
||||
ON CONFLICT (site_id, change_id) DO NOTHING;
|
||||
""",
|
||||
),
|
||||
@@ -423,3 +423,4 @@ def main() -> int:
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
|
||||
117
etl_billiards/scripts/check_ods_json_vs_table.py
Normal file
117
etl_billiards/scripts/check_ods_json_vs_table.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
ODS JSON 字段核对脚本:对照当前数据库中的 ODS 表字段,检查示例 JSON(默认目录 C:\\dev\\LLTQ\\export\\test-json-doc)
|
||||
是否包含同名键,并输出每表未命中的字段,便于补充映射或确认确实无源字段。
|
||||
|
||||
使用方法:
|
||||
set PG_DSN=postgresql://... # 如 .env 中配置
|
||||
python -m etl_billiards.scripts.check_ods_json_vs_table
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
from typing import Dict, Iterable, Set, Tuple
|
||||
|
||||
import psycopg2
|
||||
|
||||
from etl_billiards.tasks.manual_ingest_task import ManualIngestTask
|
||||
|
||||
|
||||
def _flatten_keys(obj, prefix: str = "") -> Set[str]:
|
||||
"""递归展开 JSON 所有键路径,返回形如 data.assistantInfos.id 的集合。列表不保留索引,仅继续向下展开。"""
|
||||
keys: Set[str] = set()
|
||||
if isinstance(obj, dict):
|
||||
for k, v in obj.items():
|
||||
new_prefix = f"{prefix}.{k}" if prefix else k
|
||||
keys.add(new_prefix)
|
||||
keys |= _flatten_keys(v, new_prefix)
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
keys |= _flatten_keys(item, prefix)
|
||||
return keys
|
||||
|
||||
|
||||
def _load_json_keys(path: pathlib.Path) -> Tuple[Set[str], dict[str, Set[str]]]:
|
||||
"""读取单个 JSON 文件并返回展开后的键集合以及末段->路径列表映射,若文件不存在或无法解析则返回空集合。"""
|
||||
if not path.exists():
|
||||
return set(), {}
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
paths = _flatten_keys(data)
|
||||
last_map: dict[str, Set[str]] = {}
|
||||
for p in paths:
|
||||
last = p.split(".")[-1].lower()
|
||||
last_map.setdefault(last, set()).add(p)
|
||||
return paths, last_map
|
||||
|
||||
|
||||
def _load_ods_columns(dsn: str) -> Dict[str, Set[str]]:
|
||||
"""从数据库读取 billiards_ods.* 的列名集合,按表返回。"""
|
||||
conn = psycopg2.connect(dsn)
|
||||
cur = conn.cursor()
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT table_name, column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema='billiards_ods'
|
||||
ORDER BY table_name, ordinal_position
|
||||
"""
|
||||
)
|
||||
result: Dict[str, Set[str]] = {}
|
||||
for table, col in cur.fetchall():
|
||||
result.setdefault(table, set()).add(col.lower())
|
||||
cur.close()
|
||||
conn.close()
|
||||
return result
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""主流程:遍历 FILE_MAPPING 中的 ODS 表,检查 JSON 键覆盖情况并打印报告。"""
|
||||
dsn = os.environ.get("PG_DSN")
|
||||
json_dir = pathlib.Path(os.environ.get("JSON_DOC_DIR", r"C:\dev\LLTQ\export\test-json-doc"))
|
||||
|
||||
ods_cols_map = _load_ods_columns(dsn)
|
||||
|
||||
print(f"使用 JSON 目录: {json_dir}")
|
||||
print(f"连接 DSN: {dsn}")
|
||||
print("=" * 80)
|
||||
|
||||
for keywords, ods_table in ManualIngestTask.FILE_MAPPING:
|
||||
table = ods_table.split(".")[-1]
|
||||
cols = ods_cols_map.get(table, set())
|
||||
file_name = f"{keywords[0]}.json"
|
||||
file_path = json_dir / file_name
|
||||
keys_full, path_map = _load_json_keys(file_path)
|
||||
key_last_parts = set(path_map.keys())
|
||||
|
||||
missing: Set[str] = set()
|
||||
extra_keys: Set[str] = set()
|
||||
present: Set[str] = set()
|
||||
for col in sorted(cols):
|
||||
if col in key_last_parts:
|
||||
present.add(col)
|
||||
else:
|
||||
missing.add(col)
|
||||
for k in key_last_parts:
|
||||
if k not in cols:
|
||||
extra_keys.add(k)
|
||||
|
||||
print(f"[{table}] 文件={file_name} 列数={len(cols)} JSON键(末段)覆盖={len(present)}/{len(cols)}")
|
||||
if missing:
|
||||
print(" 未命中列:", ", ".join(sorted(missing)))
|
||||
else:
|
||||
print(" 未命中列: 无")
|
||||
if extra_keys:
|
||||
extras = []
|
||||
for k in sorted(extra_keys):
|
||||
paths = ", ".join(sorted(path_map.get(k, [])))
|
||||
extras.append(f"{k} ({paths})")
|
||||
print(" JSON 仅有(表无此列):", "; ".join(extras))
|
||||
else:
|
||||
print(" JSON 仅有(表无此列): 无")
|
||||
print("-" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user