Files
Neo-ZQYY/scripts/ops/export_dwd_field_review.py

224 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
导出 DWD 表字段清单(现有 + 拟新增),供人工审查。
输出:$FIELD_AUDIT_ROOT/dwd_field_review.md由 .env 配置)
对每张涉及的 DWD 表main + ex列出
- 现有字段:字段名、数据类型、说明(从 pg_catalog.col_description 获取)
- 拟新增字段:字段名、建议类型、来源 ODS 列、说明
"""
import os
import sys
import psycopg2
from dotenv import load_dotenv
from pathlib import Path
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
# 构建 DSN使用 .env 中的 PG_DSN指向 test_etl_feiqiuschema 为 ods/dwd/dws/meta
DSN = os.getenv("PG_DSN")
if not DSN:
print("ERROR: PG_DSN 未配置"); sys.exit(1)
# ── 涉及的 DWD 表schema.table ──────────────────────────────────
TABLES = [
# A 类
("dwd", "dim_assistant"),
("dwd", "dim_assistant_ex"),
("dwd", "dwd_assistant_service_log"),
("dwd", "dwd_assistant_service_log_ex"),
("dwd", "dwd_store_goods_sale"),
("dwd", "dwd_store_goods_sale_ex"),
("dwd", "dwd_member_balance_change"),
("dwd", "dwd_member_balance_change_ex"),
("dwd", "dim_tenant_goods"),
("dwd", "dim_tenant_goods_ex"),
("dwd", "dim_table"),
("dwd", "dim_table_ex"),
# B 类
("dwd", "dwd_recharge_order"),
("dwd", "dim_store_goods"),
("dwd", "dim_store_goods_ex"),
]
# ── 拟新增字段(按 DWD 表分组) ────────────────────────────────────
NEW_FIELDS = {
"dim_assistant_ex": [
("system_role_id", "bigint", "assistant_accounts_master.system_role_id", "系统角色 ID关联角色权限"),
("job_num", "text", "assistant_accounts_master.job_num", "备用工号(当前门店未启用,全 NULL"),
("cx_unit_price", "numeric(18,2)","assistant_accounts_master.cx_unit_price", "促销时段单价(当前值 0.00"),
("pd_unit_price", "numeric(18,2)","assistant_accounts_master.pd_unit_price", "普通时段单价(当前值 0.00"),
],
"dwd_assistant_service_log_ex": [
("operator_id", "bigint", "assistant_service_records.operator_id", "操作员 ID如收银员"),
("operator_name", "text", "assistant_service_records.operator_name", "操作员名称快照"),
],
"dwd_member_balance_change_ex": [
("relate_id", "bigint", "member_balance_changes.relate_id", "关联充值/订单 ID0=无关联)"),
],
"dim_table_ex": [
("sitename", "text", "site_tables_master.sitename", "门店名称快照"),
("applet_qr_code_url", "text", "site_tables_master.appletqrcodeurl", "小程序二维码 URL当前全 NULL"),
("audit_status", "integer", "site_tables_master.audit_status", "审核状态枚举"),
("charge_free", "integer", "site_tables_master.charge_free", "是否免费0=否)"),
("create_time", "timestamptz", "site_tables_master.create_time", "台桌创建时间"),
("delay_lights_time", "integer", "site_tables_master.delay_lights_time", "延迟关灯时间(秒)"),
("is_rest_area", "integer", "site_tables_master.is_rest_area", "是否休息区0=否)"),
("light_status", "integer", "site_tables_master.light_status", "灯控状态枚举"),
("only_allow_groupon", "integer", "site_tables_master.only_allow_groupon", "是否仅允许团购"),
("order_delay_time", "integer", "site_tables_master.order_delay_time", "订单延迟时间(秒)"),
("self_table", "integer", "site_tables_master.self_table", "是否自助台桌"),
("table_status_name", "text", "site_tables_master.tablestatusname", "台桌状态名称(空闲中/使用中/暂停中)"),
("temporary_light_second","integer", "site_tables_master.temporary_light_second","临时灯光秒数"),
("virtual_table", "integer", "site_tables_master.virtual_table", "是否虚拟台桌0=否)"),
],
"dim_store_goods_ex": [
("batch_stock_quantity", "numeric", "store_goods_master.batch_stock_quantity", "批次库存数量"),
],
}
# recharge_settlements 仅补映射,不新增列
MAPPING_ONLY = {
"dwd_recharge_order": [
("pl_coupon_sale_amount", "plcouponsaleamount", "平台券销售额"),
("mervou_sales_amount", "mervousalesamount", "美团券销售额"),
("electricity_money", "electricitymoney", "电费金额"),
("real_electricity_money", "realelectricitymoney", "实际电费金额"),
("electricity_adjust_money","electricityadjustmoney","电费调整金额"),
],
}
# 跳过的字段
SKIPPED = [
("store_goods_sales_records", "discount_price", "DWD 列名已被 discount_money 占用"),
("tenant_goods_master", "commoditycode", "冗余字段DWD 已有 commodity_code + commodity_code_list"),
("store_goods_master", "provisional_total_cost","DWD 列名已被 total_purchase_cost 占用"),
("store_goods_master", "time_slot_sale", "ODS 列不存在,需确认 API"),
]
# C 类新建表(仅列出 ODS 列名,后续设计时确定 DWD 列名)
C_CLASS_TABLES = {
"goods_stock_summary (→ 新建 dwd_goods_stock_summary)": [
"sitegoodsid", "goodsname", "goodsunit", "goodscategoryid",
"goodscategorysecondid", "categoryname", "rangestartstock",
"rangeendstock", "rangein", "rangeout", "rangesale",
"rangesalemoney", "rangeinventory", "currentstock",
],
"goods_stock_movements (→ 新建 dwd_goods_stock_movement)": [
"sitegoodsstockid", "tenantid", "siteid", "sitegoodsid",
"goodsname", "goodscategoryid", "goodssecondcategoryid",
"unit", "price", "stocktype", "changenum", "startnum",
"endnum", "changenuma", "startnuma", "endnuma",
"remark", "operatorname", "createtime",
],
}
def get_table_columns(cur, schema, table):
"""获取表的列信息:列名、类型、注释(使用 pg_catalog 避免 search_path 问题)"""
cur.execute("""
SELECT a.attname AS column_name,
pg_catalog.format_type(a.atttypid, a.atttypmod) AS col_type,
COALESCE(d.description, '') AS col_comment
FROM pg_catalog.pg_attribute a
JOIN pg_catalog.pg_class c ON c.oid = a.attrelid
JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_catalog.pg_description d
ON d.objoid = a.attrelid AND d.objsubid = a.attnum
WHERE n.nspname = %s
AND c.relname = %s
AND a.attnum > 0
AND NOT a.attisdropped
ORDER BY a.attnum
""", (schema, table))
return cur.fetchall()
def main():
conn = psycopg2.connect(DSN)
cur = conn.cursor()
lines = []
lines.append("# DWD 表字段清单(现有 + 拟新增)\n")
lines.append("> 导出时间2026-02-19")
lines.append("> 用途:供人工审查排查结论,确认新增字段是否合理\n")
lines.append("---\n")
# ── 第一部分:各 DWD 表现有字段 + 拟新增字段 ──
lines.append("## 第一部分A/B 类表(已有 DWD 表)\n")
for schema, table in TABLES:
cols = get_table_columns(cur, schema, table)
if not cols:
lines.append(f"### {schema}.{table}\n")
lines.append("⚠️ 表不存在或无列\n")
continue
lines.append(f"### {schema}.{table}\n")
# 现有字段
lines.append("#### 现有字段\n")
lines.append("| # | 字段名 | 数据类型 | 说明 |")
lines.append("|---|--------|---------|------|")
for i, (col_name, col_type, col_comment) in enumerate(cols, 1):
lines.append(f"| {i} | `{col_name}` | {col_type} | {col_comment} |")
lines.append("")
# 拟新增字段
new = NEW_FIELDS.get(table, [])
if new:
lines.append("#### 🆕 拟新增字段\n")
lines.append("| # | 字段名 | 建议类型 | 来源 ODS 列 | 说明 |")
lines.append("|---|--------|---------|------------|------|")
for i, (fname, ftype, fsrc, fdesc) in enumerate(new, 1):
lines.append(f"| {i} | `{fname}` | {ftype} | {fsrc} | {fdesc} |")
lines.append("")
# 仅补映射
mo = MAPPING_ONLY.get(table, [])
if mo:
lines.append("#### 🔗 仅补 FACT_MAPPINGSDWD 列已存在)\n")
lines.append("| # | DWD 列 | ODS 列 | 说明 |")
lines.append("|---|--------|--------|------|")
for i, (dwd_col, ods_col, desc) in enumerate(mo, 1):
lines.append(f"| {i} | `{dwd_col}` | `{ods_col}` | {desc} |")
lines.append("")
lines.append("---\n")
# ── 第二部分:跳过的字段 ──
lines.append("## 第二部分:跳过的字段\n")
lines.append("| # | ODS 表 | ODS 列 | 跳过原因 |")
lines.append("|---|--------|--------|---------|")
for i, (tbl, col, reason) in enumerate(SKIPPED, 1):
lines.append(f"| {i} | {tbl} | `{col}` | {reason} |")
lines.append("\n---\n")
# ── 第三部分C 类新建表 ──
lines.append("## 第三部分C 类表(需新建 DWD 表)\n")
for title, ods_cols in C_CLASS_TABLES.items():
lines.append(f"### {title}\n")
lines.append("| # | ODS 列名 |")
lines.append("|---|---------|")
for i, col in enumerate(ods_cols, 1):
lines.append(f"| {i} | `{col}` |")
lines.append("")
lines.append("---\n")
cur.close()
conn.close()
# 写入文件(从 .env 读取 FIELD_AUDIT_ROOT
from _env_paths import get_output_path
out_dir = get_output_path("FIELD_AUDIT_ROOT")
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / "dwd_field_review.md"
out_path.write_text("\n".join(lines), encoding="utf-8")
print(f"✅ 已导出到 {out_path}")
if __name__ == "__main__":
main()