This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

94
tmp/Untitled Normal file
View File

@@ -0,0 +1,94 @@
# DWS 数据层需求
## 简介
项目路径C:\dev\LLTQ\ETL\feiqiu-ETL
本文档描述在ETL已完成的DWD层数据基础上对DWS层的数据处理
- 完成对DWS层数据库的处理即数据库设计成果为DDL的SQL语句。
- 数据读取处理到落库即DWD读取Python处理SQL写入。
文档更多聚焦业务描述你需要使用专业技能使用面向对象编程OOP思想完成程序设计直至代码完成
- 参考.\README.md 了解现在项目现状。
- 参考.\etl_billiards\docs 了解 DWD的schema的表和字段。
- SQL和Python代码需要详尽的高密度的中文注释。
- 完成内容,需要详尽高密度的补充至.\README.md以方便后续维护。
- DWS的表与表的字段 参考.\etl_billiards\docs\dwd_main_tables_dictionary.md 完成类似的数据库文档,方便后续维护。
- 注意中文编码需求。
## 通用需求
### 数据分层
我希望使用互联网软件的业内通用方法将数据按照更新时间分为4层以符合业务层面的查询效率速度。
- 第一层:回溯两天前到当前数据。
- 第二层回溯1个月前到当前数据。
- 第三层回溯3个月前到当前数据。
- 第四层:全量数据。
- 需要有配套的机制及时添加删除整理数据。
### 统计注意
当统计一些数据时,注意口径,数据有效性标识。举例:
- 计算助教业绩/工资时,需要参考助教废除表,相关业务数据的影响。
- 计算助教业绩/工资时,注意辨别 助教课 附加课影响。
## 业务需求
### 系统设置
- 助教新的绩效考核和工资结算方式更新为以下算法,影响工资结算和财务账务方面的统计核算,相关内容需要落库,以方便后续调整。还要标记执行时间(如哪个月执行哪个标准等),执行相关结算和计算逻辑。:
档位原因考虑 总业绩小时数阈值 专业课抽成(元/小时) 打赏课抽成 次月休假(天)
0档 淘汰压力 H <100 28 50% 3
1档 及格档(重点激励) 100≤ H <130 18 40% 4
2档 良好档(重点激励) 130≤ H <160 15 38% 4
3档 优秀档 160≤ H <190 13 35% 5
4档 卓越加速档(高端人才倾斜) 190≤ H <220 10 33% 6
5档 冠军加速档(高端人才倾斜) H ≥220 8 30% 休假自由
*课程分为2种dwd_assistant_service_log表的skill_name
基础课:又名 专业课 上桌 上钟,是为客户提供台球助教陪练的课程,按时长统计。精确到分钟。
附加课:又名 超休 激励 打赏,是客户支付较为高昂的价格,买断整小时与助教外出。
总业绩小时数阈值指基础课和附加课总和。
各级别助教dim_assistant表的level基础课对客户收费初级 98元/小时;中级 108元/小时;高级 118元/小时;星级 138元/小时;
附加课对客户收费统一为190元/小时。
充值提成:
冲刺奖 达成奖金
当月 H ≥ 190300 元
当月 H ≥ 220800 元(与上条不叠加,取高)
额外奖金:
冲刺奖 达成奖金
当月 H ≥ 190300 元
当月 H ≥ 220800 元(与上条不叠加,取高)
Top3 奖金:
第1名1000 元
第2名600 元
第3名400 元
规则:
1、过档后所有时长按新档位进行计算。
举例当前某中级助教已完成185小时基础课占170小时附加课15小时。则该月工资计算方法
170*108-13+15*1-0.35
2、本月新入职助教定档方案
按照日均*30的总业绩小时数定档。
在该25日之后入职的新助教最高定档至3档。
该折算仅用于定档不适用于“冲刺奖”和“Top3奖”的计算口径。
### 助教维度
以每个助教个体的视角
- 我要知道我的业绩档位,历史月份与本月档位进度,档位影响的收入单价。及相邻月份的变化。
- 我要知道我的有效业绩:历史月份与本月的 基础课课时,激励课课时,全部课课时。相邻月份的变化。
- 我要知道我的收入:历史月份与本月的收入(注意助教等级,业绩档位,课程种类等因素的总和计算)。相邻月份的变化。
- 我要知道我的客户情况过去7天、10天、15天、30天、60天、90天 的跨度进行统计,我服务过(基础课+附加课)的客户数据,并关联每次服务的 时间 时长 台桌 分类 等详细信息。
### 客户维度
统计每个客户的信息
- 我要知道每个客户过去7天、10天、15天、30天、60天、90天 的跨度进行统计,来店消费情况,并关联每次服务的 时间 食品饮品 时长 台桌 分类 助教服务 等详细信息。
### 财务维度
财务维度的需求(已经落到原型图需求级别了),见财务页面需求.md

View File

@@ -0,0 +1,226 @@
# -*- coding: utf-8 -*-
"""
添加缺失的 DWD 列到数据库
根据 ODS 新增字段,在对应的 DWD 表中添加相关列
"""
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# DWD 表缺失字段定义:表名 -> [(列名, 类型, 注释)]
# 根据计划,核心业务字段放主表,扩展字段放 _ex 表
MISSING_COLUMNS = {
# 结算表 - 核心金额字段放主表
'billiards_dwd.dwd_settlement_head': [
('electricity_money', 'NUMERIC(18,2)', '电费金额'),
('real_electricity_money', 'NUMERIC(18,2)', '实际电费金额'),
('electricity_adjust_money', 'NUMERIC(18,2)', '电费调整金额'),
('pl_coupon_sale_amount', 'NUMERIC(18,2)', '平台券销售额'),
('mervou_sales_amount', 'NUMERIC(18,2)', '商户券销售额'),
],
'billiards_dwd.dwd_settlement_head_ex': [
('settle_list', 'JSONB', '结算明细列表'),
],
# 台费流水表
'billiards_dwd.dwd_table_fee_log': [
('activity_discount_amount', 'NUMERIC(18,2)', '活动折扣金额'),
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
],
'billiards_dwd.dwd_table_fee_log_ex': [
('order_consumption_type', 'INT', '订单消费类型'),
],
# 助教服务流水表
'billiards_dwd.dwd_assistant_service_log': [
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
],
'billiards_dwd.dwd_assistant_service_log_ex': [
('assistant_team_name', 'TEXT', '助教团队名称'),
],
# 团购核销记录表
'billiards_dwd.dwd_groupbuy_redemption': [
('member_discount_money', 'NUMERIC(18,2)', '会员折扣金额'),
('coupon_sale_id', 'BIGINT', '优惠券销售ID'),
],
'billiards_dwd.dwd_groupbuy_redemption_ex': [
('table_share_money', 'NUMERIC(18,2)', '台费分摊金额'),
('table_service_share_money', 'NUMERIC(18,2)', '台费服务分摊金额'),
('goods_share_money', 'NUMERIC(18,2)', '商品分摊金额'),
('good_service_share_money', 'NUMERIC(18,2)', '商品服务分摊金额'),
('assistant_share_money', 'NUMERIC(18,2)', '助教分摊金额'),
('assistant_service_share_money', 'NUMERIC(18,2)', '助教服务分摊金额'),
('recharge_share_money', 'NUMERIC(18,2)', '充值分摊金额'),
],
# 台费调整记录表
'billiards_dwd.dwd_table_fee_adjust': [
('table_name', 'TEXT', '台桌名称'),
('table_price', 'NUMERIC(18,2)', '台桌价格'),
('charge_free', 'BOOLEAN', '是否免费'),
],
'billiards_dwd.dwd_table_fee_adjust_ex': [
('area_type_id', 'BIGINT', '区域类型ID'),
('site_table_area_id', 'BIGINT', '门店台区ID'),
('site_table_area_name', 'TEXT', '门店台区名称'),
('site_name', 'TEXT', '门店名称'),
('tenant_name', 'TEXT', '租户名称'),
],
# 会员储值卡维度表
'billiards_dwd.dim_member_card_account': [
('principal_balance', 'NUMERIC(18,2)', '本金余额'),
('member_grade', 'INT', '会员等级'),
],
'billiards_dwd.dim_member_card_account_ex': [
('able_share_member_discount', 'BOOLEAN', '是否可共享会员折扣'),
('electricity_deduct_radio', 'NUMERIC(18,4)', '电费扣减比例'),
('electricity_discount', 'NUMERIC(18,4)', '电费折扣'),
('electricity_card_deduct', 'BOOLEAN', '电费卡扣'),
('recharge_freeze_balance', 'NUMERIC(18,2)', '充值冻结余额'),
],
# 会员维度表
'billiards_dwd.dim_member': [
('pay_money_sum', 'NUMERIC(18,2)', '累计支付金额'),
('recharge_money_sum', 'NUMERIC(18,2)', '累计充值金额'),
],
'billiards_dwd.dim_member_ex': [
('person_tenant_org_id', 'BIGINT', '人员租户组织ID'),
('person_tenant_org_name', 'TEXT', '人员租户组织名称'),
('register_source', 'TEXT', '注册来源'),
],
# 会员余额变更表
'billiards_dwd.dwd_member_balance_change': [
('principal_before', 'NUMERIC(18,2)', '变动前本金'),
('principal_after', 'NUMERIC(18,2)', '变动后本金'),
],
'billiards_dwd.dwd_member_balance_change_ex': [
('principal_data', 'TEXT', '本金变动数据'),
],
# 团购套餐维度表
'billiards_dwd.dim_groupbuy_package': [
('sort', 'INT', '排序'),
('is_first_limit', 'BOOLEAN', '是否首单限制'),
],
'billiards_dwd.dim_groupbuy_package_ex': [
('tenant_coupon_sale_order_item_id', 'BIGINT', '租户券销售订单项ID'),
],
# 门店商品维度表
'billiards_dwd.dim_store_goods': [
('commodity_code', 'TEXT', '商品编码'),
('not_sale', 'BOOLEAN', '是否停售'),
],
# 台桌维度表
'billiards_dwd.dim_table': [
('order_id', 'BIGINT', '订单ID'),
],
# 租户商品维度表
'billiards_dwd.dim_tenant_goods': [
('not_sale', 'BOOLEAN', '是否停售'),
],
# 助教作废记录表
'billiards_dwd.dwd_assistant_cancel_log': [
('tenant_id', 'BIGINT', '租户ID'),
],
# 商品销售流水表
'billiards_dwd.dwd_goods_sale_log': [
('coupon_share_money', 'NUMERIC(18,2)', '优惠券分摊金额'),
],
# 支付流水表
'billiards_dwd.dwd_payment': [
('tenant_id', 'BIGINT', '租户ID'),
],
}
def get_existing_columns(conn, schema, table):
"""获取表已有的列"""
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
"""
with conn.cursor() as cur:
cur.execute(sql, (schema, table))
return {row[0].lower() for row in cur.fetchall()}
def table_exists(conn, schema, table):
"""检查表是否存在"""
sql = """
SELECT EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_schema = %s AND table_name = %s
)
"""
with conn.cursor() as cur:
cur.execute(sql, (schema, table))
return cur.fetchone()[0]
def add_column(conn, full_table, col_name, col_type, comment):
"""添加列"""
sql = f'ALTER TABLE {full_table} ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}'
comment_sql = f"COMMENT ON COLUMN {full_table}.\"{col_name}\" IS '{comment}'"
with conn.cursor() as cur:
cur.execute(sql)
cur.execute(comment_sql)
conn.commit()
print(f" [OK] 添加列: {col_name} ({col_type})")
def main():
conn = psycopg2.connect(DSN)
print("=" * 80)
print("添加缺失的 DWD 列")
print("=" * 80)
total_added = 0
total_skipped = 0
tables_not_found = []
for full_table, columns in MISSING_COLUMNS.items():
schema, table = full_table.split('.')
if not table_exists(conn, schema, table):
print(f"\n[跳过] 表不存在: {full_table}")
tables_not_found.append(full_table)
continue
print(f"\n处理表: {full_table}")
existing = get_existing_columns(conn, schema, table)
for col_name, col_type, comment in columns:
if col_name.lower() in existing:
print(f" [跳过] 列已存在: {col_name}")
total_skipped += 1
else:
add_column(conn, full_table, col_name, col_type, comment)
total_added += 1
conn.close()
print("\n" + "=" * 80)
print(f"完成: 添加 {total_added} 列, 跳过 {total_skipped}")
if tables_not_found:
print(f"未找到的表: {len(tables_not_found)}")
for t in tables_not_found:
print(f" - {t}")
print("=" * 80)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-
"""
添加缺失的 ODS 列到数据库
"""
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 缺失字段定义:表名 -> [(列名, 类型, 注释)]
MISSING_COLUMNS = {
'billiards_ods.settlement_records': [
('electricityadjustmoney', 'NUMERIC(18,2)', '电费调整金额'),
('electricitymoney', 'NUMERIC(18,2)', '电费金额'),
('mervousalesamount', 'NUMERIC(18,2)', '商户券销售额'),
('plcouponsaleamount', 'NUMERIC(18,2)', '平台券销售额'),
('realelectricitymoney', 'NUMERIC(18,2)', '实际电费金额'),
('settlelist', 'JSONB', '结算明细列表'),
],
'billiards_ods.recharge_settlements': [
('electricityadjustmoney', 'NUMERIC(18,2)', '电费调整金额'),
('electricitymoney', 'NUMERIC(18,2)', '电费金额'),
('mervousalesamount', 'NUMERIC(18,2)', '商户券销售额'),
('plcouponsaleamount', 'NUMERIC(18,2)', '平台券销售额'),
('realelectricitymoney', 'NUMERIC(18,2)', '实际电费金额'),
('settlelist', 'JSONB', '结算明细列表'),
],
'billiards_ods.table_fee_transactions': [
('activity_discount_amount', 'NUMERIC(18,2)', '活动折扣金额'),
('order_consumption_type', 'INT', '订单消费类型'),
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
],
'billiards_ods.assistant_service_records': [
('assistantteamname', 'TEXT', '助教团队名称'),
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
],
'billiards_ods.group_buy_redemption_records': [
('assistant_service_share_money', 'NUMERIC(18,2)', '助教服务分摊金额'),
('assistant_share_money', 'NUMERIC(18,2)', '助教分摊金额'),
('coupon_sale_id', 'BIGINT', '优惠券销售ID'),
('good_service_share_money', 'NUMERIC(18,2)', '商品服务分摊金额'),
('goods_share_money', 'NUMERIC(18,2)', '商品分摊金额'),
('member_discount_money', 'NUMERIC(18,2)', '会员折扣金额'),
('recharge_share_money', 'NUMERIC(18,2)', '充值分摊金额'),
('table_service_share_money', 'NUMERIC(18,2)', '台费服务分摊金额'),
('table_share_money', 'NUMERIC(18,2)', '台费分摊金额'),
],
'billiards_ods.table_fee_discount_records': [
('area_type_id', 'BIGINT', '区域类型ID'),
('charge_free', 'BOOLEAN', '是否免费'),
('site_table_area_id', 'BIGINT', '门店台区ID'),
('site_table_area_name', 'TEXT', '门店台区名称'),
('sitename', 'TEXT', '门店名称'),
('table_name', 'TEXT', '台桌名称'),
('table_price', 'NUMERIC(18,2)', '台桌价格'),
('tenant_name', 'TEXT', '租户名称'),
],
'billiards_ods.member_stored_value_cards': [
('able_share_member_discount', 'BOOLEAN', '是否可共享会员折扣'),
('electricity_deduct_radio', 'NUMERIC(18,4)', '电费扣减比例'),
('electricity_discount', 'NUMERIC(18,4)', '电费折扣'),
('electricitycarddeduct', 'BOOLEAN', '电费卡扣'),
('member_grade', 'INT', '会员等级'),
('principal_balance', 'NUMERIC(18,2)', '本金余额'),
('rechargefreezebalance', 'NUMERIC(18,2)', '充值冻结余额'),
],
'billiards_ods.member_profiles': [
('pay_money_sum', 'NUMERIC(18,2)', '累计支付金额'),
('person_tenant_org_id', 'BIGINT', '人员租户组织ID'),
('person_tenant_org_name', 'TEXT', '人员租户组织名称'),
('recharge_money_sum', 'NUMERIC(18,2)', '累计充值金额'),
('register_source', 'TEXT', '注册来源'),
],
'billiards_ods.member_balance_changes': [
('principal_after', 'NUMERIC(18,2)', '变动后本金'),
('principal_before', 'NUMERIC(18,2)', '变动前本金'),
('principal_data', 'TEXT', '本金变动数据'),
],
'billiards_ods.group_buy_packages': [
('is_first_limit', 'BOOLEAN', '是否首单限制'),
('sort', 'INT', '排序'),
('tenantcouponsaleorderitemid', 'BIGINT', '租户券销售订单项ID'),
],
'billiards_ods.store_goods_master': [
('commodity_code', 'TEXT', '商品编码'),
('not_sale', 'BOOLEAN', '是否停售'),
],
'billiards_ods.assistant_cancellation_records': [
('tenant_id', 'BIGINT', '租户ID'),
],
'billiards_ods.store_goods_sales_records': [
('coupon_share_money', 'NUMERIC(18,2)', '优惠券分摊金额'),
],
'billiards_ods.payment_transactions': [
('tenant_id', 'BIGINT', '租户ID'),
],
'billiards_ods.site_tables_master': [
('order_id', 'BIGINT', '订单ID'),
],
'billiards_ods.tenant_goods_master': [
('not_sale', 'BOOLEAN', '是否停售'),
],
}
def get_existing_columns(conn, schema, table):
"""获取表已有的列"""
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
"""
with conn.cursor() as cur:
cur.execute(sql, (schema, table))
return {row[0].lower() for row in cur.fetchall()}
def add_column(conn, full_table, col_name, col_type, comment):
"""添加列"""
schema, table = full_table.split('.')
sql = f'ALTER TABLE {full_table} ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}'
comment_sql = f"COMMENT ON COLUMN {full_table}.\"{col_name}\" IS '{comment}'"
with conn.cursor() as cur:
cur.execute(sql)
cur.execute(comment_sql)
conn.commit()
print(f" [OK] 添加列: {col_name} ({col_type})")
def main():
conn = psycopg2.connect(DSN)
print("=" * 80)
print("添加缺失的 ODS 列")
print("=" * 80)
total_added = 0
total_skipped = 0
for full_table, columns in MISSING_COLUMNS.items():
schema, table = full_table.split('.')
print(f"\n处理表: {full_table}")
existing = get_existing_columns(conn, schema, table)
for col_name, col_type, comment in columns:
if col_name.lower() in existing:
print(f" [跳过] 列已存在: {col_name}")
total_skipped += 1
else:
add_column(conn, full_table, col_name, col_type, comment)
total_added += 1
conn.close()
print("\n" + "=" * 80)
print(f"完成: 添加 {total_added} 列, 跳过 {total_skipped}")
print("=" * 80)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
"""
添加剩余的 DWD 列
"""
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 修正后的表名
MISSING_COLUMNS = {
'billiards_dwd.dwd_assistant_trash_event': [
('tenant_id', 'BIGINT', '租户ID'),
],
'billiards_dwd.dwd_store_goods_sale': [
('coupon_share_money', 'NUMERIC(18,2)', '优惠券分摊金额'),
],
}
def add_column(conn, full_table, col_name, col_type, comment):
sql = f'ALTER TABLE {full_table} ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}'
comment_sql = f"COMMENT ON COLUMN {full_table}.\"{col_name}\" IS '{comment}'"
with conn.cursor() as cur:
cur.execute(sql)
cur.execute(comment_sql)
conn.commit()
print(f" [OK] {full_table}.{col_name} ({col_type})")
def main():
conn = psycopg2.connect(DSN)
for full_table, columns in MISSING_COLUMNS.items():
for col_name, col_type, comment in columns:
add_column(conn, full_table, col_name, col_type, comment)
conn.close()
print("Done!")
if __name__ == '__main__':
main()

2305
tmp/api_ods_comparison.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,355 @@
{
"generated_at": "2026-02-02T19:00:26.972834",
"missing_fields": [
{
"task_code": "ODS_SETTLEMENT_RECORDS",
"table_name": "billiards_ods.settlement_records",
"endpoint": "/Site/GetAllOrderSettleList",
"missing_fields": [
"electricityadjustmoney",
"electricitymoney",
"mervousalesamount",
"plcouponsaleamount",
"realelectricitymoney",
"settlelist",
"tenant_id"
]
},
{
"task_code": "ODS_TABLE_USE",
"table_name": "billiards_ods.table_fee_transactions",
"endpoint": "/Site/GetSiteTableOrderDetails",
"missing_fields": [
"activity_discount_amount",
"order_consumption_type",
"real_service_money"
]
},
{
"task_code": "ODS_ASSISTANT_LEDGER",
"table_name": "billiards_ods.assistant_service_records",
"endpoint": "/AssistantPerformance/GetOrderAssistantDetails",
"missing_fields": [
"assistantteamname",
"real_service_money"
]
},
{
"task_code": "ODS_ASSISTANT_ABOLISH",
"table_name": "billiards_ods.assistant_cancellation_records",
"endpoint": "/AssistantPerformance/GetAbolitionAssistant",
"missing_fields": [
"tenant_id"
]
},
{
"task_code": "ODS_STORE_GOODS_SALES",
"table_name": "billiards_ods.store_goods_sales_records",
"endpoint": "/TenantGoods/GetGoodsSalesList",
"missing_fields": [
"coupon_share_money"
]
},
{
"task_code": "ODS_PAYMENT",
"table_name": "billiards_ods.payment_transactions",
"endpoint": "/PayLog/GetPayLogListPage",
"missing_fields": [
"tenant_id"
]
},
{
"task_code": "ODS_MEMBER",
"table_name": "billiards_ods.member_profiles",
"endpoint": "/MemberProfile/GetTenantMemberList",
"missing_fields": [
"pay_money_sum",
"person_tenant_org_id",
"person_tenant_org_name",
"recharge_money_sum",
"register_source"
]
},
{
"task_code": "ODS_MEMBER_CARD",
"table_name": "billiards_ods.member_stored_value_cards",
"endpoint": "/MemberProfile/GetTenantMemberCardList",
"missing_fields": [
"able_share_member_discount",
"electricity_deduct_radio",
"electricity_discount",
"electricitycarddeduct",
"member_grade",
"principal_balance",
"rechargefreezebalance"
]
},
{
"task_code": "ODS_MEMBER_BALANCE",
"table_name": "billiards_ods.member_balance_changes",
"endpoint": "/MemberProfile/GetMemberCardBalanceChange",
"missing_fields": [
"principal_after",
"principal_before",
"principal_data"
]
},
{
"task_code": "ODS_RECHARGE_SETTLE",
"table_name": "billiards_ods.recharge_settlements",
"endpoint": "/Site/GetRechargeSettleList",
"missing_fields": [
"electricityadjustmoney",
"electricitymoney",
"mervousalesamount",
"plcouponsaleamount",
"realelectricitymoney",
"settlelist",
"tenant_id"
]
},
{
"task_code": "ODS_GROUP_PACKAGE",
"table_name": "billiards_ods.group_buy_packages",
"endpoint": "/PackageCoupon/QueryPackageCouponList",
"missing_fields": [
"is_first_limit",
"sort",
"tableareanamelist",
"tenantcouponsaleorderitemid",
"tenanttableareaidlist"
]
},
{
"task_code": "ODS_GROUP_BUY_REDEMPTION",
"table_name": "billiards_ods.group_buy_redemption_records",
"endpoint": "/Site/GetSiteTableUseDetails",
"missing_fields": [
"assistant_service_share_money",
"assistant_share_money",
"coupon_sale_id",
"good_service_share_money",
"goods_share_money",
"member_discount_money",
"recharge_share_money",
"table_service_share_money",
"table_share_money"
]
},
{
"task_code": "ODS_TABLES",
"table_name": "billiards_ods.site_tables_master",
"endpoint": "/Table/GetSiteTables",
"missing_fields": [
"order_id"
]
},
{
"task_code": "ODS_STORE_GOODS",
"table_name": "billiards_ods.store_goods_master",
"endpoint": "/TenantGoods/GetGoodsInventoryList",
"missing_fields": [
"commodity_code",
"not_sale"
]
},
{
"task_code": "ODS_TABLE_FEE_DISCOUNT",
"table_name": "billiards_ods.table_fee_discount_records",
"endpoint": "/Site/GetTaiFeeAdjustList",
"missing_fields": [
"area_type_id",
"charge_free",
"site_table_area_id",
"site_table_area_name",
"sitename",
"table_name",
"table_price",
"tenant_name"
]
},
{
"task_code": "ODS_TENANT_GOODS",
"table_name": "billiards_ods.tenant_goods_master",
"endpoint": "/TenantGoods/QueryTenantGoods",
"missing_fields": [
"not_sale"
]
}
],
"zero_to_null_issues": [
{
"task_code": "ODS_TABLE_USE",
"table_name": "billiards_ods.table_fee_transactions",
"checked_rows": 100,
"issues": [
{
"column": "activity_discount_amount",
"count": 67,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "real_service_money",
"count": 67,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_ASSISTANT_LEDGER",
"table_name": "billiards_ods.assistant_service_records",
"checked_rows": 100,
"issues": [
{
"column": "real_service_money",
"count": 90,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_STORE_GOODS_SALES",
"table_name": "billiards_ods.store_goods_sales_records",
"checked_rows": 100,
"issues": [
{
"column": "coupon_share_money",
"count": 100,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_MEMBER",
"table_name": "billiards_ods.member_profiles",
"checked_rows": 100,
"issues": [
{
"column": "person_tenant_org_id",
"count": 96,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "pay_money_sum",
"count": 40,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "recharge_money_sum",
"count": 12,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_MEMBER_CARD",
"table_name": "billiards_ods.member_stored_value_cards",
"checked_rows": 100,
"issues": [
{
"column": "rechargefreezebalance",
"count": 100,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "principal_balance",
"count": 34,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "member_grade",
"count": 8,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_MEMBER_BALANCE",
"table_name": "billiards_ods.member_balance_changes",
"checked_rows": 100,
"issues": [
{
"column": "principal_after",
"count": 18,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "principal_before",
"count": 18,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_GROUP_PACKAGE",
"table_name": "billiards_ods.group_buy_packages",
"checked_rows": 52,
"issues": [
{
"column": "tenantcouponsaleorderitemid",
"count": 52,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_GROUP_BUY_REDEMPTION",
"table_name": "billiards_ods.group_buy_redemption_records",
"checked_rows": 100,
"issues": [
{
"column": "assistant_service_share_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "assistant_share_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "coupon_sale_id",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "good_service_share_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "goods_share_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "member_discount_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "recharge_share_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
},
{
"column": "table_service_share_money",
"count": 74,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
},
{
"task_code": "ODS_TABLES",
"table_name": "billiards_ods.site_tables_master",
"checked_rows": 100,
"issues": [
{
"column": "order_id",
"count": 19,
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
}
]
}
]
}

View File

@@ -0,0 +1,283 @@
# -*- coding: utf-8 -*-
"""
从 ODS 同步回填 DWD 缺失的列值
"""
import os
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
# DWD 回填配置: (dwd_table, ods_table, join_condition, [(dwd_col, ods_col), ...])
BACKFILL_CONFIGS = [
# dwd_settlement_head
(
"billiards_dwd.dwd_settlement_head",
"billiards_ods.settlement_records",
"d.order_settle_id = o.id",
[
("pl_coupon_sale_amount", "plcouponsaleamount"),
("mervou_sales_amount", "mervousalesamount"),
("electricity_money", "electricitymoney"),
("real_electricity_money", "realelectricitymoney"),
("electricity_adjust_money", "electricityadjustmoney"),
]
),
# dwd_recharge_order
(
"billiards_dwd.dwd_recharge_order",
"billiards_ods.recharge_settlements",
"d.recharge_order_id = o.id",
[
("pl_coupon_sale_amount", "plcouponsaleamount"),
("mervou_sales_amount", "mervousalesamount"),
("electricity_money", "electricitymoney"),
("real_electricity_money", "realelectricitymoney"),
("electricity_adjust_money", "electricityadjustmoney"),
]
),
# dwd_member_balance_change
(
"billiards_dwd.dwd_member_balance_change",
"billiards_ods.member_balance_changes",
"d.balance_change_id = o.id",
[
("principal_before", "principal_before"),
("principal_after", "principal_after"),
("principal_change_amount", "principal_data"),
]
),
# dim_member
(
"billiards_dwd.dim_member",
"billiards_ods.member_profiles",
"d.member_id = o.id",
[
("pay_money_sum", "pay_money_sum"),
("recharge_money_sum", "recharge_money_sum"),
]
),
# dim_member_ex
(
"billiards_dwd.dim_member_ex",
"billiards_ods.member_profiles",
"d.member_id = o.id",
[
("person_tenant_org_id", "person_tenant_org_id"),
("person_tenant_org_name", "person_tenant_org_name"),
("register_source", "register_source"),
]
),
# dim_member_card_account
(
"billiards_dwd.dim_member_card_account",
"billiards_ods.member_stored_value_cards",
"d.member_card_id = o.id",
[
("principal_balance", "principal_balance"),
("member_grade", "member_grade"),
]
),
# dim_member_card_account_ex
(
"billiards_dwd.dim_member_card_account_ex",
"billiards_ods.member_stored_value_cards",
"d.member_card_id = o.id",
[
("able_share_member_discount", "able_share_member_discount"),
("electricity_deduct_radio", "electricity_deduct_radio"),
("electricity_discount", "electricity_discount"),
("electricity_card_deduct", "electricitycarddeduct"),
("recharge_freeze_balance", "rechargefreezebalance"),
]
),
# dwd_table_fee_log
(
"billiards_dwd.dwd_table_fee_log",
"billiards_ods.table_fee_transactions",
"d.table_fee_log_id = o.id",
[
("activity_discount_amount", "activity_discount_amount"),
("real_service_money", "real_service_money"),
]
),
# dwd_table_fee_log_ex
(
"billiards_dwd.dwd_table_fee_log_ex",
"billiards_ods.table_fee_transactions",
"d.table_fee_log_id = o.id",
[
("order_consumption_type", "order_consumption_type"),
]
),
# dwd_assistant_service_log
(
"billiards_dwd.dwd_assistant_service_log",
"billiards_ods.assistant_service_records",
"d.assistant_service_id = o.id",
[
("real_service_money", "real_service_money"),
]
),
# dwd_assistant_service_log_ex
(
"billiards_dwd.dwd_assistant_service_log_ex",
"billiards_ods.assistant_service_records",
"d.assistant_service_id = o.id",
[
("assistant_team_name", "assistantteamname"),
]
),
# dwd_store_goods_sale
(
"billiards_dwd.dwd_store_goods_sale",
"billiards_ods.store_goods_sales_records",
"d.store_goods_sale_id = o.id",
[
("coupon_share_money", "coupon_share_money"),
]
),
# dwd_groupbuy_redemption
(
"billiards_dwd.dwd_groupbuy_redemption",
"billiards_ods.group_buy_redemption_records",
"d.redemption_id = o.id",
[
("coupon_sale_id", "coupon_sale_id"),
("member_discount_money", "member_discount_money"),
]
),
# dwd_groupbuy_redemption_ex
(
"billiards_dwd.dwd_groupbuy_redemption_ex",
"billiards_ods.group_buy_redemption_records",
"d.redemption_id = o.id",
[
("assistant_share_money", "assistant_share_money"),
("table_share_money", "table_share_money"),
("goods_share_money", "goods_share_money"),
("recharge_share_money", "recharge_share_money"),
]
),
# dim_table
(
"billiards_dwd.dim_table",
"billiards_ods.site_tables_master",
"d.table_id = o.id",
[
("order_id", "order_id"),
]
),
# dim_store_goods
(
"billiards_dwd.dim_store_goods",
"billiards_ods.store_goods_master",
"d.site_goods_id = o.id",
[
("commodity_code", "commodity_code"),
("not_sale", "not_sale"),
]
),
# dim_tenant_goods
(
"billiards_dwd.dim_tenant_goods",
"billiards_ods.tenant_goods_master",
"d.tenant_goods_id = o.id",
[
("not_sale", "not_sale"),
]
),
# dim_groupbuy_package
(
"billiards_dwd.dim_groupbuy_package",
"billiards_ods.group_buy_packages",
"d.groupbuy_package_id = o.id",
[
("sort", "sort"),
("is_first_limit", "is_first_limit"),
]
),
]
def column_exists(db, table: str, column: str) -> bool:
schema, tbl = table.split(".")
result = db.query("""
SELECT 1 FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""", (schema, tbl, column.lower()))
return bool(result)
def main():
dsn = os.getenv("PG_DSN")
if not dsn:
print("Error: PG_DSN not set")
return
db = DatabaseConnection(dsn)
print("=" * 70)
print("DWD Backfill from ODS Script")
print("=" * 70)
total_updates = 0
errors = []
for dwd_table, ods_table, join_cond, columns in BACKFILL_CONFIGS:
print(f"\n[{dwd_table}]")
for dwd_col, ods_col in columns:
# Check column exists in both tables
if not column_exists(db, dwd_table, dwd_col):
print(f" {dwd_col}: SKIP (DWD column not found)")
continue
if not column_exists(db, ods_table, ods_col):
print(f" {dwd_col}: SKIP (ODS column {ods_col} not found)")
continue
# Build UPDATE SQL
sql = f"""
UPDATE {dwd_table} d
SET "{dwd_col}" = o."{ods_col}"
FROM {ods_table} o
WHERE {join_cond}
AND d."{dwd_col}" IS NULL
AND o."{ods_col}" IS NOT NULL
"""
try:
db.execute(sql)
db.commit()
# Count non-null
count_sql = f'SELECT COUNT(*) as cnt FROM {dwd_table} WHERE "{dwd_col}" IS NOT NULL'
cnt = db.query(count_sql)[0]["cnt"]
print(f" {dwd_col}: OK (now {cnt} non-null)")
total_updates += 1
except Exception as e:
db.rollback()
err_msg = str(e).split("\n")[0][:80]
print(f" {dwd_col}: ERROR - {err_msg}")
errors.append((dwd_table, dwd_col, err_msg))
print("\n" + "=" * 70)
print(f"Completed: {total_updates} columns processed")
if errors:
print(f"Errors: {len(errors)}")
for t, c, e in errors:
print(f" - {t}.{c}: {e}")
db.close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,208 @@
# -*- coding: utf-8 -*-
"""
从 ODS payload 回填缺失的列值
"""
import os
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
# 回填配置: (表名, [(db_col, payload_jsonb_expr), ...])
BACKFILL_CONFIGS = [
# settlement_records - settleList 内的字段
("billiards_ods.settlement_records", [
("plcouponsaleamount", "(payload->'settleList')->>'plCouponSaleAmount'"),
("mervousalesamount", "(payload->'settleList')->>'merVouSalesAmount'"),
("electricitymoney", "(payload->'settleList')->>'electricityMoney'"),
("realelectricitymoney", "(payload->'settleList')->>'realElectricityMoney'"),
("electricityadjustmoney", "(payload->'settleList')->>'electricityAdjustMoney'"),
]),
# recharge_settlements
("billiards_ods.recharge_settlements", [
("plcouponsaleamount", "(payload->'settleList')->>'plCouponSaleAmount'"),
("mervousalesamount", "(payload->'settleList')->>'merVouSalesAmount'"),
("electricitymoney", "(payload->'settleList')->>'electricityMoney'"),
("realelectricitymoney", "(payload->'settleList')->>'realElectricityMoney'"),
("electricityadjustmoney", "(payload->'settleList')->>'electricityAdjustMoney'"),
]),
# member_balance_changes
("billiards_ods.member_balance_changes", [
("principal_before", "payload->>'principal_before'"),
("principal_after", "payload->>'principal_after'"),
("principal_data", "payload->>'principal_data'"),
]),
# member_stored_value_cards
("billiards_ods.member_stored_value_cards", [
("principal_balance", "payload->>'principal_balance'"),
("member_grade", "payload->>'member_grade'"),
("rechargefreezebalance", "payload->>'rechargeFreezeBalance'"),
("able_share_member_discount", "payload->>'able_share_member_discount'"),
("electricity_deduct_radio", "payload->>'electricity_deduct_radio'"),
("electricity_discount", "payload->>'electricity_discount'"),
("electricitycarddeduct", "payload->>'electricityCardDeduct'"),
]),
# member_profiles
("billiards_ods.member_profiles", [
("pay_money_sum", "payload->>'pay_money_sum'"),
("recharge_money_sum", "payload->>'recharge_money_sum'"),
("person_tenant_org_id", "payload->>'person_tenant_org_id'"),
("person_tenant_org_name", "payload->>'person_tenant_org_name'"),
("register_source", "payload->>'register_source'"),
]),
# table_fee_transactions
("billiards_ods.table_fee_transactions", [
("activity_discount_amount", "payload->>'activity_discount_amount'"),
("real_service_money", "payload->>'real_service_money'"),
("order_consumption_type", "payload->>'order_consumption_type'"),
]),
# assistant_service_records
("billiards_ods.assistant_service_records", [
("real_service_money", "payload->>'real_service_money'"),
("assistantteamname", "payload->>'assistantTeamName'"),
]),
# store_goods_sales_records
("billiards_ods.store_goods_sales_records", [
("coupon_share_money", "payload->>'coupon_share_money'"),
]),
# group_buy_redemption_records
("billiards_ods.group_buy_redemption_records", [
("coupon_sale_id", "payload->>'coupon_sale_id'"),
("member_discount_money", "payload->>'member_discount_money'"),
("assistant_share_money", "payload->>'assistant_share_money'"),
("table_share_money", "payload->>'table_share_money'"),
("goods_share_money", "payload->>'goods_share_money'"),
("recharge_share_money", "payload->>'recharge_share_money'"),
]),
# site_tables_master
("billiards_ods.site_tables_master", [
("order_id", "payload->>'order_id'"),
]),
# store_goods_master
("billiards_ods.store_goods_master", [
("commodity_code", "payload->>'commodity_code'"),
("not_sale", "payload->>'not_sale'"),
]),
# table_fee_discount_records
("billiards_ods.table_fee_discount_records", [
("table_name", "payload->>'table_name'"),
("table_price", "payload->>'table_price'"),
("charge_free", "payload->>'charge_free'"),
("area_type_id", "payload->>'area_type_id'"),
("site_table_area_id", "payload->>'site_table_area_id'"),
("site_table_area_name", "payload->>'site_table_area_name'"),
]),
# tenant_goods_master
("billiards_ods.tenant_goods_master", [
("not_sale", "payload->>'not_sale'"),
]),
# group_buy_packages
("billiards_ods.group_buy_packages", [
("sort", "payload->>'sort'"),
("is_first_limit", "payload->>'is_first_limit'"),
]),
]
def column_exists(db, table: str, column: str) -> bool:
schema, tbl = table.split(".")
result = db.query("""
SELECT 1 FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""", (schema, tbl, column.lower()))
return bool(result)
def get_column_type(db, table: str, column: str) -> str:
schema, tbl = table.split(".")
result = db.query("""
SELECT data_type FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""", (schema, tbl, column.lower()))
return result[0]["data_type"] if result else "text"
def main():
dsn = os.getenv("PG_DSN")
if not dsn:
print("Error: PG_DSN not set")
return
db = DatabaseConnection(dsn)
print("=" * 70)
print("ODS Payload Backfill Script")
print("=" * 70)
total_updates = 0
errors = []
for table, columns in BACKFILL_CONFIGS:
print(f"\n[{table}]")
for db_col, payload_expr in columns:
# Check column exists
if not column_exists(db, table, db_col):
print(f" {db_col}: SKIP (column not found)")
continue
# Get column type for proper casting
col_type = get_column_type(db, table, db_col)
# Build UPDATE SQL with proper type casting
if col_type in ("numeric", "double precision", "real", "decimal"):
cast_expr = f"({payload_expr})::numeric"
elif col_type in ("integer", "bigint", "smallint"):
cast_expr = f"({payload_expr})::bigint"
elif col_type == "boolean":
cast_expr = f"({payload_expr})::boolean"
elif col_type in ("timestamp", "timestamp with time zone", "timestamp without time zone"):
cast_expr = f"({payload_expr})::timestamp"
else:
cast_expr = payload_expr # text, keep as is
sql = f"""
UPDATE {table}
SET "{db_col}" = {cast_expr}
WHERE "{db_col}" IS NULL
AND {payload_expr} IS NOT NULL
"""
try:
db.execute(sql)
db.commit()
# Count updated
count_sql = f"""
SELECT COUNT(*) as cnt FROM {table}
WHERE "{db_col}" IS NOT NULL
"""
cnt = db.query(count_sql)[0]["cnt"]
print(f" {db_col}: OK (now {cnt} non-null)")
total_updates += 1
except Exception as e:
db.rollback()
err_msg = str(e).split("\n")[0][:80]
print(f" {db_col}: ERROR - {err_msg}")
errors.append((table, db_col, err_msg))
print("\n" + "=" * 70)
print(f"Completed: {total_updates} columns processed")
if errors:
print(f"Errors: {len(errors)}")
for t, c, e in errors:
print(f" - {t}.{c}: {e}")
db.close()
if __name__ == "__main__":
main()

57
tmp/bd_manual_diff.json Normal file
View File

@@ -0,0 +1,57 @@
[
{
"table": "dim_member_ex",
"missing_in_doc": [],
"extra_in_doc": [
"1"
],
"type_mismatches": [],
"doc_path": "etl_billiards\\docs\\bd_manual\\Ex\\BD_manual_dim_member_ex.md"
},
{
"table": "dim_store_goods",
"missing_in_doc": [],
"extra_in_doc": [
"1"
],
"type_mismatches": [],
"doc_path": "etl_billiards\\docs\\bd_manual\\main\\BD_manual_dim_store_goods.md"
},
{
"table": "dim_table",
"missing_in_doc": [],
"extra_in_doc": [
"补时长"
],
"type_mismatches": [],
"doc_path": "etl_billiards\\docs\\bd_manual\\main\\BD_manual_dim_table.md"
},
{
"table": "dim_table_ex",
"missing_in_doc": [],
"extra_in_doc": [
"1"
],
"type_mismatches": [],
"doc_path": "etl_billiards\\docs\\bd_manual\\Ex\\BD_manual_dim_table_ex.md"
},
{
"table": "dwd_member_balance_change",
"missing_in_doc": [],
"extra_in_doc": [
"台费专用卡",
"最主要的消费卡种"
],
"type_mismatches": [],
"doc_path": "etl_billiards\\docs\\bd_manual\\main\\BD_manual_dwd_member_balance_change.md"
},
{
"table": "dwd_refund_ex",
"missing_in_doc": [],
"extra_in_doc": [
"1"
],
"type_mismatches": [],
"doc_path": "etl_billiards\\docs\\bd_manual\\Ex\\BD_manual_dwd_refund_ex.md"
}
]

295
tmp/check_api_ods_issues.py Normal file
View File

@@ -0,0 +1,295 @@
# -*- coding: utf-8 -*-
"""
排查 API -> ODS 环节的问题:
1. 检测 API 字段在 ODS 表中缺失的列
2. 检测 API 中的 0 值在 ODS 中是否变成了 NULL
"""
import json
import os
import sys
from datetime import datetime
from decimal import Decimal
from pathlib import Path
# 添加项目路径
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
def load_api_ods_comparison():
"""加载已有的 API-ODS 对比文件"""
comparison_file = Path(__file__).parent / "api_ods_comparison.json"
if comparison_file.exists():
with open(comparison_file, "r", encoding="utf-8") as f:
return json.load(f)
return {}
def get_ods_tables_mapping():
"""获取 ODS 任务代码与表名的映射"""
return {
"ODS_ASSISTANT_ACCOUNT": "billiards_ods.assistant_accounts_master",
"ODS_SETTLEMENT_RECORDS": "billiards_ods.settlement_records",
"ODS_TABLE_USE": "billiards_ods.table_fee_transactions",
"ODS_ASSISTANT_LEDGER": "billiards_ods.assistant_service_records",
"ODS_ASSISTANT_ABOLISH": "billiards_ods.assistant_cancellation_records",
"ODS_STORE_GOODS_SALES": "billiards_ods.store_goods_sales_records",
"ODS_PAYMENT": "billiards_ods.payment_transactions",
"ODS_REFUND": "billiards_ods.refund_transactions",
"ODS_PLATFORM_COUPON": "billiards_ods.platform_coupon_redemption_records",
"ODS_MEMBER": "billiards_ods.member_profiles",
"ODS_MEMBER_CARD": "billiards_ods.member_stored_value_cards",
"ODS_MEMBER_BALANCE": "billiards_ods.member_balance_changes",
"ODS_RECHARGE_SETTLE": "billiards_ods.recharge_settlements",
"ODS_GROUP_PACKAGE": "billiards_ods.group_buy_packages",
"ODS_GROUP_BUY_REDEMPTION": "billiards_ods.group_buy_redemption_records",
"ODS_INVENTORY_STOCK": "billiards_ods.goods_stock_summary",
"ODS_INVENTORY_CHANGE": "billiards_ods.goods_stock_movements",
"ODS_TABLES": "billiards_ods.site_tables_master",
"ODS_GOODS_CATEGORY": "billiards_ods.stock_goods_category_tree",
"ODS_STORE_GOODS": "billiards_ods.store_goods_master",
"ODS_TABLE_FEE_DISCOUNT": "billiards_ods.table_fee_discount_records",
"ODS_TENANT_GOODS": "billiards_ods.tenant_goods_master",
}
def check_zero_to_null_issues(db: DatabaseConnection, table_name: str, limit: int = 100):
"""
检查 ODS 表中是否存在 payload 里有 0 值但对应列为 NULL 的情况
"""
issues = []
# 获取表的列信息
schema, name = table_name.split(".", 1) if "." in table_name else ("public", table_name)
col_sql = """
SELECT column_name, data_type, udt_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
"""
try:
cols = db.query(col_sql, (schema, name))
except Exception as e:
return {"error": str(e), "issues": []}
# 筛选数值类型列(可能存在 0 转 NULL 问题)
numeric_cols = [
c["column_name"] for c in cols
if c["data_type"] in ("integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal")
]
# 查询最近的记录,检查 payload 中的值与列值
check_sql = f"""
SELECT payload, {', '.join(f'"{c}"' for c in numeric_cols)}
FROM {table_name}
WHERE payload IS NOT NULL
ORDER BY fetched_at DESC NULLS LAST
LIMIT %s
"""
try:
rows = db.query(check_sql, (limit,))
except Exception as e:
return {"error": str(e), "issues": []}
zero_to_null_count = {}
for row in rows:
payload = row.get("payload")
if not payload:
continue
if isinstance(payload, str):
try:
payload = json.loads(payload)
except:
continue
if not isinstance(payload, dict):
continue
# 检查每个数值列
for col in numeric_cols:
db_value = row.get(col)
# 从 payload 中获取对应的值(不区分大小写)
payload_value = None
for k, v in payload.items():
if k.lower() == col.lower():
payload_value = v
break
# 检查payload 中是 0但数据库中是 NULL
if payload_value == 0 and db_value is None:
if col not in zero_to_null_count:
zero_to_null_count[col] = 0
zero_to_null_count[col] += 1
if zero_to_null_count:
issues = [
{"column": col, "count": count, "issue": "API 中的 0 值在 ODS 中变成了 NULL"}
for col, count in zero_to_null_count.items()
]
return {"issues": issues, "checked_rows": len(rows)}
def generate_report():
"""生成完整的排查报告"""
print("=" * 80)
print("API -> ODS 字段排查报告")
print("生成时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print("=" * 80)
# 加载对比数据
comparison = load_api_ods_comparison()
if not comparison:
print("\n[错误] 未找到 API-ODS 对比文件 (api_ods_comparison.json)")
print("请先运行 compare_api_ods_fields.py 生成对比数据")
return
# 统计缺失字段
print("\n" + "=" * 80)
print("一、API 字段在 ODS 表中缺失的情况")
print("=" * 80)
missing_summary = []
for task_code, data in comparison.items():
missing = data.get("missing_in_ods", [])
if missing:
# 过滤掉 siteprofile 等嵌套对象和系统字段
filtered_missing = [
f for f in missing
if f.lower() not in ("siteprofile", "settleprofile", "tableprofile", "address", "avatar",
"business_tel", "customer_service_qrcode", "customer_service_wechat",
"fixed_pay_qrcode", "full_address", "latitude", "longitude",
"light_status", "light_token", "light_type", "org_id", "prod_env",
"shop_name", "shop_status", "site_label", "site_type",
"tenant_site_region_id", "wifi_name", "wifi_password",
"attendance_distance", "attendance_enabled", "auto_light")
]
if filtered_missing:
missing_summary.append({
"task_code": task_code,
"table_name": data.get("table_name"),
"endpoint": data.get("endpoint"),
"missing_fields": filtered_missing,
})
if missing_summary:
for item in missing_summary:
print(f"\n{item['task_code']}")
print(f" 表名: {item['table_name']}")
print(f" 端点: {item['endpoint']}")
print(f" 缺失字段 ({len(item['missing_fields'])} 个):")
for field in item['missing_fields']:
print(f" - {field}")
else:
print("\n没有发现明显缺失的业务字段。")
# 检查 0 转 NULL 问题
print("\n" + "=" * 80)
print("二、检查 API 中的 0 值在 ODS 中是否变成了 NULL")
print("=" * 80)
try:
dsn = os.getenv("PG_DSN")
if not dsn:
print("[错误] 未找到 PG_DSN 环境变量")
return
db = DatabaseConnection(dsn)
tables = get_ods_tables_mapping()
zero_null_issues = []
for task_code, table_name in tables.items():
print(f"\n检查 {task_code} ({table_name})...")
result = check_zero_to_null_issues(db, table_name)
if result.get("error"):
print(f" [错误] {result['error']}")
continue
if result.get("issues"):
zero_null_issues.append({
"task_code": task_code,
"table_name": table_name,
"checked_rows": result["checked_rows"],
"issues": result["issues"],
})
for issue in result["issues"]:
print(f" [发现问题] 列 '{issue['column']}': {issue['count']} 条记录 - {issue['issue']}")
else:
print(f" [正常] 检查了 {result['checked_rows']} 条记录,未发现 0 转 NULL 问题")
db.close()
except Exception as e:
print(f"\n[错误] 数据库连接失败: {e}")
zero_null_issues = []
# 生成汇总
print("\n" + "=" * 80)
print("三、问题汇总")
print("=" * 80)
print("\n1. 需要添加的 ODS 表列:")
if missing_summary:
all_ddl = []
for item in missing_summary:
table_name = item['table_name']
for field in item['missing_fields']:
# 根据字段名推断类型
if field.endswith("_id") or field in ("tenant_id", "member_id", "site_id"):
col_type = "BIGINT"
elif field.endswith("_money") or field.endswith("_amount") or field.endswith("_price"):
col_type = "NUMERIC(18,2)"
elif field.endswith("_time") or field.startswith("create") or field.startswith("update"):
col_type = "TIMESTAMP"
elif field.startswith("is_") or field.endswith("_status"):
col_type = "INTEGER"
else:
col_type = "TEXT"
ddl = f"ALTER TABLE {table_name} ADD COLUMN IF NOT EXISTS {field} {col_type};"
all_ddl.append(ddl)
print("\n生成的 DDL 语句:")
for ddl in all_ddl:
print(f" {ddl}")
else:
print("")
print("\n2. 需要修复的 0 转 NULL 问题:")
if zero_null_issues:
for item in zero_null_issues:
print(f"\n{item['task_code']}】({item['table_name']})")
for issue in item['issues']:
print(f" - 列 '{issue['column']}': {issue['count']} 条记录受影响")
else:
print(" 未发现明显的 0 转 NULL 问题")
# 保存报告
report = {
"generated_at": datetime.now().isoformat(),
"missing_fields": missing_summary,
"zero_to_null_issues": zero_null_issues,
}
report_path = Path(__file__).parent / "api_ods_issue_report.json"
with open(report_path, "w", encoding="utf-8") as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f"\n报告已保存到: {report_path}")
return report
if __name__ == "__main__":
generate_report()

231
tmp/check_ddl_vs_db.py Normal file
View File

@@ -0,0 +1,231 @@
# -*- coding: utf-8 -*-
"""
检查 DDL 文件与数据库实际结构的差异
"""
import psycopg2
import re
from pathlib import Path
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
def get_db_columns(conn, schema):
"""从数据库获取所有表和列"""
sql = """
SELECT table_name, column_name, data_type,
character_maximum_length, numeric_precision, numeric_scale,
is_nullable
FROM information_schema.columns
WHERE table_schema = %s
ORDER BY table_name, ordinal_position
"""
with conn.cursor() as cur:
cur.execute(sql, (schema,))
rows = cur.fetchall()
tables = {}
for row in rows:
table_name = row[0]
col_name = row[1].lower()
data_type = row[2]
char_len = row[3]
num_prec = row[4]
num_scale = row[5]
# 构建类型字符串
if data_type == 'character varying':
type_str = f'VARCHAR({char_len})' if char_len else 'VARCHAR'
elif data_type == 'numeric':
type_str = f'NUMERIC({num_prec},{num_scale})' if num_prec else 'NUMERIC'
elif data_type == 'integer':
type_str = 'INTEGER'
elif data_type == 'bigint':
type_str = 'BIGINT'
elif data_type == 'smallint':
type_str = 'SMALLINT'
elif data_type == 'boolean':
type_str = 'BOOLEAN'
elif data_type == 'text':
type_str = 'TEXT'
elif data_type == 'jsonb':
type_str = 'JSONB'
elif data_type == 'json':
type_str = 'JSON'
elif data_type == 'date':
type_str = 'DATE'
elif data_type == 'timestamp with time zone':
type_str = 'TIMESTAMPTZ'
elif data_type == 'timestamp without time zone':
type_str = 'TIMESTAMP'
else:
type_str = data_type.upper()
if table_name not in tables:
tables[table_name] = {}
tables[table_name][col_name] = type_str
return tables
def parse_ddl_file(filepath, default_schema=None):
"""解析 DDL 文件,提取表和列定义"""
content = Path(filepath).read_text(encoding='utf-8')
tables = {}
# 匹配多种 CREATE TABLE 格式:
# 1. CREATE TABLE schema.table (...)
# 2. CREATE TABLE IF NOT EXISTS schema.table (...)
# 3. CREATE TABLE IF NOT EXISTS table (...) -- 需要 default_schema
# 4. CREATE TABLE table (...) -- 需要 default_schema
table_pattern = re.compile(
r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?(?:(\w+)\.)?(\w+)\s*\((.*?)\);',
re.DOTALL | re.IGNORECASE
)
for match in table_pattern.finditer(content):
schema = match.group(1) or default_schema
table_name = match.group(2)
columns_block = match.group(3)
columns = {}
# 解析列定义
for line in columns_block.split('\n'):
line = line.strip()
if not line or line.startswith('--'):
continue
# 跳过约束
if line.upper().startswith(('PRIMARY KEY', 'CONSTRAINT', 'UNIQUE', 'FOREIGN KEY', 'CHECK', 'EXCLUDE')):
continue
# 匹配列定义: column_name TYPE ...
col_match = re.match(r'^(\w+)\s+(\w+(?:\s*\([^)]+\))?)', line)
if col_match:
col_name = col_match.group(1).lower()
col_type = col_match.group(2).upper().replace(' ', '')
# 标准化类型
if col_type == 'INT':
col_type = 'INTEGER'
columns[col_name] = col_type
tables[table_name] = columns
return tables
def compare_schemas(db_tables, ddl_tables, schema_name):
"""比较数据库和 DDL 的差异"""
differences = {
'db_only_tables': [],
'ddl_only_tables': [],
'db_only_cols': [],
'ddl_only_cols': [],
'type_diff': []
}
# 检查 DDL 中有但数据库没有的表
for table in ddl_tables:
if table not in db_tables:
differences['ddl_only_tables'].append(f"{schema_name}.{table}")
# 检查数据库中有但 DDL 没有的表
for table in db_tables:
if table not in ddl_tables:
differences['db_only_tables'].append(f"{schema_name}.{table}")
# 检查共有表的列差异
for table in set(db_tables.keys()) & set(ddl_tables.keys()):
db_cols = db_tables[table]
ddl_cols = ddl_tables[table]
# DDL 有但 DB 没有的列
for col in ddl_cols:
if col not in db_cols:
differences['ddl_only_cols'].append(f"{schema_name}.{table}.{col} ({ddl_cols[col]})")
# DB 有但 DDL 没有的列
for col in db_cols:
if col not in ddl_cols:
differences['db_only_cols'].append(f"{schema_name}.{table}.{col} ({db_cols[col]})")
return differences
def main():
conn = psycopg2.connect(DSN)
base_dir = Path(__file__).parent.parent / 'etl_billiards' / 'database'
print("=" * 80)
print("DDL vs DB Structure Comparison")
print("=" * 80)
# 检查 ODS
print("\n### billiards_ods ###\n")
ods_ddl_file = base_dir / 'schema_ODS_doc.sql'
if ods_ddl_file.exists():
db_ods = get_db_columns(conn, 'billiards_ods')
ddl_ods = parse_ddl_file(ods_ddl_file, 'billiards_ods')
print(f"DB tables: {len(db_ods)}")
print(f"DDL tables: {len(ddl_ods)}")
diff_ods = compare_schemas(db_ods, ddl_ods, 'billiards_ods')
total_diff = sum(len(v) for v in diff_ods.values())
if total_diff > 0:
print(f"\nFound {total_diff} differences:")
if diff_ods['db_only_tables']:
print("\n [DB has, DDL missing] Tables:")
for t in sorted(diff_ods['db_only_tables']):
print(f" - {t}")
if diff_ods['ddl_only_tables']:
print("\n [DDL has, DB missing] Tables:")
for t in sorted(diff_ods['ddl_only_tables']):
print(f" - {t}")
if diff_ods['db_only_cols']:
print("\n [DB has, DDL missing] Columns:")
for c in sorted(diff_ods['db_only_cols']):
print(f" - {c}")
if diff_ods['ddl_only_cols']:
print("\n [DDL has, DB missing] Columns:")
for c in sorted(diff_ods['ddl_only_cols']):
print(f" - {c}")
else:
print("\nNo differences found.")
# 检查 DWD
print("\n### billiards_dwd ###\n")
dwd_ddl_file = base_dir / 'schema_dwd_doc.sql'
if dwd_ddl_file.exists():
db_dwd = get_db_columns(conn, 'billiards_dwd')
ddl_dwd = parse_ddl_file(dwd_ddl_file, 'billiards_dwd')
print(f"DB tables: {len(db_dwd)}")
print(f"DDL tables: {len(ddl_dwd)}")
diff_dwd = compare_schemas(db_dwd, ddl_dwd, 'billiards_dwd')
total_diff = sum(len(v) for v in diff_dwd.values())
if total_diff > 0:
print(f"\nFound {total_diff} differences:")
if diff_dwd['db_only_tables']:
print("\n [DB has, DDL missing] Tables:")
for t in sorted(diff_dwd['db_only_tables']):
print(f" - {t}")
if diff_dwd['ddl_only_tables']:
print("\n [DDL has, DB missing] Tables:")
for t in sorted(diff_dwd['ddl_only_tables']):
print(f" - {t}")
if diff_dwd['db_only_cols']:
print("\n [DB has, DDL missing] Columns:")
for c in sorted(diff_dwd['db_only_cols']):
print(f" - {c}")
if diff_dwd['ddl_only_cols']:
print("\n [DDL has, DB missing] Columns:")
for c in sorted(diff_dwd['ddl_only_cols']):
print(f" - {c}")
else:
print("\nNo differences found.")
conn.close()
print("\n" + "=" * 80)
if __name__ == '__main__':
main()

181
tmp/check_field_variants.py Normal file
View File

@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
"""
检查缺失字段是否是拼写变体(驼峰式/下划线式、大小写差异等)
"""
import os
import sys
import json
import re
import psycopg2
from psycopg2.extras import RealDictCursor
# 配置
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
def camel_to_snake(name):
"""驼峰转下划线"""
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def snake_to_camel(name):
"""下划线转驼峰"""
components = name.split('_')
return components[0] + ''.join(x.title() for x in components[1:])
def normalize_field(name):
"""标准化字段名 - 去除下划线,全小写"""
return name.lower().replace('_', '')
def find_variants(api_field, ods_columns):
"""查找 API 字段在 ODS 中的可能变体"""
api_lower = api_field.lower()
api_normalized = normalize_field(api_field)
api_snake = camel_to_snake(api_field)
api_camel = snake_to_camel(api_field)
matches = []
for ods_col in ods_columns:
ods_lower = ods_col.lower()
ods_normalized = normalize_field(ods_col)
# 完全匹配
if api_lower == ods_lower:
matches.append((ods_col, 'exact'))
continue
# 标准化后匹配(忽略下划线和大小写)
if api_normalized == ods_normalized:
matches.append((ods_col, 'normalized'))
continue
# 驼峰转下划线匹配
if api_snake == ods_lower:
matches.append((ods_col, 'camel_to_snake'))
continue
# 下划线转驼峰匹配
if api_camel.lower() == ods_lower:
matches.append((ods_col, 'snake_to_camel'))
continue
# 部分匹配 - 一个是另一个的子串
if len(api_normalized) > 3 and len(ods_normalized) > 3:
if api_normalized in ods_normalized or ods_normalized in api_normalized:
matches.append((ods_col, 'partial'))
continue
return matches
def get_ods_table_columns(conn, table_name):
"""获取 ODS 表的字段结构"""
if '.' in table_name:
schema, name = table_name.split('.', 1)
else:
schema, name = 'public', table_name
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
"""
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(sql, (schema, name))
rows = cur.fetchall()
return [row['column_name'] for row in rows]
def main():
# 读取之前的对比结果
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
with open(json_path, 'r', encoding='utf-8') as f:
results = json.load(f)
conn = psycopg2.connect(DSN)
print("=" * 100)
print("缺失字段拼写变体检查")
print("=" * 100)
all_findings = {}
for code, data in results.items():
missing = data.get('missing_in_ods', [])
if not missing:
continue
table_name = data['table_name']
ods_columns = get_ods_table_columns(conn, table_name)
print(f"\n### {code}")
print(f"表名: {table_name}")
findings = []
true_missing = []
for api_field in missing:
variants = find_variants(api_field, ods_columns)
if variants:
for ods_col, match_type in variants:
findings.append({
'api_field': api_field,
'ods_column': ods_col,
'match_type': match_type
})
print(f" [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})")
else:
true_missing.append(api_field)
if findings:
all_findings[code] = {
'table_name': table_name,
'variants': findings,
'true_missing': true_missing
}
if true_missing:
print(f"\n **确认缺失 ({len(true_missing)}):**")
for f in true_missing:
print(f" - {f}")
conn.close()
# 输出汇总
print("\n")
print("=" * 100)
print("汇总 - 发现的拼写变体")
print("=" * 100)
for code, data in all_findings.items():
if data['variants']:
print(f"\n### {code} (`{data['table_name']}`)")
print("\n| API 字段 | ODS 字段 | 匹配类型 |")
print("|----------|----------|----------|")
for v in data['variants']:
print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |")
print("\n")
print("=" * 100)
print("汇总 - 确认缺失的字段(无变体)")
print("=" * 100)
for code, data in results.items():
missing = data.get('missing_in_ods', [])
if not missing:
continue
if code in all_findings:
true_missing = all_findings[code]['true_missing']
else:
true_missing = missing
if true_missing:
print(f"\n### {code} (`{data['table_name']}`)")
print(f"缺失 {len(true_missing)} 个字段:")
print("\n| 字段名 | 说明 |")
print("|--------|------|")
for f in true_missing:
print(f"| `{f}` | |")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
"""检查新添加字段的数据完整性"""
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 新添加的字段列表
NEW_FIELDS = {
# DWD 主表
'billiards_dwd.dwd_settlement_head': [
'electricity_money', 'real_electricity_money', 'electricity_adjust_money',
'pl_coupon_sale_amount', 'mervou_sales_amount'
],
'billiards_dwd.dwd_table_fee_log': ['activity_discount_amount', 'real_service_money'],
'billiards_dwd.dwd_table_fee_adjust': ['table_name', 'table_price', 'charge_free'],
'billiards_dwd.dim_member': ['pay_money_sum', 'recharge_money_sum'],
'billiards_dwd.dim_member_card_account': ['principal_balance', 'member_grade'],
'billiards_dwd.dim_store_goods': ['commodity_code', 'not_sale'],
'billiards_dwd.dim_table': ['order_id'],
'billiards_dwd.dim_tenant_goods': ['not_sale'],
'billiards_dwd.dim_groupbuy_package': ['sort', 'is_first_limit'],
'billiards_dwd.dwd_assistant_service_log': ['real_service_money'],
'billiards_dwd.dwd_assistant_trash_event': ['tenant_id'],
'billiards_dwd.dwd_groupbuy_redemption': ['member_discount_money', 'coupon_sale_id'],
'billiards_dwd.dwd_member_balance_change': ['principal_before', 'principal_after'],
'billiards_dwd.dwd_payment': ['tenant_id'],
'billiards_dwd.dwd_store_goods_sale': ['coupon_share_money'],
}
def check_field_data(conn, schema_table, fields):
"""检查字段的数据情况"""
results = []
schema, table = schema_table.split('.')
cur = conn.cursor()
# 获取总行数
cur.execute(f"SELECT COUNT(*) FROM {schema_table}")
total_rows = cur.fetchone()[0]
for field in fields:
try:
# 非空计数
cur.execute(f"SELECT COUNT(*) FROM {schema_table} WHERE {field} IS NOT NULL")
non_null_count = cur.fetchone()[0]
# 非空非零计数(对于数值类型)
cur.execute(f"""
SELECT COUNT(*) FROM {schema_table}
WHERE {field} IS NOT NULL
AND CAST({field} AS TEXT) NOT IN ('0', '0.00', '0.0', '')
""")
non_zero_count = cur.fetchone()[0]
results.append({
'field': field,
'total': total_rows,
'non_null': non_null_count,
'non_zero': non_zero_count,
'fill_rate': f"{non_null_count/total_rows*100:.1f}%" if total_rows > 0 else "N/A"
})
except Exception as e:
results.append({
'field': field,
'error': str(e)[:50]
})
cur.close()
return results
def main():
conn = psycopg2.connect(DSN)
print("=" * 90)
print("New Fields Data Completeness Check")
print("=" * 90)
for table, fields in NEW_FIELDS.items():
print(f"\n### {table} ###\n")
results = check_field_data(conn, table, fields)
print(f"{'Field':<30} {'Total':>8} {'Non-Null':>10} {'Non-Zero':>10} {'Fill Rate':>10}")
print("-" * 70)
for r in results:
if 'error' in r:
print(f"{r['field']:<30} ERROR: {r['error']}")
else:
print(f"{r['field']:<30} {r['total']:>8} {r['non_null']:>10} {r['non_zero']:>10} {r['fill_rate']:>10}")
conn.close()
print("\n" + "=" * 90)
if __name__ == '__main__':
main()

90
tmp/check_scd2_tables.py Normal file
View File

@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
"""检查 DWD 维度表 SCD2 配置"""
import os
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
dsn = os.getenv("PG_DSN")
db = DatabaseConnection(dsn)
print("=" * 70)
print("DWD Dimension Tables - SCD2 Analysis")
print("=" * 70)
# 获取所有维度表
tables = db.query("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'billiards_dwd'
AND table_name LIKE 'dim_%'
ORDER BY table_name
""")
scd_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
scd2_tables = []
type1_tables = []
for t in tables:
tbl = t["table_name"]
cols = db.query("""
SELECT column_name FROM information_schema.columns
WHERE table_schema = 'billiards_dwd' AND table_name = %s
""", (tbl,))
col_names = {c["column_name"].lower() for c in cols}
has_scd = col_names & scd_cols
if has_scd:
scd2_tables.append((tbl, has_scd))
else:
type1_tables.append(tbl)
print("\n[SCD2 Tables - History Tracking]")
print("-" * 50)
if scd2_tables:
for tbl, cols in scd2_tables:
print(f" {tbl}")
print(f" SCD2 cols: {', '.join(sorted(cols))}")
else:
print(" (none)")
print(f"\n[Type1 Tables - Direct Overwrite] ({len(type1_tables)} tables)")
print("-" * 50)
for tbl in type1_tables:
print(f" {tbl}")
print("\n" + "=" * 70)
print("Processing Logic")
print("=" * 70)
print("""
Code path in dwd_load_task.py:
if table.startswith('dim_'):
_merge_dim()
|
+-- if has SCD2 columns:
| _merge_dim_scd2()
| -> Compare data, close old version, insert new version
| -> Uses INSERT (no ON CONFLICT)
| -> SCD2 NOT affected by fact_upsert config
|
+-- else:
_merge_dim_type1_upsert()
-> Uses ON CONFLICT DO UPDATE
-> Direct overwrite (Type1)
else:
_load_fact_generic()
-> Uses ON CONFLICT DO UPDATE (if fact_upsert=true)
CONCLUSION: SCD2 logic is INDEPENDENT, NOT affected by conflict mode settings.
""")
db.close()

26
tmp/check_seq.py Normal file
View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
import re
from pathlib import Path
docs = list(Path('etl_billiards/docs/bd_manual/main').glob('*.md')) + \
list(Path('etl_billiards/docs/bd_manual/Ex').glob('*.md'))
for doc in docs:
content = doc.read_text(encoding='utf-8')
lines = content.split('\n')
seqs = []
for line in lines:
match = re.match(r'\|\s*(\d+)\s*\|', line)
if match:
seq = int(match.group(1))
seqs.append((seq, line[:70]))
# 检查是否有重复序号
seq_nums = [s[0] for s in seqs]
if len(seq_nums) != len(set(seq_nums)):
print(f'\n{doc.name}: Duplicate sequences found')
seen = set()
for seq, line in seqs:
if seq in seen or seq_nums.count(seq) > 1:
print(f' {seq}: {line}...')
seen.add(seq)

View File

@@ -0,0 +1,510 @@
# -*- coding: utf-8 -*-
"""
对比 API 返回字段和 ODS 表字段,找出 ODS 中缺少的 API 字段
"""
import os
import sys
import json
import requests
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
import psycopg2
from psycopg2.extras import RealDictCursor
# 配置
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
API_BASE = 'https://pc.ficoo.vip/apiprod/admin/v1/'
API_TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IktlbTVsdHRqZ2tSUExOcVA2ajhNakdQYnFrNW5mRzBQNzRvMHE0b295VVE9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvOCDkuIvljYg2OjU3OjA1IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDU0ODIyNSwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.wJlm7pTqUzp769nUGdxx0e1bVMy4x9Prp9U_UMWQvlk'
STORE_ID = '2790685415443269'
TZ = ZoneInfo('Asia/Taipei')
# ODS 任务配置
ODS_SPECS = [
{
'code': 'ODS_ASSISTANT_ACCOUNT',
'table_name': 'billiards_ods.assistant_accounts_master',
'endpoint': '/PersonnelManagement/SearchAssistantInfo',
'data_path': ['data'],
'list_key': 'assistantInfos',
'requires_window': True,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_SETTLEMENT_RECORDS',
'table_name': 'billiards_ods.settlement_records',
'endpoint': '/Site/GetAllOrderSettleList',
'data_path': ['data'],
'list_key': 'settleList',
'requires_window': True,
'time_fields': ('rangeStartTime', 'rangeEndTime'),
'include_site_id': True,
},
{
'code': 'ODS_TABLE_USE',
'table_name': 'billiards_ods.table_fee_transactions',
'endpoint': '/Site/GetSiteTableOrderDetails',
'data_path': ['data'],
'list_key': 'siteTableUseDetailsList',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_ASSISTANT_LEDGER',
'table_name': 'billiards_ods.assistant_service_records',
'endpoint': '/AssistantPerformance/GetOrderAssistantDetails',
'data_path': ['data'],
'list_key': 'orderAssistantDetails',
'requires_window': True,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_ASSISTANT_ABOLISH',
'table_name': 'billiards_ods.assistant_cancellation_records',
'endpoint': '/AssistantPerformance/GetAbolitionAssistant',
'data_path': ['data'],
'list_key': 'abolitionAssistants',
'requires_window': True,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_STORE_GOODS_SALES',
'table_name': 'billiards_ods.store_goods_sales_records',
'endpoint': '/TenantGoods/GetGoodsSalesList',
'data_path': ['data'],
'list_key': 'orderGoodsLedgers',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_PAYMENT',
'table_name': 'billiards_ods.payment_transactions',
'endpoint': '/PayLog/GetPayLogListPage',
'data_path': ['data'],
'list_key': None,
'requires_window': False,
'time_fields': ('StartPayTime', 'EndPayTime'),
'include_site_id': True,
},
{
'code': 'ODS_REFUND',
'table_name': 'billiards_ods.refund_transactions',
'endpoint': '/Order/GetRefundPayLogList',
'data_path': ['data'],
'list_key': None,
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_PLATFORM_COUPON',
'table_name': 'billiards_ods.platform_coupon_redemption_records',
'endpoint': '/Promotion/GetOfflineCouponConsumePageList',
'data_path': ['data'],
'list_key': None,
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_MEMBER',
'table_name': 'billiards_ods.member_profiles',
'endpoint': '/MemberProfile/GetTenantMemberList',
'data_path': ['data'],
'list_key': 'tenantMemberInfos',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_MEMBER_CARD',
'table_name': 'billiards_ods.member_stored_value_cards',
'endpoint': '/MemberProfile/GetTenantMemberCardList',
'data_path': ['data'],
'list_key': 'tenantMemberCards',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_MEMBER_BALANCE',
'table_name': 'billiards_ods.member_balance_changes',
'endpoint': '/MemberProfile/GetMemberCardBalanceChange',
'data_path': ['data'],
'list_key': 'tenantMemberCardLogs',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_RECHARGE_SETTLE',
'table_name': 'billiards_ods.recharge_settlements',
'endpoint': '/Site/GetRechargeSettleList',
'data_path': ['data'],
'list_key': 'settleList',
'requires_window': True,
'time_fields': ('rangeStartTime', 'rangeEndTime'),
'include_site_id': True,
},
{
'code': 'ODS_GROUP_PACKAGE',
'table_name': 'billiards_ods.group_buy_packages',
'endpoint': '/PackageCoupon/QueryPackageCouponList',
'data_path': ['data'],
'list_key': 'packageCouponList',
'requires_window': False,
'time_fields': None,
'include_site_id': True,
},
{
'code': 'ODS_GROUP_BUY_REDEMPTION',
'table_name': 'billiards_ods.group_buy_redemption_records',
'endpoint': '/Site/GetSiteTableUseDetails',
'data_path': ['data'],
'list_key': 'siteTableUseDetailsList',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_INVENTORY_STOCK',
'table_name': 'billiards_ods.goods_stock_summary',
'endpoint': '/TenantGoods/GetGoodsStockReport',
'data_path': ['data'],
'list_key': None,
'requires_window': False,
'time_fields': None,
'include_site_id': True,
},
{
'code': 'ODS_INVENTORY_CHANGE',
'table_name': 'billiards_ods.goods_stock_movements',
'endpoint': '/GoodsStockManage/QueryGoodsOutboundReceipt',
'data_path': ['data'],
'list_key': 'queryDeliveryRecordsList',
'requires_window': True,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_TABLES',
'table_name': 'billiards_ods.site_tables_master',
'endpoint': '/Table/GetSiteTables',
'data_path': ['data'],
'list_key': 'siteTables',
'requires_window': False,
'time_fields': None,
'include_site_id': True,
},
{
'code': 'ODS_GOODS_CATEGORY',
'table_name': 'billiards_ods.stock_goods_category_tree',
'endpoint': '/TenantGoodsCategory/QueryPrimarySecondaryCategory',
'data_path': ['data'],
'list_key': 'goodsCategoryList',
'requires_window': False,
'time_fields': None,
'include_site_id': True,
},
{
'code': 'ODS_STORE_GOODS',
'table_name': 'billiards_ods.store_goods_master',
'endpoint': '/TenantGoods/GetGoodsInventoryList',
'data_path': ['data'],
'list_key': 'orderGoodsList',
'requires_window': False,
'time_fields': None,
'include_site_id': True,
'site_id_array': True, # 需要数组格式
},
{
'code': 'ODS_TABLE_FEE_DISCOUNT',
'table_name': 'billiards_ods.table_fee_discount_records',
'endpoint': '/Site/GetTaiFeeAdjustList',
'data_path': ['data'],
'list_key': 'taiFeeAdjustInfos',
'requires_window': False,
'time_fields': ('startTime', 'endTime'),
'include_site_id': True,
},
{
'code': 'ODS_TENANT_GOODS',
'table_name': 'billiards_ods.tenant_goods_master',
'endpoint': '/TenantGoods/QueryTenantGoods',
'data_path': ['data'],
'list_key': 'tenantGoodsList',
'requires_window': False,
'time_fields': None,
'include_site_id': True,
},
]
def get_ods_table_columns(conn, table_name: str) -> dict:
"""获取 ODS 表的字段结构"""
if '.' in table_name:
schema, name = table_name.split('.', 1)
else:
schema, name = 'public', table_name
sql = """
SELECT column_name, data_type, udt_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
"""
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(sql, (schema, name))
rows = cur.fetchall()
return {row['column_name'].lower(): row for row in rows}
def flatten_json_keys(obj, prefix='', depth=0) -> set:
"""递归展平 JSON 获取所有字段名,限制深度"""
if depth > 3: # 限制深度
return set()
keys = set()
if isinstance(obj, dict):
for k, v in obj.items():
full_key = f"{prefix}.{k}" if prefix else k
keys.add(k) # 添加不带前缀的
if isinstance(v, (dict, list)) and depth < 3:
keys.update(flatten_json_keys(v, full_key, depth + 1))
elif isinstance(obj, list):
for item in obj[:5]: # 只检查前5个
keys.update(flatten_json_keys(item, prefix, depth))
return keys
def call_api(endpoint: str, params: dict) -> dict:
"""调用 API"""
url = API_BASE.rstrip('/') + '/' + endpoint.lstrip('/')
headers = {
'Authorization': f'Bearer {API_TOKEN}',
'Content-Type': 'application/json',
'Accept': 'application/json',
}
try:
resp = requests.post(url, json=params, headers=headers, timeout=30)
resp.raise_for_status()
return resp.json()
except Exception as e:
print(f" API 调用异常: {e}")
return {}
def extract_list(payload: dict, data_path: list, list_key: str = None) -> list:
"""从响应中提取列表"""
cur = payload
for key in data_path:
if isinstance(cur, dict):
cur = cur.get(key)
else:
return []
if isinstance(cur, list):
return cur
if isinstance(cur, dict):
if list_key and list_key in cur:
return cur[list_key]
# 尝试常见的列表键
for k in ['list', 'rows', 'records', 'items', 'dataList']:
if k in cur and isinstance(cur[k], list):
return cur[k]
# 返回字典的第一个列表值
for v in cur.values():
if isinstance(v, list):
return v
return []
def get_api_sample_data(spec: dict, window_start: datetime, window_end: datetime) -> list:
"""从 API 获取示例数据"""
params = {'page': 1, 'limit': 50}
if spec.get('include_site_id'):
if spec.get('site_id_array'):
params['siteId'] = [int(STORE_ID)]
else:
params['siteId'] = int(STORE_ID)
time_fields = spec.get('time_fields')
if time_fields:
start_key, end_key = time_fields
params[start_key] = window_start.strftime('%Y-%m-%d %H:%M:%S')
params[end_key] = window_end.strftime('%Y-%m-%d %H:%M:%S')
payload = call_api(spec['endpoint'], params)
if not payload:
return []
records = extract_list(payload, spec['data_path'], spec.get('list_key'))
return records
def compare_fields(api_fields: set, ods_columns: dict) -> dict:
"""比较 API 字段和 ODS 列"""
ods_col_names = set(ods_columns.keys())
# 需要排除的 ODS 系统字段
system_cols = {
'payload', 'source_file', 'source_endpoint', 'fetched_at',
'content_hash', 'record_index', 'site_profile'
}
# siteProfile 嵌套字段 - 忽略这些门店配置字段
site_profile_fields = {
'address', 'full_address', 'latitude', 'longitude',
'shop_name', 'shop_status', 'site_label', 'site_type',
'tenant_site_region_id', 'attendance_distance', 'attendance_enabled',
'auto_light', 'avatar', 'business_tel', 'customer_service_qrcode',
'customer_service_wechat', 'fixed_pay_qrcode', 'light_status',
'light_token', 'light_type', 'prod_env', 'wifi_name', 'wifi_password',
'org_id', 'siteprofile', 'ewelink_client_id'
}
# API 字段标准化
api_fields_lower = {f.lower() for f in api_fields}
# 在 ODS 中缺失的 API 字段(排除系统字段和 siteProfile 字段)
missing_in_ods = api_fields_lower - ods_col_names - system_cols - site_profile_fields
# 在 API 中没有但 ODS 有的字段(可能是衍生字段)
ods_only = ods_col_names - api_fields_lower - system_cols
return {
'api_fields': sorted(api_fields_lower),
'ods_columns': sorted(ods_col_names),
'missing_in_ods': sorted(missing_in_ods),
'ods_only': sorted(ods_only),
}
def main():
print("=" * 80)
print("API vs ODS 字段对比分析")
print("=" * 80)
# 连接数据库
conn = psycopg2.connect(DSN)
print(f"数据库连接成功")
print(f"API: {API_BASE}")
print(f"门店 ID: {STORE_ID}")
# 时间窗口2025-12-01 到现在
now = datetime.now(TZ)
window_end = now
window_start = datetime(2025, 12, 1, 0, 0, 0, tzinfo=TZ)
print(f"时间窗口: {window_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {window_end.strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 80)
results = {}
for spec in ODS_SPECS:
print(f"\n处理: {spec['code']}")
print(f" 表名: {spec['table_name']}")
print(f" 端点: {spec['endpoint']}")
# 获取 ODS 表结构
ods_columns = get_ods_table_columns(conn, spec['table_name'])
if not ods_columns:
print(f" [跳过] ODS 表不存在或无字段")
continue
print(f" ODS 字段数: {len(ods_columns)}")
# 获取 API 数据
records = get_api_sample_data(spec, window_start, window_end)
print(f" API 返回记录数: {len(records)}")
if not records:
results[spec['code']] = {
'table_name': spec['table_name'],
'endpoint': spec['endpoint'],
'api_records': 0,
'ods_columns': list(ods_columns.keys()),
'missing_in_ods': [],
'note': 'API 无返回数据'
}
continue
# 提取 API 字段
api_fields = set()
for rec in records[:20]: # 检查前20条
if isinstance(rec, dict):
api_fields.update(flatten_json_keys(rec))
print(f" API 字段数: {len(api_fields)}")
# 对比
comparison = compare_fields(api_fields, ods_columns)
results[spec['code']] = {
'table_name': spec['table_name'],
'endpoint': spec['endpoint'],
'api_records': len(records),
'api_fields_count': len(comparison['api_fields']),
'ods_columns_count': len(comparison['ods_columns']),
'missing_in_ods': comparison['missing_in_ods'],
'ods_only': comparison['ods_only'],
'api_fields': comparison['api_fields'],
'ods_columns': comparison['ods_columns'],
}
if comparison['missing_in_ods']:
print(f" [!] ODS 缺少 {len(comparison['missing_in_ods'])} 个字段:")
for f in comparison['missing_in_ods'][:10]:
print(f" - {f}")
if len(comparison['missing_in_ods']) > 10:
print(f" ... 还有 {len(comparison['missing_in_ods']) - 10}")
else:
print(f" [OK] ODS 已包含所有 API 字段")
conn.close()
# 输出汇总表格
print("\n")
print("=" * 80)
print("汇总报告 - 每个 ODS 表缺少的 API 字段")
print("=" * 80)
for code, data in results.items():
missing = data.get('missing_in_ods', [])
if missing or data.get('note'):
print(f"\n### {code}")
print(f"表名: `{data['table_name']}`")
print(f"端点: `{data['endpoint']}`")
print(f"API 记录数: {data.get('api_records', 0)}")
if missing:
print(f"\n**ODS 缺少的字段 ({len(missing)}):**\n")
print("| 字段名 | 说明 |")
print("|--------|------|")
for f in missing:
print(f"| `{f}` | |")
elif data.get('note'):
print(f"\n备注: {data['note']}")
# 显示没有缺失的表
print("\n\n### 已完整的表(无缺失字段)")
for code, data in results.items():
missing = data.get('missing_in_ods', [])
if not missing and not data.get('note'):
print(f"- {code}: `{data['table_name']}` [OK]")
# 保存详细结果
output_file = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n\n详细结果已保存至: {output_file}")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
"""
详细双向对比 - 针对可能相关的字段
"""
import os
import json
import re
import psycopg2
from psycopg2.extras import RealDictCursor
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 需要详细审核的字段对
REVIEW_PAIRS = [
{
'code': 'ODS_TABLE_USE',
'table': 'billiards_ods.table_fee_transactions',
'keywords': ['service', 'money', 'real'],
},
{
'code': 'ODS_ASSISTANT_LEDGER',
'table': 'billiards_ods.assistant_service_records',
'keywords': ['service', 'money', 'real'],
},
{
'code': 'ODS_MEMBER_CARD',
'table': 'billiards_ods.member_stored_value_cards',
'keywords': ['balance', 'principal', 'freeze', 'recharge'],
},
{
'code': 'ODS_MEMBER_BALANCE',
'table': 'billiards_ods.member_balance_changes',
'keywords': ['before', 'after', 'principal', 'change'],
},
{
'code': 'ODS_SETTLEMENT_RECORDS',
'table': 'billiards_ods.settlement_records',
'keywords': ['coupon', 'sale', 'amount', 'pl', 'tenant'],
},
{
'code': 'ODS_RECHARGE_SETTLE',
'table': 'billiards_ods.recharge_settlements',
'keywords': ['coupon', 'sale', 'amount', 'pl', 'tenant'],
},
{
'code': 'ODS_GROUP_PACKAGE',
'table': 'billiards_ods.group_buy_packages',
'keywords': ['table', 'area', 'name', 'list', 'tenant'],
},
]
def get_ods_columns(conn, table_name):
"""获取 ODS 表字段"""
if '.' in table_name:
schema, name = table_name.split('.', 1)
else:
schema, name = 'public', table_name
sql = """
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
ORDER BY ordinal_position
"""
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute(sql, (schema, name))
return {row['column_name']: row['data_type'] for row in cur.fetchall()}
def normalize(s):
"""标准化:去除下划线,全小写"""
return s.lower().replace('_', '')
def filter_by_keywords(fields, keywords):
"""按关键词筛选字段"""
result = []
for f in fields:
f_norm = normalize(f)
for kw in keywords:
if kw in f_norm:
result.append(f)
break
return sorted(set(result))
def main():
# 读取 API 字段
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
with open(json_path, 'r', encoding='utf-8') as f:
results = json.load(f)
conn = psycopg2.connect(DSN)
print("=" * 100)
print("双向详细对比 - 可能相关的字段")
print("=" * 100)
for review in REVIEW_PAIRS:
code = review['code']
table = review['table']
keywords = review['keywords']
if code not in results:
continue
data = results[code]
api_fields = data.get('api_fields', [])
# 获取 ODS 字段
ods_columns = get_ods_columns(conn, table)
# 按关键词筛选
api_related = filter_by_keywords(api_fields, keywords)
ods_related = filter_by_keywords(ods_columns.keys(), keywords)
print(f"\n{'='*80}")
print(f"### {code}")
print(f"表: {table}")
print(f"关键词: {keywords}")
print(f"{'='*80}")
print(f"\n**API 相关字段 ({len(api_related)}):**")
for f in api_related:
print(f" - {f}")
print(f"\n**ODS 相关字段 ({len(ods_related)}):**")
for f in ods_related:
dtype = ods_columns.get(f, '')
print(f" - {f} ({dtype})")
# 匹配分析
print(f"\n**匹配分析:**")
# 建立映射
matched_api = set()
matched_ods = set()
mappings = []
for api_f in api_related:
api_norm = normalize(api_f)
for ods_f in ods_related:
ods_norm = normalize(ods_f)
# 完全匹配
if api_norm == ods_norm:
mappings.append((api_f, ods_f, 'exact', '完全匹配'))
matched_api.add(api_f)
matched_ods.add(ods_f)
# 包含关系
elif api_norm in ods_norm or ods_norm in api_norm:
if api_f not in matched_api:
mappings.append((api_f, ods_f, 'partial', '部分匹配'))
if mappings:
print("\n| API 字段 | ODS 字段 | 类型 | 说明 |")
print("|----------|----------|------|------|")
for api_f, ods_f, mtype, desc in mappings:
print(f"| `{api_f}` | `{ods_f}` | {mtype} | {desc} |")
# 未匹配的 API 字段
unmatched_api = set(api_related) - matched_api
if unmatched_api:
print(f"\n**API 未匹配字段:**")
for f in sorted(unmatched_api):
print(f" - {f}")
# 未匹配的 ODS 字段
unmatched_ods = set(ods_related) - matched_ods
if unmatched_ods:
print(f"\n**ODS 未匹配字段:**")
for f in sorted(unmatched_ods):
print(f" - {f}")
conn.close()
# 输出最终结论
print("\n")
print("=" * 100)
print("最终审核结论")
print("=" * 100)
if __name__ == '__main__':
main()

5404
tmp/dwd_schema.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
{
"generated_at": "2026-02-02T19:14:29.766314",
"ods_coverage": [
{
"table": "billiards_ods.table_fee_transactions",
"column": "activity_discount_amount",
"total": 28162,
"non_null": 33,
"coverage": 0.1171791776152262,
"zero_count": 33
},
{
"table": "billiards_ods.table_fee_transactions",
"column": "real_service_money",
"total": 28162,
"non_null": 33,
"coverage": 0.1171791776152262,
"zero_count": 33
},
{
"table": "billiards_ods.table_fee_transactions",
"column": "order_consumption_type",
"total": 28162,
"non_null": 33,
"coverage": 0.1171791776152262,
"zero_count": 0
},
{
"table": "billiards_ods.assistant_service_records",
"column": "real_service_money",
"total": 10093,
"non_null": 10,
"coverage": 0.09907856930545923,
"zero_count": 10
}
],
"dwd_coverage": []
}

180
tmp/fix_bd_manual.py Normal file
View File

@@ -0,0 +1,180 @@
# -*- coding: utf-8 -*-
"""自动修复 bd_manual 文档中的类型不匹配问题"""
import json
import re
from pathlib import Path
def fix_type_in_doc(doc_path, type_mismatches):
"""修复文档中的类型不匹配"""
if not Path(doc_path).exists():
print(f" SKIP: {doc_path} not found")
return False
content = Path(doc_path).read_text(encoding='utf-8')
original = content
for m in type_mismatches:
col_name = m['column']
old_type = m['doc_type']
new_type = m['db_type']
# 匹配字段行并替换类型
# 格式: | 序号 | 字段名 | 类型 | 可空 | ...
pattern = rf'(\|\s*\d+\s*\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)'
replacement = rf'\g<1>{new_type}\g<2>'
content, count = re.subn(pattern, replacement, content)
if count > 0:
print(f" Fixed: {col_name}: {old_type} -> {new_type}")
else:
# 尝试更宽松的匹配
pattern2 = rf'(\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)'
content, count = re.subn(pattern2, replacement.replace(r'\g<1>', r'\1').replace(r'\g<2>', r'\2'), content)
if count > 0:
print(f" Fixed (alt): {col_name}: {old_type} -> {new_type}")
else:
print(f" WARN: Could not fix {col_name}")
if content != original:
Path(doc_path).write_text(content, encoding='utf-8')
return True
return False
def add_missing_field(doc_path, table_name, field_name, db_schema):
"""向文档中添加缺失的字段"""
if not Path(doc_path).exists():
return False
# 从 db_schema 获取字段信息
field_info = None
for col in db_schema.get(table_name, []):
if col['column'] == field_name:
field_info = col
break
if not field_info:
print(f" WARN: Could not find {field_name} in db_schema")
return False
content = Path(doc_path).read_text(encoding='utf-8')
# 找到字段表格的最后一行,在其后添加新字段
# 格式: | 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
lines = content.split('\n')
insert_idx = None
last_seq = 0
for i, line in enumerate(lines):
# 匹配字段行
match = re.match(r'\|\s*(\d+)\s*\|\s*(\w+)\s*\|', line)
if match:
seq = int(match.group(1))
if seq > last_seq:
last_seq = seq
insert_idx = i
if insert_idx is not None:
new_seq = last_seq + 1
nullable = 'YES' if field_info['nullable'] == 'YES' else 'NO'
new_line = f"| {new_seq} | {field_name} | {field_info['type']} | {nullable} | | 调整时间 |"
lines.insert(insert_idx + 1, new_line)
Path(doc_path).write_text('\n'.join(lines), encoding='utf-8')
print(f" Added: {field_name} (type: {field_info['type']})")
return True
return False
def main():
# 加载差异数据
with open('tmp/bd_manual_diff.json', 'r', encoding='utf-8') as f:
diffs = json.load(f)
# 加载数据库 schema (需要重新获取带详细信息的)
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
conn = psycopg2.connect(DSN)
cur = conn.cursor()
cur.execute("""
SELECT table_name, column_name, data_type, is_nullable,
COALESCE(character_maximum_length, numeric_precision) as max_length,
numeric_scale
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd'
ORDER BY table_name, ordinal_position
""")
db_schema = {}
TYPE_MAP = {
'bigint': 'BIGINT',
'integer': 'INTEGER',
'smallint': 'SMALLINT',
'numeric': 'NUMERIC',
'text': 'TEXT',
'character varying': 'VARCHAR',
'boolean': 'BOOLEAN',
'timestamp with time zone': 'TIMESTAMPTZ',
'timestamp without time zone': 'TIMESTAMP',
'date': 'DATE',
'jsonb': 'JSONB',
'json': 'JSON',
}
for row in cur.fetchall():
table_name, col_name, data_type, nullable, max_len, scale = row
if table_name not in db_schema:
db_schema[table_name] = []
type_str = TYPE_MAP.get(data_type, data_type.upper())
if data_type == 'numeric' and max_len and scale is not None:
type_str = f'NUMERIC({max_len},{scale})'
elif data_type == 'character varying' and max_len:
type_str = f'VARCHAR({max_len})'
db_schema[table_name].append({
'column': col_name,
'type': type_str,
'nullable': nullable,
})
cur.close()
conn.close()
print("=" * 80)
print("Fixing BD Manual Documents")
print("=" * 80)
fixed_count = 0
for diff in diffs:
table = diff['table']
doc_path = diff.get('doc_path', '')
if not doc_path:
continue
has_changes = False
# 修复类型不匹配
if diff.get('type_mismatches'):
print(f"\n### {table} (type fixes) ###")
if fix_type_in_doc(doc_path, diff['type_mismatches']):
has_changes = True
# 添加缺失字段
if diff.get('missing_in_doc'):
print(f"\n### {table} (missing fields) ###")
for field in diff['missing_in_doc']:
if add_missing_field(doc_path, table, field, db_schema):
has_changes = True
if has_changes:
fixed_count += 1
print("\n" + "=" * 80)
print(f"Fixed {fixed_count} documents")
print("=" * 80)
if __name__ == '__main__':
main()

55
tmp/fix_not_sale_type.py Normal file
View File

@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
"""修复 not_sale 字段类型"""
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
def fix_column_type():
conn = psycopg2.connect(DSN)
conn.autocommit = True
cur = conn.cursor()
# 修复 ODS store_goods_master 表
tables_to_fix = [
('billiards_ods', 'store_goods_master', 'not_sale', 'INTEGER'),
('billiards_ods', 'tenant_goods_master', 'not_sale', 'INTEGER'),
('billiards_dwd', 'dim_store_goods', 'not_sale', 'INTEGER'),
('billiards_dwd', 'dim_tenant_goods', 'not_sale', 'INTEGER'),
]
for schema, table, column, new_type in tables_to_fix:
try:
# 检查表和列是否存在
cur.execute("""
SELECT data_type FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""", (schema, table, column))
result = cur.fetchone()
if result:
current_type = result[0]
print(f"{schema}.{table}.{column}: current type = {current_type}")
if current_type == 'boolean':
# 先删除列,再重新添加为 INTEGER
sql_drop = f'ALTER TABLE {schema}.{table} DROP COLUMN "{column}"'
sql_add = f'ALTER TABLE {schema}.{table} ADD COLUMN "{column}" {new_type}'
cur.execute(sql_drop)
print(f" Dropped column")
cur.execute(sql_add)
print(f" Re-added as {new_type}")
else:
print(f" Already {current_type}, skipping")
else:
print(f"{schema}.{table}.{column}: column not found")
except Exception as e:
print(f"Error fixing {schema}.{table}.{column}: {e}")
cur.close()
conn.close()
print("\nDone!")
if __name__ == '__main__':
fix_column_type()

View File

@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
import os
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
dsn = os.getenv("PG_DSN")
db = DatabaseConnection(dsn)
print("=== Fixing remaining issues ===")
# 1. Fix principal_change_amount type mismatch
db.execute("""
UPDATE billiards_dwd.dwd_member_balance_change d
SET principal_change_amount = o.principal_data::numeric
FROM billiards_ods.member_balance_changes o
WHERE d.balance_change_id = o.id
AND d.principal_change_amount IS NULL
AND o.principal_data IS NOT NULL
""")
db.commit()
print("principal_change_amount: fixed")
# 2. Add missing DWD columns for dwd_recharge_order
missing_cols = [
("pl_coupon_sale_amount", "NUMERIC(18,2)"),
("mervou_sales_amount", "NUMERIC(18,2)"),
("electricity_money", "NUMERIC(18,2)"),
("real_electricity_money", "NUMERIC(18,2)"),
("electricity_adjust_money", "NUMERIC(18,2)"),
]
for col, dtype in missing_cols:
try:
db.execute(f'ALTER TABLE billiards_dwd.dwd_recharge_order ADD COLUMN IF NOT EXISTS "{col}" {dtype}')
db.commit()
print(f"dwd_recharge_order.{col}: column added")
except Exception as e:
db.rollback()
print(f"dwd_recharge_order.{col}: {str(e)[:50]}")
# 3. Backfill dwd_recharge_order from ODS
db.execute("""
UPDATE billiards_dwd.dwd_recharge_order d
SET pl_coupon_sale_amount = o.plcouponsaleamount,
mervou_sales_amount = o.mervousalesamount,
electricity_money = o.electricitymoney,
real_electricity_money = o.realelectricitymoney,
electricity_adjust_money = o.electricityadjustmoney
FROM billiards_ods.recharge_settlements o
WHERE d.recharge_order_id = o.id
""")
db.commit()
print("dwd_recharge_order: backfilled")
db.close()
print("Done")

View File

@@ -0,0 +1,237 @@
# -*- coding: utf-8 -*-
"""
全量数据回写验证脚本
从 2025-07-01 到现在,重新获取 API 数据并入库
"""
import json
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path
# 添加项目路径
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
def check_ods_field_coverage(db: DatabaseConnection):
"""检查 ODS 表中新增字段的数据覆盖情况"""
# 需要检查的新增字段
fields_to_check = [
("billiards_ods.table_fee_transactions", ["activity_discount_amount", "real_service_money", "order_consumption_type"]),
("billiards_ods.assistant_service_records", ["real_service_money", "assistantteamname"]),
("billiards_ods.assistant_cancellation_records", ["tenant_id"]),
("billiards_ods.store_goods_sales_records", ["coupon_share_money"]),
("billiards_ods.payment_transactions", ["tenant_id"]),
("billiards_ods.member_profiles", ["pay_money_sum", "person_tenant_org_id", "recharge_money_sum", "register_source"]),
("billiards_ods.member_stored_value_cards", ["principal_balance", "member_grade", "rechargefreezebalance"]),
("billiards_ods.member_balance_changes", ["principal_after", "principal_before", "principal_data"]),
("billiards_ods.settlement_records", ["tenant_id"]),
("billiards_ods.recharge_settlements", ["tenant_id"]),
("billiards_ods.group_buy_packages", ["sort", "is_first_limit", "tenantcouponsaleorderitemid"]),
("billiards_ods.group_buy_redemption_records", ["coupon_sale_id", "member_discount_money"]),
("billiards_ods.site_tables_master", ["order_id"]),
("billiards_ods.store_goods_master", ["commodity_code", "not_sale"]),
("billiards_ods.table_fee_discount_records", ["table_name", "table_price", "charge_free"]),
("billiards_ods.tenant_goods_master", ["not_sale"]),
]
print("\n" + "=" * 80)
print("ODS 新增字段数据覆盖检查")
print("=" * 80)
results = []
for table, columns in fields_to_check:
print(f"\n检查表: {table}")
# 获取总记录数
try:
total_rows = db.query(f"SELECT COUNT(*) as cnt FROM {table}")[0]["cnt"]
except Exception as e:
print(f" [错误] 无法获取记录数: {e}")
continue
for col in columns:
try:
# 检查列是否存在
schema, name = table.split(".", 1)
col_check = db.query("""
SELECT COUNT(*) as cnt FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""", (schema, name, col.lower()))
if col_check[0]["cnt"] == 0:
print(f"{col}: [不存在]")
continue
# 统计非空值数量
non_null_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" IS NOT NULL')[0]["cnt"]
zero_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" = 0')[0]["cnt"]
coverage = (non_null_rows / total_rows * 100) if total_rows > 0 else 0
print(f"{col}:")
print(f" - 总记录: {total_rows}, 非空: {non_null_rows} ({coverage:.1f}%), 值为0: {zero_rows}")
results.append({
"table": table,
"column": col,
"total": total_rows,
"non_null": non_null_rows,
"coverage": coverage,
"zero_count": zero_rows,
})
except Exception as e:
print(f"{col}: [错误] {e}")
return results
def check_dwd_field_coverage(db: DatabaseConnection):
"""检查 DWD 表中新增字段的数据覆盖情况"""
# 需要检查的新增字段
fields_to_check = [
("billiards_dwd.dwd_table_fee_log", ["activity_discount_amount", "real_service_money"]),
("billiards_dwd.dwd_assistant_service_log", ["real_service_money"]),
("billiards_dwd.dwd_assistant_trash_event", ["tenant_id"]),
("billiards_dwd.dwd_store_goods_sale", ["coupon_share_money"]),
("billiards_dwd.dwd_payment", ["tenant_id"]),
("billiards_dwd.dim_member", ["pay_money_sum", "recharge_money_sum"]),
("billiards_dwd.dim_member_ex", ["person_tenant_org_id", "register_source"]),
("billiards_dwd.dim_member_card_account", ["principal_balance", "member_grade"]),
("billiards_dwd.dwd_member_balance_change", ["principal_after", "principal_before", "principal_change_amount"]),
("billiards_dwd.dwd_settlement_head", ["tenant_id"]),
("billiards_dwd.dwd_recharge_order", ["tenant_id"]),
("billiards_dwd.dim_groupbuy_package", ["sort", "is_first_limit"]),
("billiards_dwd.dwd_groupbuy_redemption", ["coupon_sale_id", "member_discount_money"]),
("billiards_dwd.dim_table", ["order_id"]),
("billiards_dwd.dim_store_goods", ["commodity_code", "not_sale"]),
("billiards_dwd.dwd_table_fee_adjust", ["table_name", "table_price", "charge_free"]),
("billiards_dwd.dim_tenant_goods", ["not_sale"]),
]
print("\n" + "=" * 80)
print("DWD 新增字段数据覆盖检查")
print("=" * 80)
results = []
for table, columns in fields_to_check:
print(f"\n检查表: {table}")
# 获取总记录数
try:
total_rows = db.query(f"SELECT COUNT(*) as cnt FROM {table}")[0]["cnt"]
except Exception as e:
print(f" [错误] 无法获取记录数: {e}")
continue
for col in columns:
try:
# 检查列是否存在
schema, name = table.split(".", 1)
col_check = db.query("""
SELECT COUNT(*) as cnt FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s AND column_name = %s
""", (schema, name, col.lower()))
if col_check[0]["cnt"] == 0:
print(f"{col}: [不存在]")
continue
# 统计非空值数量
non_null_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" IS NOT NULL')[0]["cnt"]
coverage = (non_null_rows / total_rows * 100) if total_rows > 0 else 0
print(f"{col}: 总记录: {total_rows}, 非空: {non_null_rows} ({coverage:.1f}%)")
results.append({
"table": table,
"column": col,
"total": total_rows,
"non_null": non_null_rows,
"coverage": coverage,
})
except Exception as e:
print(f"{col}: [错误] {e}")
return results
def main():
print("=" * 80)
print("全量数据回写验证")
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print("=" * 80)
# 连接数据库
dsn = os.getenv("PG_DSN")
if not dsn:
print("[错误] 未找到 PG_DSN 环境变量")
return False
db = DatabaseConnection(dsn)
# 检查 ODS 字段覆盖
ods_results = check_ods_field_coverage(db)
# 检查 DWD 字段覆盖
dwd_results = check_dwd_field_coverage(db)
db.close()
# 生成汇总
print("\n" + "=" * 80)
print("汇总")
print("=" * 80)
print("\nODS 新增字段覆盖率统计:")
for r in ods_results:
if r["coverage"] < 50:
status = "[需关注]"
elif r["coverage"] < 80:
status = "[一般]"
else:
status = "[良好]"
print(f" {r['table']}.{r['column']}: {r['coverage']:.1f}% {status}")
print("\nDWD 新增字段覆盖率统计:")
for r in dwd_results:
if r["coverage"] < 50:
status = "[需关注]"
elif r["coverage"] < 80:
status = "[一般]"
else:
status = "[良好]"
print(f" {r['table']}.{r['column']}: {r['coverage']:.1f}% {status}")
# 保存报告
report = {
"generated_at": datetime.now().isoformat(),
"ods_coverage": ods_results,
"dwd_coverage": dwd_results,
}
report_file = Path(__file__).parent / "field_coverage_report.json"
with open(report_file, "w", encoding="utf-8") as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f"\n报告已保存到: {report_file}")
return True
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

48
tmp/get_dwd_schema.py Normal file
View File

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""获取 DWD 所有表的结构"""
import psycopg2
import json
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
def get_all_tables(conn, schema='billiards_dwd'):
"""获取所有表及其列"""
cur = conn.cursor()
cur.execute("""
SELECT table_name, column_name, data_type, is_nullable,
COALESCE(character_maximum_length, numeric_precision) as max_length
FROM information_schema.columns
WHERE table_schema = %s
ORDER BY table_name, ordinal_position
""", (schema,))
tables = {}
for row in cur.fetchall():
table_name, col_name, data_type, nullable, max_len = row
if table_name not in tables:
tables[table_name] = []
tables[table_name].append({
'column': col_name,
'type': data_type,
'nullable': nullable,
'max_length': max_len
})
cur.close()
return tables
def main():
conn = psycopg2.connect(DSN)
tables = get_all_tables(conn)
conn.close()
# 保存到 JSON 文件
with open('tmp/dwd_schema.json', 'w', encoding='utf-8') as f:
json.dump(tables, f, ensure_ascii=False, indent=2)
print(f"Found {len(tables)} tables")
for table_name, columns in sorted(tables.items()):
print(f" {table_name}: {len(columns)} columns")
if __name__ == '__main__':
main()

9
tmp/list_dwd_tables.py Normal file
View File

@@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
conn = psycopg2.connect(DSN)
cur = conn.cursor()
cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'billiards_dwd' ORDER BY table_name")
for r in cur.fetchall():
print(r[0])
conn.close()

19
tmp/query_schema.py Normal file
View File

@@ -0,0 +1,19 @@
import psycopg2
from collections import defaultdict
conn = psycopg2.connect('postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test')
cur = conn.cursor()
cur.execute("SELECT table_name, column_name, data_type, ordinal_position FROM information_schema.columns WHERE table_schema = 'billiards_dwd' ORDER BY table_name, ordinal_position")
results = cur.fetchall()
tables = defaultdict(list)
for row in results:
tables[row[0]].append((row[1], row[2], row[3]))
for table in sorted(tables.keys()):
print(f'\n琛ㄥ悕: {table}')
cols = tables[table]
for col, dtype, pos in cols:
print(f' {pos}. {col} ({dtype})')
cur.close()
conn.close()

994
tmp/schema_output.txt Normal file
View File

@@ -0,0 +1,994 @@

?? assistant_accounts_master
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. assistant_no (text)
5. nickname (text)
6. real_name (text)
7. mobile (text)
8. team_id (bigint)
9. team_name (text)
10. user_id (bigint)
11. level (text)
12. assistant_status (integer)
13. work_status (integer)
14. leave_status (integer)
15. entry_time (timestamp without time zone)
16. resign_time (timestamp without time zone)
17. start_time (timestamp without time zone)
18. end_time (timestamp without time zone)
19. create_time (timestamp without time zone)
20. update_time (timestamp without time zone)
21. order_trade_no (text)
22. staff_id (bigint)
23. staff_profile_id (bigint)
24. system_role_id (bigint)
25. avatar (text)
26. birth_date (timestamp without time zone)
27. gender (integer)
28. height (numeric)
29. weight (numeric)
30. job_num (text)
31. show_status (integer)
32. show_sort (integer)
33. sum_grade (numeric)
34. assistant_grade (numeric)
35. get_grade_times (integer)
36. introduce (text)
37. video_introduction_url (text)
38. group_id (bigint)
39. group_name (text)
40. shop_name (text)
41. charge_way (integer)
42. entry_type (integer)
43. allow_cx (integer)
44. is_guaranteed (integer)
45. salary_grant_enabled (integer)
46. light_status (integer)
47. online_status (integer)
48. is_delete (integer)
49. cx_unit_price (numeric)
50. pd_unit_price (numeric)
51. last_table_id (bigint)
52. last_table_name (text)
53. person_org_id (bigint)
54. serial_number (bigint)
55. is_team_leader (integer)
56. criticism_status (integer)
57. last_update_name (text)
58. ding_talk_synced (integer)
59. site_light_cfg_id (bigint)
60. light_equipment_id (text)
61. entry_sign_status (integer)
62. resign_sign_status (integer)
63. source_file (text)
64. source_endpoint (text)
65. fetched_at (timestamp with time zone)
66. payload (jsonb)
67. content_hash (text)
?? assistant_cancellation_records
------------------------------------------------------------
1. id (bigint)
2. siteid (bigint)
3. siteprofile (jsonb)
4. assistantname (text)
5. assistantabolishamount (numeric)
6. assistanton (integer)
7. pdchargeminutes (integer)
8. tableareaid (bigint)
9. tablearea (text)
10. tableid (bigint)
11. tablename (text)
12. trashreason (text)
13. createtime (timestamp without time zone)
14. source_file (text)
15. source_endpoint (text)
16. fetched_at (timestamp with time zone)
17. payload (jsonb)
18. content_hash (text)
?? assistant_service_records
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. siteprofile (jsonb)
5. site_table_id (bigint)
6. order_settle_id (bigint)
7. order_trade_no (text)
8. order_pay_id (bigint)
9. order_assistant_id (bigint)
10. order_assistant_type (integer)
11. assistantname (text)
12. assistantno (text)
13. assistant_level (text)
14. levelname (text)
15. site_assistant_id (bigint)
16. skill_id (bigint)
17. skillname (text)
18. system_member_id (bigint)
19. tablename (text)
20. tenant_member_id (bigint)
21. user_id (bigint)
22. assistant_team_id (bigint)
23. nickname (text)
24. ledger_name (text)
25. ledger_group_name (text)
26. ledger_amount (numeric)
27. ledger_count (numeric)
28. ledger_unit_price (numeric)
29. ledger_status (integer)
30. ledger_start_time (timestamp without time zone)
31. ledger_end_time (timestamp without time zone)
32. manual_discount_amount (numeric)
33. member_discount_amount (numeric)
34. coupon_deduct_money (numeric)
35. service_money (numeric)
36. projected_income (numeric)
37. real_use_seconds (integer)
38. income_seconds (integer)
39. start_use_time (timestamp without time zone)
40. last_use_time (timestamp without time zone)
41. create_time (timestamp without time zone)
42. is_single_order (integer)
43. is_delete (integer)
44. is_trash (integer)
45. trash_reason (text)
46. trash_applicant_id (bigint)
47. trash_applicant_name (text)
48. operator_id (bigint)
49. operator_name (text)
50. salesman_name (text)
51. salesman_org_id (bigint)
52. salesman_user_id (bigint)
53. person_org_id (bigint)
54. add_clock (integer)
55. returns_clock (integer)
56. composite_grade (numeric)
57. composite_grade_time (timestamp without time zone)
58. skill_grade (numeric)
59. service_grade (numeric)
60. sum_grade (numeric)
61. grade_status (integer)
62. get_grade_times (integer)
63. is_not_responding (integer)
64. is_confirm (integer)
65. payload (jsonb)
66. source_file (text)
67. source_endpoint (text)
68. fetched_at (timestamp with time zone)
69. content_hash (text)
?? goods_stock_movements
------------------------------------------------------------
1. sitegoodsstockid (bigint)
2. tenantid (bigint)
3. siteid (bigint)
4. sitegoodsid (bigint)
5. goodsname (text)
6. goodscategoryid (bigint)
7. goodssecondcategoryid (bigint)
8. unit (text)
9. price (numeric)
10. stocktype (integer)
11. changenum (numeric)
12. startnum (numeric)
13. endnum (numeric)
14. changenuma (numeric)
15. startnuma (numeric)
16. endnuma (numeric)
17. remark (text)
18. operatorname (text)
19. createtime (timestamp without time zone)
20. source_file (text)
21. source_endpoint (text)
22. fetched_at (timestamp with time zone)
23. payload (jsonb)
24. content_hash (text)
?? goods_stock_summary
------------------------------------------------------------
1. sitegoodsid (bigint)
2. goodsname (text)
3. goodsunit (text)
4. goodscategoryid (bigint)
5. goodscategorysecondid (bigint)
6. categoryname (text)
7. rangestartstock (numeric)
8. rangeendstock (numeric)
9. rangein (numeric)
10. rangeout (numeric)
11. rangesale (numeric)
12. rangesalemoney (numeric)
13. rangeinventory (numeric)
14. currentstock (numeric)
15. source_file (text)
16. source_endpoint (text)
17. fetched_at (timestamp with time zone)
18. payload (jsonb)
19. content_hash (text)
?? group_buy_packages
------------------------------------------------------------
1. id (bigint)
2. package_id (bigint)
3. package_name (text)
4. selling_price (numeric)
5. coupon_money (numeric)
6. date_type (integer)
7. date_info (text)
8. start_time (timestamp without time zone)
9. end_time (timestamp without time zone)
10. start_clock (text)
11. end_clock (text)
12. add_start_clock (text)
13. add_end_clock (text)
14. duration (integer)
15. usable_count (integer)
16. usable_range (integer)
17. table_area_id (bigint)
18. table_area_name (text)
19. table_area_id_list (jsonb)
20. tenant_table_area_id (bigint)
21. tenant_table_area_id_list (jsonb)
22. site_id (bigint)
23. site_name (text)
24. tenant_id (bigint)
25. card_type_ids (jsonb)
26. group_type (integer)
27. system_group_type (integer)
28. type (integer)
29. effective_status (integer)
30. is_enabled (integer)
31. is_delete (integer)
32. max_selectable_categories (integer)
33. area_tag_type (integer)
34. creator_name (text)
35. create_time (timestamp without time zone)
36. source_file (text)
37. source_endpoint (text)
38. fetched_at (timestamp with time zone)
39. payload (jsonb)
40. content_hash (text)
?? group_buy_redemption_records
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. sitename (text)
5. table_id (bigint)
6. tablename (text)
7. tableareaname (text)
8. tenant_table_area_id (bigint)
9. order_trade_no (text)
10. order_settle_id (bigint)
11. order_pay_id (bigint)
12. order_coupon_id (bigint)
13. order_coupon_channel (integer)
14. coupon_code (text)
15. coupon_money (numeric)
16. coupon_origin_id (bigint)
17. ledger_name (text)
18. ledger_group_name (text)
19. ledger_amount (numeric)
20. ledger_count (numeric)
21. ledger_unit_price (numeric)
22. ledger_status (integer)
23. table_charge_seconds (integer)
24. promotion_activity_id (bigint)
25. promotion_coupon_id (bigint)
26. promotion_seconds (integer)
27. offer_type (integer)
28. assistant_promotion_money (numeric)
29. assistant_service_promotion_money (numeric)
30. table_service_promotion_money (numeric)
31. goods_promotion_money (numeric)
32. recharge_promotion_money (numeric)
33. reward_promotion_money (numeric)
34. goodsoptionprice (numeric)
35. salesman_name (text)
36. sales_man_org_id (bigint)
37. salesman_role_id (bigint)
38. salesman_user_id (bigint)
39. operator_id (bigint)
40. operator_name (text)
41. is_single_order (integer)
42. is_delete (integer)
43. create_time (timestamp without time zone)
44. payload (jsonb)
45. source_file (text)
46. source_endpoint (text)
47. fetched_at (timestamp with time zone)
48. content_hash (text)
?? member_balance_changes
------------------------------------------------------------
1. tenant_id (bigint)
2. site_id (bigint)
3. register_site_id (bigint)
4. registersitename (text)
5. paysitename (text)
6. id (bigint)
7. tenant_member_id (bigint)
8. tenant_member_card_id (bigint)
9. system_member_id (bigint)
10. membername (text)
11. membermobile (text)
12. card_type_id (bigint)
13. membercardtypename (text)
14. account_data (numeric)
15. before (numeric)
16. after (numeric)
17. refund_amount (numeric)
18. from_type (integer)
19. payment_method (integer)
20. relate_id (bigint)
21. remark (text)
22. operator_id (bigint)
23. operator_name (text)
24. is_delete (integer)
25. create_time (timestamp without time zone)
26. source_file (text)
27. source_endpoint (text)
28. fetched_at (timestamp with time zone)
29. payload (jsonb)
30. content_hash (text)
?? member_profiles
------------------------------------------------------------
1. tenant_id (bigint)
2. register_site_id (bigint)
3. site_name (text)
4. id (bigint)
5. system_member_id (bigint)
6. member_card_grade_code (bigint)
7. member_card_grade_name (text)
8. mobile (text)
9. nickname (text)
10. point (numeric)
11. growth_value (numeric)
12. referrer_member_id (bigint)
13. status (integer)
14. user_status (integer)
15. create_time (timestamp without time zone)
16. source_file (text)
17. source_endpoint (text)
18. fetched_at (timestamp with time zone)
19. payload (jsonb)
20. content_hash (text)
?? member_stored_value_cards
------------------------------------------------------------
1. tenant_id (bigint)
2. tenant_member_id (bigint)
3. system_member_id (bigint)
4. register_site_id (bigint)
5. site_name (text)
6. id (bigint)
7. member_card_grade_code (bigint)
8. member_card_grade_code_name (text)
9. member_card_type_name (text)
10. member_name (text)
11. member_mobile (text)
12. card_type_id (bigint)
13. card_no (text)
14. card_physics_type (text)
15. balance (numeric)
16. denomination (numeric)
17. table_discount (numeric)
18. goods_discount (numeric)
19. assistant_discount (numeric)
20. assistant_reward_discount (numeric)
21. table_service_discount (numeric)
22. assistant_service_discount (numeric)
23. coupon_discount (numeric)
24. goods_service_discount (numeric)
25. assistant_discount_sub_switch (integer)
26. table_discount_sub_switch (integer)
27. goods_discount_sub_switch (integer)
28. assistant_reward_discount_sub_switch (integer)
29. table_service_deduct_radio (numeric)
30. assistant_service_deduct_radio (numeric)
31. goods_service_deduct_radio (numeric)
32. assistant_deduct_radio (numeric)
33. table_deduct_radio (numeric)
34. goods_deduct_radio (numeric)
35. coupon_deduct_radio (numeric)
36. assistant_reward_deduct_radio (numeric)
37. tablecarddeduct (numeric)
38. tableservicecarddeduct (numeric)
39. goodscardeduct (numeric)
40. goodsservicecarddeduct (numeric)
41. assistantcarddeduct (numeric)
42. assistantservicecarddeduct (numeric)
43. assistantrewardcarddeduct (numeric)
44. cardsettlededuct (numeric)
45. couponcarddeduct (numeric)
46. deliveryfeededuct (numeric)
47. use_scene (integer)
48. able_cross_site (integer)
49. able_site_transfer (integer)
50. is_allow_give (integer)
51. is_allow_order_deduct (integer)
52. is_delete (integer)
53. bind_password (text)
54. goods_discount_range_type (integer)
55. goodscategoryid (bigint)
56. tableareaid (bigint)
57. effect_site_id (bigint)
58. start_time (timestamp without time zone)
59. end_time (timestamp without time zone)
60. disable_start_time (timestamp without time zone)
61. disable_end_time (timestamp without time zone)
62. last_consume_time (timestamp without time zone)
63. create_time (timestamp without time zone)
64. status (integer)
65. sort (integer)
66. tenantavatar (text)
67. tenantname (text)
68. pdassisnatlevel (text)
69. cxassisnatlevel (text)
70. source_file (text)
71. source_endpoint (text)
72. fetched_at (timestamp with time zone)
73. payload (jsonb)
74. content_hash (text)
?? payment_transactions
------------------------------------------------------------
1. id (bigint)
2. site_id (bigint)
3. siteprofile (jsonb)
4. relate_type (integer)
5. relate_id (bigint)
6. pay_amount (numeric)
7. pay_status (integer)
8. pay_time (timestamp without time zone)
9. create_time (timestamp without time zone)
10. payment_method (integer)
11. online_pay_channel (integer)
12. source_file (text)
13. source_endpoint (text)
14. fetched_at (timestamp with time zone)
15. payload (jsonb)
16. content_hash (text)
?? platform_coupon_redemption_records
------------------------------------------------------------
1. id (bigint)
2. verify_id (bigint)
3. certificate_id (text)
4. coupon_code (text)
5. coupon_name (text)
6. coupon_channel (integer)
7. groupon_type (integer)
8. group_package_id (bigint)
9. sale_price (numeric)
10. coupon_money (numeric)
11. coupon_free_time (numeric)
12. coupon_cover (text)
13. coupon_remark (text)
14. use_status (integer)
15. consume_time (timestamp without time zone)
16. create_time (timestamp without time zone)
17. deal_id (text)
18. channel_deal_id (text)
19. site_id (bigint)
20. site_order_id (bigint)
21. table_id (bigint)
22. tenant_id (bigint)
23. operator_id (bigint)
24. operator_name (text)
25. is_delete (integer)
26. siteprofile (jsonb)
27. source_file (text)
28. source_endpoint (text)
29. fetched_at (timestamp with time zone)
30. payload (jsonb)
31. content_hash (text)
?? recharge_settlements
------------------------------------------------------------
1. id (bigint)
2. tenantid (bigint)
3. siteid (bigint)
4. sitename (text)
5. balanceamount (numeric)
6. cardamount (numeric)
7. cashamount (numeric)
8. couponamount (numeric)
9. createtime (timestamp with time zone)
10. memberid (bigint)
11. membername (text)
12. tenantmembercardid (bigint)
13. membercardtypename (text)
14. memberphone (text)
15. tableid (bigint)
16. consumemoney (numeric)
17. onlineamount (numeric)
18. operatorid (bigint)
19. operatorname (text)
20. revokeorderid (bigint)
21. revokeordername (text)
22. revoketime (timestamp with time zone)
23. payamount (numeric)
24. pointamount (numeric)
25. refundamount (numeric)
26. settlename (text)
27. settlerelateid (bigint)
28. settlestatus (integer)
29. settletype (integer)
30. paytime (timestamp with time zone)
31. roundingamount (numeric)
32. paymentmethod (integer)
33. adjustamount (numeric)
34. assistantcxmoney (numeric)
35. assistantpdmoney (numeric)
36. couponsaleamount (numeric)
37. memberdiscountamount (numeric)
38. tablechargemoney (numeric)
39. goodsmoney (numeric)
40. realgoodsmoney (numeric)
41. servicemoney (numeric)
42. prepaymoney (numeric)
43. salesmanname (text)
44. orderremark (text)
45. salesmanuserid (bigint)
46. canberevoked (boolean)
47. pointdiscountprice (numeric)
48. pointdiscountcost (numeric)
49. activitydiscount (numeric)
50. serialnumber (bigint)
51. assistantmanualdiscount (numeric)
52. allcoupondiscount (numeric)
53. goodspromotionmoney (numeric)
54. assistantpromotionmoney (numeric)
55. isusecoupon (boolean)
56. isusediscount (boolean)
57. isactivity (boolean)
58. isbindmember (boolean)
59. isfirst (integer)
60. rechargecardamount (numeric)
61. giftcardamount (numeric)
62. source_file (text)
63. source_endpoint (text)
64. fetched_at (timestamp with time zone)
65. payload (jsonb)
66. content_hash (text)
?? refund_transactions
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. tenantname (text)
4. site_id (bigint)
5. siteprofile (jsonb)
6. relate_type (integer)
7. relate_id (bigint)
8. pay_sn (text)
9. pay_amount (numeric)
10. refund_amount (numeric)
11. round_amount (numeric)
12. pay_status (integer)
13. pay_time (timestamp without time zone)
14. create_time (timestamp without time zone)
15. payment_method (integer)
16. pay_terminal (integer)
17. pay_config_id (bigint)
18. online_pay_channel (integer)
19. online_pay_type (integer)
20. channel_fee (numeric)
21. channel_payer_id (text)
22. channel_pay_no (text)
23. member_id (bigint)
24. member_card_id (bigint)
25. cashier_point_id (bigint)
26. operator_id (bigint)
27. action_type (integer)
28. check_status (integer)
29. is_revoke (integer)
30. is_delete (integer)
31. balance_frozen_amount (numeric)
32. card_frozen_amount (numeric)
33. source_file (text)
34. source_endpoint (text)
35. fetched_at (timestamp with time zone)
36. payload (jsonb)
37. content_hash (text)
?? settlement_records
------------------------------------------------------------
1. id (bigint)
2. tenantid (bigint)
3. siteid (bigint)
4. sitename (text)
5. balanceamount (numeric)
6. cardamount (numeric)
7. cashamount (numeric)
8. couponamount (numeric)
9. createtime (timestamp with time zone)
10. memberid (bigint)
11. membername (text)
12. tenantmembercardid (bigint)
13. membercardtypename (text)
14. memberphone (text)
15. tableid (bigint)
16. consumemoney (numeric)
17. onlineamount (numeric)
18. operatorid (bigint)
19. operatorname (text)
20. revokeorderid (bigint)
21. revokeordername (text)
22. revoketime (timestamp with time zone)
23. payamount (numeric)
24. pointamount (numeric)
25. refundamount (numeric)
26. settlename (text)
27. settlerelateid (bigint)
28. settlestatus (integer)
29. settletype (integer)
30. paytime (timestamp with time zone)
31. roundingamount (numeric)
32. paymentmethod (integer)
33. adjustamount (numeric)
34. assistantcxmoney (numeric)
35. assistantpdmoney (numeric)
36. couponsaleamount (numeric)
37. memberdiscountamount (numeric)
38. tablechargemoney (numeric)
39. goodsmoney (numeric)
40. realgoodsmoney (numeric)
41. servicemoney (numeric)
42. prepaymoney (numeric)
43. salesmanname (text)
44. orderremark (text)
45. salesmanuserid (bigint)
46. canberevoked (boolean)
47. pointdiscountprice (numeric)
48. pointdiscountcost (numeric)
49. activitydiscount (numeric)
50. serialnumber (bigint)
51. assistantmanualdiscount (numeric)
52. allcoupondiscount (numeric)
53. goodspromotionmoney (numeric)
54. assistantpromotionmoney (numeric)
55. isusecoupon (boolean)
56. isusediscount (boolean)
57. isactivity (boolean)
58. isbindmember (boolean)
59. isfirst (integer)
60. rechargecardamount (numeric)
61. giftcardamount (numeric)
62. source_file (text)
63. source_endpoint (text)
64. fetched_at (timestamp with time zone)
65. payload (jsonb)
66. content_hash (text)
?? settlement_ticket_details
------------------------------------------------------------
1. ordersettleid (bigint)
2. actualpayment (numeric)
3. adjustamount (numeric)
4. assistantmanualdiscount (numeric)
5. balanceamount (numeric)
6. cashiername (text)
7. consumemoney (numeric)
8. couponamount (numeric)
9. deliveryaddress (text)
10. deliveryfee (numeric)
11. ledgeramount (numeric)
12. memberdeductamount (numeric)
13. memberofferamount (numeric)
14. onlinereturnamount (numeric)
15. orderremark (text)
16. ordersettlenumber (bigint)
17. paymemberbalance (numeric)
18. paytime (timestamp without time zone)
19. paymentmethod (integer)
20. pointdiscountcost (numeric)
21. pointdiscountprice (numeric)
22. prepaymoney (numeric)
23. refundamount (numeric)
24. returngoodsamount (numeric)
25. rewardname (text)
26. settletype (text)
27. siteaddress (text)
28. sitebusinesstel (text)
29. siteid (bigint)
30. sitename (text)
31. tenantid (bigint)
32. tenantname (text)
33. ticketcustomcontent (text)
34. ticketremark (text)
35. vouchermoney (numeric)
36. memberprofile (jsonb)
37. orderitem (jsonb)
38. tenantmembercardlogs (jsonb)
39. payload (jsonb)
40. source_file (text)
41. source_endpoint (text)
42. fetched_at (timestamp with time zone)
43. content_hash (text)
?? site_tables_master
------------------------------------------------------------
1. id (bigint)
2. site_id (bigint)
3. sitename (text)
4. appletQrCodeUrl (text)
5. areaname (text)
6. audit_status (integer)
7. charge_free (integer)
8. create_time (timestamp without time zone)
9. delay_lights_time (integer)
10. is_online_reservation (integer)
11. is_rest_area (integer)
12. light_status (integer)
13. only_allow_groupon (integer)
14. order_delay_time (integer)
15. self_table (integer)
16. show_status (integer)
17. site_table_area_id (bigint)
18. tablestatusname (text)
19. table_cloth_use_cycle (integer)
20. table_cloth_use_time (timestamp without time zone)
21. table_name (text)
22. table_price (numeric)
23. table_status (integer)
24. temporary_light_second (integer)
25. virtual_table (integer)
26. source_file (text)
27. source_endpoint (text)
28. fetched_at (timestamp with time zone)
29. payload (jsonb)
30. content_hash (text)
?? stock_goods_category_tree
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. category_name (text)
4. alias_name (text)
5. pid (bigint)
6. business_name (text)
7. tenant_goods_business_id (bigint)
8. open_salesman (integer)
9. categoryboxes (jsonb)
10. sort (integer)
11. is_warehousing (integer)
12. source_file (text)
13. source_endpoint (text)
14. fetched_at (timestamp with time zone)
15. payload (jsonb)
16. content_hash (text)
?? store_goods_master
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. sitename (text)
5. tenant_goods_id (bigint)
6. goods_name (text)
7. goods_bar_code (text)
8. goods_category_id (bigint)
9. goods_second_category_id (bigint)
10. onecategoryname (text)
11. twocategoryname (text)
12. unit (text)
13. sale_price (numeric)
14. cost_price (numeric)
15. cost_price_type (integer)
16. min_discount_price (numeric)
17. safe_stock (numeric)
18. stock (numeric)
19. stock_a (numeric)
20. sale_num (numeric)
21. total_purchase_cost (numeric)
22. total_sales (numeric)
23. average_monthly_sales (numeric)
24. batch_stock_quantity (numeric)
25. days_available (integer)
26. provisional_total_cost (numeric)
27. enable_status (integer)
28. audit_status (integer)
29. goods_state (integer)
30. is_delete (integer)
31. is_warehousing (integer)
32. able_discount (integer)
33. able_site_transfer (integer)
34. forbid_sell_status (integer)
35. freeze (integer)
36. send_state (integer)
37. custom_label_type (integer)
38. option_required (integer)
39. sale_channel (integer)
40. sort (integer)
41. remark (text)
42. pinyin_initial (text)
43. goods_cover (text)
44. create_time (timestamp without time zone)
45. update_time (timestamp without time zone)
46. payload (jsonb)
47. source_file (text)
48. source_endpoint (text)
49. fetched_at (timestamp with time zone)
50. content_hash (text)
?? store_goods_sales_records
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. siteid (bigint)
5. sitename (text)
6. site_goods_id (bigint)
7. tenant_goods_id (bigint)
8. order_settle_id (bigint)
9. order_trade_no (text)
10. order_goods_id (bigint)
11. ordergoodsid (bigint)
12. order_pay_id (bigint)
13. order_coupon_id (bigint)
14. ledger_name (text)
15. ledger_group_name (text)
16. ledger_amount (numeric)
17. ledger_count (numeric)
18. ledger_unit_price (numeric)
19. ledger_status (integer)
20. discount_money (numeric)
21. discount_price (numeric)
22. coupon_deduct_money (numeric)
23. member_discount_amount (numeric)
24. option_coupon_deduct_money (numeric)
25. option_member_discount_money (numeric)
26. point_discount_money (numeric)
27. point_discount_money_cost (numeric)
28. real_goods_money (numeric)
29. cost_money (numeric)
30. push_money (numeric)
31. sales_type (integer)
32. is_single_order (integer)
33. is_delete (integer)
34. goods_remark (text)
35. option_price (numeric)
36. option_value_name (text)
37. option_name (text)
38. member_coupon_id (bigint)
39. package_coupon_id (bigint)
40. sales_man_org_id (bigint)
41. salesman_name (text)
42. salesman_role_id (bigint)
43. salesman_user_id (bigint)
44. operator_id (bigint)
45. operator_name (text)
46. opensalesman (text)
47. returns_number (integer)
48. site_table_id (bigint)
49. tenant_goods_business_id (bigint)
50. tenant_goods_category_id (bigint)
51. create_time (timestamp without time zone)
52. payload (jsonb)
53. source_file (text)
54. source_endpoint (text)
55. fetched_at (timestamp with time zone)
56. content_hash (text)
?? table_fee_discount_records
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. siteprofile (jsonb)
5. site_table_id (bigint)
6. tableprofile (jsonb)
7. tenant_table_area_id (bigint)
8. adjust_type (integer)
9. ledger_amount (numeric)
10. ledger_count (numeric)
11. ledger_name (text)
12. ledger_status (integer)
13. applicant_id (bigint)
14. applicant_name (text)
15. operator_id (bigint)
16. operator_name (text)
17. order_settle_id (bigint)
18. order_trade_no (text)
19. is_delete (integer)
20. create_time (timestamp without time zone)
21. source_file (text)
22. source_endpoint (text)
23. fetched_at (timestamp with time zone)
24. payload (jsonb)
25. content_hash (text)
?? table_fee_transactions
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. site_id (bigint)
4. siteprofile (jsonb)
5. site_table_id (bigint)
6. site_table_area_id (bigint)
7. site_table_area_name (text)
8. tenant_table_area_id (bigint)
9. order_trade_no (text)
10. order_pay_id (bigint)
11. order_settle_id (bigint)
12. ledger_name (text)
13. ledger_amount (numeric)
14. ledger_count (numeric)
15. ledger_unit_price (numeric)
16. ledger_status (integer)
17. ledger_start_time (timestamp without time zone)
18. ledger_end_time (timestamp without time zone)
19. start_use_time (timestamp without time zone)
20. last_use_time (timestamp without time zone)
21. real_table_use_seconds (integer)
22. real_table_charge_money (numeric)
23. add_clock_seconds (integer)
24. adjust_amount (numeric)
25. coupon_promotion_amount (numeric)
26. member_discount_amount (numeric)
27. used_card_amount (numeric)
28. mgmt_fee (numeric)
29. service_money (numeric)
30. fee_total (numeric)
31. is_single_order (integer)
32. is_delete (integer)
33. member_id (bigint)
34. operator_id (bigint)
35. operator_name (text)
36. salesman_name (text)
37. salesman_org_id (bigint)
38. salesman_user_id (bigint)
39. create_time (timestamp without time zone)
40. payload (jsonb)
41. source_file (text)
42. source_endpoint (text)
43. fetched_at (timestamp with time zone)
44. content_hash (text)
?? tenant_goods_master
------------------------------------------------------------
1. id (bigint)
2. tenant_id (bigint)
3. goods_name (text)
4. goods_bar_code (text)
5. goods_category_id (bigint)
6. goods_second_category_id (bigint)
7. categoryname (text)
8. unit (text)
9. goods_number (text)
10. out_goods_id (text)
11. goods_state (integer)
12. sale_channel (integer)
13. able_discount (integer)
14. able_site_transfer (integer)
15. is_delete (integer)
16. is_warehousing (integer)
17. isinsite (integer)
18. cost_price (numeric)
19. cost_price_type (integer)
20. market_price (numeric)
21. min_discount_price (numeric)
22. common_sale_royalty (numeric)
23. point_sale_royalty (numeric)
24. pinyin_initial (text)
25. commoditycode (text)
26. commodity_code (text)
27. goods_cover (text)
28. supplier_id (bigint)
29. remark_name (text)
30. create_time (timestamp without time zone)
31. update_time (timestamp without time zone)
32. payload (jsonb)
33. source_file (text)
34. source_endpoint (text)
35. fetched_at (timestamp with time zone)
36. content_hash (text)

View File

@@ -0,0 +1,238 @@
# -*- coding: utf-8 -*-
"""
同步 API 字段到 ODS 数据库表
1. 检测 API JSON 字段与 ODS 表列的差异
2. 生成并执行 DDL 添加缺失列
3. 忽略 siteProfile 等嵌套对象字段
"""
import json
import os
import sys
from datetime import datetime
from pathlib import Path
# 添加项目路径
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
# 忽略的 siteProfile 相关字段和其他非业务字段
IGNORED_FIELDS = {
# siteProfile 内嵌字段
"siteprofile", "address", "avatar", "business_tel", "customer_service_qrcode",
"customer_service_wechat", "fixed_pay_qrcode", "full_address", "latitude", "longitude",
"light_status", "light_token", "light_type", "org_id", "prod_env", "shop_name",
"shop_status", "site_label", "site_type", "tenant_site_region_id", "wifi_name",
"wifi_password", "attendance_distance", "attendance_enabled", "auto_light",
"ewelink_client_id",
# tableprofile 内嵌字段
"tableprofile",
# 已有的系统字段
"content_hash", "payload", "source_file", "source_endpoint", "fetched_at", "record_index",
}
# API 字段类型推断规则
def infer_column_type(field_name: str, sample_value=None) -> str:
"""根据字段名和样本值推断 PostgreSQL 列类型"""
fn = field_name.lower()
# ID 字段
if fn.endswith("_id") or fn in ("id", "tenant_id", "member_id", "site_id", "table_id",
"operator_id", "relate_id", "order_id"):
return "BIGINT"
# 金额字段
if any(x in fn for x in ("_money", "_amount", "_price", "_cost", "_discount", "_balance",
"_deduct", "_fee", "_charge", "money", "amount", "price")):
return "NUMERIC(18,2)"
# 时间字段
if any(x in fn for x in ("_time", "time", "_date", "date")) or fn.startswith("create") or fn.startswith("update"):
return "TIMESTAMP"
# 布尔/状态字段
if fn.startswith("is_") or fn.startswith("can_") or fn.startswith("able_"):
return "INTEGER"
# 数量/计数字段
if any(x in fn for x in ("_count", "_num", "_seconds", "_minutes", "count", "num", "seconds")):
return "INTEGER"
# 比率/折扣率
if any(x in fn for x in ("_radio", "_ratio", "_rate")):
return "NUMERIC(10,4)"
# 根据样本值推断
if sample_value is not None:
if isinstance(sample_value, bool):
return "BOOLEAN"
if isinstance(sample_value, int):
if sample_value > 2147483647 or sample_value < -2147483648:
return "BIGINT"
return "INTEGER"
if isinstance(sample_value, float):
return "NUMERIC(18,2)"
if isinstance(sample_value, (list, dict)):
return "JSONB"
# 默认文本
return "TEXT"
def get_db_table_columns(db: DatabaseConnection, table_name: str) -> set:
"""获取数据库表的所有列名"""
schema, name = table_name.split(".", 1) if "." in table_name else ("public", table_name)
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
"""
rows = db.query(sql, (schema, name))
return {r["column_name"].lower() for r in rows}
def get_api_fields_from_comparison(comparison_file: Path) -> dict:
"""从对比文件获取 API 字段"""
if not comparison_file.exists():
return {}
with open(comparison_file, "r", encoding="utf-8") as f:
return json.load(f)
def generate_ddl_for_missing_fields(table_name: str, missing_fields: list, api_data: dict = None) -> list:
"""生成添加缺失列的 DDL"""
ddl_list = []
for field in missing_fields:
# 尝试从 API 数据获取样本值来推断类型
sample_value = None
if api_data:
for record in api_data.get("data", [])[:10]:
if isinstance(record, dict) and field in record:
sample_value = record[field]
break
col_type = infer_column_type(field, sample_value)
ddl = f'ALTER TABLE {table_name} ADD COLUMN IF NOT EXISTS "{field}" {col_type};'
ddl_list.append(ddl)
return ddl_list
def main():
print("=" * 80)
print("API → ODS 字段同步脚本")
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print("=" * 80)
# 连接数据库
dsn = os.getenv("PG_DSN")
if not dsn:
print("[错误] 未找到 PG_DSN 环境变量")
return
db = DatabaseConnection(dsn)
# 加载对比数据
comparison_file = Path(__file__).parent / "api_ods_comparison.json"
comparison = get_api_fields_from_comparison(comparison_file)
if not comparison:
print("[错误] 未找到对比文件 api_ods_comparison.json")
db.close()
return
all_ddl = []
executed_ddl = []
failed_ddl = []
for task_code, data in comparison.items():
table_name = data.get("table_name")
missing = data.get("missing_in_ods", [])
if not table_name or not missing:
continue
# 过滤忽略的字段
filtered_missing = [
f for f in missing
if f.lower() not in IGNORED_FIELDS
]
if not filtered_missing:
continue
# 获取数据库当前列
current_cols = get_db_table_columns(db, table_name)
# 二次过滤:排除已存在的列
truly_missing = [
f for f in filtered_missing
if f.lower() not in current_cols
]
if not truly_missing:
print(f"\n{task_code}】({table_name})")
print(f" 所有缺失字段已在数据库中存在,跳过")
continue
print(f"\n{task_code}】({table_name})")
print(f" 需要添加 {len(truly_missing)} 列: {', '.join(truly_missing)}")
# 生成 DDL
ddl_list = generate_ddl_for_missing_fields(table_name, truly_missing)
all_ddl.extend(ddl_list)
# 执行 DDL
for ddl in ddl_list:
try:
db.execute(ddl)
db.commit()
executed_ddl.append(ddl)
print(f" [成功] {ddl[:80]}...")
except Exception as e:
db.rollback()
failed_ddl.append((ddl, str(e)))
print(f" [失败] {ddl[:60]}... - {e}")
db.close()
# 汇总
print("\n" + "=" * 80)
print("执行汇总")
print("=" * 80)
print(f"总计生成 DDL: {len(all_ddl)}")
print(f"执行成功: {len(executed_ddl)}")
print(f"执行失败: {len(failed_ddl)}")
if failed_ddl:
print("\n失败的 DDL:")
for ddl, err in failed_ddl:
print(f" - {ddl}")
print(f" 错误: {err}")
# 保存执行日志
log_file = Path(__file__).parent / "sync_ods_columns_log.json"
log = {
"executed_at": datetime.now().isoformat(),
"total_ddl": len(all_ddl),
"success_count": len(executed_ddl),
"failed_count": len(failed_ddl),
"executed_ddl": executed_ddl,
"failed_ddl": [{"ddl": d, "error": e} for d, e in failed_ddl],
}
with open(log_file, "w", encoding="utf-8") as f:
json.dump(log, f, ensure_ascii=False, indent=2)
print(f"\n执行日志已保存到: {log_file}")
return len(failed_ddl) == 0
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

181
tmp/sync_bd_manual.py Normal file
View File

@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-
"""校验并同步 bd_manual 文档与数据库结构"""
import json
import re
from pathlib import Path
import psycopg2
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
# 类型映射 (PostgreSQL -> 文档显示格式)
TYPE_MAP = {
'bigint': 'BIGINT',
'integer': 'INTEGER',
'smallint': 'SMALLINT',
'numeric': 'NUMERIC',
'text': 'TEXT',
'character varying': 'VARCHAR',
'boolean': 'BOOLEAN',
'timestamp with time zone': 'TIMESTAMPTZ',
'timestamp without time zone': 'TIMESTAMP',
'date': 'DATE',
'jsonb': 'JSONB',
'json': 'JSON',
}
def get_db_schema():
"""获取数据库 schema"""
conn = psycopg2.connect(DSN)
cur = conn.cursor()
cur.execute("""
SELECT table_name, column_name, data_type, is_nullable,
COALESCE(character_maximum_length, numeric_precision) as max_length,
numeric_scale
FROM information_schema.columns
WHERE table_schema = 'billiards_dwd'
ORDER BY table_name, ordinal_position
""")
tables = {}
for row in cur.fetchall():
table_name, col_name, data_type, nullable, max_len, scale = row
if table_name not in tables:
tables[table_name] = []
# 格式化类型
type_str = TYPE_MAP.get(data_type, data_type.upper())
if data_type == 'numeric' and max_len and scale is not None:
type_str = f'NUMERIC({max_len},{scale})'
elif data_type == 'character varying' and max_len:
type_str = f'VARCHAR({max_len})'
tables[table_name].append({
'column': col_name,
'type': type_str,
'nullable': 'YES' if nullable == 'YES' else 'NO',
})
cur.close()
conn.close()
return tables
def parse_md_fields(content):
"""解析 MD 文档中的字段列表"""
fields = {}
# 匹配字段表格行
pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|'
for match in re.finditer(pattern, content):
col_name = match.group(1).strip()
col_type = match.group(2).strip()
nullable = match.group(3).strip()
fields[col_name] = {'type': col_type, 'nullable': nullable}
return fields
def compare_and_report(table_name, db_cols, doc_path):
"""对比数据库和文档,返回差异"""
if not doc_path.exists():
return {'missing_doc': True, 'table': table_name}
content = doc_path.read_text(encoding='utf-8')
doc_fields = parse_md_fields(content)
db_field_names = {c['column'] for c in db_cols}
doc_field_names = set(doc_fields.keys())
# 找出差异
missing_in_doc = db_field_names - doc_field_names
extra_in_doc = doc_field_names - db_field_names
type_mismatches = []
for col in db_cols:
col_name = col['column']
if col_name in doc_fields:
# 检查类型是否匹配 (忽略大小写和空格)
db_type = col['type'].upper().replace(' ', '')
doc_type = doc_fields[col_name]['type'].upper().replace(' ', '')
if db_type != doc_type:
type_mismatches.append({
'column': col_name,
'db_type': col['type'],
'doc_type': doc_fields[col_name]['type']
})
return {
'table': table_name,
'missing_in_doc': list(missing_in_doc),
'extra_in_doc': list(extra_in_doc),
'type_mismatches': type_mismatches,
'doc_path': str(doc_path),
}
def main():
db_schema = get_db_schema()
main_dir = Path('etl_billiards/docs/bd_manual/main')
ex_dir = Path('etl_billiards/docs/bd_manual/Ex')
all_diffs = []
for table_name, columns in sorted(db_schema.items()):
# 确定文档路径
if table_name.endswith('_ex'):
base_name = table_name[:-3] # 去掉 _ex
doc_path = ex_dir / f'BD_manual_{table_name}.md'
else:
doc_path = main_dir / f'BD_manual_{table_name}.md'
diff = compare_and_report(table_name, columns, doc_path)
if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'):
all_diffs.append(diff)
# 输出报告
print("=" * 80)
print("BD Manual vs Database Schema Comparison Report")
print("=" * 80)
total_missing = 0
total_extra = 0
total_type_mismatch = 0
for diff in all_diffs:
table = diff['table']
if diff.get('missing_doc'):
print(f"\n### {table}: MISSING DOCUMENT ###")
continue
has_issues = False
if diff['missing_in_doc']:
if not has_issues:
print(f"\n### {table} ###")
has_issues = True
print(f" Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}")
total_missing += len(diff['missing_in_doc'])
if diff['extra_in_doc']:
if not has_issues:
print(f"\n### {table} ###")
has_issues = True
print(f" Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}")
total_extra += len(diff['extra_in_doc'])
if diff['type_mismatches']:
if not has_issues:
print(f"\n### {table} ###")
has_issues = True
print(f" Type mismatches ({len(diff['type_mismatches'])}):")
for m in diff['type_mismatches']:
print(f" - {m['column']}: doc={m['doc_type']}, db={m['db_type']}")
total_type_mismatch += len(diff['type_mismatches'])
print("\n" + "=" * 80)
print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches")
print("=" * 80)
# 保存详细结果到 JSON
with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f:
json.dump(all_diffs, f, ensure_ascii=False, indent=2)
print(f"\nDetailed results saved to tmp/bd_manual_diff.json")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,10 @@
{
"executed_at": "2026-02-02T19:12:00.539963",
"total_ddl": 1,
"success_count": 1,
"failed_count": 0,
"executed_ddl": [
"ALTER TABLE billiards_dwd.dwd_member_balance_change ADD COLUMN IF NOT EXISTS \"principal_change_amount\" NUMERIC(18,2);"
],
"failed_ddl": []
}

View File

@@ -0,0 +1,13 @@
{
"executed_at": "2026-02-02T19:10:13.492902",
"total_ddl": 4,
"success_count": 4,
"failed_count": 0,
"executed_ddl": [
"ALTER TABLE billiards_ods.settlement_records ADD COLUMN IF NOT EXISTS \"tenant_id\" BIGINT;",
"ALTER TABLE billiards_ods.recharge_settlements ADD COLUMN IF NOT EXISTS \"tenant_id\" BIGINT;",
"ALTER TABLE billiards_ods.group_buy_packages ADD COLUMN IF NOT EXISTS \"tableareanamelist\" TEXT;",
"ALTER TABLE billiards_ods.group_buy_packages ADD COLUMN IF NOT EXISTS \"tenanttableareaidlist\" TEXT;"
],
"failed_ddl": []
}

View File

@@ -0,0 +1,259 @@
# -*- coding: utf-8 -*-
"""
同步 ODS 字段到 DWD 数据库表
1. 检测 ODS 新增字段对应的 DWD 表是否缺失列
2. 根据 dwd_load_task.py 的 FACT_MAPPINGS 生成 DDL
"""
import json
import os
import sys
from datetime import datetime
from pathlib import Path
# 添加项目路径
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
# ODS -> DWD 表映射(从 dwd_load_task.py 提取)
ODS_TO_DWD_MAP = {
"billiards_ods.table_fee_transactions": [
"billiards_dwd.dim_site", "billiards_dwd.dim_site_ex",
"billiards_dwd.dwd_table_fee_log", "billiards_dwd.dwd_table_fee_log_ex",
],
"billiards_ods.site_tables_master": [
"billiards_dwd.dim_table", "billiards_dwd.dim_table_ex",
],
"billiards_ods.assistant_accounts_master": [
"billiards_dwd.dim_assistant", "billiards_dwd.dim_assistant_ex",
],
"billiards_ods.assistant_service_records": [
"billiards_dwd.dwd_assistant_service_log", "billiards_dwd.dwd_assistant_service_log_ex",
],
"billiards_ods.assistant_cancellation_records": [
"billiards_dwd.dwd_assistant_trash_event", "billiards_dwd.dwd_assistant_trash_event_ex",
],
"billiards_ods.store_goods_sales_records": [
"billiards_dwd.dwd_store_goods_sale", "billiards_dwd.dwd_store_goods_sale_ex",
],
"billiards_ods.payment_transactions": [
"billiards_dwd.dwd_payment",
],
"billiards_ods.member_profiles": [
"billiards_dwd.dim_member", "billiards_dwd.dim_member_ex",
],
"billiards_ods.member_stored_value_cards": [
"billiards_dwd.dim_member_card_account", "billiards_dwd.dim_member_card_account_ex",
],
"billiards_ods.member_balance_changes": [
"billiards_dwd.dwd_member_balance_change", "billiards_dwd.dwd_member_balance_change_ex",
],
"billiards_ods.settlement_records": [
"billiards_dwd.dwd_settlement_head", "billiards_dwd.dwd_settlement_head_ex",
],
"billiards_ods.recharge_settlements": [
"billiards_dwd.dwd_recharge_order", "billiards_dwd.dwd_recharge_order_ex",
],
"billiards_ods.group_buy_packages": [
"billiards_dwd.dim_groupbuy_package", "billiards_dwd.dim_groupbuy_package_ex",
],
"billiards_ods.group_buy_redemption_records": [
"billiards_dwd.dwd_groupbuy_redemption", "billiards_dwd.dwd_groupbuy_redemption_ex",
],
"billiards_ods.table_fee_discount_records": [
"billiards_dwd.dwd_table_fee_adjust", "billiards_dwd.dwd_table_fee_adjust_ex",
],
"billiards_ods.tenant_goods_master": [
"billiards_dwd.dim_tenant_goods", "billiards_dwd.dim_tenant_goods_ex",
],
"billiards_ods.store_goods_master": [
"billiards_dwd.dim_store_goods", "billiards_dwd.dim_store_goods_ex",
],
}
# 需要同步到 DWD 的新增 ODS 字段(从排查报告中获取)
# 格式: {ods_table: [(ods_col, dwd_col, dwd_table, col_type), ...]}
NEW_FIELDS_TO_DWD = {
"billiards_ods.table_fee_transactions": [
("activity_discount_amount", "activity_discount_amount", "billiards_dwd.dwd_table_fee_log", "NUMERIC(18,2)"),
("real_service_money", "real_service_money", "billiards_dwd.dwd_table_fee_log", "NUMERIC(18,2)"),
("order_consumption_type", "order_consumption_type", "billiards_dwd.dwd_table_fee_log_ex", "INTEGER"),
],
"billiards_ods.assistant_service_records": [
("real_service_money", "real_service_money", "billiards_dwd.dwd_assistant_service_log", "NUMERIC(18,2)"),
("assistantteamname", "assistant_team_name", "billiards_dwd.dwd_assistant_service_log_ex", "TEXT"),
],
"billiards_ods.assistant_cancellation_records": [
("tenant_id", "tenant_id", "billiards_dwd.dwd_assistant_trash_event", "BIGINT"),
],
"billiards_ods.store_goods_sales_records": [
("coupon_share_money", "coupon_share_money", "billiards_dwd.dwd_store_goods_sale", "NUMERIC(18,2)"),
],
"billiards_ods.payment_transactions": [
("tenant_id", "tenant_id", "billiards_dwd.dwd_payment", "BIGINT"),
],
"billiards_ods.member_profiles": [
("pay_money_sum", "pay_money_sum", "billiards_dwd.dim_member", "NUMERIC(18,2)"),
("recharge_money_sum", "recharge_money_sum", "billiards_dwd.dim_member", "NUMERIC(18,2)"),
("person_tenant_org_id", "person_tenant_org_id", "billiards_dwd.dim_member_ex", "BIGINT"),
("person_tenant_org_name", "person_tenant_org_name", "billiards_dwd.dim_member_ex", "TEXT"),
("register_source", "register_source", "billiards_dwd.dim_member_ex", "TEXT"),
],
"billiards_ods.member_stored_value_cards": [
("principal_balance", "principal_balance", "billiards_dwd.dim_member_card_account", "NUMERIC(18,2)"),
("member_grade", "member_grade", "billiards_dwd.dim_member_card_account", "INTEGER"),
("able_share_member_discount", "able_share_member_discount", "billiards_dwd.dim_member_card_account_ex", "BOOLEAN"),
("electricity_deduct_radio", "electricity_deduct_radio", "billiards_dwd.dim_member_card_account_ex", "NUMERIC(10,4)"),
("electricity_discount", "electricity_discount", "billiards_dwd.dim_member_card_account_ex", "NUMERIC(10,4)"),
("electricitycarddeduct", "electricity_card_deduct", "billiards_dwd.dim_member_card_account_ex", "BOOLEAN"),
("rechargefreezebalance", "recharge_freeze_balance", "billiards_dwd.dim_member_card_account_ex", "NUMERIC(18,2)"),
],
"billiards_ods.member_balance_changes": [
("principal_after", "principal_after", "billiards_dwd.dwd_member_balance_change", "NUMERIC(18,2)"),
("principal_before", "principal_before", "billiards_dwd.dwd_member_balance_change", "NUMERIC(18,2)"),
("principal_data", "principal_change_amount", "billiards_dwd.dwd_member_balance_change", "NUMERIC(18,2)"),
],
"billiards_ods.settlement_records": [
("tenant_id", "tenant_id", "billiards_dwd.dwd_settlement_head", "BIGINT"),
],
"billiards_ods.recharge_settlements": [
("tenant_id", "tenant_id", "billiards_dwd.dwd_recharge_order", "BIGINT"),
],
"billiards_ods.group_buy_packages": [
("sort", "sort", "billiards_dwd.dim_groupbuy_package", "INTEGER"),
("is_first_limit", "is_first_limit", "billiards_dwd.dim_groupbuy_package", "BOOLEAN"),
("tenantcouponsaleorderitemid", "tenant_coupon_sale_order_item_id", "billiards_dwd.dim_groupbuy_package_ex", "BIGINT"),
],
"billiards_ods.group_buy_redemption_records": [
("coupon_sale_id", "coupon_sale_id", "billiards_dwd.dwd_groupbuy_redemption", "BIGINT"),
("member_discount_money", "member_discount_money", "billiards_dwd.dwd_groupbuy_redemption", "NUMERIC(18,2)"),
("assistant_share_money", "assistant_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
("table_share_money", "table_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
("goods_share_money", "goods_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
("recharge_share_money", "recharge_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
],
"billiards_ods.site_tables_master": [
("order_id", "order_id", "billiards_dwd.dim_table", "BIGINT"),
],
"billiards_ods.store_goods_master": [
("commodity_code", "commodity_code", "billiards_dwd.dim_store_goods", "TEXT"),
("not_sale", "not_sale", "billiards_dwd.dim_store_goods", "INTEGER"),
],
"billiards_ods.table_fee_discount_records": [
("table_name", "table_name", "billiards_dwd.dwd_table_fee_adjust", "TEXT"),
("table_price", "table_price", "billiards_dwd.dwd_table_fee_adjust", "NUMERIC(18,2)"),
("charge_free", "charge_free", "billiards_dwd.dwd_table_fee_adjust", "BOOLEAN"),
("area_type_id", "area_type_id", "billiards_dwd.dwd_table_fee_adjust_ex", "BIGINT"),
("site_table_area_id", "site_table_area_id", "billiards_dwd.dwd_table_fee_adjust_ex", "BIGINT"),
("site_table_area_name", "site_table_area_name", "billiards_dwd.dwd_table_fee_adjust_ex", "TEXT"),
("sitename", "site_name", "billiards_dwd.dwd_table_fee_adjust_ex", "TEXT"),
("tenant_name", "tenant_name", "billiards_dwd.dwd_table_fee_adjust_ex", "TEXT"),
],
"billiards_ods.tenant_goods_master": [
("not_sale", "not_sale", "billiards_dwd.dim_tenant_goods", "INTEGER"),
],
}
def get_db_table_columns(db: DatabaseConnection, table_name: str) -> set:
"""获取数据库表的所有列名"""
schema, name = table_name.split(".", 1) if "." in table_name else ("public", table_name)
sql = """
SELECT column_name
FROM information_schema.columns
WHERE table_schema = %s AND table_name = %s
"""
rows = db.query(sql, (schema, name))
return {r["column_name"].lower() for r in rows}
def main():
print("=" * 80)
print("ODS → DWD 字段同步脚本")
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print("=" * 80)
# 连接数据库
dsn = os.getenv("PG_DSN")
if not dsn:
print("[错误] 未找到 PG_DSN 环境变量")
return False
db = DatabaseConnection(dsn)
all_ddl = []
executed_ddl = []
failed_ddl = []
for ods_table, fields in NEW_FIELDS_TO_DWD.items():
print(f"\n处理 ODS 表: {ods_table}")
for ods_col, dwd_col, dwd_table, col_type in fields:
# 检查 DWD 表是否存在该列
try:
dwd_cols = get_db_table_columns(db, dwd_table)
except Exception as e:
print(f" [跳过] DWD 表 {dwd_table} 不存在或无法访问: {e}")
continue
if dwd_col.lower() in dwd_cols:
print(f" [存在] {dwd_table}.{dwd_col}")
continue
# 生成 DDL
ddl = f'ALTER TABLE {dwd_table} ADD COLUMN IF NOT EXISTS "{dwd_col}" {col_type};'
all_ddl.append(ddl)
# 执行 DDL
try:
db.execute(ddl)
db.commit()
executed_ddl.append(ddl)
print(f" [新增] {dwd_table}.{dwd_col} ({col_type})")
except Exception as e:
db.rollback()
failed_ddl.append((ddl, str(e)))
print(f" [失败] {dwd_table}.{dwd_col} - {e}")
db.close()
# 汇总
print("\n" + "=" * 80)
print("执行汇总")
print("=" * 80)
print(f"总计生成 DDL: {len(all_ddl)}")
print(f"执行成功: {len(executed_ddl)}")
print(f"执行失败: {len(failed_ddl)}")
if failed_ddl:
print("\n失败的 DDL:")
for ddl, err in failed_ddl:
print(f" - {ddl}")
print(f" 错误: {err}")
# 保存执行日志
log_file = Path(__file__).parent / "sync_dwd_columns_log.json"
log = {
"executed_at": datetime.now().isoformat(),
"total_ddl": len(all_ddl),
"success_count": len(executed_ddl),
"failed_count": len(failed_ddl),
"executed_ddl": executed_ddl,
"failed_ddl": [{"ddl": d, "error": e} for d, e in failed_ddl],
}
with open(log_file, "w", encoding="utf-8") as f:
json.dump(log, f, ensure_ascii=False, indent=2)
print(f"\n执行日志已保存到: {log_file}")
return len(failed_ddl) == 0
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

View File

@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
"""
测试 ODS 回填特性
"""
import os
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent / "etl_billiards"
sys.path.insert(0, str(project_root))
from dotenv import load_dotenv
load_dotenv(project_root / ".env")
from database.connection import DatabaseConnection
dsn = os.getenv("PG_DSN")
db = DatabaseConnection(dsn)
print("=== 测试 ODS 回填特性 ===")
# 1. 创建一个测试场景:找一条有 NULL 值的记录
result = db.query("""
SELECT id, plcouponsaleamount, mervousalesamount,
payload->'settleList'->>'plCouponSaleAmount' as payload_val
FROM billiards_ods.settlement_records
WHERE plcouponsaleamount IS NOT NULL
LIMIT 1
""")
if result:
row = result[0]
print(f"找到测试记录: id={row['id']}")
print(f" 当前 plcouponsaleamount: {row['plcouponsaleamount']}")
print(f" payload 中的值: {row['payload_val']}")
else:
print("未找到测试记录")
# 2. 模拟生成的 SQL 语句
print("\n=== 生成的 SQL 示例 ===")
# 获取表结构
cols = db.query("""
SELECT column_name
FROM information_schema.columns
WHERE table_schema = 'billiards_ods'
AND table_name = 'settlement_records'
ORDER BY ordinal_position
""")
col_names = [c["column_name"] for c in cols]
pk_cols = ["id"] # 假设主键是 id
meta_cols = {"payload", "source_file", "source_endpoint", "fetched_at", "content_hash"}
pk_cols_lower = {c.lower() for c in pk_cols}
update_cols = [
c for c in col_names
if c.lower() not in pk_cols_lower and c.lower() not in meta_cols
]
print(f"表有 {len(col_names)}")
print(f"可更新列: {len(update_cols)}")
# 生成 SQL
table = "billiards_ods.settlement_records"
pk_clause = ", ".join(f'"{c}"' for c in pk_cols)
set_clause = ", ".join(
f'"{c}" = COALESCE({table}."{c}", EXCLUDED."{c}")'
for c in update_cols[:3] # 只显示前3个
)
where_clause = " OR ".join(f'{table}."{c}" IS NULL' for c in update_cols[:3])
print(f"\nSQL 示例 (前3列):")
print(f"INSERT INTO {table} (...) VALUES ...")
print(f"ON CONFLICT ({pk_clause}) DO UPDATE SET")
print(f" {set_clause}")
print(f"WHERE {where_clause}")
print("\n=== 特性说明 ===")
print("1. 新记录 -> 正常插入")
print("2. 已存在记录 -> 只更新 NULL 列 (COALESCE)")
print("3. 已有值的列 -> 保持不变")
print("4. 可通过配置 run.ods_backfill_null_columns=false 禁用")
db.close()
print("\n测试完成!")

View File

@@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
"""
测试 ODS 冲突处理三种模式
"""
print("=" * 70)
print("ODS 冲突处理模式说明")
print("=" * 70)
modes = [
("nothing", "跳过已存在记录", """
INSERT INTO table (...) VALUES (...)
ON CONFLICT (pk) DO NOTHING
行为: 已存在的记录完全跳过,不做任何更新
适用: 严格保留原始快照,不允许修改历史数据
"""),
("backfill", "回填 NULL 列", """
INSERT INTO table (...) VALUES (...)
ON CONFLICT (pk) DO UPDATE SET
col1 = COALESCE(table.col1, EXCLUDED.col1),
col2 = COALESCE(table.col2, EXCLUDED.col2)
WHERE table.col1 IS NULL OR table.col2 IS NULL
行为: 只填充数据库中为 NULL 的字段,已有值保持不变
适用: 新增字段后回填历史数据,但不覆盖已有值
"""),
("update", "全字段对比更新 (默认)", """
INSERT INTO table (...) VALUES (...)
ON CONFLICT (pk) DO UPDATE SET
col1 = EXCLUDED.col1,
col2 = EXCLUDED.col2
WHERE table.col1 IS DISTINCT FROM EXCLUDED.col1
OR table.col2 IS DISTINCT FROM EXCLUDED.col2
行为: 对比所有字段,有变化则更新
适用: 数据同步,保持与 API 一致
"""),
]
for mode, title, sql in modes:
print(f"\n【模式: {mode}{title}")
print("-" * 50)
print(sql)
print("=" * 70)
print("配置方式 (在 .env 中设置)")
print("=" * 70)
print("""
# 方式1: 直接设置模式
run.ods_conflict_mode=update # 全字段对比更新 (默认)
run.ods_conflict_mode=backfill # 只回填 NULL
run.ods_conflict_mode=nothing # 跳过已存在
# 方式2: 兼容旧配置
run.ods_backfill_null_columns=false # 等同于 nothing 模式
""")
print("=" * 70)
print("对比表")
print("=" * 70)
print("""
| 场景 | nothing | backfill | update |
|--------------------------|---------|----------|--------|
| 新记录 | 插入 | 插入 | 插入 |
| 已存在 + 字段已有值 | 跳过 | 保留原值 | 更新 |
| 已存在 + 字段为 NULL | 跳过 | 填充新值 | 填充 |
| 已存在 + API值与DB相同 | 跳过 | 跳过 | 跳过 |
""")