Updata2
This commit is contained in:
94
tmp/Untitled
Normal file
94
tmp/Untitled
Normal file
@@ -0,0 +1,94 @@
|
||||
# DWS 数据层需求
|
||||
## 简介
|
||||
项目路径:C:\dev\LLTQ\ETL\feiqiu-ETL
|
||||
|
||||
本文档描述在ETL已完成的DWD层数据基础上对DWS层的数据处理:
|
||||
- 完成对DWS层数据库的处理,即数据库设计,成果为DDL的SQL语句。
|
||||
- 数据读取处理到落库,即DWD读取,Python处理,SQL写入。
|
||||
|
||||
文档更多聚焦业务描述,你需要使用专业技能,使用面向对象编程OOP思想,完成程序设计直至代码完成:
|
||||
- 参考.\README.md 了解现在项目现状。
|
||||
- 参考.\etl_billiards\docs 了解 DWD的schema的表和字段。
|
||||
- SQL和Python代码需要详尽的,高密度的中文注释。
|
||||
- 完成内容,需要详尽高密度的补充至.\README.md,以方便后续维护。
|
||||
- DWS的表与表的字段 参考.\etl_billiards\docs\dwd_main_tables_dictionary.md 完成类似的数据库文档,方便后续维护。
|
||||
- 注意中文编码需求。
|
||||
|
||||
## 通用需求
|
||||
### 数据分层
|
||||
我希望使用互联网软件的业内通用方法,将数据按照更新时间分为4层,以符合业务层面的查询效率速度。
|
||||
- 第一层:回溯两天前到当前数据。
|
||||
- 第二层:回溯1个月前到当前数据。
|
||||
- 第三层:回溯3个月前到当前数据。
|
||||
- 第四层:全量数据。
|
||||
- 需要有配套的机制及时添加删除整理数据。
|
||||
|
||||
### 统计注意
|
||||
当统计一些数据时,注意口径,数据有效性标识。举例:
|
||||
- 计算助教业绩/工资时,需要参考助教废除表,相关业务数据的影响。
|
||||
- 计算助教业绩/工资时,注意辨别 助教课 附加课影响。
|
||||
|
||||
## 业务需求
|
||||
### 系统设置
|
||||
- 助教新的绩效考核和工资结算方式更新为以下算法,影响工资结算和财务账务方面的统计核算,相关内容需要落库,以方便后续调整。还要标记执行时间(如哪个月执行哪个标准等),执行相关结算和计算逻辑。:
|
||||
档位原因考虑 总业绩小时数阈值 专业课抽成(元/小时) 打赏课抽成 次月休假(天)
|
||||
0档 淘汰压力 H <100 28 50% 3
|
||||
1档 及格档(重点激励) 100≤ H <130 18 40% 4
|
||||
2档 良好档(重点激励) 130≤ H <160 15 38% 4
|
||||
3档 优秀档 160≤ H <190 13 35% 5
|
||||
4档 卓越加速档(高端人才倾斜) 190≤ H <220 10 33% 6
|
||||
5档 冠军加速档(高端人才倾斜) H ≥220 8 30% 休假自由
|
||||
|
||||
*课程分为2种(dwd_assistant_service_log表的skill_name):
|
||||
基础课:又名 专业课 上桌 上钟,是为客户提供台球助教陪练的课程,按时长统计。精确到分钟。
|
||||
附加课:又名 超休 激励 打赏,是客户支付较为高昂的价格,买断整小时与助教外出。
|
||||
总业绩小时数阈值指基础课和附加课总和。
|
||||
|
||||
各级别助教(dim_assistant表的level)基础课,对客户收费:初级 98元/小时;中级 108元/小时;高级 118元/小时;星级 138元/小时;
|
||||
附加课对客户收费统一为190元/小时。
|
||||
|
||||
充值提成:
|
||||
|
||||
|
||||
|
||||
|
||||
冲刺奖 达成奖金
|
||||
当月 H ≥ 190:300 元
|
||||
当月 H ≥ 220:800 元(与上条不叠加,取高)
|
||||
|
||||
额外奖金:
|
||||
冲刺奖 达成奖金
|
||||
当月 H ≥ 190:300 元
|
||||
当月 H ≥ 220:800 元(与上条不叠加,取高)
|
||||
|
||||
Top3 奖金:
|
||||
第1名:1000 元
|
||||
第2名:600 元
|
||||
第3名:400 元
|
||||
|
||||
规则:
|
||||
1、过档后,所有时长按新档位进行计算。
|
||||
举例,当前某中级助教已完成185小时,基础课占170小时,附加课15小时。则该月工资计算方法:
|
||||
170*(108-13)+15*(1-0.35)
|
||||
|
||||
2、本月新入职助教,定档方案:
|
||||
按照日均*30的总业绩小时数定档。
|
||||
在该25日之后入职的新助教,最高定档至3档。
|
||||
该折算仅用于定档,不适用于“冲刺奖”和“Top3奖”的计算口径。
|
||||
|
||||
### 助教维度
|
||||
以每个助教个体的视角
|
||||
- 我要知道我的业绩档位,历史月份与本月档位进度,档位影响的收入单价。及相邻月份的变化。
|
||||
- 我要知道我的有效业绩:历史月份与本月的 基础课课时,激励课课时,全部课课时。相邻月份的变化。
|
||||
- 我要知道我的收入:历史月份与本月的收入(注意助教等级,业绩档位,课程种类等因素的总和计算)。相邻月份的变化。
|
||||
- 我要知道我的客户情况:过去7天、10天、15天、30天、60天、90天 的跨度进行统计,我服务过(基础课+附加课)的客户数据,并关联每次服务的 时间 时长 台桌 分类 等详细信息。
|
||||
|
||||
### 客户维度
|
||||
统计每个客户的信息
|
||||
- 我要知道每个客户:过去7天、10天、15天、30天、60天、90天 的跨度进行统计,来店消费情况,并关联每次服务的 时间 食品饮品 时长 台桌 分类 助教服务 等详细信息。
|
||||
|
||||
|
||||
### 财务维度
|
||||
财务维度的需求(已经落到原型图需求级别了),见财务页面需求.md
|
||||
|
||||
|
||||
226
tmp/add_missing_dwd_columns.py
Normal file
226
tmp/add_missing_dwd_columns.py
Normal file
@@ -0,0 +1,226 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
添加缺失的 DWD 列到数据库
|
||||
根据 ODS 新增字段,在对应的 DWD 表中添加相关列
|
||||
"""
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# DWD 表缺失字段定义:表名 -> [(列名, 类型, 注释)]
|
||||
# 根据计划,核心业务字段放主表,扩展字段放 _ex 表
|
||||
MISSING_COLUMNS = {
|
||||
# 结算表 - 核心金额字段放主表
|
||||
'billiards_dwd.dwd_settlement_head': [
|
||||
('electricity_money', 'NUMERIC(18,2)', '电费金额'),
|
||||
('real_electricity_money', 'NUMERIC(18,2)', '实际电费金额'),
|
||||
('electricity_adjust_money', 'NUMERIC(18,2)', '电费调整金额'),
|
||||
('pl_coupon_sale_amount', 'NUMERIC(18,2)', '平台券销售额'),
|
||||
('mervou_sales_amount', 'NUMERIC(18,2)', '商户券销售额'),
|
||||
],
|
||||
'billiards_dwd.dwd_settlement_head_ex': [
|
||||
('settle_list', 'JSONB', '结算明细列表'),
|
||||
],
|
||||
|
||||
# 台费流水表
|
||||
'billiards_dwd.dwd_table_fee_log': [
|
||||
('activity_discount_amount', 'NUMERIC(18,2)', '活动折扣金额'),
|
||||
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
|
||||
],
|
||||
'billiards_dwd.dwd_table_fee_log_ex': [
|
||||
('order_consumption_type', 'INT', '订单消费类型'),
|
||||
],
|
||||
|
||||
# 助教服务流水表
|
||||
'billiards_dwd.dwd_assistant_service_log': [
|
||||
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
|
||||
],
|
||||
'billiards_dwd.dwd_assistant_service_log_ex': [
|
||||
('assistant_team_name', 'TEXT', '助教团队名称'),
|
||||
],
|
||||
|
||||
# 团购核销记录表
|
||||
'billiards_dwd.dwd_groupbuy_redemption': [
|
||||
('member_discount_money', 'NUMERIC(18,2)', '会员折扣金额'),
|
||||
('coupon_sale_id', 'BIGINT', '优惠券销售ID'),
|
||||
],
|
||||
'billiards_dwd.dwd_groupbuy_redemption_ex': [
|
||||
('table_share_money', 'NUMERIC(18,2)', '台费分摊金额'),
|
||||
('table_service_share_money', 'NUMERIC(18,2)', '台费服务分摊金额'),
|
||||
('goods_share_money', 'NUMERIC(18,2)', '商品分摊金额'),
|
||||
('good_service_share_money', 'NUMERIC(18,2)', '商品服务分摊金额'),
|
||||
('assistant_share_money', 'NUMERIC(18,2)', '助教分摊金额'),
|
||||
('assistant_service_share_money', 'NUMERIC(18,2)', '助教服务分摊金额'),
|
||||
('recharge_share_money', 'NUMERIC(18,2)', '充值分摊金额'),
|
||||
],
|
||||
|
||||
# 台费调整记录表
|
||||
'billiards_dwd.dwd_table_fee_adjust': [
|
||||
('table_name', 'TEXT', '台桌名称'),
|
||||
('table_price', 'NUMERIC(18,2)', '台桌价格'),
|
||||
('charge_free', 'BOOLEAN', '是否免费'),
|
||||
],
|
||||
'billiards_dwd.dwd_table_fee_adjust_ex': [
|
||||
('area_type_id', 'BIGINT', '区域类型ID'),
|
||||
('site_table_area_id', 'BIGINT', '门店台区ID'),
|
||||
('site_table_area_name', 'TEXT', '门店台区名称'),
|
||||
('site_name', 'TEXT', '门店名称'),
|
||||
('tenant_name', 'TEXT', '租户名称'),
|
||||
],
|
||||
|
||||
# 会员储值卡维度表
|
||||
'billiards_dwd.dim_member_card_account': [
|
||||
('principal_balance', 'NUMERIC(18,2)', '本金余额'),
|
||||
('member_grade', 'INT', '会员等级'),
|
||||
],
|
||||
'billiards_dwd.dim_member_card_account_ex': [
|
||||
('able_share_member_discount', 'BOOLEAN', '是否可共享会员折扣'),
|
||||
('electricity_deduct_radio', 'NUMERIC(18,4)', '电费扣减比例'),
|
||||
('electricity_discount', 'NUMERIC(18,4)', '电费折扣'),
|
||||
('electricity_card_deduct', 'BOOLEAN', '电费卡扣'),
|
||||
('recharge_freeze_balance', 'NUMERIC(18,2)', '充值冻结余额'),
|
||||
],
|
||||
|
||||
# 会员维度表
|
||||
'billiards_dwd.dim_member': [
|
||||
('pay_money_sum', 'NUMERIC(18,2)', '累计支付金额'),
|
||||
('recharge_money_sum', 'NUMERIC(18,2)', '累计充值金额'),
|
||||
],
|
||||
'billiards_dwd.dim_member_ex': [
|
||||
('person_tenant_org_id', 'BIGINT', '人员租户组织ID'),
|
||||
('person_tenant_org_name', 'TEXT', '人员租户组织名称'),
|
||||
('register_source', 'TEXT', '注册来源'),
|
||||
],
|
||||
|
||||
# 会员余额变更表
|
||||
'billiards_dwd.dwd_member_balance_change': [
|
||||
('principal_before', 'NUMERIC(18,2)', '变动前本金'),
|
||||
('principal_after', 'NUMERIC(18,2)', '变动后本金'),
|
||||
],
|
||||
'billiards_dwd.dwd_member_balance_change_ex': [
|
||||
('principal_data', 'TEXT', '本金变动数据'),
|
||||
],
|
||||
|
||||
# 团购套餐维度表
|
||||
'billiards_dwd.dim_groupbuy_package': [
|
||||
('sort', 'INT', '排序'),
|
||||
('is_first_limit', 'BOOLEAN', '是否首单限制'),
|
||||
],
|
||||
'billiards_dwd.dim_groupbuy_package_ex': [
|
||||
('tenant_coupon_sale_order_item_id', 'BIGINT', '租户券销售订单项ID'),
|
||||
],
|
||||
|
||||
# 门店商品维度表
|
||||
'billiards_dwd.dim_store_goods': [
|
||||
('commodity_code', 'TEXT', '商品编码'),
|
||||
('not_sale', 'BOOLEAN', '是否停售'),
|
||||
],
|
||||
|
||||
# 台桌维度表
|
||||
'billiards_dwd.dim_table': [
|
||||
('order_id', 'BIGINT', '订单ID'),
|
||||
],
|
||||
|
||||
# 租户商品维度表
|
||||
'billiards_dwd.dim_tenant_goods': [
|
||||
('not_sale', 'BOOLEAN', '是否停售'),
|
||||
],
|
||||
|
||||
# 助教作废记录表
|
||||
'billiards_dwd.dwd_assistant_cancel_log': [
|
||||
('tenant_id', 'BIGINT', '租户ID'),
|
||||
],
|
||||
|
||||
# 商品销售流水表
|
||||
'billiards_dwd.dwd_goods_sale_log': [
|
||||
('coupon_share_money', 'NUMERIC(18,2)', '优惠券分摊金额'),
|
||||
],
|
||||
|
||||
# 支付流水表
|
||||
'billiards_dwd.dwd_payment': [
|
||||
('tenant_id', 'BIGINT', '租户ID'),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_existing_columns(conn, schema, table):
|
||||
"""获取表已有的列"""
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema, table))
|
||||
return {row[0].lower() for row in cur.fetchall()}
|
||||
|
||||
|
||||
def table_exists(conn, schema, table):
|
||||
"""检查表是否存在"""
|
||||
sql = """
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
)
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema, table))
|
||||
return cur.fetchone()[0]
|
||||
|
||||
|
||||
def add_column(conn, full_table, col_name, col_type, comment):
|
||||
"""添加列"""
|
||||
sql = f'ALTER TABLE {full_table} ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}'
|
||||
comment_sql = f"COMMENT ON COLUMN {full_table}.\"{col_name}\" IS '{comment}'"
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
cur.execute(comment_sql)
|
||||
conn.commit()
|
||||
print(f" [OK] 添加列: {col_name} ({col_type})")
|
||||
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 80)
|
||||
print("添加缺失的 DWD 列")
|
||||
print("=" * 80)
|
||||
|
||||
total_added = 0
|
||||
total_skipped = 0
|
||||
tables_not_found = []
|
||||
|
||||
for full_table, columns in MISSING_COLUMNS.items():
|
||||
schema, table = full_table.split('.')
|
||||
|
||||
if not table_exists(conn, schema, table):
|
||||
print(f"\n[跳过] 表不存在: {full_table}")
|
||||
tables_not_found.append(full_table)
|
||||
continue
|
||||
|
||||
print(f"\n处理表: {full_table}")
|
||||
|
||||
existing = get_existing_columns(conn, schema, table)
|
||||
|
||||
for col_name, col_type, comment in columns:
|
||||
if col_name.lower() in existing:
|
||||
print(f" [跳过] 列已存在: {col_name}")
|
||||
total_skipped += 1
|
||||
else:
|
||||
add_column(conn, full_table, col_name, col_type, comment)
|
||||
total_added += 1
|
||||
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"完成: 添加 {total_added} 列, 跳过 {total_skipped} 列")
|
||||
if tables_not_found:
|
||||
print(f"未找到的表: {len(tables_not_found)}")
|
||||
for t in tables_not_found:
|
||||
print(f" - {t}")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
162
tmp/add_missing_ods_columns.py
Normal file
162
tmp/add_missing_ods_columns.py
Normal file
@@ -0,0 +1,162 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
添加缺失的 ODS 列到数据库
|
||||
"""
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 缺失字段定义:表名 -> [(列名, 类型, 注释)]
|
||||
MISSING_COLUMNS = {
|
||||
'billiards_ods.settlement_records': [
|
||||
('electricityadjustmoney', 'NUMERIC(18,2)', '电费调整金额'),
|
||||
('electricitymoney', 'NUMERIC(18,2)', '电费金额'),
|
||||
('mervousalesamount', 'NUMERIC(18,2)', '商户券销售额'),
|
||||
('plcouponsaleamount', 'NUMERIC(18,2)', '平台券销售额'),
|
||||
('realelectricitymoney', 'NUMERIC(18,2)', '实际电费金额'),
|
||||
('settlelist', 'JSONB', '结算明细列表'),
|
||||
],
|
||||
'billiards_ods.recharge_settlements': [
|
||||
('electricityadjustmoney', 'NUMERIC(18,2)', '电费调整金额'),
|
||||
('electricitymoney', 'NUMERIC(18,2)', '电费金额'),
|
||||
('mervousalesamount', 'NUMERIC(18,2)', '商户券销售额'),
|
||||
('plcouponsaleamount', 'NUMERIC(18,2)', '平台券销售额'),
|
||||
('realelectricitymoney', 'NUMERIC(18,2)', '实际电费金额'),
|
||||
('settlelist', 'JSONB', '结算明细列表'),
|
||||
],
|
||||
'billiards_ods.table_fee_transactions': [
|
||||
('activity_discount_amount', 'NUMERIC(18,2)', '活动折扣金额'),
|
||||
('order_consumption_type', 'INT', '订单消费类型'),
|
||||
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
|
||||
],
|
||||
'billiards_ods.assistant_service_records': [
|
||||
('assistantteamname', 'TEXT', '助教团队名称'),
|
||||
('real_service_money', 'NUMERIC(18,2)', '实际服务费金额'),
|
||||
],
|
||||
'billiards_ods.group_buy_redemption_records': [
|
||||
('assistant_service_share_money', 'NUMERIC(18,2)', '助教服务分摊金额'),
|
||||
('assistant_share_money', 'NUMERIC(18,2)', '助教分摊金额'),
|
||||
('coupon_sale_id', 'BIGINT', '优惠券销售ID'),
|
||||
('good_service_share_money', 'NUMERIC(18,2)', '商品服务分摊金额'),
|
||||
('goods_share_money', 'NUMERIC(18,2)', '商品分摊金额'),
|
||||
('member_discount_money', 'NUMERIC(18,2)', '会员折扣金额'),
|
||||
('recharge_share_money', 'NUMERIC(18,2)', '充值分摊金额'),
|
||||
('table_service_share_money', 'NUMERIC(18,2)', '台费服务分摊金额'),
|
||||
('table_share_money', 'NUMERIC(18,2)', '台费分摊金额'),
|
||||
],
|
||||
'billiards_ods.table_fee_discount_records': [
|
||||
('area_type_id', 'BIGINT', '区域类型ID'),
|
||||
('charge_free', 'BOOLEAN', '是否免费'),
|
||||
('site_table_area_id', 'BIGINT', '门店台区ID'),
|
||||
('site_table_area_name', 'TEXT', '门店台区名称'),
|
||||
('sitename', 'TEXT', '门店名称'),
|
||||
('table_name', 'TEXT', '台桌名称'),
|
||||
('table_price', 'NUMERIC(18,2)', '台桌价格'),
|
||||
('tenant_name', 'TEXT', '租户名称'),
|
||||
],
|
||||
'billiards_ods.member_stored_value_cards': [
|
||||
('able_share_member_discount', 'BOOLEAN', '是否可共享会员折扣'),
|
||||
('electricity_deduct_radio', 'NUMERIC(18,4)', '电费扣减比例'),
|
||||
('electricity_discount', 'NUMERIC(18,4)', '电费折扣'),
|
||||
('electricitycarddeduct', 'BOOLEAN', '电费卡扣'),
|
||||
('member_grade', 'INT', '会员等级'),
|
||||
('principal_balance', 'NUMERIC(18,2)', '本金余额'),
|
||||
('rechargefreezebalance', 'NUMERIC(18,2)', '充值冻结余额'),
|
||||
],
|
||||
'billiards_ods.member_profiles': [
|
||||
('pay_money_sum', 'NUMERIC(18,2)', '累计支付金额'),
|
||||
('person_tenant_org_id', 'BIGINT', '人员租户组织ID'),
|
||||
('person_tenant_org_name', 'TEXT', '人员租户组织名称'),
|
||||
('recharge_money_sum', 'NUMERIC(18,2)', '累计充值金额'),
|
||||
('register_source', 'TEXT', '注册来源'),
|
||||
],
|
||||
'billiards_ods.member_balance_changes': [
|
||||
('principal_after', 'NUMERIC(18,2)', '变动后本金'),
|
||||
('principal_before', 'NUMERIC(18,2)', '变动前本金'),
|
||||
('principal_data', 'TEXT', '本金变动数据'),
|
||||
],
|
||||
'billiards_ods.group_buy_packages': [
|
||||
('is_first_limit', 'BOOLEAN', '是否首单限制'),
|
||||
('sort', 'INT', '排序'),
|
||||
('tenantcouponsaleorderitemid', 'BIGINT', '租户券销售订单项ID'),
|
||||
],
|
||||
'billiards_ods.store_goods_master': [
|
||||
('commodity_code', 'TEXT', '商品编码'),
|
||||
('not_sale', 'BOOLEAN', '是否停售'),
|
||||
],
|
||||
'billiards_ods.assistant_cancellation_records': [
|
||||
('tenant_id', 'BIGINT', '租户ID'),
|
||||
],
|
||||
'billiards_ods.store_goods_sales_records': [
|
||||
('coupon_share_money', 'NUMERIC(18,2)', '优惠券分摊金额'),
|
||||
],
|
||||
'billiards_ods.payment_transactions': [
|
||||
('tenant_id', 'BIGINT', '租户ID'),
|
||||
],
|
||||
'billiards_ods.site_tables_master': [
|
||||
('order_id', 'BIGINT', '订单ID'),
|
||||
],
|
||||
'billiards_ods.tenant_goods_master': [
|
||||
('not_sale', 'BOOLEAN', '是否停售'),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_existing_columns(conn, schema, table):
|
||||
"""获取表已有的列"""
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema, table))
|
||||
return {row[0].lower() for row in cur.fetchall()}
|
||||
|
||||
|
||||
def add_column(conn, full_table, col_name, col_type, comment):
|
||||
"""添加列"""
|
||||
schema, table = full_table.split('.')
|
||||
sql = f'ALTER TABLE {full_table} ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}'
|
||||
comment_sql = f"COMMENT ON COLUMN {full_table}.\"{col_name}\" IS '{comment}'"
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
cur.execute(comment_sql)
|
||||
conn.commit()
|
||||
print(f" [OK] 添加列: {col_name} ({col_type})")
|
||||
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 80)
|
||||
print("添加缺失的 ODS 列")
|
||||
print("=" * 80)
|
||||
|
||||
total_added = 0
|
||||
total_skipped = 0
|
||||
|
||||
for full_table, columns in MISSING_COLUMNS.items():
|
||||
schema, table = full_table.split('.')
|
||||
print(f"\n处理表: {full_table}")
|
||||
|
||||
existing = get_existing_columns(conn, schema, table)
|
||||
|
||||
for col_name, col_type, comment in columns:
|
||||
if col_name.lower() in existing:
|
||||
print(f" [跳过] 列已存在: {col_name}")
|
||||
total_skipped += 1
|
||||
else:
|
||||
add_column(conn, full_table, col_name, col_type, comment)
|
||||
total_added += 1
|
||||
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"完成: 添加 {total_added} 列, 跳过 {total_skipped} 列")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
37
tmp/add_remaining_dwd_columns.py
Normal file
37
tmp/add_remaining_dwd_columns.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
添加剩余的 DWD 列
|
||||
"""
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 修正后的表名
|
||||
MISSING_COLUMNS = {
|
||||
'billiards_dwd.dwd_assistant_trash_event': [
|
||||
('tenant_id', 'BIGINT', '租户ID'),
|
||||
],
|
||||
'billiards_dwd.dwd_store_goods_sale': [
|
||||
('coupon_share_money', 'NUMERIC(18,2)', '优惠券分摊金额'),
|
||||
],
|
||||
}
|
||||
|
||||
def add_column(conn, full_table, col_name, col_type, comment):
|
||||
sql = f'ALTER TABLE {full_table} ADD COLUMN IF NOT EXISTS "{col_name}" {col_type}'
|
||||
comment_sql = f"COMMENT ON COLUMN {full_table}.\"{col_name}\" IS '{comment}'"
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
cur.execute(comment_sql)
|
||||
conn.commit()
|
||||
print(f" [OK] {full_table}.{col_name} ({col_type})")
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
for full_table, columns in MISSING_COLUMNS.items():
|
||||
for col_name, col_type, comment in columns:
|
||||
add_column(conn, full_table, col_name, col_type, comment)
|
||||
conn.close()
|
||||
print("Done!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
2305
tmp/api_ods_comparison.json
Normal file
2305
tmp/api_ods_comparison.json
Normal file
File diff suppressed because it is too large
Load Diff
355
tmp/api_ods_issue_report.json
Normal file
355
tmp/api_ods_issue_report.json
Normal file
@@ -0,0 +1,355 @@
|
||||
{
|
||||
"generated_at": "2026-02-02T19:00:26.972834",
|
||||
"missing_fields": [
|
||||
{
|
||||
"task_code": "ODS_SETTLEMENT_RECORDS",
|
||||
"table_name": "billiards_ods.settlement_records",
|
||||
"endpoint": "/Site/GetAllOrderSettleList",
|
||||
"missing_fields": [
|
||||
"electricityadjustmoney",
|
||||
"electricitymoney",
|
||||
"mervousalesamount",
|
||||
"plcouponsaleamount",
|
||||
"realelectricitymoney",
|
||||
"settlelist",
|
||||
"tenant_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLE_USE",
|
||||
"table_name": "billiards_ods.table_fee_transactions",
|
||||
"endpoint": "/Site/GetSiteTableOrderDetails",
|
||||
"missing_fields": [
|
||||
"activity_discount_amount",
|
||||
"order_consumption_type",
|
||||
"real_service_money"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_LEDGER",
|
||||
"table_name": "billiards_ods.assistant_service_records",
|
||||
"endpoint": "/AssistantPerformance/GetOrderAssistantDetails",
|
||||
"missing_fields": [
|
||||
"assistantteamname",
|
||||
"real_service_money"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_ABOLISH",
|
||||
"table_name": "billiards_ods.assistant_cancellation_records",
|
||||
"endpoint": "/AssistantPerformance/GetAbolitionAssistant",
|
||||
"missing_fields": [
|
||||
"tenant_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS_SALES",
|
||||
"table_name": "billiards_ods.store_goods_sales_records",
|
||||
"endpoint": "/TenantGoods/GetGoodsSalesList",
|
||||
"missing_fields": [
|
||||
"coupon_share_money"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_PAYMENT",
|
||||
"table_name": "billiards_ods.payment_transactions",
|
||||
"endpoint": "/PayLog/GetPayLogListPage",
|
||||
"missing_fields": [
|
||||
"tenant_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER",
|
||||
"table_name": "billiards_ods.member_profiles",
|
||||
"endpoint": "/MemberProfile/GetTenantMemberList",
|
||||
"missing_fields": [
|
||||
"pay_money_sum",
|
||||
"person_tenant_org_id",
|
||||
"person_tenant_org_name",
|
||||
"recharge_money_sum",
|
||||
"register_source"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_CARD",
|
||||
"table_name": "billiards_ods.member_stored_value_cards",
|
||||
"endpoint": "/MemberProfile/GetTenantMemberCardList",
|
||||
"missing_fields": [
|
||||
"able_share_member_discount",
|
||||
"electricity_deduct_radio",
|
||||
"electricity_discount",
|
||||
"electricitycarddeduct",
|
||||
"member_grade",
|
||||
"principal_balance",
|
||||
"rechargefreezebalance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_BALANCE",
|
||||
"table_name": "billiards_ods.member_balance_changes",
|
||||
"endpoint": "/MemberProfile/GetMemberCardBalanceChange",
|
||||
"missing_fields": [
|
||||
"principal_after",
|
||||
"principal_before",
|
||||
"principal_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_RECHARGE_SETTLE",
|
||||
"table_name": "billiards_ods.recharge_settlements",
|
||||
"endpoint": "/Site/GetRechargeSettleList",
|
||||
"missing_fields": [
|
||||
"electricityadjustmoney",
|
||||
"electricitymoney",
|
||||
"mervousalesamount",
|
||||
"plcouponsaleamount",
|
||||
"realelectricitymoney",
|
||||
"settlelist",
|
||||
"tenant_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_PACKAGE",
|
||||
"table_name": "billiards_ods.group_buy_packages",
|
||||
"endpoint": "/PackageCoupon/QueryPackageCouponList",
|
||||
"missing_fields": [
|
||||
"is_first_limit",
|
||||
"sort",
|
||||
"tableareanamelist",
|
||||
"tenantcouponsaleorderitemid",
|
||||
"tenanttableareaidlist"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_BUY_REDEMPTION",
|
||||
"table_name": "billiards_ods.group_buy_redemption_records",
|
||||
"endpoint": "/Site/GetSiteTableUseDetails",
|
||||
"missing_fields": [
|
||||
"assistant_service_share_money",
|
||||
"assistant_share_money",
|
||||
"coupon_sale_id",
|
||||
"good_service_share_money",
|
||||
"goods_share_money",
|
||||
"member_discount_money",
|
||||
"recharge_share_money",
|
||||
"table_service_share_money",
|
||||
"table_share_money"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLES",
|
||||
"table_name": "billiards_ods.site_tables_master",
|
||||
"endpoint": "/Table/GetSiteTables",
|
||||
"missing_fields": [
|
||||
"order_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS",
|
||||
"table_name": "billiards_ods.store_goods_master",
|
||||
"endpoint": "/TenantGoods/GetGoodsInventoryList",
|
||||
"missing_fields": [
|
||||
"commodity_code",
|
||||
"not_sale"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLE_FEE_DISCOUNT",
|
||||
"table_name": "billiards_ods.table_fee_discount_records",
|
||||
"endpoint": "/Site/GetTaiFeeAdjustList",
|
||||
"missing_fields": [
|
||||
"area_type_id",
|
||||
"charge_free",
|
||||
"site_table_area_id",
|
||||
"site_table_area_name",
|
||||
"sitename",
|
||||
"table_name",
|
||||
"table_price",
|
||||
"tenant_name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TENANT_GOODS",
|
||||
"table_name": "billiards_ods.tenant_goods_master",
|
||||
"endpoint": "/TenantGoods/QueryTenantGoods",
|
||||
"missing_fields": [
|
||||
"not_sale"
|
||||
]
|
||||
}
|
||||
],
|
||||
"zero_to_null_issues": [
|
||||
{
|
||||
"task_code": "ODS_TABLE_USE",
|
||||
"table_name": "billiards_ods.table_fee_transactions",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "activity_discount_amount",
|
||||
"count": 67,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "real_service_money",
|
||||
"count": 67,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_ASSISTANT_LEDGER",
|
||||
"table_name": "billiards_ods.assistant_service_records",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "real_service_money",
|
||||
"count": 90,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_STORE_GOODS_SALES",
|
||||
"table_name": "billiards_ods.store_goods_sales_records",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "coupon_share_money",
|
||||
"count": 100,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER",
|
||||
"table_name": "billiards_ods.member_profiles",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "person_tenant_org_id",
|
||||
"count": 96,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "pay_money_sum",
|
||||
"count": 40,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "recharge_money_sum",
|
||||
"count": 12,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_CARD",
|
||||
"table_name": "billiards_ods.member_stored_value_cards",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "rechargefreezebalance",
|
||||
"count": 100,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "principal_balance",
|
||||
"count": 34,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "member_grade",
|
||||
"count": 8,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_MEMBER_BALANCE",
|
||||
"table_name": "billiards_ods.member_balance_changes",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "principal_after",
|
||||
"count": 18,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "principal_before",
|
||||
"count": 18,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_PACKAGE",
|
||||
"table_name": "billiards_ods.group_buy_packages",
|
||||
"checked_rows": 52,
|
||||
"issues": [
|
||||
{
|
||||
"column": "tenantcouponsaleorderitemid",
|
||||
"count": 52,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_GROUP_BUY_REDEMPTION",
|
||||
"table_name": "billiards_ods.group_buy_redemption_records",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "assistant_service_share_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "assistant_share_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "coupon_sale_id",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "good_service_share_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "goods_share_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "member_discount_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "recharge_share_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
},
|
||||
{
|
||||
"column": "table_service_share_money",
|
||||
"count": 74,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"task_code": "ODS_TABLES",
|
||||
"table_name": "billiards_ods.site_tables_master",
|
||||
"checked_rows": 100,
|
||||
"issues": [
|
||||
{
|
||||
"column": "order_id",
|
||||
"count": 19,
|
||||
"issue": "API 中的 0 值在 ODS 中变成了 NULL"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
283
tmp/backfill_dwd_from_ods.py
Normal file
283
tmp/backfill_dwd_from_ods.py
Normal file
@@ -0,0 +1,283 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从 ODS 同步回填 DWD 缺失的列值
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
# DWD 回填配置: (dwd_table, ods_table, join_condition, [(dwd_col, ods_col), ...])
|
||||
BACKFILL_CONFIGS = [
|
||||
# dwd_settlement_head
|
||||
(
|
||||
"billiards_dwd.dwd_settlement_head",
|
||||
"billiards_ods.settlement_records",
|
||||
"d.order_settle_id = o.id",
|
||||
[
|
||||
("pl_coupon_sale_amount", "plcouponsaleamount"),
|
||||
("mervou_sales_amount", "mervousalesamount"),
|
||||
("electricity_money", "electricitymoney"),
|
||||
("real_electricity_money", "realelectricitymoney"),
|
||||
("electricity_adjust_money", "electricityadjustmoney"),
|
||||
]
|
||||
),
|
||||
# dwd_recharge_order
|
||||
(
|
||||
"billiards_dwd.dwd_recharge_order",
|
||||
"billiards_ods.recharge_settlements",
|
||||
"d.recharge_order_id = o.id",
|
||||
[
|
||||
("pl_coupon_sale_amount", "plcouponsaleamount"),
|
||||
("mervou_sales_amount", "mervousalesamount"),
|
||||
("electricity_money", "electricitymoney"),
|
||||
("real_electricity_money", "realelectricitymoney"),
|
||||
("electricity_adjust_money", "electricityadjustmoney"),
|
||||
]
|
||||
),
|
||||
# dwd_member_balance_change
|
||||
(
|
||||
"billiards_dwd.dwd_member_balance_change",
|
||||
"billiards_ods.member_balance_changes",
|
||||
"d.balance_change_id = o.id",
|
||||
[
|
||||
("principal_before", "principal_before"),
|
||||
("principal_after", "principal_after"),
|
||||
("principal_change_amount", "principal_data"),
|
||||
]
|
||||
),
|
||||
# dim_member
|
||||
(
|
||||
"billiards_dwd.dim_member",
|
||||
"billiards_ods.member_profiles",
|
||||
"d.member_id = o.id",
|
||||
[
|
||||
("pay_money_sum", "pay_money_sum"),
|
||||
("recharge_money_sum", "recharge_money_sum"),
|
||||
]
|
||||
),
|
||||
# dim_member_ex
|
||||
(
|
||||
"billiards_dwd.dim_member_ex",
|
||||
"billiards_ods.member_profiles",
|
||||
"d.member_id = o.id",
|
||||
[
|
||||
("person_tenant_org_id", "person_tenant_org_id"),
|
||||
("person_tenant_org_name", "person_tenant_org_name"),
|
||||
("register_source", "register_source"),
|
||||
]
|
||||
),
|
||||
# dim_member_card_account
|
||||
(
|
||||
"billiards_dwd.dim_member_card_account",
|
||||
"billiards_ods.member_stored_value_cards",
|
||||
"d.member_card_id = o.id",
|
||||
[
|
||||
("principal_balance", "principal_balance"),
|
||||
("member_grade", "member_grade"),
|
||||
]
|
||||
),
|
||||
# dim_member_card_account_ex
|
||||
(
|
||||
"billiards_dwd.dim_member_card_account_ex",
|
||||
"billiards_ods.member_stored_value_cards",
|
||||
"d.member_card_id = o.id",
|
||||
[
|
||||
("able_share_member_discount", "able_share_member_discount"),
|
||||
("electricity_deduct_radio", "electricity_deduct_radio"),
|
||||
("electricity_discount", "electricity_discount"),
|
||||
("electricity_card_deduct", "electricitycarddeduct"),
|
||||
("recharge_freeze_balance", "rechargefreezebalance"),
|
||||
]
|
||||
),
|
||||
# dwd_table_fee_log
|
||||
(
|
||||
"billiards_dwd.dwd_table_fee_log",
|
||||
"billiards_ods.table_fee_transactions",
|
||||
"d.table_fee_log_id = o.id",
|
||||
[
|
||||
("activity_discount_amount", "activity_discount_amount"),
|
||||
("real_service_money", "real_service_money"),
|
||||
]
|
||||
),
|
||||
# dwd_table_fee_log_ex
|
||||
(
|
||||
"billiards_dwd.dwd_table_fee_log_ex",
|
||||
"billiards_ods.table_fee_transactions",
|
||||
"d.table_fee_log_id = o.id",
|
||||
[
|
||||
("order_consumption_type", "order_consumption_type"),
|
||||
]
|
||||
),
|
||||
# dwd_assistant_service_log
|
||||
(
|
||||
"billiards_dwd.dwd_assistant_service_log",
|
||||
"billiards_ods.assistant_service_records",
|
||||
"d.assistant_service_id = o.id",
|
||||
[
|
||||
("real_service_money", "real_service_money"),
|
||||
]
|
||||
),
|
||||
# dwd_assistant_service_log_ex
|
||||
(
|
||||
"billiards_dwd.dwd_assistant_service_log_ex",
|
||||
"billiards_ods.assistant_service_records",
|
||||
"d.assistant_service_id = o.id",
|
||||
[
|
||||
("assistant_team_name", "assistantteamname"),
|
||||
]
|
||||
),
|
||||
# dwd_store_goods_sale
|
||||
(
|
||||
"billiards_dwd.dwd_store_goods_sale",
|
||||
"billiards_ods.store_goods_sales_records",
|
||||
"d.store_goods_sale_id = o.id",
|
||||
[
|
||||
("coupon_share_money", "coupon_share_money"),
|
||||
]
|
||||
),
|
||||
# dwd_groupbuy_redemption
|
||||
(
|
||||
"billiards_dwd.dwd_groupbuy_redemption",
|
||||
"billiards_ods.group_buy_redemption_records",
|
||||
"d.redemption_id = o.id",
|
||||
[
|
||||
("coupon_sale_id", "coupon_sale_id"),
|
||||
("member_discount_money", "member_discount_money"),
|
||||
]
|
||||
),
|
||||
# dwd_groupbuy_redemption_ex
|
||||
(
|
||||
"billiards_dwd.dwd_groupbuy_redemption_ex",
|
||||
"billiards_ods.group_buy_redemption_records",
|
||||
"d.redemption_id = o.id",
|
||||
[
|
||||
("assistant_share_money", "assistant_share_money"),
|
||||
("table_share_money", "table_share_money"),
|
||||
("goods_share_money", "goods_share_money"),
|
||||
("recharge_share_money", "recharge_share_money"),
|
||||
]
|
||||
),
|
||||
# dim_table
|
||||
(
|
||||
"billiards_dwd.dim_table",
|
||||
"billiards_ods.site_tables_master",
|
||||
"d.table_id = o.id",
|
||||
[
|
||||
("order_id", "order_id"),
|
||||
]
|
||||
),
|
||||
# dim_store_goods
|
||||
(
|
||||
"billiards_dwd.dim_store_goods",
|
||||
"billiards_ods.store_goods_master",
|
||||
"d.site_goods_id = o.id",
|
||||
[
|
||||
("commodity_code", "commodity_code"),
|
||||
("not_sale", "not_sale"),
|
||||
]
|
||||
),
|
||||
# dim_tenant_goods
|
||||
(
|
||||
"billiards_dwd.dim_tenant_goods",
|
||||
"billiards_ods.tenant_goods_master",
|
||||
"d.tenant_goods_id = o.id",
|
||||
[
|
||||
("not_sale", "not_sale"),
|
||||
]
|
||||
),
|
||||
# dim_groupbuy_package
|
||||
(
|
||||
"billiards_dwd.dim_groupbuy_package",
|
||||
"billiards_ods.group_buy_packages",
|
||||
"d.groupbuy_package_id = o.id",
|
||||
[
|
||||
("sort", "sort"),
|
||||
("is_first_limit", "is_first_limit"),
|
||||
]
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def column_exists(db, table: str, column: str) -> bool:
|
||||
schema, tbl = table.split(".")
|
||||
result = db.query("""
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, tbl, column.lower()))
|
||||
return bool(result)
|
||||
|
||||
|
||||
def main():
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("Error: PG_DSN not set")
|
||||
return
|
||||
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
print("=" * 70)
|
||||
print("DWD Backfill from ODS Script")
|
||||
print("=" * 70)
|
||||
|
||||
total_updates = 0
|
||||
errors = []
|
||||
|
||||
for dwd_table, ods_table, join_cond, columns in BACKFILL_CONFIGS:
|
||||
print(f"\n[{dwd_table}]")
|
||||
|
||||
for dwd_col, ods_col in columns:
|
||||
# Check column exists in both tables
|
||||
if not column_exists(db, dwd_table, dwd_col):
|
||||
print(f" {dwd_col}: SKIP (DWD column not found)")
|
||||
continue
|
||||
if not column_exists(db, ods_table, ods_col):
|
||||
print(f" {dwd_col}: SKIP (ODS column {ods_col} not found)")
|
||||
continue
|
||||
|
||||
# Build UPDATE SQL
|
||||
sql = f"""
|
||||
UPDATE {dwd_table} d
|
||||
SET "{dwd_col}" = o."{ods_col}"
|
||||
FROM {ods_table} o
|
||||
WHERE {join_cond}
|
||||
AND d."{dwd_col}" IS NULL
|
||||
AND o."{ods_col}" IS NOT NULL
|
||||
"""
|
||||
|
||||
try:
|
||||
db.execute(sql)
|
||||
db.commit()
|
||||
|
||||
# Count non-null
|
||||
count_sql = f'SELECT COUNT(*) as cnt FROM {dwd_table} WHERE "{dwd_col}" IS NOT NULL'
|
||||
cnt = db.query(count_sql)[0]["cnt"]
|
||||
print(f" {dwd_col}: OK (now {cnt} non-null)")
|
||||
total_updates += 1
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
err_msg = str(e).split("\n")[0][:80]
|
||||
print(f" {dwd_col}: ERROR - {err_msg}")
|
||||
errors.append((dwd_table, dwd_col, err_msg))
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print(f"Completed: {total_updates} columns processed")
|
||||
if errors:
|
||||
print(f"Errors: {len(errors)}")
|
||||
for t, c, e in errors:
|
||||
print(f" - {t}.{c}: {e}")
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
208
tmp/backfill_ods_from_payload.py
Normal file
208
tmp/backfill_ods_from_payload.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
从 ODS payload 回填缺失的列值
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
# 回填配置: (表名, [(db_col, payload_jsonb_expr), ...])
|
||||
BACKFILL_CONFIGS = [
|
||||
# settlement_records - settleList 内的字段
|
||||
("billiards_ods.settlement_records", [
|
||||
("plcouponsaleamount", "(payload->'settleList')->>'plCouponSaleAmount'"),
|
||||
("mervousalesamount", "(payload->'settleList')->>'merVouSalesAmount'"),
|
||||
("electricitymoney", "(payload->'settleList')->>'electricityMoney'"),
|
||||
("realelectricitymoney", "(payload->'settleList')->>'realElectricityMoney'"),
|
||||
("electricityadjustmoney", "(payload->'settleList')->>'electricityAdjustMoney'"),
|
||||
]),
|
||||
# recharge_settlements
|
||||
("billiards_ods.recharge_settlements", [
|
||||
("plcouponsaleamount", "(payload->'settleList')->>'plCouponSaleAmount'"),
|
||||
("mervousalesamount", "(payload->'settleList')->>'merVouSalesAmount'"),
|
||||
("electricitymoney", "(payload->'settleList')->>'electricityMoney'"),
|
||||
("realelectricitymoney", "(payload->'settleList')->>'realElectricityMoney'"),
|
||||
("electricityadjustmoney", "(payload->'settleList')->>'electricityAdjustMoney'"),
|
||||
]),
|
||||
# member_balance_changes
|
||||
("billiards_ods.member_balance_changes", [
|
||||
("principal_before", "payload->>'principal_before'"),
|
||||
("principal_after", "payload->>'principal_after'"),
|
||||
("principal_data", "payload->>'principal_data'"),
|
||||
]),
|
||||
# member_stored_value_cards
|
||||
("billiards_ods.member_stored_value_cards", [
|
||||
("principal_balance", "payload->>'principal_balance'"),
|
||||
("member_grade", "payload->>'member_grade'"),
|
||||
("rechargefreezebalance", "payload->>'rechargeFreezeBalance'"),
|
||||
("able_share_member_discount", "payload->>'able_share_member_discount'"),
|
||||
("electricity_deduct_radio", "payload->>'electricity_deduct_radio'"),
|
||||
("electricity_discount", "payload->>'electricity_discount'"),
|
||||
("electricitycarddeduct", "payload->>'electricityCardDeduct'"),
|
||||
]),
|
||||
# member_profiles
|
||||
("billiards_ods.member_profiles", [
|
||||
("pay_money_sum", "payload->>'pay_money_sum'"),
|
||||
("recharge_money_sum", "payload->>'recharge_money_sum'"),
|
||||
("person_tenant_org_id", "payload->>'person_tenant_org_id'"),
|
||||
("person_tenant_org_name", "payload->>'person_tenant_org_name'"),
|
||||
("register_source", "payload->>'register_source'"),
|
||||
]),
|
||||
# table_fee_transactions
|
||||
("billiards_ods.table_fee_transactions", [
|
||||
("activity_discount_amount", "payload->>'activity_discount_amount'"),
|
||||
("real_service_money", "payload->>'real_service_money'"),
|
||||
("order_consumption_type", "payload->>'order_consumption_type'"),
|
||||
]),
|
||||
# assistant_service_records
|
||||
("billiards_ods.assistant_service_records", [
|
||||
("real_service_money", "payload->>'real_service_money'"),
|
||||
("assistantteamname", "payload->>'assistantTeamName'"),
|
||||
]),
|
||||
# store_goods_sales_records
|
||||
("billiards_ods.store_goods_sales_records", [
|
||||
("coupon_share_money", "payload->>'coupon_share_money'"),
|
||||
]),
|
||||
# group_buy_redemption_records
|
||||
("billiards_ods.group_buy_redemption_records", [
|
||||
("coupon_sale_id", "payload->>'coupon_sale_id'"),
|
||||
("member_discount_money", "payload->>'member_discount_money'"),
|
||||
("assistant_share_money", "payload->>'assistant_share_money'"),
|
||||
("table_share_money", "payload->>'table_share_money'"),
|
||||
("goods_share_money", "payload->>'goods_share_money'"),
|
||||
("recharge_share_money", "payload->>'recharge_share_money'"),
|
||||
]),
|
||||
# site_tables_master
|
||||
("billiards_ods.site_tables_master", [
|
||||
("order_id", "payload->>'order_id'"),
|
||||
]),
|
||||
# store_goods_master
|
||||
("billiards_ods.store_goods_master", [
|
||||
("commodity_code", "payload->>'commodity_code'"),
|
||||
("not_sale", "payload->>'not_sale'"),
|
||||
]),
|
||||
# table_fee_discount_records
|
||||
("billiards_ods.table_fee_discount_records", [
|
||||
("table_name", "payload->>'table_name'"),
|
||||
("table_price", "payload->>'table_price'"),
|
||||
("charge_free", "payload->>'charge_free'"),
|
||||
("area_type_id", "payload->>'area_type_id'"),
|
||||
("site_table_area_id", "payload->>'site_table_area_id'"),
|
||||
("site_table_area_name", "payload->>'site_table_area_name'"),
|
||||
]),
|
||||
# tenant_goods_master
|
||||
("billiards_ods.tenant_goods_master", [
|
||||
("not_sale", "payload->>'not_sale'"),
|
||||
]),
|
||||
# group_buy_packages
|
||||
("billiards_ods.group_buy_packages", [
|
||||
("sort", "payload->>'sort'"),
|
||||
("is_first_limit", "payload->>'is_first_limit'"),
|
||||
]),
|
||||
]
|
||||
|
||||
|
||||
def column_exists(db, table: str, column: str) -> bool:
|
||||
schema, tbl = table.split(".")
|
||||
result = db.query("""
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, tbl, column.lower()))
|
||||
return bool(result)
|
||||
|
||||
|
||||
def get_column_type(db, table: str, column: str) -> str:
|
||||
schema, tbl = table.split(".")
|
||||
result = db.query("""
|
||||
SELECT data_type FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, tbl, column.lower()))
|
||||
return result[0]["data_type"] if result else "text"
|
||||
|
||||
|
||||
def main():
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("Error: PG_DSN not set")
|
||||
return
|
||||
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
print("=" * 70)
|
||||
print("ODS Payload Backfill Script")
|
||||
print("=" * 70)
|
||||
|
||||
total_updates = 0
|
||||
errors = []
|
||||
|
||||
for table, columns in BACKFILL_CONFIGS:
|
||||
print(f"\n[{table}]")
|
||||
|
||||
for db_col, payload_expr in columns:
|
||||
# Check column exists
|
||||
if not column_exists(db, table, db_col):
|
||||
print(f" {db_col}: SKIP (column not found)")
|
||||
continue
|
||||
|
||||
# Get column type for proper casting
|
||||
col_type = get_column_type(db, table, db_col)
|
||||
|
||||
# Build UPDATE SQL with proper type casting
|
||||
if col_type in ("numeric", "double precision", "real", "decimal"):
|
||||
cast_expr = f"({payload_expr})::numeric"
|
||||
elif col_type in ("integer", "bigint", "smallint"):
|
||||
cast_expr = f"({payload_expr})::bigint"
|
||||
elif col_type == "boolean":
|
||||
cast_expr = f"({payload_expr})::boolean"
|
||||
elif col_type in ("timestamp", "timestamp with time zone", "timestamp without time zone"):
|
||||
cast_expr = f"({payload_expr})::timestamp"
|
||||
else:
|
||||
cast_expr = payload_expr # text, keep as is
|
||||
|
||||
sql = f"""
|
||||
UPDATE {table}
|
||||
SET "{db_col}" = {cast_expr}
|
||||
WHERE "{db_col}" IS NULL
|
||||
AND {payload_expr} IS NOT NULL
|
||||
"""
|
||||
|
||||
try:
|
||||
db.execute(sql)
|
||||
db.commit()
|
||||
|
||||
# Count updated
|
||||
count_sql = f"""
|
||||
SELECT COUNT(*) as cnt FROM {table}
|
||||
WHERE "{db_col}" IS NOT NULL
|
||||
"""
|
||||
cnt = db.query(count_sql)[0]["cnt"]
|
||||
print(f" {db_col}: OK (now {cnt} non-null)")
|
||||
total_updates += 1
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
err_msg = str(e).split("\n")[0][:80]
|
||||
print(f" {db_col}: ERROR - {err_msg}")
|
||||
errors.append((table, db_col, err_msg))
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print(f"Completed: {total_updates} columns processed")
|
||||
if errors:
|
||||
print(f"Errors: {len(errors)}")
|
||||
for t, c, e in errors:
|
||||
print(f" - {t}.{c}: {e}")
|
||||
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
57
tmp/bd_manual_diff.json
Normal file
57
tmp/bd_manual_diff.json
Normal file
@@ -0,0 +1,57 @@
|
||||
[
|
||||
{
|
||||
"table": "dim_member_ex",
|
||||
"missing_in_doc": [],
|
||||
"extra_in_doc": [
|
||||
"1"
|
||||
],
|
||||
"type_mismatches": [],
|
||||
"doc_path": "etl_billiards\\docs\\bd_manual\\Ex\\BD_manual_dim_member_ex.md"
|
||||
},
|
||||
{
|
||||
"table": "dim_store_goods",
|
||||
"missing_in_doc": [],
|
||||
"extra_in_doc": [
|
||||
"1"
|
||||
],
|
||||
"type_mismatches": [],
|
||||
"doc_path": "etl_billiards\\docs\\bd_manual\\main\\BD_manual_dim_store_goods.md"
|
||||
},
|
||||
{
|
||||
"table": "dim_table",
|
||||
"missing_in_doc": [],
|
||||
"extra_in_doc": [
|
||||
"补时长"
|
||||
],
|
||||
"type_mismatches": [],
|
||||
"doc_path": "etl_billiards\\docs\\bd_manual\\main\\BD_manual_dim_table.md"
|
||||
},
|
||||
{
|
||||
"table": "dim_table_ex",
|
||||
"missing_in_doc": [],
|
||||
"extra_in_doc": [
|
||||
"1"
|
||||
],
|
||||
"type_mismatches": [],
|
||||
"doc_path": "etl_billiards\\docs\\bd_manual\\Ex\\BD_manual_dim_table_ex.md"
|
||||
},
|
||||
{
|
||||
"table": "dwd_member_balance_change",
|
||||
"missing_in_doc": [],
|
||||
"extra_in_doc": [
|
||||
"台费专用卡",
|
||||
"最主要的消费卡种"
|
||||
],
|
||||
"type_mismatches": [],
|
||||
"doc_path": "etl_billiards\\docs\\bd_manual\\main\\BD_manual_dwd_member_balance_change.md"
|
||||
},
|
||||
{
|
||||
"table": "dwd_refund_ex",
|
||||
"missing_in_doc": [],
|
||||
"extra_in_doc": [
|
||||
"1"
|
||||
],
|
||||
"type_mismatches": [],
|
||||
"doc_path": "etl_billiards\\docs\\bd_manual\\Ex\\BD_manual_dwd_refund_ex.md"
|
||||
}
|
||||
]
|
||||
295
tmp/check_api_ods_issues.py
Normal file
295
tmp/check_api_ods_issues.py
Normal file
@@ -0,0 +1,295 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
排查 API -> ODS 环节的问题:
|
||||
1. 检测 API 字段在 ODS 表中缺失的列
|
||||
2. 检测 API 中的 0 值在 ODS 中是否变成了 NULL
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目路径
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
def load_api_ods_comparison():
|
||||
"""加载已有的 API-ODS 对比文件"""
|
||||
comparison_file = Path(__file__).parent / "api_ods_comparison.json"
|
||||
if comparison_file.exists():
|
||||
with open(comparison_file, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
|
||||
def get_ods_tables_mapping():
|
||||
"""获取 ODS 任务代码与表名的映射"""
|
||||
return {
|
||||
"ODS_ASSISTANT_ACCOUNT": "billiards_ods.assistant_accounts_master",
|
||||
"ODS_SETTLEMENT_RECORDS": "billiards_ods.settlement_records",
|
||||
"ODS_TABLE_USE": "billiards_ods.table_fee_transactions",
|
||||
"ODS_ASSISTANT_LEDGER": "billiards_ods.assistant_service_records",
|
||||
"ODS_ASSISTANT_ABOLISH": "billiards_ods.assistant_cancellation_records",
|
||||
"ODS_STORE_GOODS_SALES": "billiards_ods.store_goods_sales_records",
|
||||
"ODS_PAYMENT": "billiards_ods.payment_transactions",
|
||||
"ODS_REFUND": "billiards_ods.refund_transactions",
|
||||
"ODS_PLATFORM_COUPON": "billiards_ods.platform_coupon_redemption_records",
|
||||
"ODS_MEMBER": "billiards_ods.member_profiles",
|
||||
"ODS_MEMBER_CARD": "billiards_ods.member_stored_value_cards",
|
||||
"ODS_MEMBER_BALANCE": "billiards_ods.member_balance_changes",
|
||||
"ODS_RECHARGE_SETTLE": "billiards_ods.recharge_settlements",
|
||||
"ODS_GROUP_PACKAGE": "billiards_ods.group_buy_packages",
|
||||
"ODS_GROUP_BUY_REDEMPTION": "billiards_ods.group_buy_redemption_records",
|
||||
"ODS_INVENTORY_STOCK": "billiards_ods.goods_stock_summary",
|
||||
"ODS_INVENTORY_CHANGE": "billiards_ods.goods_stock_movements",
|
||||
"ODS_TABLES": "billiards_ods.site_tables_master",
|
||||
"ODS_GOODS_CATEGORY": "billiards_ods.stock_goods_category_tree",
|
||||
"ODS_STORE_GOODS": "billiards_ods.store_goods_master",
|
||||
"ODS_TABLE_FEE_DISCOUNT": "billiards_ods.table_fee_discount_records",
|
||||
"ODS_TENANT_GOODS": "billiards_ods.tenant_goods_master",
|
||||
}
|
||||
|
||||
|
||||
def check_zero_to_null_issues(db: DatabaseConnection, table_name: str, limit: int = 100):
|
||||
"""
|
||||
检查 ODS 表中是否存在 payload 里有 0 值但对应列为 NULL 的情况
|
||||
"""
|
||||
issues = []
|
||||
|
||||
# 获取表的列信息
|
||||
schema, name = table_name.split(".", 1) if "." in table_name else ("public", table_name)
|
||||
col_sql = """
|
||||
SELECT column_name, data_type, udt_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
"""
|
||||
|
||||
try:
|
||||
cols = db.query(col_sql, (schema, name))
|
||||
except Exception as e:
|
||||
return {"error": str(e), "issues": []}
|
||||
|
||||
# 筛选数值类型列(可能存在 0 转 NULL 问题)
|
||||
numeric_cols = [
|
||||
c["column_name"] for c in cols
|
||||
if c["data_type"] in ("integer", "bigint", "smallint", "numeric", "double precision", "real", "decimal")
|
||||
]
|
||||
|
||||
# 查询最近的记录,检查 payload 中的值与列值
|
||||
check_sql = f"""
|
||||
SELECT payload, {', '.join(f'"{c}"' for c in numeric_cols)}
|
||||
FROM {table_name}
|
||||
WHERE payload IS NOT NULL
|
||||
ORDER BY fetched_at DESC NULLS LAST
|
||||
LIMIT %s
|
||||
"""
|
||||
|
||||
try:
|
||||
rows = db.query(check_sql, (limit,))
|
||||
except Exception as e:
|
||||
return {"error": str(e), "issues": []}
|
||||
|
||||
zero_to_null_count = {}
|
||||
|
||||
for row in rows:
|
||||
payload = row.get("payload")
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
if isinstance(payload, str):
|
||||
try:
|
||||
payload = json.loads(payload)
|
||||
except:
|
||||
continue
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
|
||||
# 检查每个数值列
|
||||
for col in numeric_cols:
|
||||
db_value = row.get(col)
|
||||
|
||||
# 从 payload 中获取对应的值(不区分大小写)
|
||||
payload_value = None
|
||||
for k, v in payload.items():
|
||||
if k.lower() == col.lower():
|
||||
payload_value = v
|
||||
break
|
||||
|
||||
# 检查:payload 中是 0,但数据库中是 NULL
|
||||
if payload_value == 0 and db_value is None:
|
||||
if col not in zero_to_null_count:
|
||||
zero_to_null_count[col] = 0
|
||||
zero_to_null_count[col] += 1
|
||||
|
||||
if zero_to_null_count:
|
||||
issues = [
|
||||
{"column": col, "count": count, "issue": "API 中的 0 值在 ODS 中变成了 NULL"}
|
||||
for col, count in zero_to_null_count.items()
|
||||
]
|
||||
|
||||
return {"issues": issues, "checked_rows": len(rows)}
|
||||
|
||||
|
||||
def generate_report():
|
||||
"""生成完整的排查报告"""
|
||||
print("=" * 80)
|
||||
print("API -> ODS 字段排查报告")
|
||||
print("生成时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
print("=" * 80)
|
||||
|
||||
# 加载对比数据
|
||||
comparison = load_api_ods_comparison()
|
||||
|
||||
if not comparison:
|
||||
print("\n[错误] 未找到 API-ODS 对比文件 (api_ods_comparison.json)")
|
||||
print("请先运行 compare_api_ods_fields.py 生成对比数据")
|
||||
return
|
||||
|
||||
# 统计缺失字段
|
||||
print("\n" + "=" * 80)
|
||||
print("一、API 字段在 ODS 表中缺失的情况")
|
||||
print("=" * 80)
|
||||
|
||||
missing_summary = []
|
||||
for task_code, data in comparison.items():
|
||||
missing = data.get("missing_in_ods", [])
|
||||
if missing:
|
||||
# 过滤掉 siteprofile 等嵌套对象和系统字段
|
||||
filtered_missing = [
|
||||
f for f in missing
|
||||
if f.lower() not in ("siteprofile", "settleprofile", "tableprofile", "address", "avatar",
|
||||
"business_tel", "customer_service_qrcode", "customer_service_wechat",
|
||||
"fixed_pay_qrcode", "full_address", "latitude", "longitude",
|
||||
"light_status", "light_token", "light_type", "org_id", "prod_env",
|
||||
"shop_name", "shop_status", "site_label", "site_type",
|
||||
"tenant_site_region_id", "wifi_name", "wifi_password",
|
||||
"attendance_distance", "attendance_enabled", "auto_light")
|
||||
]
|
||||
if filtered_missing:
|
||||
missing_summary.append({
|
||||
"task_code": task_code,
|
||||
"table_name": data.get("table_name"),
|
||||
"endpoint": data.get("endpoint"),
|
||||
"missing_fields": filtered_missing,
|
||||
})
|
||||
|
||||
if missing_summary:
|
||||
for item in missing_summary:
|
||||
print(f"\n【{item['task_code']}】")
|
||||
print(f" 表名: {item['table_name']}")
|
||||
print(f" 端点: {item['endpoint']}")
|
||||
print(f" 缺失字段 ({len(item['missing_fields'])} 个):")
|
||||
for field in item['missing_fields']:
|
||||
print(f" - {field}")
|
||||
else:
|
||||
print("\n没有发现明显缺失的业务字段。")
|
||||
|
||||
# 检查 0 转 NULL 问题
|
||||
print("\n" + "=" * 80)
|
||||
print("二、检查 API 中的 0 值在 ODS 中是否变成了 NULL")
|
||||
print("=" * 80)
|
||||
|
||||
try:
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("[错误] 未找到 PG_DSN 环境变量")
|
||||
return
|
||||
db = DatabaseConnection(dsn)
|
||||
tables = get_ods_tables_mapping()
|
||||
|
||||
zero_null_issues = []
|
||||
for task_code, table_name in tables.items():
|
||||
print(f"\n检查 {task_code} ({table_name})...")
|
||||
result = check_zero_to_null_issues(db, table_name)
|
||||
|
||||
if result.get("error"):
|
||||
print(f" [错误] {result['error']}")
|
||||
continue
|
||||
|
||||
if result.get("issues"):
|
||||
zero_null_issues.append({
|
||||
"task_code": task_code,
|
||||
"table_name": table_name,
|
||||
"checked_rows": result["checked_rows"],
|
||||
"issues": result["issues"],
|
||||
})
|
||||
for issue in result["issues"]:
|
||||
print(f" [发现问题] 列 '{issue['column']}': {issue['count']} 条记录 - {issue['issue']}")
|
||||
else:
|
||||
print(f" [正常] 检查了 {result['checked_rows']} 条记录,未发现 0 转 NULL 问题")
|
||||
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n[错误] 数据库连接失败: {e}")
|
||||
zero_null_issues = []
|
||||
|
||||
# 生成汇总
|
||||
print("\n" + "=" * 80)
|
||||
print("三、问题汇总")
|
||||
print("=" * 80)
|
||||
|
||||
print("\n1. 需要添加的 ODS 表列:")
|
||||
if missing_summary:
|
||||
all_ddl = []
|
||||
for item in missing_summary:
|
||||
table_name = item['table_name']
|
||||
for field in item['missing_fields']:
|
||||
# 根据字段名推断类型
|
||||
if field.endswith("_id") or field in ("tenant_id", "member_id", "site_id"):
|
||||
col_type = "BIGINT"
|
||||
elif field.endswith("_money") or field.endswith("_amount") or field.endswith("_price"):
|
||||
col_type = "NUMERIC(18,2)"
|
||||
elif field.endswith("_time") or field.startswith("create") or field.startswith("update"):
|
||||
col_type = "TIMESTAMP"
|
||||
elif field.startswith("is_") or field.endswith("_status"):
|
||||
col_type = "INTEGER"
|
||||
else:
|
||||
col_type = "TEXT"
|
||||
|
||||
ddl = f"ALTER TABLE {table_name} ADD COLUMN IF NOT EXISTS {field} {col_type};"
|
||||
all_ddl.append(ddl)
|
||||
|
||||
print("\n生成的 DDL 语句:")
|
||||
for ddl in all_ddl:
|
||||
print(f" {ddl}")
|
||||
else:
|
||||
print(" 无")
|
||||
|
||||
print("\n2. 需要修复的 0 转 NULL 问题:")
|
||||
if zero_null_issues:
|
||||
for item in zero_null_issues:
|
||||
print(f"\n 【{item['task_code']}】({item['table_name']})")
|
||||
for issue in item['issues']:
|
||||
print(f" - 列 '{issue['column']}': {issue['count']} 条记录受影响")
|
||||
else:
|
||||
print(" 未发现明显的 0 转 NULL 问题")
|
||||
|
||||
# 保存报告
|
||||
report = {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"missing_fields": missing_summary,
|
||||
"zero_to_null_issues": zero_null_issues,
|
||||
}
|
||||
|
||||
report_path = Path(__file__).parent / "api_ods_issue_report.json"
|
||||
with open(report_path, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n报告已保存到: {report_path}")
|
||||
|
||||
return report
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_report()
|
||||
231
tmp/check_ddl_vs_db.py
Normal file
231
tmp/check_ddl_vs_db.py
Normal file
@@ -0,0 +1,231 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
检查 DDL 文件与数据库实际结构的差异
|
||||
"""
|
||||
import psycopg2
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
def get_db_columns(conn, schema):
|
||||
"""从数据库获取所有表和列"""
|
||||
sql = """
|
||||
SELECT table_name, column_name, data_type,
|
||||
character_maximum_length, numeric_precision, numeric_scale,
|
||||
is_nullable
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s
|
||||
ORDER BY table_name, ordinal_position
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema,))
|
||||
rows = cur.fetchall()
|
||||
|
||||
tables = {}
|
||||
for row in rows:
|
||||
table_name = row[0]
|
||||
col_name = row[1].lower()
|
||||
data_type = row[2]
|
||||
char_len = row[3]
|
||||
num_prec = row[4]
|
||||
num_scale = row[5]
|
||||
|
||||
# 构建类型字符串
|
||||
if data_type == 'character varying':
|
||||
type_str = f'VARCHAR({char_len})' if char_len else 'VARCHAR'
|
||||
elif data_type == 'numeric':
|
||||
type_str = f'NUMERIC({num_prec},{num_scale})' if num_prec else 'NUMERIC'
|
||||
elif data_type == 'integer':
|
||||
type_str = 'INTEGER'
|
||||
elif data_type == 'bigint':
|
||||
type_str = 'BIGINT'
|
||||
elif data_type == 'smallint':
|
||||
type_str = 'SMALLINT'
|
||||
elif data_type == 'boolean':
|
||||
type_str = 'BOOLEAN'
|
||||
elif data_type == 'text':
|
||||
type_str = 'TEXT'
|
||||
elif data_type == 'jsonb':
|
||||
type_str = 'JSONB'
|
||||
elif data_type == 'json':
|
||||
type_str = 'JSON'
|
||||
elif data_type == 'date':
|
||||
type_str = 'DATE'
|
||||
elif data_type == 'timestamp with time zone':
|
||||
type_str = 'TIMESTAMPTZ'
|
||||
elif data_type == 'timestamp without time zone':
|
||||
type_str = 'TIMESTAMP'
|
||||
else:
|
||||
type_str = data_type.upper()
|
||||
|
||||
if table_name not in tables:
|
||||
tables[table_name] = {}
|
||||
tables[table_name][col_name] = type_str
|
||||
|
||||
return tables
|
||||
|
||||
def parse_ddl_file(filepath, default_schema=None):
|
||||
"""解析 DDL 文件,提取表和列定义"""
|
||||
content = Path(filepath).read_text(encoding='utf-8')
|
||||
|
||||
tables = {}
|
||||
|
||||
# 匹配多种 CREATE TABLE 格式:
|
||||
# 1. CREATE TABLE schema.table (...)
|
||||
# 2. CREATE TABLE IF NOT EXISTS schema.table (...)
|
||||
# 3. CREATE TABLE IF NOT EXISTS table (...) -- 需要 default_schema
|
||||
# 4. CREATE TABLE table (...) -- 需要 default_schema
|
||||
table_pattern = re.compile(
|
||||
r'CREATE TABLE\s+(?:IF NOT EXISTS\s+)?(?:(\w+)\.)?(\w+)\s*\((.*?)\);',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
for match in table_pattern.finditer(content):
|
||||
schema = match.group(1) or default_schema
|
||||
table_name = match.group(2)
|
||||
columns_block = match.group(3)
|
||||
|
||||
columns = {}
|
||||
# 解析列定义
|
||||
for line in columns_block.split('\n'):
|
||||
line = line.strip()
|
||||
if not line or line.startswith('--'):
|
||||
continue
|
||||
# 跳过约束
|
||||
if line.upper().startswith(('PRIMARY KEY', 'CONSTRAINT', 'UNIQUE', 'FOREIGN KEY', 'CHECK', 'EXCLUDE')):
|
||||
continue
|
||||
|
||||
# 匹配列定义: column_name TYPE ...
|
||||
col_match = re.match(r'^(\w+)\s+(\w+(?:\s*\([^)]+\))?)', line)
|
||||
if col_match:
|
||||
col_name = col_match.group(1).lower()
|
||||
col_type = col_match.group(2).upper().replace(' ', '')
|
||||
# 标准化类型
|
||||
if col_type == 'INT':
|
||||
col_type = 'INTEGER'
|
||||
columns[col_name] = col_type
|
||||
|
||||
tables[table_name] = columns
|
||||
|
||||
return tables
|
||||
|
||||
def compare_schemas(db_tables, ddl_tables, schema_name):
|
||||
"""比较数据库和 DDL 的差异"""
|
||||
differences = {
|
||||
'db_only_tables': [],
|
||||
'ddl_only_tables': [],
|
||||
'db_only_cols': [],
|
||||
'ddl_only_cols': [],
|
||||
'type_diff': []
|
||||
}
|
||||
|
||||
# 检查 DDL 中有但数据库没有的表
|
||||
for table in ddl_tables:
|
||||
if table not in db_tables:
|
||||
differences['ddl_only_tables'].append(f"{schema_name}.{table}")
|
||||
|
||||
# 检查数据库中有但 DDL 没有的表
|
||||
for table in db_tables:
|
||||
if table not in ddl_tables:
|
||||
differences['db_only_tables'].append(f"{schema_name}.{table}")
|
||||
|
||||
# 检查共有表的列差异
|
||||
for table in set(db_tables.keys()) & set(ddl_tables.keys()):
|
||||
db_cols = db_tables[table]
|
||||
ddl_cols = ddl_tables[table]
|
||||
|
||||
# DDL 有但 DB 没有的列
|
||||
for col in ddl_cols:
|
||||
if col not in db_cols:
|
||||
differences['ddl_only_cols'].append(f"{schema_name}.{table}.{col} ({ddl_cols[col]})")
|
||||
|
||||
# DB 有但 DDL 没有的列
|
||||
for col in db_cols:
|
||||
if col not in ddl_cols:
|
||||
differences['db_only_cols'].append(f"{schema_name}.{table}.{col} ({db_cols[col]})")
|
||||
|
||||
return differences
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
base_dir = Path(__file__).parent.parent / 'etl_billiards' / 'database'
|
||||
|
||||
print("=" * 80)
|
||||
print("DDL vs DB Structure Comparison")
|
||||
print("=" * 80)
|
||||
|
||||
# 检查 ODS
|
||||
print("\n### billiards_ods ###\n")
|
||||
ods_ddl_file = base_dir / 'schema_ODS_doc.sql'
|
||||
if ods_ddl_file.exists():
|
||||
db_ods = get_db_columns(conn, 'billiards_ods')
|
||||
ddl_ods = parse_ddl_file(ods_ddl_file, 'billiards_ods')
|
||||
|
||||
print(f"DB tables: {len(db_ods)}")
|
||||
print(f"DDL tables: {len(ddl_ods)}")
|
||||
|
||||
diff_ods = compare_schemas(db_ods, ddl_ods, 'billiards_ods')
|
||||
|
||||
total_diff = sum(len(v) for v in diff_ods.values())
|
||||
if total_diff > 0:
|
||||
print(f"\nFound {total_diff} differences:")
|
||||
if diff_ods['db_only_tables']:
|
||||
print("\n [DB has, DDL missing] Tables:")
|
||||
for t in sorted(diff_ods['db_only_tables']):
|
||||
print(f" - {t}")
|
||||
if diff_ods['ddl_only_tables']:
|
||||
print("\n [DDL has, DB missing] Tables:")
|
||||
for t in sorted(diff_ods['ddl_only_tables']):
|
||||
print(f" - {t}")
|
||||
if diff_ods['db_only_cols']:
|
||||
print("\n [DB has, DDL missing] Columns:")
|
||||
for c in sorted(diff_ods['db_only_cols']):
|
||||
print(f" - {c}")
|
||||
if diff_ods['ddl_only_cols']:
|
||||
print("\n [DDL has, DB missing] Columns:")
|
||||
for c in sorted(diff_ods['ddl_only_cols']):
|
||||
print(f" - {c}")
|
||||
else:
|
||||
print("\nNo differences found.")
|
||||
|
||||
# 检查 DWD
|
||||
print("\n### billiards_dwd ###\n")
|
||||
dwd_ddl_file = base_dir / 'schema_dwd_doc.sql'
|
||||
if dwd_ddl_file.exists():
|
||||
db_dwd = get_db_columns(conn, 'billiards_dwd')
|
||||
ddl_dwd = parse_ddl_file(dwd_ddl_file, 'billiards_dwd')
|
||||
|
||||
print(f"DB tables: {len(db_dwd)}")
|
||||
print(f"DDL tables: {len(ddl_dwd)}")
|
||||
|
||||
diff_dwd = compare_schemas(db_dwd, ddl_dwd, 'billiards_dwd')
|
||||
|
||||
total_diff = sum(len(v) for v in diff_dwd.values())
|
||||
if total_diff > 0:
|
||||
print(f"\nFound {total_diff} differences:")
|
||||
if diff_dwd['db_only_tables']:
|
||||
print("\n [DB has, DDL missing] Tables:")
|
||||
for t in sorted(diff_dwd['db_only_tables']):
|
||||
print(f" - {t}")
|
||||
if diff_dwd['ddl_only_tables']:
|
||||
print("\n [DDL has, DB missing] Tables:")
|
||||
for t in sorted(diff_dwd['ddl_only_tables']):
|
||||
print(f" - {t}")
|
||||
if diff_dwd['db_only_cols']:
|
||||
print("\n [DB has, DDL missing] Columns:")
|
||||
for c in sorted(diff_dwd['db_only_cols']):
|
||||
print(f" - {c}")
|
||||
if diff_dwd['ddl_only_cols']:
|
||||
print("\n [DDL has, DB missing] Columns:")
|
||||
for c in sorted(diff_dwd['ddl_only_cols']):
|
||||
print(f" - {c}")
|
||||
else:
|
||||
print("\nNo differences found.")
|
||||
|
||||
conn.close()
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
181
tmp/check_field_variants.py
Normal file
181
tmp/check_field_variants.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
检查缺失字段是否是拼写变体(驼峰式/下划线式、大小写差异等)
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# 配置
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
def camel_to_snake(name):
|
||||
"""驼峰转下划线"""
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||
|
||||
def snake_to_camel(name):
|
||||
"""下划线转驼峰"""
|
||||
components = name.split('_')
|
||||
return components[0] + ''.join(x.title() for x in components[1:])
|
||||
|
||||
def normalize_field(name):
|
||||
"""标准化字段名 - 去除下划线,全小写"""
|
||||
return name.lower().replace('_', '')
|
||||
|
||||
def find_variants(api_field, ods_columns):
|
||||
"""查找 API 字段在 ODS 中的可能变体"""
|
||||
api_lower = api_field.lower()
|
||||
api_normalized = normalize_field(api_field)
|
||||
api_snake = camel_to_snake(api_field)
|
||||
api_camel = snake_to_camel(api_field)
|
||||
|
||||
matches = []
|
||||
for ods_col in ods_columns:
|
||||
ods_lower = ods_col.lower()
|
||||
ods_normalized = normalize_field(ods_col)
|
||||
|
||||
# 完全匹配
|
||||
if api_lower == ods_lower:
|
||||
matches.append((ods_col, 'exact'))
|
||||
continue
|
||||
|
||||
# 标准化后匹配(忽略下划线和大小写)
|
||||
if api_normalized == ods_normalized:
|
||||
matches.append((ods_col, 'normalized'))
|
||||
continue
|
||||
|
||||
# 驼峰转下划线匹配
|
||||
if api_snake == ods_lower:
|
||||
matches.append((ods_col, 'camel_to_snake'))
|
||||
continue
|
||||
|
||||
# 下划线转驼峰匹配
|
||||
if api_camel.lower() == ods_lower:
|
||||
matches.append((ods_col, 'snake_to_camel'))
|
||||
continue
|
||||
|
||||
# 部分匹配 - 一个是另一个的子串
|
||||
if len(api_normalized) > 3 and len(ods_normalized) > 3:
|
||||
if api_normalized in ods_normalized or ods_normalized in api_normalized:
|
||||
matches.append((ods_col, 'partial'))
|
||||
continue
|
||||
|
||||
return matches
|
||||
|
||||
def get_ods_table_columns(conn, table_name):
|
||||
"""获取 ODS 表的字段结构"""
|
||||
if '.' in table_name:
|
||||
schema, name = table_name.split('.', 1)
|
||||
else:
|
||||
schema, name = 'public', table_name
|
||||
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
"""
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(sql, (schema, name))
|
||||
rows = cur.fetchall()
|
||||
|
||||
return [row['column_name'] for row in rows]
|
||||
|
||||
def main():
|
||||
# 读取之前的对比结果
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
results = json.load(f)
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 100)
|
||||
print("缺失字段拼写变体检查")
|
||||
print("=" * 100)
|
||||
|
||||
all_findings = {}
|
||||
|
||||
for code, data in results.items():
|
||||
missing = data.get('missing_in_ods', [])
|
||||
if not missing:
|
||||
continue
|
||||
|
||||
table_name = data['table_name']
|
||||
ods_columns = get_ods_table_columns(conn, table_name)
|
||||
|
||||
print(f"\n### {code}")
|
||||
print(f"表名: {table_name}")
|
||||
|
||||
findings = []
|
||||
true_missing = []
|
||||
|
||||
for api_field in missing:
|
||||
variants = find_variants(api_field, ods_columns)
|
||||
if variants:
|
||||
for ods_col, match_type in variants:
|
||||
findings.append({
|
||||
'api_field': api_field,
|
||||
'ods_column': ods_col,
|
||||
'match_type': match_type
|
||||
})
|
||||
print(f" [发现变体] API: `{api_field}` -> ODS: `{ods_col}` ({match_type})")
|
||||
else:
|
||||
true_missing.append(api_field)
|
||||
|
||||
if findings:
|
||||
all_findings[code] = {
|
||||
'table_name': table_name,
|
||||
'variants': findings,
|
||||
'true_missing': true_missing
|
||||
}
|
||||
|
||||
if true_missing:
|
||||
print(f"\n **确认缺失 ({len(true_missing)}):**")
|
||||
for f in true_missing:
|
||||
print(f" - {f}")
|
||||
|
||||
conn.close()
|
||||
|
||||
# 输出汇总
|
||||
print("\n")
|
||||
print("=" * 100)
|
||||
print("汇总 - 发现的拼写变体")
|
||||
print("=" * 100)
|
||||
|
||||
for code, data in all_findings.items():
|
||||
if data['variants']:
|
||||
print(f"\n### {code} (`{data['table_name']}`)")
|
||||
print("\n| API 字段 | ODS 字段 | 匹配类型 |")
|
||||
print("|----------|----------|----------|")
|
||||
for v in data['variants']:
|
||||
print(f"| `{v['api_field']}` | `{v['ods_column']}` | {v['match_type']} |")
|
||||
|
||||
print("\n")
|
||||
print("=" * 100)
|
||||
print("汇总 - 确认缺失的字段(无变体)")
|
||||
print("=" * 100)
|
||||
|
||||
for code, data in results.items():
|
||||
missing = data.get('missing_in_ods', [])
|
||||
if not missing:
|
||||
continue
|
||||
|
||||
if code in all_findings:
|
||||
true_missing = all_findings[code]['true_missing']
|
||||
else:
|
||||
true_missing = missing
|
||||
|
||||
if true_missing:
|
||||
print(f"\n### {code} (`{data['table_name']}`)")
|
||||
print(f"缺失 {len(true_missing)} 个字段:")
|
||||
print("\n| 字段名 | 说明 |")
|
||||
print("|--------|------|")
|
||||
for f in true_missing:
|
||||
print(f"| `{f}` | |")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
95
tmp/check_new_fields_data.py
Normal file
95
tmp/check_new_fields_data.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""检查新添加字段的数据完整性"""
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 新添加的字段列表
|
||||
NEW_FIELDS = {
|
||||
# DWD 主表
|
||||
'billiards_dwd.dwd_settlement_head': [
|
||||
'electricity_money', 'real_electricity_money', 'electricity_adjust_money',
|
||||
'pl_coupon_sale_amount', 'mervou_sales_amount'
|
||||
],
|
||||
'billiards_dwd.dwd_table_fee_log': ['activity_discount_amount', 'real_service_money'],
|
||||
'billiards_dwd.dwd_table_fee_adjust': ['table_name', 'table_price', 'charge_free'],
|
||||
'billiards_dwd.dim_member': ['pay_money_sum', 'recharge_money_sum'],
|
||||
'billiards_dwd.dim_member_card_account': ['principal_balance', 'member_grade'],
|
||||
'billiards_dwd.dim_store_goods': ['commodity_code', 'not_sale'],
|
||||
'billiards_dwd.dim_table': ['order_id'],
|
||||
'billiards_dwd.dim_tenant_goods': ['not_sale'],
|
||||
'billiards_dwd.dim_groupbuy_package': ['sort', 'is_first_limit'],
|
||||
'billiards_dwd.dwd_assistant_service_log': ['real_service_money'],
|
||||
'billiards_dwd.dwd_assistant_trash_event': ['tenant_id'],
|
||||
'billiards_dwd.dwd_groupbuy_redemption': ['member_discount_money', 'coupon_sale_id'],
|
||||
'billiards_dwd.dwd_member_balance_change': ['principal_before', 'principal_after'],
|
||||
'billiards_dwd.dwd_payment': ['tenant_id'],
|
||||
'billiards_dwd.dwd_store_goods_sale': ['coupon_share_money'],
|
||||
}
|
||||
|
||||
def check_field_data(conn, schema_table, fields):
|
||||
"""检查字段的数据情况"""
|
||||
results = []
|
||||
schema, table = schema_table.split('.')
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
# 获取总行数
|
||||
cur.execute(f"SELECT COUNT(*) FROM {schema_table}")
|
||||
total_rows = cur.fetchone()[0]
|
||||
|
||||
for field in fields:
|
||||
try:
|
||||
# 非空计数
|
||||
cur.execute(f"SELECT COUNT(*) FROM {schema_table} WHERE {field} IS NOT NULL")
|
||||
non_null_count = cur.fetchone()[0]
|
||||
|
||||
# 非空非零计数(对于数值类型)
|
||||
cur.execute(f"""
|
||||
SELECT COUNT(*) FROM {schema_table}
|
||||
WHERE {field} IS NOT NULL
|
||||
AND CAST({field} AS TEXT) NOT IN ('0', '0.00', '0.0', '')
|
||||
""")
|
||||
non_zero_count = cur.fetchone()[0]
|
||||
|
||||
results.append({
|
||||
'field': field,
|
||||
'total': total_rows,
|
||||
'non_null': non_null_count,
|
||||
'non_zero': non_zero_count,
|
||||
'fill_rate': f"{non_null_count/total_rows*100:.1f}%" if total_rows > 0 else "N/A"
|
||||
})
|
||||
except Exception as e:
|
||||
results.append({
|
||||
'field': field,
|
||||
'error': str(e)[:50]
|
||||
})
|
||||
|
||||
cur.close()
|
||||
return results
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 90)
|
||||
print("New Fields Data Completeness Check")
|
||||
print("=" * 90)
|
||||
|
||||
for table, fields in NEW_FIELDS.items():
|
||||
print(f"\n### {table} ###\n")
|
||||
results = check_field_data(conn, table, fields)
|
||||
|
||||
print(f"{'Field':<30} {'Total':>8} {'Non-Null':>10} {'Non-Zero':>10} {'Fill Rate':>10}")
|
||||
print("-" * 70)
|
||||
|
||||
for r in results:
|
||||
if 'error' in r:
|
||||
print(f"{r['field']:<30} ERROR: {r['error']}")
|
||||
else:
|
||||
print(f"{r['field']:<30} {r['total']:>8} {r['non_null']:>10} {r['non_zero']:>10} {r['fill_rate']:>10}")
|
||||
|
||||
conn.close()
|
||||
print("\n" + "=" * 90)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
90
tmp/check_scd2_tables.py
Normal file
90
tmp/check_scd2_tables.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""检查 DWD 维度表 SCD2 配置"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
dsn = os.getenv("PG_DSN")
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
print("=" * 70)
|
||||
print("DWD Dimension Tables - SCD2 Analysis")
|
||||
print("=" * 70)
|
||||
|
||||
# 获取所有维度表
|
||||
tables = db.query("""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'billiards_dwd'
|
||||
AND table_name LIKE 'dim_%'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
|
||||
scd_cols = {"scd2_start_time", "scd2_end_time", "scd2_is_current", "scd2_version"}
|
||||
|
||||
scd2_tables = []
|
||||
type1_tables = []
|
||||
|
||||
for t in tables:
|
||||
tbl = t["table_name"]
|
||||
cols = db.query("""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd' AND table_name = %s
|
||||
""", (tbl,))
|
||||
col_names = {c["column_name"].lower() for c in cols}
|
||||
|
||||
has_scd = col_names & scd_cols
|
||||
if has_scd:
|
||||
scd2_tables.append((tbl, has_scd))
|
||||
else:
|
||||
type1_tables.append(tbl)
|
||||
|
||||
print("\n[SCD2 Tables - History Tracking]")
|
||||
print("-" * 50)
|
||||
if scd2_tables:
|
||||
for tbl, cols in scd2_tables:
|
||||
print(f" {tbl}")
|
||||
print(f" SCD2 cols: {', '.join(sorted(cols))}")
|
||||
else:
|
||||
print(" (none)")
|
||||
|
||||
print(f"\n[Type1 Tables - Direct Overwrite] ({len(type1_tables)} tables)")
|
||||
print("-" * 50)
|
||||
for tbl in type1_tables:
|
||||
print(f" {tbl}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Processing Logic")
|
||||
print("=" * 70)
|
||||
print("""
|
||||
Code path in dwd_load_task.py:
|
||||
|
||||
if table.startswith('dim_'):
|
||||
_merge_dim()
|
||||
|
|
||||
+-- if has SCD2 columns:
|
||||
| _merge_dim_scd2()
|
||||
| -> Compare data, close old version, insert new version
|
||||
| -> Uses INSERT (no ON CONFLICT)
|
||||
| -> SCD2 NOT affected by fact_upsert config
|
||||
|
|
||||
+-- else:
|
||||
_merge_dim_type1_upsert()
|
||||
-> Uses ON CONFLICT DO UPDATE
|
||||
-> Direct overwrite (Type1)
|
||||
else:
|
||||
_load_fact_generic()
|
||||
-> Uses ON CONFLICT DO UPDATE (if fact_upsert=true)
|
||||
|
||||
CONCLUSION: SCD2 logic is INDEPENDENT, NOT affected by conflict mode settings.
|
||||
""")
|
||||
|
||||
db.close()
|
||||
26
tmp/check_seq.py
Normal file
26
tmp/check_seq.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
docs = list(Path('etl_billiards/docs/bd_manual/main').glob('*.md')) + \
|
||||
list(Path('etl_billiards/docs/bd_manual/Ex').glob('*.md'))
|
||||
|
||||
for doc in docs:
|
||||
content = doc.read_text(encoding='utf-8')
|
||||
lines = content.split('\n')
|
||||
seqs = []
|
||||
for line in lines:
|
||||
match = re.match(r'\|\s*(\d+)\s*\|', line)
|
||||
if match:
|
||||
seq = int(match.group(1))
|
||||
seqs.append((seq, line[:70]))
|
||||
|
||||
# 检查是否有重复序号
|
||||
seq_nums = [s[0] for s in seqs]
|
||||
if len(seq_nums) != len(set(seq_nums)):
|
||||
print(f'\n{doc.name}: Duplicate sequences found')
|
||||
seen = set()
|
||||
for seq, line in seqs:
|
||||
if seq in seen or seq_nums.count(seq) > 1:
|
||||
print(f' {seq}: {line}...')
|
||||
seen.add(seq)
|
||||
510
tmp/compare_api_ods_fields.py
Normal file
510
tmp/compare_api_ods_fields.py
Normal file
@@ -0,0 +1,510 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
对比 API 返回字段和 ODS 表字段,找出 ODS 中缺少的 API 字段
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
# 配置
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
API_BASE = 'https://pc.ficoo.vip/apiprod/admin/v1/'
|
||||
API_TOKEN = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6IktlbTVsdHRqZ2tSUExOcVA2ajhNakdQYnFrNW5mRzBQNzRvMHE0b295VVE9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvOCDkuIvljYg2OjU3OjA1IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDU0ODIyNSwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.wJlm7pTqUzp769nUGdxx0e1bVMy4x9Prp9U_UMWQvlk'
|
||||
STORE_ID = '2790685415443269'
|
||||
TZ = ZoneInfo('Asia/Taipei')
|
||||
|
||||
# ODS 任务配置
|
||||
ODS_SPECS = [
|
||||
{
|
||||
'code': 'ODS_ASSISTANT_ACCOUNT',
|
||||
'table_name': 'billiards_ods.assistant_accounts_master',
|
||||
'endpoint': '/PersonnelManagement/SearchAssistantInfo',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'assistantInfos',
|
||||
'requires_window': True,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_SETTLEMENT_RECORDS',
|
||||
'table_name': 'billiards_ods.settlement_records',
|
||||
'endpoint': '/Site/GetAllOrderSettleList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'settleList',
|
||||
'requires_window': True,
|
||||
'time_fields': ('rangeStartTime', 'rangeEndTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_TABLE_USE',
|
||||
'table_name': 'billiards_ods.table_fee_transactions',
|
||||
'endpoint': '/Site/GetSiteTableOrderDetails',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'siteTableUseDetailsList',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_ASSISTANT_LEDGER',
|
||||
'table_name': 'billiards_ods.assistant_service_records',
|
||||
'endpoint': '/AssistantPerformance/GetOrderAssistantDetails',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'orderAssistantDetails',
|
||||
'requires_window': True,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_ASSISTANT_ABOLISH',
|
||||
'table_name': 'billiards_ods.assistant_cancellation_records',
|
||||
'endpoint': '/AssistantPerformance/GetAbolitionAssistant',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'abolitionAssistants',
|
||||
'requires_window': True,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_STORE_GOODS_SALES',
|
||||
'table_name': 'billiards_ods.store_goods_sales_records',
|
||||
'endpoint': '/TenantGoods/GetGoodsSalesList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'orderGoodsLedgers',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_PAYMENT',
|
||||
'table_name': 'billiards_ods.payment_transactions',
|
||||
'endpoint': '/PayLog/GetPayLogListPage',
|
||||
'data_path': ['data'],
|
||||
'list_key': None,
|
||||
'requires_window': False,
|
||||
'time_fields': ('StartPayTime', 'EndPayTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_REFUND',
|
||||
'table_name': 'billiards_ods.refund_transactions',
|
||||
'endpoint': '/Order/GetRefundPayLogList',
|
||||
'data_path': ['data'],
|
||||
'list_key': None,
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_PLATFORM_COUPON',
|
||||
'table_name': 'billiards_ods.platform_coupon_redemption_records',
|
||||
'endpoint': '/Promotion/GetOfflineCouponConsumePageList',
|
||||
'data_path': ['data'],
|
||||
'list_key': None,
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_MEMBER',
|
||||
'table_name': 'billiards_ods.member_profiles',
|
||||
'endpoint': '/MemberProfile/GetTenantMemberList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'tenantMemberInfos',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_MEMBER_CARD',
|
||||
'table_name': 'billiards_ods.member_stored_value_cards',
|
||||
'endpoint': '/MemberProfile/GetTenantMemberCardList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'tenantMemberCards',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_MEMBER_BALANCE',
|
||||
'table_name': 'billiards_ods.member_balance_changes',
|
||||
'endpoint': '/MemberProfile/GetMemberCardBalanceChange',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'tenantMemberCardLogs',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_RECHARGE_SETTLE',
|
||||
'table_name': 'billiards_ods.recharge_settlements',
|
||||
'endpoint': '/Site/GetRechargeSettleList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'settleList',
|
||||
'requires_window': True,
|
||||
'time_fields': ('rangeStartTime', 'rangeEndTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_GROUP_PACKAGE',
|
||||
'table_name': 'billiards_ods.group_buy_packages',
|
||||
'endpoint': '/PackageCoupon/QueryPackageCouponList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'packageCouponList',
|
||||
'requires_window': False,
|
||||
'time_fields': None,
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_GROUP_BUY_REDEMPTION',
|
||||
'table_name': 'billiards_ods.group_buy_redemption_records',
|
||||
'endpoint': '/Site/GetSiteTableUseDetails',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'siteTableUseDetailsList',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_INVENTORY_STOCK',
|
||||
'table_name': 'billiards_ods.goods_stock_summary',
|
||||
'endpoint': '/TenantGoods/GetGoodsStockReport',
|
||||
'data_path': ['data'],
|
||||
'list_key': None,
|
||||
'requires_window': False,
|
||||
'time_fields': None,
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_INVENTORY_CHANGE',
|
||||
'table_name': 'billiards_ods.goods_stock_movements',
|
||||
'endpoint': '/GoodsStockManage/QueryGoodsOutboundReceipt',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'queryDeliveryRecordsList',
|
||||
'requires_window': True,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_TABLES',
|
||||
'table_name': 'billiards_ods.site_tables_master',
|
||||
'endpoint': '/Table/GetSiteTables',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'siteTables',
|
||||
'requires_window': False,
|
||||
'time_fields': None,
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_GOODS_CATEGORY',
|
||||
'table_name': 'billiards_ods.stock_goods_category_tree',
|
||||
'endpoint': '/TenantGoodsCategory/QueryPrimarySecondaryCategory',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'goodsCategoryList',
|
||||
'requires_window': False,
|
||||
'time_fields': None,
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_STORE_GOODS',
|
||||
'table_name': 'billiards_ods.store_goods_master',
|
||||
'endpoint': '/TenantGoods/GetGoodsInventoryList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'orderGoodsList',
|
||||
'requires_window': False,
|
||||
'time_fields': None,
|
||||
'include_site_id': True,
|
||||
'site_id_array': True, # 需要数组格式
|
||||
},
|
||||
{
|
||||
'code': 'ODS_TABLE_FEE_DISCOUNT',
|
||||
'table_name': 'billiards_ods.table_fee_discount_records',
|
||||
'endpoint': '/Site/GetTaiFeeAdjustList',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'taiFeeAdjustInfos',
|
||||
'requires_window': False,
|
||||
'time_fields': ('startTime', 'endTime'),
|
||||
'include_site_id': True,
|
||||
},
|
||||
{
|
||||
'code': 'ODS_TENANT_GOODS',
|
||||
'table_name': 'billiards_ods.tenant_goods_master',
|
||||
'endpoint': '/TenantGoods/QueryTenantGoods',
|
||||
'data_path': ['data'],
|
||||
'list_key': 'tenantGoodsList',
|
||||
'requires_window': False,
|
||||
'time_fields': None,
|
||||
'include_site_id': True,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def get_ods_table_columns(conn, table_name: str) -> dict:
|
||||
"""获取 ODS 表的字段结构"""
|
||||
if '.' in table_name:
|
||||
schema, name = table_name.split('.', 1)
|
||||
else:
|
||||
schema, name = 'public', table_name
|
||||
|
||||
sql = """
|
||||
SELECT column_name, data_type, udt_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
"""
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(sql, (schema, name))
|
||||
rows = cur.fetchall()
|
||||
|
||||
return {row['column_name'].lower(): row for row in rows}
|
||||
|
||||
|
||||
def flatten_json_keys(obj, prefix='', depth=0) -> set:
|
||||
"""递归展平 JSON 获取所有字段名,限制深度"""
|
||||
if depth > 3: # 限制深度
|
||||
return set()
|
||||
keys = set()
|
||||
if isinstance(obj, dict):
|
||||
for k, v in obj.items():
|
||||
full_key = f"{prefix}.{k}" if prefix else k
|
||||
keys.add(k) # 添加不带前缀的
|
||||
if isinstance(v, (dict, list)) and depth < 3:
|
||||
keys.update(flatten_json_keys(v, full_key, depth + 1))
|
||||
elif isinstance(obj, list):
|
||||
for item in obj[:5]: # 只检查前5个
|
||||
keys.update(flatten_json_keys(item, prefix, depth))
|
||||
return keys
|
||||
|
||||
|
||||
def call_api(endpoint: str, params: dict) -> dict:
|
||||
"""调用 API"""
|
||||
url = API_BASE.rstrip('/') + '/' + endpoint.lstrip('/')
|
||||
headers = {
|
||||
'Authorization': f'Bearer {API_TOKEN}',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
}
|
||||
try:
|
||||
resp = requests.post(url, json=params, headers=headers, timeout=30)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
print(f" API 调用异常: {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def extract_list(payload: dict, data_path: list, list_key: str = None) -> list:
|
||||
"""从响应中提取列表"""
|
||||
cur = payload
|
||||
for key in data_path:
|
||||
if isinstance(cur, dict):
|
||||
cur = cur.get(key)
|
||||
else:
|
||||
return []
|
||||
|
||||
if isinstance(cur, list):
|
||||
return cur
|
||||
|
||||
if isinstance(cur, dict):
|
||||
if list_key and list_key in cur:
|
||||
return cur[list_key]
|
||||
# 尝试常见的列表键
|
||||
for k in ['list', 'rows', 'records', 'items', 'dataList']:
|
||||
if k in cur and isinstance(cur[k], list):
|
||||
return cur[k]
|
||||
# 返回字典的第一个列表值
|
||||
for v in cur.values():
|
||||
if isinstance(v, list):
|
||||
return v
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def get_api_sample_data(spec: dict, window_start: datetime, window_end: datetime) -> list:
|
||||
"""从 API 获取示例数据"""
|
||||
params = {'page': 1, 'limit': 50}
|
||||
|
||||
if spec.get('include_site_id'):
|
||||
if spec.get('site_id_array'):
|
||||
params['siteId'] = [int(STORE_ID)]
|
||||
else:
|
||||
params['siteId'] = int(STORE_ID)
|
||||
|
||||
time_fields = spec.get('time_fields')
|
||||
if time_fields:
|
||||
start_key, end_key = time_fields
|
||||
params[start_key] = window_start.strftime('%Y-%m-%d %H:%M:%S')
|
||||
params[end_key] = window_end.strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
payload = call_api(spec['endpoint'], params)
|
||||
if not payload:
|
||||
return []
|
||||
|
||||
records = extract_list(payload, spec['data_path'], spec.get('list_key'))
|
||||
return records
|
||||
|
||||
|
||||
def compare_fields(api_fields: set, ods_columns: dict) -> dict:
|
||||
"""比较 API 字段和 ODS 列"""
|
||||
ods_col_names = set(ods_columns.keys())
|
||||
|
||||
# 需要排除的 ODS 系统字段
|
||||
system_cols = {
|
||||
'payload', 'source_file', 'source_endpoint', 'fetched_at',
|
||||
'content_hash', 'record_index', 'site_profile'
|
||||
}
|
||||
|
||||
# siteProfile 嵌套字段 - 忽略这些门店配置字段
|
||||
site_profile_fields = {
|
||||
'address', 'full_address', 'latitude', 'longitude',
|
||||
'shop_name', 'shop_status', 'site_label', 'site_type',
|
||||
'tenant_site_region_id', 'attendance_distance', 'attendance_enabled',
|
||||
'auto_light', 'avatar', 'business_tel', 'customer_service_qrcode',
|
||||
'customer_service_wechat', 'fixed_pay_qrcode', 'light_status',
|
||||
'light_token', 'light_type', 'prod_env', 'wifi_name', 'wifi_password',
|
||||
'org_id', 'siteprofile', 'ewelink_client_id'
|
||||
}
|
||||
|
||||
# API 字段标准化
|
||||
api_fields_lower = {f.lower() for f in api_fields}
|
||||
|
||||
# 在 ODS 中缺失的 API 字段(排除系统字段和 siteProfile 字段)
|
||||
missing_in_ods = api_fields_lower - ods_col_names - system_cols - site_profile_fields
|
||||
|
||||
# 在 API 中没有但 ODS 有的字段(可能是衍生字段)
|
||||
ods_only = ods_col_names - api_fields_lower - system_cols
|
||||
|
||||
return {
|
||||
'api_fields': sorted(api_fields_lower),
|
||||
'ods_columns': sorted(ods_col_names),
|
||||
'missing_in_ods': sorted(missing_in_ods),
|
||||
'ods_only': sorted(ods_only),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("API vs ODS 字段对比分析")
|
||||
print("=" * 80)
|
||||
|
||||
# 连接数据库
|
||||
conn = psycopg2.connect(DSN)
|
||||
print(f"数据库连接成功")
|
||||
|
||||
print(f"API: {API_BASE}")
|
||||
print(f"门店 ID: {STORE_ID}")
|
||||
|
||||
# 时间窗口:2025-12-01 到现在
|
||||
now = datetime.now(TZ)
|
||||
window_end = now
|
||||
window_start = datetime(2025, 12, 1, 0, 0, 0, tzinfo=TZ)
|
||||
print(f"时间窗口: {window_start.strftime('%Y-%m-%d %H:%M:%S')} ~ {window_end.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print("=" * 80)
|
||||
|
||||
results = {}
|
||||
|
||||
for spec in ODS_SPECS:
|
||||
print(f"\n处理: {spec['code']}")
|
||||
print(f" 表名: {spec['table_name']}")
|
||||
print(f" 端点: {spec['endpoint']}")
|
||||
|
||||
# 获取 ODS 表结构
|
||||
ods_columns = get_ods_table_columns(conn, spec['table_name'])
|
||||
if not ods_columns:
|
||||
print(f" [跳过] ODS 表不存在或无字段")
|
||||
continue
|
||||
print(f" ODS 字段数: {len(ods_columns)}")
|
||||
|
||||
# 获取 API 数据
|
||||
records = get_api_sample_data(spec, window_start, window_end)
|
||||
print(f" API 返回记录数: {len(records)}")
|
||||
|
||||
if not records:
|
||||
results[spec['code']] = {
|
||||
'table_name': spec['table_name'],
|
||||
'endpoint': spec['endpoint'],
|
||||
'api_records': 0,
|
||||
'ods_columns': list(ods_columns.keys()),
|
||||
'missing_in_ods': [],
|
||||
'note': 'API 无返回数据'
|
||||
}
|
||||
continue
|
||||
|
||||
# 提取 API 字段
|
||||
api_fields = set()
|
||||
for rec in records[:20]: # 检查前20条
|
||||
if isinstance(rec, dict):
|
||||
api_fields.update(flatten_json_keys(rec))
|
||||
print(f" API 字段数: {len(api_fields)}")
|
||||
|
||||
# 对比
|
||||
comparison = compare_fields(api_fields, ods_columns)
|
||||
|
||||
results[spec['code']] = {
|
||||
'table_name': spec['table_name'],
|
||||
'endpoint': spec['endpoint'],
|
||||
'api_records': len(records),
|
||||
'api_fields_count': len(comparison['api_fields']),
|
||||
'ods_columns_count': len(comparison['ods_columns']),
|
||||
'missing_in_ods': comparison['missing_in_ods'],
|
||||
'ods_only': comparison['ods_only'],
|
||||
'api_fields': comparison['api_fields'],
|
||||
'ods_columns': comparison['ods_columns'],
|
||||
}
|
||||
|
||||
if comparison['missing_in_ods']:
|
||||
print(f" [!] ODS 缺少 {len(comparison['missing_in_ods'])} 个字段:")
|
||||
for f in comparison['missing_in_ods'][:10]:
|
||||
print(f" - {f}")
|
||||
if len(comparison['missing_in_ods']) > 10:
|
||||
print(f" ... 还有 {len(comparison['missing_in_ods']) - 10} 个")
|
||||
else:
|
||||
print(f" [OK] ODS 已包含所有 API 字段")
|
||||
|
||||
conn.close()
|
||||
|
||||
# 输出汇总表格
|
||||
print("\n")
|
||||
print("=" * 80)
|
||||
print("汇总报告 - 每个 ODS 表缺少的 API 字段")
|
||||
print("=" * 80)
|
||||
|
||||
for code, data in results.items():
|
||||
missing = data.get('missing_in_ods', [])
|
||||
if missing or data.get('note'):
|
||||
print(f"\n### {code}")
|
||||
print(f"表名: `{data['table_name']}`")
|
||||
print(f"端点: `{data['endpoint']}`")
|
||||
print(f"API 记录数: {data.get('api_records', 0)}")
|
||||
|
||||
if missing:
|
||||
print(f"\n**ODS 缺少的字段 ({len(missing)}):**\n")
|
||||
print("| 字段名 | 说明 |")
|
||||
print("|--------|------|")
|
||||
for f in missing:
|
||||
print(f"| `{f}` | |")
|
||||
elif data.get('note'):
|
||||
print(f"\n备注: {data['note']}")
|
||||
|
||||
# 显示没有缺失的表
|
||||
print("\n\n### 已完整的表(无缺失字段)")
|
||||
for code, data in results.items():
|
||||
missing = data.get('missing_in_ods', [])
|
||||
if not missing and not data.get('note'):
|
||||
print(f"- {code}: `{data['table_name']}` [OK]")
|
||||
|
||||
# 保存详细结果
|
||||
output_file = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n\n详细结果已保存至: {output_file}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
181
tmp/detailed_field_compare.py
Normal file
181
tmp/detailed_field_compare.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
详细双向对比 - 针对可能相关的字段
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 需要详细审核的字段对
|
||||
REVIEW_PAIRS = [
|
||||
{
|
||||
'code': 'ODS_TABLE_USE',
|
||||
'table': 'billiards_ods.table_fee_transactions',
|
||||
'keywords': ['service', 'money', 'real'],
|
||||
},
|
||||
{
|
||||
'code': 'ODS_ASSISTANT_LEDGER',
|
||||
'table': 'billiards_ods.assistant_service_records',
|
||||
'keywords': ['service', 'money', 'real'],
|
||||
},
|
||||
{
|
||||
'code': 'ODS_MEMBER_CARD',
|
||||
'table': 'billiards_ods.member_stored_value_cards',
|
||||
'keywords': ['balance', 'principal', 'freeze', 'recharge'],
|
||||
},
|
||||
{
|
||||
'code': 'ODS_MEMBER_BALANCE',
|
||||
'table': 'billiards_ods.member_balance_changes',
|
||||
'keywords': ['before', 'after', 'principal', 'change'],
|
||||
},
|
||||
{
|
||||
'code': 'ODS_SETTLEMENT_RECORDS',
|
||||
'table': 'billiards_ods.settlement_records',
|
||||
'keywords': ['coupon', 'sale', 'amount', 'pl', 'tenant'],
|
||||
},
|
||||
{
|
||||
'code': 'ODS_RECHARGE_SETTLE',
|
||||
'table': 'billiards_ods.recharge_settlements',
|
||||
'keywords': ['coupon', 'sale', 'amount', 'pl', 'tenant'],
|
||||
},
|
||||
{
|
||||
'code': 'ODS_GROUP_PACKAGE',
|
||||
'table': 'billiards_ods.group_buy_packages',
|
||||
'keywords': ['table', 'area', 'name', 'list', 'tenant'],
|
||||
},
|
||||
]
|
||||
|
||||
def get_ods_columns(conn, table_name):
|
||||
"""获取 ODS 表字段"""
|
||||
if '.' in table_name:
|
||||
schema, name = table_name.split('.', 1)
|
||||
else:
|
||||
schema, name = 'public', table_name
|
||||
|
||||
sql = """
|
||||
SELECT column_name, data_type
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
"""
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(sql, (schema, name))
|
||||
return {row['column_name']: row['data_type'] for row in cur.fetchall()}
|
||||
|
||||
def normalize(s):
|
||||
"""标准化:去除下划线,全小写"""
|
||||
return s.lower().replace('_', '')
|
||||
|
||||
def filter_by_keywords(fields, keywords):
|
||||
"""按关键词筛选字段"""
|
||||
result = []
|
||||
for f in fields:
|
||||
f_norm = normalize(f)
|
||||
for kw in keywords:
|
||||
if kw in f_norm:
|
||||
result.append(f)
|
||||
break
|
||||
return sorted(set(result))
|
||||
|
||||
def main():
|
||||
# 读取 API 字段
|
||||
json_path = os.path.join(os.path.dirname(__file__), 'api_ods_comparison.json')
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
results = json.load(f)
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
|
||||
print("=" * 100)
|
||||
print("双向详细对比 - 可能相关的字段")
|
||||
print("=" * 100)
|
||||
|
||||
for review in REVIEW_PAIRS:
|
||||
code = review['code']
|
||||
table = review['table']
|
||||
keywords = review['keywords']
|
||||
|
||||
if code not in results:
|
||||
continue
|
||||
|
||||
data = results[code]
|
||||
api_fields = data.get('api_fields', [])
|
||||
|
||||
# 获取 ODS 字段
|
||||
ods_columns = get_ods_columns(conn, table)
|
||||
|
||||
# 按关键词筛选
|
||||
api_related = filter_by_keywords(api_fields, keywords)
|
||||
ods_related = filter_by_keywords(ods_columns.keys(), keywords)
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print(f"### {code}")
|
||||
print(f"表: {table}")
|
||||
print(f"关键词: {keywords}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
print(f"\n**API 相关字段 ({len(api_related)}):**")
|
||||
for f in api_related:
|
||||
print(f" - {f}")
|
||||
|
||||
print(f"\n**ODS 相关字段 ({len(ods_related)}):**")
|
||||
for f in ods_related:
|
||||
dtype = ods_columns.get(f, '')
|
||||
print(f" - {f} ({dtype})")
|
||||
|
||||
# 匹配分析
|
||||
print(f"\n**匹配分析:**")
|
||||
|
||||
# 建立映射
|
||||
matched_api = set()
|
||||
matched_ods = set()
|
||||
mappings = []
|
||||
|
||||
for api_f in api_related:
|
||||
api_norm = normalize(api_f)
|
||||
for ods_f in ods_related:
|
||||
ods_norm = normalize(ods_f)
|
||||
|
||||
# 完全匹配
|
||||
if api_norm == ods_norm:
|
||||
mappings.append((api_f, ods_f, 'exact', '完全匹配'))
|
||||
matched_api.add(api_f)
|
||||
matched_ods.add(ods_f)
|
||||
# 包含关系
|
||||
elif api_norm in ods_norm or ods_norm in api_norm:
|
||||
if api_f not in matched_api:
|
||||
mappings.append((api_f, ods_f, 'partial', '部分匹配'))
|
||||
|
||||
if mappings:
|
||||
print("\n| API 字段 | ODS 字段 | 类型 | 说明 |")
|
||||
print("|----------|----------|------|------|")
|
||||
for api_f, ods_f, mtype, desc in mappings:
|
||||
print(f"| `{api_f}` | `{ods_f}` | {mtype} | {desc} |")
|
||||
|
||||
# 未匹配的 API 字段
|
||||
unmatched_api = set(api_related) - matched_api
|
||||
if unmatched_api:
|
||||
print(f"\n**API 未匹配字段:**")
|
||||
for f in sorted(unmatched_api):
|
||||
print(f" - {f}")
|
||||
|
||||
# 未匹配的 ODS 字段
|
||||
unmatched_ods = set(ods_related) - matched_ods
|
||||
if unmatched_ods:
|
||||
print(f"\n**ODS 未匹配字段:**")
|
||||
for f in sorted(unmatched_ods):
|
||||
print(f" - {f}")
|
||||
|
||||
conn.close()
|
||||
|
||||
# 输出最终结论
|
||||
print("\n")
|
||||
print("=" * 100)
|
||||
print("最终审核结论")
|
||||
print("=" * 100)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
5404
tmp/dwd_schema.json
Normal file
5404
tmp/dwd_schema.json
Normal file
File diff suppressed because it is too large
Load Diff
38
tmp/field_coverage_report.json
Normal file
38
tmp/field_coverage_report.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"generated_at": "2026-02-02T19:14:29.766314",
|
||||
"ods_coverage": [
|
||||
{
|
||||
"table": "billiards_ods.table_fee_transactions",
|
||||
"column": "activity_discount_amount",
|
||||
"total": 28162,
|
||||
"non_null": 33,
|
||||
"coverage": 0.1171791776152262,
|
||||
"zero_count": 33
|
||||
},
|
||||
{
|
||||
"table": "billiards_ods.table_fee_transactions",
|
||||
"column": "real_service_money",
|
||||
"total": 28162,
|
||||
"non_null": 33,
|
||||
"coverage": 0.1171791776152262,
|
||||
"zero_count": 33
|
||||
},
|
||||
{
|
||||
"table": "billiards_ods.table_fee_transactions",
|
||||
"column": "order_consumption_type",
|
||||
"total": 28162,
|
||||
"non_null": 33,
|
||||
"coverage": 0.1171791776152262,
|
||||
"zero_count": 0
|
||||
},
|
||||
{
|
||||
"table": "billiards_ods.assistant_service_records",
|
||||
"column": "real_service_money",
|
||||
"total": 10093,
|
||||
"non_null": 10,
|
||||
"coverage": 0.09907856930545923,
|
||||
"zero_count": 10
|
||||
}
|
||||
],
|
||||
"dwd_coverage": []
|
||||
}
|
||||
180
tmp/fix_bd_manual.py
Normal file
180
tmp/fix_bd_manual.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""自动修复 bd_manual 文档中的类型不匹配问题"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
def fix_type_in_doc(doc_path, type_mismatches):
|
||||
"""修复文档中的类型不匹配"""
|
||||
if not Path(doc_path).exists():
|
||||
print(f" SKIP: {doc_path} not found")
|
||||
return False
|
||||
|
||||
content = Path(doc_path).read_text(encoding='utf-8')
|
||||
original = content
|
||||
|
||||
for m in type_mismatches:
|
||||
col_name = m['column']
|
||||
old_type = m['doc_type']
|
||||
new_type = m['db_type']
|
||||
|
||||
# 匹配字段行并替换类型
|
||||
# 格式: | 序号 | 字段名 | 类型 | 可空 | ...
|
||||
pattern = rf'(\|\s*\d+\s*\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)'
|
||||
replacement = rf'\g<1>{new_type}\g<2>'
|
||||
content, count = re.subn(pattern, replacement, content)
|
||||
|
||||
if count > 0:
|
||||
print(f" Fixed: {col_name}: {old_type} -> {new_type}")
|
||||
else:
|
||||
# 尝试更宽松的匹配
|
||||
pattern2 = rf'(\|\s*{col_name}\s*\|\s*){re.escape(old_type)}(\s*\|)'
|
||||
content, count = re.subn(pattern2, replacement.replace(r'\g<1>', r'\1').replace(r'\g<2>', r'\2'), content)
|
||||
if count > 0:
|
||||
print(f" Fixed (alt): {col_name}: {old_type} -> {new_type}")
|
||||
else:
|
||||
print(f" WARN: Could not fix {col_name}")
|
||||
|
||||
if content != original:
|
||||
Path(doc_path).write_text(content, encoding='utf-8')
|
||||
return True
|
||||
return False
|
||||
|
||||
def add_missing_field(doc_path, table_name, field_name, db_schema):
|
||||
"""向文档中添加缺失的字段"""
|
||||
if not Path(doc_path).exists():
|
||||
return False
|
||||
|
||||
# 从 db_schema 获取字段信息
|
||||
field_info = None
|
||||
for col in db_schema.get(table_name, []):
|
||||
if col['column'] == field_name:
|
||||
field_info = col
|
||||
break
|
||||
|
||||
if not field_info:
|
||||
print(f" WARN: Could not find {field_name} in db_schema")
|
||||
return False
|
||||
|
||||
content = Path(doc_path).read_text(encoding='utf-8')
|
||||
|
||||
# 找到字段表格的最后一行,在其后添加新字段
|
||||
# 格式: | 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
lines = content.split('\n')
|
||||
insert_idx = None
|
||||
last_seq = 0
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
# 匹配字段行
|
||||
match = re.match(r'\|\s*(\d+)\s*\|\s*(\w+)\s*\|', line)
|
||||
if match:
|
||||
seq = int(match.group(1))
|
||||
if seq > last_seq:
|
||||
last_seq = seq
|
||||
insert_idx = i
|
||||
|
||||
if insert_idx is not None:
|
||||
new_seq = last_seq + 1
|
||||
nullable = 'YES' if field_info['nullable'] == 'YES' else 'NO'
|
||||
new_line = f"| {new_seq} | {field_name} | {field_info['type']} | {nullable} | | 调整时间 |"
|
||||
lines.insert(insert_idx + 1, new_line)
|
||||
|
||||
Path(doc_path).write_text('\n'.join(lines), encoding='utf-8')
|
||||
print(f" Added: {field_name} (type: {field_info['type']})")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def main():
|
||||
# 加载差异数据
|
||||
with open('tmp/bd_manual_diff.json', 'r', encoding='utf-8') as f:
|
||||
diffs = json.load(f)
|
||||
|
||||
# 加载数据库 schema (需要重新获取带详细信息的)
|
||||
import psycopg2
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
conn = psycopg2.connect(DSN)
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT table_name, column_name, data_type, is_nullable,
|
||||
COALESCE(character_maximum_length, numeric_precision) as max_length,
|
||||
numeric_scale
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd'
|
||||
ORDER BY table_name, ordinal_position
|
||||
""")
|
||||
|
||||
db_schema = {}
|
||||
TYPE_MAP = {
|
||||
'bigint': 'BIGINT',
|
||||
'integer': 'INTEGER',
|
||||
'smallint': 'SMALLINT',
|
||||
'numeric': 'NUMERIC',
|
||||
'text': 'TEXT',
|
||||
'character varying': 'VARCHAR',
|
||||
'boolean': 'BOOLEAN',
|
||||
'timestamp with time zone': 'TIMESTAMPTZ',
|
||||
'timestamp without time zone': 'TIMESTAMP',
|
||||
'date': 'DATE',
|
||||
'jsonb': 'JSONB',
|
||||
'json': 'JSON',
|
||||
}
|
||||
|
||||
for row in cur.fetchall():
|
||||
table_name, col_name, data_type, nullable, max_len, scale = row
|
||||
if table_name not in db_schema:
|
||||
db_schema[table_name] = []
|
||||
|
||||
type_str = TYPE_MAP.get(data_type, data_type.upper())
|
||||
if data_type == 'numeric' and max_len and scale is not None:
|
||||
type_str = f'NUMERIC({max_len},{scale})'
|
||||
elif data_type == 'character varying' and max_len:
|
||||
type_str = f'VARCHAR({max_len})'
|
||||
|
||||
db_schema[table_name].append({
|
||||
'column': col_name,
|
||||
'type': type_str,
|
||||
'nullable': nullable,
|
||||
})
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
print("=" * 80)
|
||||
print("Fixing BD Manual Documents")
|
||||
print("=" * 80)
|
||||
|
||||
fixed_count = 0
|
||||
|
||||
for diff in diffs:
|
||||
table = diff['table']
|
||||
doc_path = diff.get('doc_path', '')
|
||||
|
||||
if not doc_path:
|
||||
continue
|
||||
|
||||
has_changes = False
|
||||
|
||||
# 修复类型不匹配
|
||||
if diff.get('type_mismatches'):
|
||||
print(f"\n### {table} (type fixes) ###")
|
||||
if fix_type_in_doc(doc_path, diff['type_mismatches']):
|
||||
has_changes = True
|
||||
|
||||
# 添加缺失字段
|
||||
if diff.get('missing_in_doc'):
|
||||
print(f"\n### {table} (missing fields) ###")
|
||||
for field in diff['missing_in_doc']:
|
||||
if add_missing_field(doc_path, table, field, db_schema):
|
||||
has_changes = True
|
||||
|
||||
if has_changes:
|
||||
fixed_count += 1
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Fixed {fixed_count} documents")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
55
tmp/fix_not_sale_type.py
Normal file
55
tmp/fix_not_sale_type.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""修复 not_sale 字段类型"""
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
def fix_column_type():
|
||||
conn = psycopg2.connect(DSN)
|
||||
conn.autocommit = True
|
||||
cur = conn.cursor()
|
||||
|
||||
# 修复 ODS store_goods_master 表
|
||||
tables_to_fix = [
|
||||
('billiards_ods', 'store_goods_master', 'not_sale', 'INTEGER'),
|
||||
('billiards_ods', 'tenant_goods_master', 'not_sale', 'INTEGER'),
|
||||
('billiards_dwd', 'dim_store_goods', 'not_sale', 'INTEGER'),
|
||||
('billiards_dwd', 'dim_tenant_goods', 'not_sale', 'INTEGER'),
|
||||
]
|
||||
|
||||
for schema, table, column, new_type in tables_to_fix:
|
||||
try:
|
||||
# 检查表和列是否存在
|
||||
cur.execute("""
|
||||
SELECT data_type FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, table, column))
|
||||
result = cur.fetchone()
|
||||
|
||||
if result:
|
||||
current_type = result[0]
|
||||
print(f"{schema}.{table}.{column}: current type = {current_type}")
|
||||
|
||||
if current_type == 'boolean':
|
||||
# 先删除列,再重新添加为 INTEGER
|
||||
sql_drop = f'ALTER TABLE {schema}.{table} DROP COLUMN "{column}"'
|
||||
sql_add = f'ALTER TABLE {schema}.{table} ADD COLUMN "{column}" {new_type}'
|
||||
|
||||
cur.execute(sql_drop)
|
||||
print(f" Dropped column")
|
||||
cur.execute(sql_add)
|
||||
print(f" Re-added as {new_type}")
|
||||
else:
|
||||
print(f" Already {current_type}, skipping")
|
||||
else:
|
||||
print(f"{schema}.{table}.{column}: column not found")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fixing {schema}.{table}.{column}: {e}")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("\nDone!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
fix_column_type()
|
||||
63
tmp/fix_remaining_issues.py
Normal file
63
tmp/fix_remaining_issues.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
dsn = os.getenv("PG_DSN")
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
print("=== Fixing remaining issues ===")
|
||||
|
||||
# 1. Fix principal_change_amount type mismatch
|
||||
db.execute("""
|
||||
UPDATE billiards_dwd.dwd_member_balance_change d
|
||||
SET principal_change_amount = o.principal_data::numeric
|
||||
FROM billiards_ods.member_balance_changes o
|
||||
WHERE d.balance_change_id = o.id
|
||||
AND d.principal_change_amount IS NULL
|
||||
AND o.principal_data IS NOT NULL
|
||||
""")
|
||||
db.commit()
|
||||
print("principal_change_amount: fixed")
|
||||
|
||||
# 2. Add missing DWD columns for dwd_recharge_order
|
||||
missing_cols = [
|
||||
("pl_coupon_sale_amount", "NUMERIC(18,2)"),
|
||||
("mervou_sales_amount", "NUMERIC(18,2)"),
|
||||
("electricity_money", "NUMERIC(18,2)"),
|
||||
("real_electricity_money", "NUMERIC(18,2)"),
|
||||
("electricity_adjust_money", "NUMERIC(18,2)"),
|
||||
]
|
||||
for col, dtype in missing_cols:
|
||||
try:
|
||||
db.execute(f'ALTER TABLE billiards_dwd.dwd_recharge_order ADD COLUMN IF NOT EXISTS "{col}" {dtype}')
|
||||
db.commit()
|
||||
print(f"dwd_recharge_order.{col}: column added")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f"dwd_recharge_order.{col}: {str(e)[:50]}")
|
||||
|
||||
# 3. Backfill dwd_recharge_order from ODS
|
||||
db.execute("""
|
||||
UPDATE billiards_dwd.dwd_recharge_order d
|
||||
SET pl_coupon_sale_amount = o.plcouponsaleamount,
|
||||
mervou_sales_amount = o.mervousalesamount,
|
||||
electricity_money = o.electricitymoney,
|
||||
real_electricity_money = o.realelectricitymoney,
|
||||
electricity_adjust_money = o.electricityadjustmoney
|
||||
FROM billiards_ods.recharge_settlements o
|
||||
WHERE d.recharge_order_id = o.id
|
||||
""")
|
||||
db.commit()
|
||||
print("dwd_recharge_order: backfilled")
|
||||
|
||||
db.close()
|
||||
print("Done")
|
||||
237
tmp/full_reload_validation.py
Normal file
237
tmp/full_reload_validation.py
Normal file
@@ -0,0 +1,237 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
全量数据回写验证脚本
|
||||
从 2025-07-01 到现在,重新获取 API 数据并入库
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目路径
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
def check_ods_field_coverage(db: DatabaseConnection):
|
||||
"""检查 ODS 表中新增字段的数据覆盖情况"""
|
||||
|
||||
# 需要检查的新增字段
|
||||
fields_to_check = [
|
||||
("billiards_ods.table_fee_transactions", ["activity_discount_amount", "real_service_money", "order_consumption_type"]),
|
||||
("billiards_ods.assistant_service_records", ["real_service_money", "assistantteamname"]),
|
||||
("billiards_ods.assistant_cancellation_records", ["tenant_id"]),
|
||||
("billiards_ods.store_goods_sales_records", ["coupon_share_money"]),
|
||||
("billiards_ods.payment_transactions", ["tenant_id"]),
|
||||
("billiards_ods.member_profiles", ["pay_money_sum", "person_tenant_org_id", "recharge_money_sum", "register_source"]),
|
||||
("billiards_ods.member_stored_value_cards", ["principal_balance", "member_grade", "rechargefreezebalance"]),
|
||||
("billiards_ods.member_balance_changes", ["principal_after", "principal_before", "principal_data"]),
|
||||
("billiards_ods.settlement_records", ["tenant_id"]),
|
||||
("billiards_ods.recharge_settlements", ["tenant_id"]),
|
||||
("billiards_ods.group_buy_packages", ["sort", "is_first_limit", "tenantcouponsaleorderitemid"]),
|
||||
("billiards_ods.group_buy_redemption_records", ["coupon_sale_id", "member_discount_money"]),
|
||||
("billiards_ods.site_tables_master", ["order_id"]),
|
||||
("billiards_ods.store_goods_master", ["commodity_code", "not_sale"]),
|
||||
("billiards_ods.table_fee_discount_records", ["table_name", "table_price", "charge_free"]),
|
||||
("billiards_ods.tenant_goods_master", ["not_sale"]),
|
||||
]
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("ODS 新增字段数据覆盖检查")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
for table, columns in fields_to_check:
|
||||
print(f"\n检查表: {table}")
|
||||
|
||||
# 获取总记录数
|
||||
try:
|
||||
total_rows = db.query(f"SELECT COUNT(*) as cnt FROM {table}")[0]["cnt"]
|
||||
except Exception as e:
|
||||
print(f" [错误] 无法获取记录数: {e}")
|
||||
continue
|
||||
|
||||
for col in columns:
|
||||
try:
|
||||
# 检查列是否存在
|
||||
schema, name = table.split(".", 1)
|
||||
col_check = db.query("""
|
||||
SELECT COUNT(*) as cnt FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, name, col.lower()))
|
||||
|
||||
if col_check[0]["cnt"] == 0:
|
||||
print(f" 列 {col}: [不存在]")
|
||||
continue
|
||||
|
||||
# 统计非空值数量
|
||||
non_null_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" IS NOT NULL')[0]["cnt"]
|
||||
zero_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" = 0')[0]["cnt"]
|
||||
|
||||
coverage = (non_null_rows / total_rows * 100) if total_rows > 0 else 0
|
||||
|
||||
print(f" 列 {col}:")
|
||||
print(f" - 总记录: {total_rows}, 非空: {non_null_rows} ({coverage:.1f}%), 值为0: {zero_rows}")
|
||||
|
||||
results.append({
|
||||
"table": table,
|
||||
"column": col,
|
||||
"total": total_rows,
|
||||
"non_null": non_null_rows,
|
||||
"coverage": coverage,
|
||||
"zero_count": zero_rows,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" 列 {col}: [错误] {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def check_dwd_field_coverage(db: DatabaseConnection):
|
||||
"""检查 DWD 表中新增字段的数据覆盖情况"""
|
||||
|
||||
# 需要检查的新增字段
|
||||
fields_to_check = [
|
||||
("billiards_dwd.dwd_table_fee_log", ["activity_discount_amount", "real_service_money"]),
|
||||
("billiards_dwd.dwd_assistant_service_log", ["real_service_money"]),
|
||||
("billiards_dwd.dwd_assistant_trash_event", ["tenant_id"]),
|
||||
("billiards_dwd.dwd_store_goods_sale", ["coupon_share_money"]),
|
||||
("billiards_dwd.dwd_payment", ["tenant_id"]),
|
||||
("billiards_dwd.dim_member", ["pay_money_sum", "recharge_money_sum"]),
|
||||
("billiards_dwd.dim_member_ex", ["person_tenant_org_id", "register_source"]),
|
||||
("billiards_dwd.dim_member_card_account", ["principal_balance", "member_grade"]),
|
||||
("billiards_dwd.dwd_member_balance_change", ["principal_after", "principal_before", "principal_change_amount"]),
|
||||
("billiards_dwd.dwd_settlement_head", ["tenant_id"]),
|
||||
("billiards_dwd.dwd_recharge_order", ["tenant_id"]),
|
||||
("billiards_dwd.dim_groupbuy_package", ["sort", "is_first_limit"]),
|
||||
("billiards_dwd.dwd_groupbuy_redemption", ["coupon_sale_id", "member_discount_money"]),
|
||||
("billiards_dwd.dim_table", ["order_id"]),
|
||||
("billiards_dwd.dim_store_goods", ["commodity_code", "not_sale"]),
|
||||
("billiards_dwd.dwd_table_fee_adjust", ["table_name", "table_price", "charge_free"]),
|
||||
("billiards_dwd.dim_tenant_goods", ["not_sale"]),
|
||||
]
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("DWD 新增字段数据覆盖检查")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
for table, columns in fields_to_check:
|
||||
print(f"\n检查表: {table}")
|
||||
|
||||
# 获取总记录数
|
||||
try:
|
||||
total_rows = db.query(f"SELECT COUNT(*) as cnt FROM {table}")[0]["cnt"]
|
||||
except Exception as e:
|
||||
print(f" [错误] 无法获取记录数: {e}")
|
||||
continue
|
||||
|
||||
for col in columns:
|
||||
try:
|
||||
# 检查列是否存在
|
||||
schema, name = table.split(".", 1)
|
||||
col_check = db.query("""
|
||||
SELECT COUNT(*) as cnt FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
""", (schema, name, col.lower()))
|
||||
|
||||
if col_check[0]["cnt"] == 0:
|
||||
print(f" 列 {col}: [不存在]")
|
||||
continue
|
||||
|
||||
# 统计非空值数量
|
||||
non_null_rows = db.query(f'SELECT COUNT(*) as cnt FROM {table} WHERE "{col}" IS NOT NULL')[0]["cnt"]
|
||||
|
||||
coverage = (non_null_rows / total_rows * 100) if total_rows > 0 else 0
|
||||
|
||||
print(f" 列 {col}: 总记录: {total_rows}, 非空: {non_null_rows} ({coverage:.1f}%)")
|
||||
|
||||
results.append({
|
||||
"table": table,
|
||||
"column": col,
|
||||
"total": total_rows,
|
||||
"non_null": non_null_rows,
|
||||
"coverage": coverage,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f" 列 {col}: [错误] {e}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("全量数据回写验证")
|
||||
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
print("=" * 80)
|
||||
|
||||
# 连接数据库
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("[错误] 未找到 PG_DSN 环境变量")
|
||||
return False
|
||||
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
# 检查 ODS 字段覆盖
|
||||
ods_results = check_ods_field_coverage(db)
|
||||
|
||||
# 检查 DWD 字段覆盖
|
||||
dwd_results = check_dwd_field_coverage(db)
|
||||
|
||||
db.close()
|
||||
|
||||
# 生成汇总
|
||||
print("\n" + "=" * 80)
|
||||
print("汇总")
|
||||
print("=" * 80)
|
||||
|
||||
print("\nODS 新增字段覆盖率统计:")
|
||||
for r in ods_results:
|
||||
if r["coverage"] < 50:
|
||||
status = "[需关注]"
|
||||
elif r["coverage"] < 80:
|
||||
status = "[一般]"
|
||||
else:
|
||||
status = "[良好]"
|
||||
print(f" {r['table']}.{r['column']}: {r['coverage']:.1f}% {status}")
|
||||
|
||||
print("\nDWD 新增字段覆盖率统计:")
|
||||
for r in dwd_results:
|
||||
if r["coverage"] < 50:
|
||||
status = "[需关注]"
|
||||
elif r["coverage"] < 80:
|
||||
status = "[一般]"
|
||||
else:
|
||||
status = "[良好]"
|
||||
print(f" {r['table']}.{r['column']}: {r['coverage']:.1f}% {status}")
|
||||
|
||||
# 保存报告
|
||||
report = {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"ods_coverage": ods_results,
|
||||
"dwd_coverage": dwd_results,
|
||||
}
|
||||
|
||||
report_file = Path(__file__).parent / "field_coverage_report.json"
|
||||
with open(report_file, "w", encoding="utf-8") as f:
|
||||
json.dump(report, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n报告已保存到: {report_file}")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
48
tmp/get_dwd_schema.py
Normal file
48
tmp/get_dwd_schema.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""获取 DWD 所有表的结构"""
|
||||
import psycopg2
|
||||
import json
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
def get_all_tables(conn, schema='billiards_dwd'):
|
||||
"""获取所有表及其列"""
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT table_name, column_name, data_type, is_nullable,
|
||||
COALESCE(character_maximum_length, numeric_precision) as max_length
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s
|
||||
ORDER BY table_name, ordinal_position
|
||||
""", (schema,))
|
||||
|
||||
tables = {}
|
||||
for row in cur.fetchall():
|
||||
table_name, col_name, data_type, nullable, max_len = row
|
||||
if table_name not in tables:
|
||||
tables[table_name] = []
|
||||
tables[table_name].append({
|
||||
'column': col_name,
|
||||
'type': data_type,
|
||||
'nullable': nullable,
|
||||
'max_length': max_len
|
||||
})
|
||||
|
||||
cur.close()
|
||||
return tables
|
||||
|
||||
def main():
|
||||
conn = psycopg2.connect(DSN)
|
||||
tables = get_all_tables(conn)
|
||||
conn.close()
|
||||
|
||||
# 保存到 JSON 文件
|
||||
with open('tmp/dwd_schema.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(tables, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Found {len(tables)} tables")
|
||||
for table_name, columns in sorted(tables.items()):
|
||||
print(f" {table_name}: {len(columns)} columns")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
9
tmp/list_dwd_tables.py
Normal file
9
tmp/list_dwd_tables.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import psycopg2
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
conn = psycopg2.connect(DSN)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'billiards_dwd' ORDER BY table_name")
|
||||
for r in cur.fetchall():
|
||||
print(r[0])
|
||||
conn.close()
|
||||
19
tmp/query_schema.py
Normal file
19
tmp/query_schema.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import psycopg2
|
||||
from collections import defaultdict
|
||||
|
||||
conn = psycopg2.connect('postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test')
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT table_name, column_name, data_type, ordinal_position FROM information_schema.columns WHERE table_schema = 'billiards_dwd' ORDER BY table_name, ordinal_position")
|
||||
results = cur.fetchall()
|
||||
tables = defaultdict(list)
|
||||
for row in results:
|
||||
tables[row[0]].append((row[1], row[2], row[3]))
|
||||
|
||||
for table in sorted(tables.keys()):
|
||||
print(f'\n琛ㄥ悕: {table}')
|
||||
cols = tables[table]
|
||||
for col, dtype, pos in cols:
|
||||
print(f' {pos}. {col} ({dtype})')
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
994
tmp/schema_output.txt
Normal file
994
tmp/schema_output.txt
Normal file
@@ -0,0 +1,994 @@
|
||||
|
||||
?? assistant_accounts_master
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. assistant_no (text)
|
||||
5. nickname (text)
|
||||
6. real_name (text)
|
||||
7. mobile (text)
|
||||
8. team_id (bigint)
|
||||
9. team_name (text)
|
||||
10. user_id (bigint)
|
||||
11. level (text)
|
||||
12. assistant_status (integer)
|
||||
13. work_status (integer)
|
||||
14. leave_status (integer)
|
||||
15. entry_time (timestamp without time zone)
|
||||
16. resign_time (timestamp without time zone)
|
||||
17. start_time (timestamp without time zone)
|
||||
18. end_time (timestamp without time zone)
|
||||
19. create_time (timestamp without time zone)
|
||||
20. update_time (timestamp without time zone)
|
||||
21. order_trade_no (text)
|
||||
22. staff_id (bigint)
|
||||
23. staff_profile_id (bigint)
|
||||
24. system_role_id (bigint)
|
||||
25. avatar (text)
|
||||
26. birth_date (timestamp without time zone)
|
||||
27. gender (integer)
|
||||
28. height (numeric)
|
||||
29. weight (numeric)
|
||||
30. job_num (text)
|
||||
31. show_status (integer)
|
||||
32. show_sort (integer)
|
||||
33. sum_grade (numeric)
|
||||
34. assistant_grade (numeric)
|
||||
35. get_grade_times (integer)
|
||||
36. introduce (text)
|
||||
37. video_introduction_url (text)
|
||||
38. group_id (bigint)
|
||||
39. group_name (text)
|
||||
40. shop_name (text)
|
||||
41. charge_way (integer)
|
||||
42. entry_type (integer)
|
||||
43. allow_cx (integer)
|
||||
44. is_guaranteed (integer)
|
||||
45. salary_grant_enabled (integer)
|
||||
46. light_status (integer)
|
||||
47. online_status (integer)
|
||||
48. is_delete (integer)
|
||||
49. cx_unit_price (numeric)
|
||||
50. pd_unit_price (numeric)
|
||||
51. last_table_id (bigint)
|
||||
52. last_table_name (text)
|
||||
53. person_org_id (bigint)
|
||||
54. serial_number (bigint)
|
||||
55. is_team_leader (integer)
|
||||
56. criticism_status (integer)
|
||||
57. last_update_name (text)
|
||||
58. ding_talk_synced (integer)
|
||||
59. site_light_cfg_id (bigint)
|
||||
60. light_equipment_id (text)
|
||||
61. entry_sign_status (integer)
|
||||
62. resign_sign_status (integer)
|
||||
63. source_file (text)
|
||||
64. source_endpoint (text)
|
||||
65. fetched_at (timestamp with time zone)
|
||||
66. payload (jsonb)
|
||||
67. content_hash (text)
|
||||
|
||||
?? assistant_cancellation_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. siteid (bigint)
|
||||
3. siteprofile (jsonb)
|
||||
4. assistantname (text)
|
||||
5. assistantabolishamount (numeric)
|
||||
6. assistanton (integer)
|
||||
7. pdchargeminutes (integer)
|
||||
8. tableareaid (bigint)
|
||||
9. tablearea (text)
|
||||
10. tableid (bigint)
|
||||
11. tablename (text)
|
||||
12. trashreason (text)
|
||||
13. createtime (timestamp without time zone)
|
||||
14. source_file (text)
|
||||
15. source_endpoint (text)
|
||||
16. fetched_at (timestamp with time zone)
|
||||
17. payload (jsonb)
|
||||
18. content_hash (text)
|
||||
|
||||
?? assistant_service_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. siteprofile (jsonb)
|
||||
5. site_table_id (bigint)
|
||||
6. order_settle_id (bigint)
|
||||
7. order_trade_no (text)
|
||||
8. order_pay_id (bigint)
|
||||
9. order_assistant_id (bigint)
|
||||
10. order_assistant_type (integer)
|
||||
11. assistantname (text)
|
||||
12. assistantno (text)
|
||||
13. assistant_level (text)
|
||||
14. levelname (text)
|
||||
15. site_assistant_id (bigint)
|
||||
16. skill_id (bigint)
|
||||
17. skillname (text)
|
||||
18. system_member_id (bigint)
|
||||
19. tablename (text)
|
||||
20. tenant_member_id (bigint)
|
||||
21. user_id (bigint)
|
||||
22. assistant_team_id (bigint)
|
||||
23. nickname (text)
|
||||
24. ledger_name (text)
|
||||
25. ledger_group_name (text)
|
||||
26. ledger_amount (numeric)
|
||||
27. ledger_count (numeric)
|
||||
28. ledger_unit_price (numeric)
|
||||
29. ledger_status (integer)
|
||||
30. ledger_start_time (timestamp without time zone)
|
||||
31. ledger_end_time (timestamp without time zone)
|
||||
32. manual_discount_amount (numeric)
|
||||
33. member_discount_amount (numeric)
|
||||
34. coupon_deduct_money (numeric)
|
||||
35. service_money (numeric)
|
||||
36. projected_income (numeric)
|
||||
37. real_use_seconds (integer)
|
||||
38. income_seconds (integer)
|
||||
39. start_use_time (timestamp without time zone)
|
||||
40. last_use_time (timestamp without time zone)
|
||||
41. create_time (timestamp without time zone)
|
||||
42. is_single_order (integer)
|
||||
43. is_delete (integer)
|
||||
44. is_trash (integer)
|
||||
45. trash_reason (text)
|
||||
46. trash_applicant_id (bigint)
|
||||
47. trash_applicant_name (text)
|
||||
48. operator_id (bigint)
|
||||
49. operator_name (text)
|
||||
50. salesman_name (text)
|
||||
51. salesman_org_id (bigint)
|
||||
52. salesman_user_id (bigint)
|
||||
53. person_org_id (bigint)
|
||||
54. add_clock (integer)
|
||||
55. returns_clock (integer)
|
||||
56. composite_grade (numeric)
|
||||
57. composite_grade_time (timestamp without time zone)
|
||||
58. skill_grade (numeric)
|
||||
59. service_grade (numeric)
|
||||
60. sum_grade (numeric)
|
||||
61. grade_status (integer)
|
||||
62. get_grade_times (integer)
|
||||
63. is_not_responding (integer)
|
||||
64. is_confirm (integer)
|
||||
65. payload (jsonb)
|
||||
66. source_file (text)
|
||||
67. source_endpoint (text)
|
||||
68. fetched_at (timestamp with time zone)
|
||||
69. content_hash (text)
|
||||
|
||||
?? goods_stock_movements
|
||||
------------------------------------------------------------
|
||||
1. sitegoodsstockid (bigint)
|
||||
2. tenantid (bigint)
|
||||
3. siteid (bigint)
|
||||
4. sitegoodsid (bigint)
|
||||
5. goodsname (text)
|
||||
6. goodscategoryid (bigint)
|
||||
7. goodssecondcategoryid (bigint)
|
||||
8. unit (text)
|
||||
9. price (numeric)
|
||||
10. stocktype (integer)
|
||||
11. changenum (numeric)
|
||||
12. startnum (numeric)
|
||||
13. endnum (numeric)
|
||||
14. changenuma (numeric)
|
||||
15. startnuma (numeric)
|
||||
16. endnuma (numeric)
|
||||
17. remark (text)
|
||||
18. operatorname (text)
|
||||
19. createtime (timestamp without time zone)
|
||||
20. source_file (text)
|
||||
21. source_endpoint (text)
|
||||
22. fetched_at (timestamp with time zone)
|
||||
23. payload (jsonb)
|
||||
24. content_hash (text)
|
||||
|
||||
?? goods_stock_summary
|
||||
------------------------------------------------------------
|
||||
1. sitegoodsid (bigint)
|
||||
2. goodsname (text)
|
||||
3. goodsunit (text)
|
||||
4. goodscategoryid (bigint)
|
||||
5. goodscategorysecondid (bigint)
|
||||
6. categoryname (text)
|
||||
7. rangestartstock (numeric)
|
||||
8. rangeendstock (numeric)
|
||||
9. rangein (numeric)
|
||||
10. rangeout (numeric)
|
||||
11. rangesale (numeric)
|
||||
12. rangesalemoney (numeric)
|
||||
13. rangeinventory (numeric)
|
||||
14. currentstock (numeric)
|
||||
15. source_file (text)
|
||||
16. source_endpoint (text)
|
||||
17. fetched_at (timestamp with time zone)
|
||||
18. payload (jsonb)
|
||||
19. content_hash (text)
|
||||
|
||||
?? group_buy_packages
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. package_id (bigint)
|
||||
3. package_name (text)
|
||||
4. selling_price (numeric)
|
||||
5. coupon_money (numeric)
|
||||
6. date_type (integer)
|
||||
7. date_info (text)
|
||||
8. start_time (timestamp without time zone)
|
||||
9. end_time (timestamp without time zone)
|
||||
10. start_clock (text)
|
||||
11. end_clock (text)
|
||||
12. add_start_clock (text)
|
||||
13. add_end_clock (text)
|
||||
14. duration (integer)
|
||||
15. usable_count (integer)
|
||||
16. usable_range (integer)
|
||||
17. table_area_id (bigint)
|
||||
18. table_area_name (text)
|
||||
19. table_area_id_list (jsonb)
|
||||
20. tenant_table_area_id (bigint)
|
||||
21. tenant_table_area_id_list (jsonb)
|
||||
22. site_id (bigint)
|
||||
23. site_name (text)
|
||||
24. tenant_id (bigint)
|
||||
25. card_type_ids (jsonb)
|
||||
26. group_type (integer)
|
||||
27. system_group_type (integer)
|
||||
28. type (integer)
|
||||
29. effective_status (integer)
|
||||
30. is_enabled (integer)
|
||||
31. is_delete (integer)
|
||||
32. max_selectable_categories (integer)
|
||||
33. area_tag_type (integer)
|
||||
34. creator_name (text)
|
||||
35. create_time (timestamp without time zone)
|
||||
36. source_file (text)
|
||||
37. source_endpoint (text)
|
||||
38. fetched_at (timestamp with time zone)
|
||||
39. payload (jsonb)
|
||||
40. content_hash (text)
|
||||
|
||||
?? group_buy_redemption_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. sitename (text)
|
||||
5. table_id (bigint)
|
||||
6. tablename (text)
|
||||
7. tableareaname (text)
|
||||
8. tenant_table_area_id (bigint)
|
||||
9. order_trade_no (text)
|
||||
10. order_settle_id (bigint)
|
||||
11. order_pay_id (bigint)
|
||||
12. order_coupon_id (bigint)
|
||||
13. order_coupon_channel (integer)
|
||||
14. coupon_code (text)
|
||||
15. coupon_money (numeric)
|
||||
16. coupon_origin_id (bigint)
|
||||
17. ledger_name (text)
|
||||
18. ledger_group_name (text)
|
||||
19. ledger_amount (numeric)
|
||||
20. ledger_count (numeric)
|
||||
21. ledger_unit_price (numeric)
|
||||
22. ledger_status (integer)
|
||||
23. table_charge_seconds (integer)
|
||||
24. promotion_activity_id (bigint)
|
||||
25. promotion_coupon_id (bigint)
|
||||
26. promotion_seconds (integer)
|
||||
27. offer_type (integer)
|
||||
28. assistant_promotion_money (numeric)
|
||||
29. assistant_service_promotion_money (numeric)
|
||||
30. table_service_promotion_money (numeric)
|
||||
31. goods_promotion_money (numeric)
|
||||
32. recharge_promotion_money (numeric)
|
||||
33. reward_promotion_money (numeric)
|
||||
34. goodsoptionprice (numeric)
|
||||
35. salesman_name (text)
|
||||
36. sales_man_org_id (bigint)
|
||||
37. salesman_role_id (bigint)
|
||||
38. salesman_user_id (bigint)
|
||||
39. operator_id (bigint)
|
||||
40. operator_name (text)
|
||||
41. is_single_order (integer)
|
||||
42. is_delete (integer)
|
||||
43. create_time (timestamp without time zone)
|
||||
44. payload (jsonb)
|
||||
45. source_file (text)
|
||||
46. source_endpoint (text)
|
||||
47. fetched_at (timestamp with time zone)
|
||||
48. content_hash (text)
|
||||
|
||||
?? member_balance_changes
|
||||
------------------------------------------------------------
|
||||
1. tenant_id (bigint)
|
||||
2. site_id (bigint)
|
||||
3. register_site_id (bigint)
|
||||
4. registersitename (text)
|
||||
5. paysitename (text)
|
||||
6. id (bigint)
|
||||
7. tenant_member_id (bigint)
|
||||
8. tenant_member_card_id (bigint)
|
||||
9. system_member_id (bigint)
|
||||
10. membername (text)
|
||||
11. membermobile (text)
|
||||
12. card_type_id (bigint)
|
||||
13. membercardtypename (text)
|
||||
14. account_data (numeric)
|
||||
15. before (numeric)
|
||||
16. after (numeric)
|
||||
17. refund_amount (numeric)
|
||||
18. from_type (integer)
|
||||
19. payment_method (integer)
|
||||
20. relate_id (bigint)
|
||||
21. remark (text)
|
||||
22. operator_id (bigint)
|
||||
23. operator_name (text)
|
||||
24. is_delete (integer)
|
||||
25. create_time (timestamp without time zone)
|
||||
26. source_file (text)
|
||||
27. source_endpoint (text)
|
||||
28. fetched_at (timestamp with time zone)
|
||||
29. payload (jsonb)
|
||||
30. content_hash (text)
|
||||
|
||||
?? member_profiles
|
||||
------------------------------------------------------------
|
||||
1. tenant_id (bigint)
|
||||
2. register_site_id (bigint)
|
||||
3. site_name (text)
|
||||
4. id (bigint)
|
||||
5. system_member_id (bigint)
|
||||
6. member_card_grade_code (bigint)
|
||||
7. member_card_grade_name (text)
|
||||
8. mobile (text)
|
||||
9. nickname (text)
|
||||
10. point (numeric)
|
||||
11. growth_value (numeric)
|
||||
12. referrer_member_id (bigint)
|
||||
13. status (integer)
|
||||
14. user_status (integer)
|
||||
15. create_time (timestamp without time zone)
|
||||
16. source_file (text)
|
||||
17. source_endpoint (text)
|
||||
18. fetched_at (timestamp with time zone)
|
||||
19. payload (jsonb)
|
||||
20. content_hash (text)
|
||||
|
||||
?? member_stored_value_cards
|
||||
------------------------------------------------------------
|
||||
1. tenant_id (bigint)
|
||||
2. tenant_member_id (bigint)
|
||||
3. system_member_id (bigint)
|
||||
4. register_site_id (bigint)
|
||||
5. site_name (text)
|
||||
6. id (bigint)
|
||||
7. member_card_grade_code (bigint)
|
||||
8. member_card_grade_code_name (text)
|
||||
9. member_card_type_name (text)
|
||||
10. member_name (text)
|
||||
11. member_mobile (text)
|
||||
12. card_type_id (bigint)
|
||||
13. card_no (text)
|
||||
14. card_physics_type (text)
|
||||
15. balance (numeric)
|
||||
16. denomination (numeric)
|
||||
17. table_discount (numeric)
|
||||
18. goods_discount (numeric)
|
||||
19. assistant_discount (numeric)
|
||||
20. assistant_reward_discount (numeric)
|
||||
21. table_service_discount (numeric)
|
||||
22. assistant_service_discount (numeric)
|
||||
23. coupon_discount (numeric)
|
||||
24. goods_service_discount (numeric)
|
||||
25. assistant_discount_sub_switch (integer)
|
||||
26. table_discount_sub_switch (integer)
|
||||
27. goods_discount_sub_switch (integer)
|
||||
28. assistant_reward_discount_sub_switch (integer)
|
||||
29. table_service_deduct_radio (numeric)
|
||||
30. assistant_service_deduct_radio (numeric)
|
||||
31. goods_service_deduct_radio (numeric)
|
||||
32. assistant_deduct_radio (numeric)
|
||||
33. table_deduct_radio (numeric)
|
||||
34. goods_deduct_radio (numeric)
|
||||
35. coupon_deduct_radio (numeric)
|
||||
36. assistant_reward_deduct_radio (numeric)
|
||||
37. tablecarddeduct (numeric)
|
||||
38. tableservicecarddeduct (numeric)
|
||||
39. goodscardeduct (numeric)
|
||||
40. goodsservicecarddeduct (numeric)
|
||||
41. assistantcarddeduct (numeric)
|
||||
42. assistantservicecarddeduct (numeric)
|
||||
43. assistantrewardcarddeduct (numeric)
|
||||
44. cardsettlededuct (numeric)
|
||||
45. couponcarddeduct (numeric)
|
||||
46. deliveryfeededuct (numeric)
|
||||
47. use_scene (integer)
|
||||
48. able_cross_site (integer)
|
||||
49. able_site_transfer (integer)
|
||||
50. is_allow_give (integer)
|
||||
51. is_allow_order_deduct (integer)
|
||||
52. is_delete (integer)
|
||||
53. bind_password (text)
|
||||
54. goods_discount_range_type (integer)
|
||||
55. goodscategoryid (bigint)
|
||||
56. tableareaid (bigint)
|
||||
57. effect_site_id (bigint)
|
||||
58. start_time (timestamp without time zone)
|
||||
59. end_time (timestamp without time zone)
|
||||
60. disable_start_time (timestamp without time zone)
|
||||
61. disable_end_time (timestamp without time zone)
|
||||
62. last_consume_time (timestamp without time zone)
|
||||
63. create_time (timestamp without time zone)
|
||||
64. status (integer)
|
||||
65. sort (integer)
|
||||
66. tenantavatar (text)
|
||||
67. tenantname (text)
|
||||
68. pdassisnatlevel (text)
|
||||
69. cxassisnatlevel (text)
|
||||
70. source_file (text)
|
||||
71. source_endpoint (text)
|
||||
72. fetched_at (timestamp with time zone)
|
||||
73. payload (jsonb)
|
||||
74. content_hash (text)
|
||||
|
||||
?? payment_transactions
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. site_id (bigint)
|
||||
3. siteprofile (jsonb)
|
||||
4. relate_type (integer)
|
||||
5. relate_id (bigint)
|
||||
6. pay_amount (numeric)
|
||||
7. pay_status (integer)
|
||||
8. pay_time (timestamp without time zone)
|
||||
9. create_time (timestamp without time zone)
|
||||
10. payment_method (integer)
|
||||
11. online_pay_channel (integer)
|
||||
12. source_file (text)
|
||||
13. source_endpoint (text)
|
||||
14. fetched_at (timestamp with time zone)
|
||||
15. payload (jsonb)
|
||||
16. content_hash (text)
|
||||
|
||||
?? platform_coupon_redemption_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. verify_id (bigint)
|
||||
3. certificate_id (text)
|
||||
4. coupon_code (text)
|
||||
5. coupon_name (text)
|
||||
6. coupon_channel (integer)
|
||||
7. groupon_type (integer)
|
||||
8. group_package_id (bigint)
|
||||
9. sale_price (numeric)
|
||||
10. coupon_money (numeric)
|
||||
11. coupon_free_time (numeric)
|
||||
12. coupon_cover (text)
|
||||
13. coupon_remark (text)
|
||||
14. use_status (integer)
|
||||
15. consume_time (timestamp without time zone)
|
||||
16. create_time (timestamp without time zone)
|
||||
17. deal_id (text)
|
||||
18. channel_deal_id (text)
|
||||
19. site_id (bigint)
|
||||
20. site_order_id (bigint)
|
||||
21. table_id (bigint)
|
||||
22. tenant_id (bigint)
|
||||
23. operator_id (bigint)
|
||||
24. operator_name (text)
|
||||
25. is_delete (integer)
|
||||
26. siteprofile (jsonb)
|
||||
27. source_file (text)
|
||||
28. source_endpoint (text)
|
||||
29. fetched_at (timestamp with time zone)
|
||||
30. payload (jsonb)
|
||||
31. content_hash (text)
|
||||
|
||||
?? recharge_settlements
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenantid (bigint)
|
||||
3. siteid (bigint)
|
||||
4. sitename (text)
|
||||
5. balanceamount (numeric)
|
||||
6. cardamount (numeric)
|
||||
7. cashamount (numeric)
|
||||
8. couponamount (numeric)
|
||||
9. createtime (timestamp with time zone)
|
||||
10. memberid (bigint)
|
||||
11. membername (text)
|
||||
12. tenantmembercardid (bigint)
|
||||
13. membercardtypename (text)
|
||||
14. memberphone (text)
|
||||
15. tableid (bigint)
|
||||
16. consumemoney (numeric)
|
||||
17. onlineamount (numeric)
|
||||
18. operatorid (bigint)
|
||||
19. operatorname (text)
|
||||
20. revokeorderid (bigint)
|
||||
21. revokeordername (text)
|
||||
22. revoketime (timestamp with time zone)
|
||||
23. payamount (numeric)
|
||||
24. pointamount (numeric)
|
||||
25. refundamount (numeric)
|
||||
26. settlename (text)
|
||||
27. settlerelateid (bigint)
|
||||
28. settlestatus (integer)
|
||||
29. settletype (integer)
|
||||
30. paytime (timestamp with time zone)
|
||||
31. roundingamount (numeric)
|
||||
32. paymentmethod (integer)
|
||||
33. adjustamount (numeric)
|
||||
34. assistantcxmoney (numeric)
|
||||
35. assistantpdmoney (numeric)
|
||||
36. couponsaleamount (numeric)
|
||||
37. memberdiscountamount (numeric)
|
||||
38. tablechargemoney (numeric)
|
||||
39. goodsmoney (numeric)
|
||||
40. realgoodsmoney (numeric)
|
||||
41. servicemoney (numeric)
|
||||
42. prepaymoney (numeric)
|
||||
43. salesmanname (text)
|
||||
44. orderremark (text)
|
||||
45. salesmanuserid (bigint)
|
||||
46. canberevoked (boolean)
|
||||
47. pointdiscountprice (numeric)
|
||||
48. pointdiscountcost (numeric)
|
||||
49. activitydiscount (numeric)
|
||||
50. serialnumber (bigint)
|
||||
51. assistantmanualdiscount (numeric)
|
||||
52. allcoupondiscount (numeric)
|
||||
53. goodspromotionmoney (numeric)
|
||||
54. assistantpromotionmoney (numeric)
|
||||
55. isusecoupon (boolean)
|
||||
56. isusediscount (boolean)
|
||||
57. isactivity (boolean)
|
||||
58. isbindmember (boolean)
|
||||
59. isfirst (integer)
|
||||
60. rechargecardamount (numeric)
|
||||
61. giftcardamount (numeric)
|
||||
62. source_file (text)
|
||||
63. source_endpoint (text)
|
||||
64. fetched_at (timestamp with time zone)
|
||||
65. payload (jsonb)
|
||||
66. content_hash (text)
|
||||
|
||||
?? refund_transactions
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. tenantname (text)
|
||||
4. site_id (bigint)
|
||||
5. siteprofile (jsonb)
|
||||
6. relate_type (integer)
|
||||
7. relate_id (bigint)
|
||||
8. pay_sn (text)
|
||||
9. pay_amount (numeric)
|
||||
10. refund_amount (numeric)
|
||||
11. round_amount (numeric)
|
||||
12. pay_status (integer)
|
||||
13. pay_time (timestamp without time zone)
|
||||
14. create_time (timestamp without time zone)
|
||||
15. payment_method (integer)
|
||||
16. pay_terminal (integer)
|
||||
17. pay_config_id (bigint)
|
||||
18. online_pay_channel (integer)
|
||||
19. online_pay_type (integer)
|
||||
20. channel_fee (numeric)
|
||||
21. channel_payer_id (text)
|
||||
22. channel_pay_no (text)
|
||||
23. member_id (bigint)
|
||||
24. member_card_id (bigint)
|
||||
25. cashier_point_id (bigint)
|
||||
26. operator_id (bigint)
|
||||
27. action_type (integer)
|
||||
28. check_status (integer)
|
||||
29. is_revoke (integer)
|
||||
30. is_delete (integer)
|
||||
31. balance_frozen_amount (numeric)
|
||||
32. card_frozen_amount (numeric)
|
||||
33. source_file (text)
|
||||
34. source_endpoint (text)
|
||||
35. fetched_at (timestamp with time zone)
|
||||
36. payload (jsonb)
|
||||
37. content_hash (text)
|
||||
|
||||
?? settlement_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenantid (bigint)
|
||||
3. siteid (bigint)
|
||||
4. sitename (text)
|
||||
5. balanceamount (numeric)
|
||||
6. cardamount (numeric)
|
||||
7. cashamount (numeric)
|
||||
8. couponamount (numeric)
|
||||
9. createtime (timestamp with time zone)
|
||||
10. memberid (bigint)
|
||||
11. membername (text)
|
||||
12. tenantmembercardid (bigint)
|
||||
13. membercardtypename (text)
|
||||
14. memberphone (text)
|
||||
15. tableid (bigint)
|
||||
16. consumemoney (numeric)
|
||||
17. onlineamount (numeric)
|
||||
18. operatorid (bigint)
|
||||
19. operatorname (text)
|
||||
20. revokeorderid (bigint)
|
||||
21. revokeordername (text)
|
||||
22. revoketime (timestamp with time zone)
|
||||
23. payamount (numeric)
|
||||
24. pointamount (numeric)
|
||||
25. refundamount (numeric)
|
||||
26. settlename (text)
|
||||
27. settlerelateid (bigint)
|
||||
28. settlestatus (integer)
|
||||
29. settletype (integer)
|
||||
30. paytime (timestamp with time zone)
|
||||
31. roundingamount (numeric)
|
||||
32. paymentmethod (integer)
|
||||
33. adjustamount (numeric)
|
||||
34. assistantcxmoney (numeric)
|
||||
35. assistantpdmoney (numeric)
|
||||
36. couponsaleamount (numeric)
|
||||
37. memberdiscountamount (numeric)
|
||||
38. tablechargemoney (numeric)
|
||||
39. goodsmoney (numeric)
|
||||
40. realgoodsmoney (numeric)
|
||||
41. servicemoney (numeric)
|
||||
42. prepaymoney (numeric)
|
||||
43. salesmanname (text)
|
||||
44. orderremark (text)
|
||||
45. salesmanuserid (bigint)
|
||||
46. canberevoked (boolean)
|
||||
47. pointdiscountprice (numeric)
|
||||
48. pointdiscountcost (numeric)
|
||||
49. activitydiscount (numeric)
|
||||
50. serialnumber (bigint)
|
||||
51. assistantmanualdiscount (numeric)
|
||||
52. allcoupondiscount (numeric)
|
||||
53. goodspromotionmoney (numeric)
|
||||
54. assistantpromotionmoney (numeric)
|
||||
55. isusecoupon (boolean)
|
||||
56. isusediscount (boolean)
|
||||
57. isactivity (boolean)
|
||||
58. isbindmember (boolean)
|
||||
59. isfirst (integer)
|
||||
60. rechargecardamount (numeric)
|
||||
61. giftcardamount (numeric)
|
||||
62. source_file (text)
|
||||
63. source_endpoint (text)
|
||||
64. fetched_at (timestamp with time zone)
|
||||
65. payload (jsonb)
|
||||
66. content_hash (text)
|
||||
|
||||
?? settlement_ticket_details
|
||||
------------------------------------------------------------
|
||||
1. ordersettleid (bigint)
|
||||
2. actualpayment (numeric)
|
||||
3. adjustamount (numeric)
|
||||
4. assistantmanualdiscount (numeric)
|
||||
5. balanceamount (numeric)
|
||||
6. cashiername (text)
|
||||
7. consumemoney (numeric)
|
||||
8. couponamount (numeric)
|
||||
9. deliveryaddress (text)
|
||||
10. deliveryfee (numeric)
|
||||
11. ledgeramount (numeric)
|
||||
12. memberdeductamount (numeric)
|
||||
13. memberofferamount (numeric)
|
||||
14. onlinereturnamount (numeric)
|
||||
15. orderremark (text)
|
||||
16. ordersettlenumber (bigint)
|
||||
17. paymemberbalance (numeric)
|
||||
18. paytime (timestamp without time zone)
|
||||
19. paymentmethod (integer)
|
||||
20. pointdiscountcost (numeric)
|
||||
21. pointdiscountprice (numeric)
|
||||
22. prepaymoney (numeric)
|
||||
23. refundamount (numeric)
|
||||
24. returngoodsamount (numeric)
|
||||
25. rewardname (text)
|
||||
26. settletype (text)
|
||||
27. siteaddress (text)
|
||||
28. sitebusinesstel (text)
|
||||
29. siteid (bigint)
|
||||
30. sitename (text)
|
||||
31. tenantid (bigint)
|
||||
32. tenantname (text)
|
||||
33. ticketcustomcontent (text)
|
||||
34. ticketremark (text)
|
||||
35. vouchermoney (numeric)
|
||||
36. memberprofile (jsonb)
|
||||
37. orderitem (jsonb)
|
||||
38. tenantmembercardlogs (jsonb)
|
||||
39. payload (jsonb)
|
||||
40. source_file (text)
|
||||
41. source_endpoint (text)
|
||||
42. fetched_at (timestamp with time zone)
|
||||
43. content_hash (text)
|
||||
|
||||
?? site_tables_master
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. site_id (bigint)
|
||||
3. sitename (text)
|
||||
4. appletQrCodeUrl (text)
|
||||
5. areaname (text)
|
||||
6. audit_status (integer)
|
||||
7. charge_free (integer)
|
||||
8. create_time (timestamp without time zone)
|
||||
9. delay_lights_time (integer)
|
||||
10. is_online_reservation (integer)
|
||||
11. is_rest_area (integer)
|
||||
12. light_status (integer)
|
||||
13. only_allow_groupon (integer)
|
||||
14. order_delay_time (integer)
|
||||
15. self_table (integer)
|
||||
16. show_status (integer)
|
||||
17. site_table_area_id (bigint)
|
||||
18. tablestatusname (text)
|
||||
19. table_cloth_use_cycle (integer)
|
||||
20. table_cloth_use_time (timestamp without time zone)
|
||||
21. table_name (text)
|
||||
22. table_price (numeric)
|
||||
23. table_status (integer)
|
||||
24. temporary_light_second (integer)
|
||||
25. virtual_table (integer)
|
||||
26. source_file (text)
|
||||
27. source_endpoint (text)
|
||||
28. fetched_at (timestamp with time zone)
|
||||
29. payload (jsonb)
|
||||
30. content_hash (text)
|
||||
|
||||
?? stock_goods_category_tree
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. category_name (text)
|
||||
4. alias_name (text)
|
||||
5. pid (bigint)
|
||||
6. business_name (text)
|
||||
7. tenant_goods_business_id (bigint)
|
||||
8. open_salesman (integer)
|
||||
9. categoryboxes (jsonb)
|
||||
10. sort (integer)
|
||||
11. is_warehousing (integer)
|
||||
12. source_file (text)
|
||||
13. source_endpoint (text)
|
||||
14. fetched_at (timestamp with time zone)
|
||||
15. payload (jsonb)
|
||||
16. content_hash (text)
|
||||
|
||||
?? store_goods_master
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. sitename (text)
|
||||
5. tenant_goods_id (bigint)
|
||||
6. goods_name (text)
|
||||
7. goods_bar_code (text)
|
||||
8. goods_category_id (bigint)
|
||||
9. goods_second_category_id (bigint)
|
||||
10. onecategoryname (text)
|
||||
11. twocategoryname (text)
|
||||
12. unit (text)
|
||||
13. sale_price (numeric)
|
||||
14. cost_price (numeric)
|
||||
15. cost_price_type (integer)
|
||||
16. min_discount_price (numeric)
|
||||
17. safe_stock (numeric)
|
||||
18. stock (numeric)
|
||||
19. stock_a (numeric)
|
||||
20. sale_num (numeric)
|
||||
21. total_purchase_cost (numeric)
|
||||
22. total_sales (numeric)
|
||||
23. average_monthly_sales (numeric)
|
||||
24. batch_stock_quantity (numeric)
|
||||
25. days_available (integer)
|
||||
26. provisional_total_cost (numeric)
|
||||
27. enable_status (integer)
|
||||
28. audit_status (integer)
|
||||
29. goods_state (integer)
|
||||
30. is_delete (integer)
|
||||
31. is_warehousing (integer)
|
||||
32. able_discount (integer)
|
||||
33. able_site_transfer (integer)
|
||||
34. forbid_sell_status (integer)
|
||||
35. freeze (integer)
|
||||
36. send_state (integer)
|
||||
37. custom_label_type (integer)
|
||||
38. option_required (integer)
|
||||
39. sale_channel (integer)
|
||||
40. sort (integer)
|
||||
41. remark (text)
|
||||
42. pinyin_initial (text)
|
||||
43. goods_cover (text)
|
||||
44. create_time (timestamp without time zone)
|
||||
45. update_time (timestamp without time zone)
|
||||
46. payload (jsonb)
|
||||
47. source_file (text)
|
||||
48. source_endpoint (text)
|
||||
49. fetched_at (timestamp with time zone)
|
||||
50. content_hash (text)
|
||||
|
||||
?? store_goods_sales_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. siteid (bigint)
|
||||
5. sitename (text)
|
||||
6. site_goods_id (bigint)
|
||||
7. tenant_goods_id (bigint)
|
||||
8. order_settle_id (bigint)
|
||||
9. order_trade_no (text)
|
||||
10. order_goods_id (bigint)
|
||||
11. ordergoodsid (bigint)
|
||||
12. order_pay_id (bigint)
|
||||
13. order_coupon_id (bigint)
|
||||
14. ledger_name (text)
|
||||
15. ledger_group_name (text)
|
||||
16. ledger_amount (numeric)
|
||||
17. ledger_count (numeric)
|
||||
18. ledger_unit_price (numeric)
|
||||
19. ledger_status (integer)
|
||||
20. discount_money (numeric)
|
||||
21. discount_price (numeric)
|
||||
22. coupon_deduct_money (numeric)
|
||||
23. member_discount_amount (numeric)
|
||||
24. option_coupon_deduct_money (numeric)
|
||||
25. option_member_discount_money (numeric)
|
||||
26. point_discount_money (numeric)
|
||||
27. point_discount_money_cost (numeric)
|
||||
28. real_goods_money (numeric)
|
||||
29. cost_money (numeric)
|
||||
30. push_money (numeric)
|
||||
31. sales_type (integer)
|
||||
32. is_single_order (integer)
|
||||
33. is_delete (integer)
|
||||
34. goods_remark (text)
|
||||
35. option_price (numeric)
|
||||
36. option_value_name (text)
|
||||
37. option_name (text)
|
||||
38. member_coupon_id (bigint)
|
||||
39. package_coupon_id (bigint)
|
||||
40. sales_man_org_id (bigint)
|
||||
41. salesman_name (text)
|
||||
42. salesman_role_id (bigint)
|
||||
43. salesman_user_id (bigint)
|
||||
44. operator_id (bigint)
|
||||
45. operator_name (text)
|
||||
46. opensalesman (text)
|
||||
47. returns_number (integer)
|
||||
48. site_table_id (bigint)
|
||||
49. tenant_goods_business_id (bigint)
|
||||
50. tenant_goods_category_id (bigint)
|
||||
51. create_time (timestamp without time zone)
|
||||
52. payload (jsonb)
|
||||
53. source_file (text)
|
||||
54. source_endpoint (text)
|
||||
55. fetched_at (timestamp with time zone)
|
||||
56. content_hash (text)
|
||||
|
||||
?? table_fee_discount_records
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. siteprofile (jsonb)
|
||||
5. site_table_id (bigint)
|
||||
6. tableprofile (jsonb)
|
||||
7. tenant_table_area_id (bigint)
|
||||
8. adjust_type (integer)
|
||||
9. ledger_amount (numeric)
|
||||
10. ledger_count (numeric)
|
||||
11. ledger_name (text)
|
||||
12. ledger_status (integer)
|
||||
13. applicant_id (bigint)
|
||||
14. applicant_name (text)
|
||||
15. operator_id (bigint)
|
||||
16. operator_name (text)
|
||||
17. order_settle_id (bigint)
|
||||
18. order_trade_no (text)
|
||||
19. is_delete (integer)
|
||||
20. create_time (timestamp without time zone)
|
||||
21. source_file (text)
|
||||
22. source_endpoint (text)
|
||||
23. fetched_at (timestamp with time zone)
|
||||
24. payload (jsonb)
|
||||
25. content_hash (text)
|
||||
|
||||
?? table_fee_transactions
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. site_id (bigint)
|
||||
4. siteprofile (jsonb)
|
||||
5. site_table_id (bigint)
|
||||
6. site_table_area_id (bigint)
|
||||
7. site_table_area_name (text)
|
||||
8. tenant_table_area_id (bigint)
|
||||
9. order_trade_no (text)
|
||||
10. order_pay_id (bigint)
|
||||
11. order_settle_id (bigint)
|
||||
12. ledger_name (text)
|
||||
13. ledger_amount (numeric)
|
||||
14. ledger_count (numeric)
|
||||
15. ledger_unit_price (numeric)
|
||||
16. ledger_status (integer)
|
||||
17. ledger_start_time (timestamp without time zone)
|
||||
18. ledger_end_time (timestamp without time zone)
|
||||
19. start_use_time (timestamp without time zone)
|
||||
20. last_use_time (timestamp without time zone)
|
||||
21. real_table_use_seconds (integer)
|
||||
22. real_table_charge_money (numeric)
|
||||
23. add_clock_seconds (integer)
|
||||
24. adjust_amount (numeric)
|
||||
25. coupon_promotion_amount (numeric)
|
||||
26. member_discount_amount (numeric)
|
||||
27. used_card_amount (numeric)
|
||||
28. mgmt_fee (numeric)
|
||||
29. service_money (numeric)
|
||||
30. fee_total (numeric)
|
||||
31. is_single_order (integer)
|
||||
32. is_delete (integer)
|
||||
33. member_id (bigint)
|
||||
34. operator_id (bigint)
|
||||
35. operator_name (text)
|
||||
36. salesman_name (text)
|
||||
37. salesman_org_id (bigint)
|
||||
38. salesman_user_id (bigint)
|
||||
39. create_time (timestamp without time zone)
|
||||
40. payload (jsonb)
|
||||
41. source_file (text)
|
||||
42. source_endpoint (text)
|
||||
43. fetched_at (timestamp with time zone)
|
||||
44. content_hash (text)
|
||||
|
||||
?? tenant_goods_master
|
||||
------------------------------------------------------------
|
||||
1. id (bigint)
|
||||
2. tenant_id (bigint)
|
||||
3. goods_name (text)
|
||||
4. goods_bar_code (text)
|
||||
5. goods_category_id (bigint)
|
||||
6. goods_second_category_id (bigint)
|
||||
7. categoryname (text)
|
||||
8. unit (text)
|
||||
9. goods_number (text)
|
||||
10. out_goods_id (text)
|
||||
11. goods_state (integer)
|
||||
12. sale_channel (integer)
|
||||
13. able_discount (integer)
|
||||
14. able_site_transfer (integer)
|
||||
15. is_delete (integer)
|
||||
16. is_warehousing (integer)
|
||||
17. isinsite (integer)
|
||||
18. cost_price (numeric)
|
||||
19. cost_price_type (integer)
|
||||
20. market_price (numeric)
|
||||
21. min_discount_price (numeric)
|
||||
22. common_sale_royalty (numeric)
|
||||
23. point_sale_royalty (numeric)
|
||||
24. pinyin_initial (text)
|
||||
25. commoditycode (text)
|
||||
26. commodity_code (text)
|
||||
27. goods_cover (text)
|
||||
28. supplier_id (bigint)
|
||||
29. remark_name (text)
|
||||
30. create_time (timestamp without time zone)
|
||||
31. update_time (timestamp without time zone)
|
||||
32. payload (jsonb)
|
||||
33. source_file (text)
|
||||
34. source_endpoint (text)
|
||||
35. fetched_at (timestamp with time zone)
|
||||
36. content_hash (text)
|
||||
238
tmp/sync_api_to_ods_columns.py
Normal file
238
tmp/sync_api_to_ods_columns.py
Normal file
@@ -0,0 +1,238 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
同步 API 字段到 ODS 数据库表
|
||||
1. 检测 API JSON 字段与 ODS 表列的差异
|
||||
2. 生成并执行 DDL 添加缺失列
|
||||
3. 忽略 siteProfile 等嵌套对象字段
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目路径
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
# 忽略的 siteProfile 相关字段和其他非业务字段
|
||||
IGNORED_FIELDS = {
|
||||
# siteProfile 内嵌字段
|
||||
"siteprofile", "address", "avatar", "business_tel", "customer_service_qrcode",
|
||||
"customer_service_wechat", "fixed_pay_qrcode", "full_address", "latitude", "longitude",
|
||||
"light_status", "light_token", "light_type", "org_id", "prod_env", "shop_name",
|
||||
"shop_status", "site_label", "site_type", "tenant_site_region_id", "wifi_name",
|
||||
"wifi_password", "attendance_distance", "attendance_enabled", "auto_light",
|
||||
"ewelink_client_id",
|
||||
# tableprofile 内嵌字段
|
||||
"tableprofile",
|
||||
# 已有的系统字段
|
||||
"content_hash", "payload", "source_file", "source_endpoint", "fetched_at", "record_index",
|
||||
}
|
||||
|
||||
# API 字段类型推断规则
|
||||
def infer_column_type(field_name: str, sample_value=None) -> str:
|
||||
"""根据字段名和样本值推断 PostgreSQL 列类型"""
|
||||
fn = field_name.lower()
|
||||
|
||||
# ID 字段
|
||||
if fn.endswith("_id") or fn in ("id", "tenant_id", "member_id", "site_id", "table_id",
|
||||
"operator_id", "relate_id", "order_id"):
|
||||
return "BIGINT"
|
||||
|
||||
# 金额字段
|
||||
if any(x in fn for x in ("_money", "_amount", "_price", "_cost", "_discount", "_balance",
|
||||
"_deduct", "_fee", "_charge", "money", "amount", "price")):
|
||||
return "NUMERIC(18,2)"
|
||||
|
||||
# 时间字段
|
||||
if any(x in fn for x in ("_time", "time", "_date", "date")) or fn.startswith("create") or fn.startswith("update"):
|
||||
return "TIMESTAMP"
|
||||
|
||||
# 布尔/状态字段
|
||||
if fn.startswith("is_") or fn.startswith("can_") or fn.startswith("able_"):
|
||||
return "INTEGER"
|
||||
|
||||
# 数量/计数字段
|
||||
if any(x in fn for x in ("_count", "_num", "_seconds", "_minutes", "count", "num", "seconds")):
|
||||
return "INTEGER"
|
||||
|
||||
# 比率/折扣率
|
||||
if any(x in fn for x in ("_radio", "_ratio", "_rate")):
|
||||
return "NUMERIC(10,4)"
|
||||
|
||||
# 根据样本值推断
|
||||
if sample_value is not None:
|
||||
if isinstance(sample_value, bool):
|
||||
return "BOOLEAN"
|
||||
if isinstance(sample_value, int):
|
||||
if sample_value > 2147483647 or sample_value < -2147483648:
|
||||
return "BIGINT"
|
||||
return "INTEGER"
|
||||
if isinstance(sample_value, float):
|
||||
return "NUMERIC(18,2)"
|
||||
if isinstance(sample_value, (list, dict)):
|
||||
return "JSONB"
|
||||
|
||||
# 默认文本
|
||||
return "TEXT"
|
||||
|
||||
|
||||
def get_db_table_columns(db: DatabaseConnection, table_name: str) -> set:
|
||||
"""获取数据库表的所有列名"""
|
||||
schema, name = table_name.split(".", 1) if "." in table_name else ("public", table_name)
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
"""
|
||||
rows = db.query(sql, (schema, name))
|
||||
return {r["column_name"].lower() for r in rows}
|
||||
|
||||
|
||||
def get_api_fields_from_comparison(comparison_file: Path) -> dict:
|
||||
"""从对比文件获取 API 字段"""
|
||||
if not comparison_file.exists():
|
||||
return {}
|
||||
with open(comparison_file, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def generate_ddl_for_missing_fields(table_name: str, missing_fields: list, api_data: dict = None) -> list:
|
||||
"""生成添加缺失列的 DDL"""
|
||||
ddl_list = []
|
||||
for field in missing_fields:
|
||||
# 尝试从 API 数据获取样本值来推断类型
|
||||
sample_value = None
|
||||
if api_data:
|
||||
for record in api_data.get("data", [])[:10]:
|
||||
if isinstance(record, dict) and field in record:
|
||||
sample_value = record[field]
|
||||
break
|
||||
|
||||
col_type = infer_column_type(field, sample_value)
|
||||
ddl = f'ALTER TABLE {table_name} ADD COLUMN IF NOT EXISTS "{field}" {col_type};'
|
||||
ddl_list.append(ddl)
|
||||
|
||||
return ddl_list
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("API → ODS 字段同步脚本")
|
||||
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
print("=" * 80)
|
||||
|
||||
# 连接数据库
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("[错误] 未找到 PG_DSN 环境变量")
|
||||
return
|
||||
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
# 加载对比数据
|
||||
comparison_file = Path(__file__).parent / "api_ods_comparison.json"
|
||||
comparison = get_api_fields_from_comparison(comparison_file)
|
||||
|
||||
if not comparison:
|
||||
print("[错误] 未找到对比文件 api_ods_comparison.json")
|
||||
db.close()
|
||||
return
|
||||
|
||||
all_ddl = []
|
||||
executed_ddl = []
|
||||
failed_ddl = []
|
||||
|
||||
for task_code, data in comparison.items():
|
||||
table_name = data.get("table_name")
|
||||
missing = data.get("missing_in_ods", [])
|
||||
|
||||
if not table_name or not missing:
|
||||
continue
|
||||
|
||||
# 过滤忽略的字段
|
||||
filtered_missing = [
|
||||
f for f in missing
|
||||
if f.lower() not in IGNORED_FIELDS
|
||||
]
|
||||
|
||||
if not filtered_missing:
|
||||
continue
|
||||
|
||||
# 获取数据库当前列
|
||||
current_cols = get_db_table_columns(db, table_name)
|
||||
|
||||
# 二次过滤:排除已存在的列
|
||||
truly_missing = [
|
||||
f for f in filtered_missing
|
||||
if f.lower() not in current_cols
|
||||
]
|
||||
|
||||
if not truly_missing:
|
||||
print(f"\n【{task_code}】({table_name})")
|
||||
print(f" 所有缺失字段已在数据库中存在,跳过")
|
||||
continue
|
||||
|
||||
print(f"\n【{task_code}】({table_name})")
|
||||
print(f" 需要添加 {len(truly_missing)} 列: {', '.join(truly_missing)}")
|
||||
|
||||
# 生成 DDL
|
||||
ddl_list = generate_ddl_for_missing_fields(table_name, truly_missing)
|
||||
all_ddl.extend(ddl_list)
|
||||
|
||||
# 执行 DDL
|
||||
for ddl in ddl_list:
|
||||
try:
|
||||
db.execute(ddl)
|
||||
db.commit()
|
||||
executed_ddl.append(ddl)
|
||||
print(f" [成功] {ddl[:80]}...")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
failed_ddl.append((ddl, str(e)))
|
||||
print(f" [失败] {ddl[:60]}... - {e}")
|
||||
|
||||
db.close()
|
||||
|
||||
# 汇总
|
||||
print("\n" + "=" * 80)
|
||||
print("执行汇总")
|
||||
print("=" * 80)
|
||||
print(f"总计生成 DDL: {len(all_ddl)} 条")
|
||||
print(f"执行成功: {len(executed_ddl)} 条")
|
||||
print(f"执行失败: {len(failed_ddl)} 条")
|
||||
|
||||
if failed_ddl:
|
||||
print("\n失败的 DDL:")
|
||||
for ddl, err in failed_ddl:
|
||||
print(f" - {ddl}")
|
||||
print(f" 错误: {err}")
|
||||
|
||||
# 保存执行日志
|
||||
log_file = Path(__file__).parent / "sync_ods_columns_log.json"
|
||||
log = {
|
||||
"executed_at": datetime.now().isoformat(),
|
||||
"total_ddl": len(all_ddl),
|
||||
"success_count": len(executed_ddl),
|
||||
"failed_count": len(failed_ddl),
|
||||
"executed_ddl": executed_ddl,
|
||||
"failed_ddl": [{"ddl": d, "error": e} for d, e in failed_ddl],
|
||||
}
|
||||
with open(log_file, "w", encoding="utf-8") as f:
|
||||
json.dump(log, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n执行日志已保存到: {log_file}")
|
||||
|
||||
return len(failed_ddl) == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
181
tmp/sync_bd_manual.py
Normal file
181
tmp/sync_bd_manual.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""校验并同步 bd_manual 文档与数据库结构"""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import psycopg2
|
||||
|
||||
DSN = 'postgresql://local-Python:Neo-local-1991125@100.64.0.4:5432/LLZQ-test'
|
||||
|
||||
# 类型映射 (PostgreSQL -> 文档显示格式)
|
||||
TYPE_MAP = {
|
||||
'bigint': 'BIGINT',
|
||||
'integer': 'INTEGER',
|
||||
'smallint': 'SMALLINT',
|
||||
'numeric': 'NUMERIC',
|
||||
'text': 'TEXT',
|
||||
'character varying': 'VARCHAR',
|
||||
'boolean': 'BOOLEAN',
|
||||
'timestamp with time zone': 'TIMESTAMPTZ',
|
||||
'timestamp without time zone': 'TIMESTAMP',
|
||||
'date': 'DATE',
|
||||
'jsonb': 'JSONB',
|
||||
'json': 'JSON',
|
||||
}
|
||||
|
||||
def get_db_schema():
|
||||
"""获取数据库 schema"""
|
||||
conn = psycopg2.connect(DSN)
|
||||
cur = conn.cursor()
|
||||
cur.execute("""
|
||||
SELECT table_name, column_name, data_type, is_nullable,
|
||||
COALESCE(character_maximum_length, numeric_precision) as max_length,
|
||||
numeric_scale
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd'
|
||||
ORDER BY table_name, ordinal_position
|
||||
""")
|
||||
|
||||
tables = {}
|
||||
for row in cur.fetchall():
|
||||
table_name, col_name, data_type, nullable, max_len, scale = row
|
||||
if table_name not in tables:
|
||||
tables[table_name] = []
|
||||
|
||||
# 格式化类型
|
||||
type_str = TYPE_MAP.get(data_type, data_type.upper())
|
||||
if data_type == 'numeric' and max_len and scale is not None:
|
||||
type_str = f'NUMERIC({max_len},{scale})'
|
||||
elif data_type == 'character varying' and max_len:
|
||||
type_str = f'VARCHAR({max_len})'
|
||||
|
||||
tables[table_name].append({
|
||||
'column': col_name,
|
||||
'type': type_str,
|
||||
'nullable': 'YES' if nullable == 'YES' else 'NO',
|
||||
})
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
return tables
|
||||
|
||||
def parse_md_fields(content):
|
||||
"""解析 MD 文档中的字段列表"""
|
||||
fields = {}
|
||||
# 匹配字段表格行
|
||||
pattern = r'\|\s*\d+\s*\|\s*(\w+)\s*\|\s*([^|]+)\s*\|\s*(\w+)\s*\|'
|
||||
for match in re.finditer(pattern, content):
|
||||
col_name = match.group(1).strip()
|
||||
col_type = match.group(2).strip()
|
||||
nullable = match.group(3).strip()
|
||||
fields[col_name] = {'type': col_type, 'nullable': nullable}
|
||||
return fields
|
||||
|
||||
def compare_and_report(table_name, db_cols, doc_path):
|
||||
"""对比数据库和文档,返回差异"""
|
||||
if not doc_path.exists():
|
||||
return {'missing_doc': True, 'table': table_name}
|
||||
|
||||
content = doc_path.read_text(encoding='utf-8')
|
||||
doc_fields = parse_md_fields(content)
|
||||
|
||||
db_field_names = {c['column'] for c in db_cols}
|
||||
doc_field_names = set(doc_fields.keys())
|
||||
|
||||
# 找出差异
|
||||
missing_in_doc = db_field_names - doc_field_names
|
||||
extra_in_doc = doc_field_names - db_field_names
|
||||
type_mismatches = []
|
||||
|
||||
for col in db_cols:
|
||||
col_name = col['column']
|
||||
if col_name in doc_fields:
|
||||
# 检查类型是否匹配 (忽略大小写和空格)
|
||||
db_type = col['type'].upper().replace(' ', '')
|
||||
doc_type = doc_fields[col_name]['type'].upper().replace(' ', '')
|
||||
if db_type != doc_type:
|
||||
type_mismatches.append({
|
||||
'column': col_name,
|
||||
'db_type': col['type'],
|
||||
'doc_type': doc_fields[col_name]['type']
|
||||
})
|
||||
|
||||
return {
|
||||
'table': table_name,
|
||||
'missing_in_doc': list(missing_in_doc),
|
||||
'extra_in_doc': list(extra_in_doc),
|
||||
'type_mismatches': type_mismatches,
|
||||
'doc_path': str(doc_path),
|
||||
}
|
||||
|
||||
def main():
|
||||
db_schema = get_db_schema()
|
||||
|
||||
main_dir = Path('etl_billiards/docs/bd_manual/main')
|
||||
ex_dir = Path('etl_billiards/docs/bd_manual/Ex')
|
||||
|
||||
all_diffs = []
|
||||
|
||||
for table_name, columns in sorted(db_schema.items()):
|
||||
# 确定文档路径
|
||||
if table_name.endswith('_ex'):
|
||||
base_name = table_name[:-3] # 去掉 _ex
|
||||
doc_path = ex_dir / f'BD_manual_{table_name}.md'
|
||||
else:
|
||||
doc_path = main_dir / f'BD_manual_{table_name}.md'
|
||||
|
||||
diff = compare_and_report(table_name, columns, doc_path)
|
||||
if diff.get('missing_in_doc') or diff.get('extra_in_doc') or diff.get('type_mismatches') or diff.get('missing_doc'):
|
||||
all_diffs.append(diff)
|
||||
|
||||
# 输出报告
|
||||
print("=" * 80)
|
||||
print("BD Manual vs Database Schema Comparison Report")
|
||||
print("=" * 80)
|
||||
|
||||
total_missing = 0
|
||||
total_extra = 0
|
||||
total_type_mismatch = 0
|
||||
|
||||
for diff in all_diffs:
|
||||
table = diff['table']
|
||||
if diff.get('missing_doc'):
|
||||
print(f"\n### {table}: MISSING DOCUMENT ###")
|
||||
continue
|
||||
|
||||
has_issues = False
|
||||
|
||||
if diff['missing_in_doc']:
|
||||
if not has_issues:
|
||||
print(f"\n### {table} ###")
|
||||
has_issues = True
|
||||
print(f" Missing in doc ({len(diff['missing_in_doc'])}): {', '.join(sorted(diff['missing_in_doc']))}")
|
||||
total_missing += len(diff['missing_in_doc'])
|
||||
|
||||
if diff['extra_in_doc']:
|
||||
if not has_issues:
|
||||
print(f"\n### {table} ###")
|
||||
has_issues = True
|
||||
print(f" Extra in doc ({len(diff['extra_in_doc'])}): {', '.join(sorted(diff['extra_in_doc']))}")
|
||||
total_extra += len(diff['extra_in_doc'])
|
||||
|
||||
if diff['type_mismatches']:
|
||||
if not has_issues:
|
||||
print(f"\n### {table} ###")
|
||||
has_issues = True
|
||||
print(f" Type mismatches ({len(diff['type_mismatches'])}):")
|
||||
for m in diff['type_mismatches']:
|
||||
print(f" - {m['column']}: doc={m['doc_type']}, db={m['db_type']}")
|
||||
total_type_mismatch += len(diff['type_mismatches'])
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Summary: {total_missing} missing, {total_extra} extra, {total_type_mismatch} type mismatches")
|
||||
print("=" * 80)
|
||||
|
||||
# 保存详细结果到 JSON
|
||||
with open('tmp/bd_manual_diff.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(all_diffs, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nDetailed results saved to tmp/bd_manual_diff.json")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
10
tmp/sync_dwd_columns_log.json
Normal file
10
tmp/sync_dwd_columns_log.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"executed_at": "2026-02-02T19:12:00.539963",
|
||||
"total_ddl": 1,
|
||||
"success_count": 1,
|
||||
"failed_count": 0,
|
||||
"executed_ddl": [
|
||||
"ALTER TABLE billiards_dwd.dwd_member_balance_change ADD COLUMN IF NOT EXISTS \"principal_change_amount\" NUMERIC(18,2);"
|
||||
],
|
||||
"failed_ddl": []
|
||||
}
|
||||
13
tmp/sync_ods_columns_log.json
Normal file
13
tmp/sync_ods_columns_log.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"executed_at": "2026-02-02T19:10:13.492902",
|
||||
"total_ddl": 4,
|
||||
"success_count": 4,
|
||||
"failed_count": 0,
|
||||
"executed_ddl": [
|
||||
"ALTER TABLE billiards_ods.settlement_records ADD COLUMN IF NOT EXISTS \"tenant_id\" BIGINT;",
|
||||
"ALTER TABLE billiards_ods.recharge_settlements ADD COLUMN IF NOT EXISTS \"tenant_id\" BIGINT;",
|
||||
"ALTER TABLE billiards_ods.group_buy_packages ADD COLUMN IF NOT EXISTS \"tableareanamelist\" TEXT;",
|
||||
"ALTER TABLE billiards_ods.group_buy_packages ADD COLUMN IF NOT EXISTS \"tenanttableareaidlist\" TEXT;"
|
||||
],
|
||||
"failed_ddl": []
|
||||
}
|
||||
259
tmp/sync_ods_to_dwd_columns.py
Normal file
259
tmp/sync_ods_to_dwd_columns.py
Normal file
@@ -0,0 +1,259 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
同步 ODS 字段到 DWD 数据库表
|
||||
1. 检测 ODS 新增字段对应的 DWD 表是否缺失列
|
||||
2. 根据 dwd_load_task.py 的 FACT_MAPPINGS 生成 DDL
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目路径
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
|
||||
# ODS -> DWD 表映射(从 dwd_load_task.py 提取)
|
||||
ODS_TO_DWD_MAP = {
|
||||
"billiards_ods.table_fee_transactions": [
|
||||
"billiards_dwd.dim_site", "billiards_dwd.dim_site_ex",
|
||||
"billiards_dwd.dwd_table_fee_log", "billiards_dwd.dwd_table_fee_log_ex",
|
||||
],
|
||||
"billiards_ods.site_tables_master": [
|
||||
"billiards_dwd.dim_table", "billiards_dwd.dim_table_ex",
|
||||
],
|
||||
"billiards_ods.assistant_accounts_master": [
|
||||
"billiards_dwd.dim_assistant", "billiards_dwd.dim_assistant_ex",
|
||||
],
|
||||
"billiards_ods.assistant_service_records": [
|
||||
"billiards_dwd.dwd_assistant_service_log", "billiards_dwd.dwd_assistant_service_log_ex",
|
||||
],
|
||||
"billiards_ods.assistant_cancellation_records": [
|
||||
"billiards_dwd.dwd_assistant_trash_event", "billiards_dwd.dwd_assistant_trash_event_ex",
|
||||
],
|
||||
"billiards_ods.store_goods_sales_records": [
|
||||
"billiards_dwd.dwd_store_goods_sale", "billiards_dwd.dwd_store_goods_sale_ex",
|
||||
],
|
||||
"billiards_ods.payment_transactions": [
|
||||
"billiards_dwd.dwd_payment",
|
||||
],
|
||||
"billiards_ods.member_profiles": [
|
||||
"billiards_dwd.dim_member", "billiards_dwd.dim_member_ex",
|
||||
],
|
||||
"billiards_ods.member_stored_value_cards": [
|
||||
"billiards_dwd.dim_member_card_account", "billiards_dwd.dim_member_card_account_ex",
|
||||
],
|
||||
"billiards_ods.member_balance_changes": [
|
||||
"billiards_dwd.dwd_member_balance_change", "billiards_dwd.dwd_member_balance_change_ex",
|
||||
],
|
||||
"billiards_ods.settlement_records": [
|
||||
"billiards_dwd.dwd_settlement_head", "billiards_dwd.dwd_settlement_head_ex",
|
||||
],
|
||||
"billiards_ods.recharge_settlements": [
|
||||
"billiards_dwd.dwd_recharge_order", "billiards_dwd.dwd_recharge_order_ex",
|
||||
],
|
||||
"billiards_ods.group_buy_packages": [
|
||||
"billiards_dwd.dim_groupbuy_package", "billiards_dwd.dim_groupbuy_package_ex",
|
||||
],
|
||||
"billiards_ods.group_buy_redemption_records": [
|
||||
"billiards_dwd.dwd_groupbuy_redemption", "billiards_dwd.dwd_groupbuy_redemption_ex",
|
||||
],
|
||||
"billiards_ods.table_fee_discount_records": [
|
||||
"billiards_dwd.dwd_table_fee_adjust", "billiards_dwd.dwd_table_fee_adjust_ex",
|
||||
],
|
||||
"billiards_ods.tenant_goods_master": [
|
||||
"billiards_dwd.dim_tenant_goods", "billiards_dwd.dim_tenant_goods_ex",
|
||||
],
|
||||
"billiards_ods.store_goods_master": [
|
||||
"billiards_dwd.dim_store_goods", "billiards_dwd.dim_store_goods_ex",
|
||||
],
|
||||
}
|
||||
|
||||
# 需要同步到 DWD 的新增 ODS 字段(从排查报告中获取)
|
||||
# 格式: {ods_table: [(ods_col, dwd_col, dwd_table, col_type), ...]}
|
||||
NEW_FIELDS_TO_DWD = {
|
||||
"billiards_ods.table_fee_transactions": [
|
||||
("activity_discount_amount", "activity_discount_amount", "billiards_dwd.dwd_table_fee_log", "NUMERIC(18,2)"),
|
||||
("real_service_money", "real_service_money", "billiards_dwd.dwd_table_fee_log", "NUMERIC(18,2)"),
|
||||
("order_consumption_type", "order_consumption_type", "billiards_dwd.dwd_table_fee_log_ex", "INTEGER"),
|
||||
],
|
||||
"billiards_ods.assistant_service_records": [
|
||||
("real_service_money", "real_service_money", "billiards_dwd.dwd_assistant_service_log", "NUMERIC(18,2)"),
|
||||
("assistantteamname", "assistant_team_name", "billiards_dwd.dwd_assistant_service_log_ex", "TEXT"),
|
||||
],
|
||||
"billiards_ods.assistant_cancellation_records": [
|
||||
("tenant_id", "tenant_id", "billiards_dwd.dwd_assistant_trash_event", "BIGINT"),
|
||||
],
|
||||
"billiards_ods.store_goods_sales_records": [
|
||||
("coupon_share_money", "coupon_share_money", "billiards_dwd.dwd_store_goods_sale", "NUMERIC(18,2)"),
|
||||
],
|
||||
"billiards_ods.payment_transactions": [
|
||||
("tenant_id", "tenant_id", "billiards_dwd.dwd_payment", "BIGINT"),
|
||||
],
|
||||
"billiards_ods.member_profiles": [
|
||||
("pay_money_sum", "pay_money_sum", "billiards_dwd.dim_member", "NUMERIC(18,2)"),
|
||||
("recharge_money_sum", "recharge_money_sum", "billiards_dwd.dim_member", "NUMERIC(18,2)"),
|
||||
("person_tenant_org_id", "person_tenant_org_id", "billiards_dwd.dim_member_ex", "BIGINT"),
|
||||
("person_tenant_org_name", "person_tenant_org_name", "billiards_dwd.dim_member_ex", "TEXT"),
|
||||
("register_source", "register_source", "billiards_dwd.dim_member_ex", "TEXT"),
|
||||
],
|
||||
"billiards_ods.member_stored_value_cards": [
|
||||
("principal_balance", "principal_balance", "billiards_dwd.dim_member_card_account", "NUMERIC(18,2)"),
|
||||
("member_grade", "member_grade", "billiards_dwd.dim_member_card_account", "INTEGER"),
|
||||
("able_share_member_discount", "able_share_member_discount", "billiards_dwd.dim_member_card_account_ex", "BOOLEAN"),
|
||||
("electricity_deduct_radio", "electricity_deduct_radio", "billiards_dwd.dim_member_card_account_ex", "NUMERIC(10,4)"),
|
||||
("electricity_discount", "electricity_discount", "billiards_dwd.dim_member_card_account_ex", "NUMERIC(10,4)"),
|
||||
("electricitycarddeduct", "electricity_card_deduct", "billiards_dwd.dim_member_card_account_ex", "BOOLEAN"),
|
||||
("rechargefreezebalance", "recharge_freeze_balance", "billiards_dwd.dim_member_card_account_ex", "NUMERIC(18,2)"),
|
||||
],
|
||||
"billiards_ods.member_balance_changes": [
|
||||
("principal_after", "principal_after", "billiards_dwd.dwd_member_balance_change", "NUMERIC(18,2)"),
|
||||
("principal_before", "principal_before", "billiards_dwd.dwd_member_balance_change", "NUMERIC(18,2)"),
|
||||
("principal_data", "principal_change_amount", "billiards_dwd.dwd_member_balance_change", "NUMERIC(18,2)"),
|
||||
],
|
||||
"billiards_ods.settlement_records": [
|
||||
("tenant_id", "tenant_id", "billiards_dwd.dwd_settlement_head", "BIGINT"),
|
||||
],
|
||||
"billiards_ods.recharge_settlements": [
|
||||
("tenant_id", "tenant_id", "billiards_dwd.dwd_recharge_order", "BIGINT"),
|
||||
],
|
||||
"billiards_ods.group_buy_packages": [
|
||||
("sort", "sort", "billiards_dwd.dim_groupbuy_package", "INTEGER"),
|
||||
("is_first_limit", "is_first_limit", "billiards_dwd.dim_groupbuy_package", "BOOLEAN"),
|
||||
("tenantcouponsaleorderitemid", "tenant_coupon_sale_order_item_id", "billiards_dwd.dim_groupbuy_package_ex", "BIGINT"),
|
||||
],
|
||||
"billiards_ods.group_buy_redemption_records": [
|
||||
("coupon_sale_id", "coupon_sale_id", "billiards_dwd.dwd_groupbuy_redemption", "BIGINT"),
|
||||
("member_discount_money", "member_discount_money", "billiards_dwd.dwd_groupbuy_redemption", "NUMERIC(18,2)"),
|
||||
("assistant_share_money", "assistant_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
|
||||
("table_share_money", "table_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
|
||||
("goods_share_money", "goods_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
|
||||
("recharge_share_money", "recharge_share_money", "billiards_dwd.dwd_groupbuy_redemption_ex", "NUMERIC(18,2)"),
|
||||
],
|
||||
"billiards_ods.site_tables_master": [
|
||||
("order_id", "order_id", "billiards_dwd.dim_table", "BIGINT"),
|
||||
],
|
||||
"billiards_ods.store_goods_master": [
|
||||
("commodity_code", "commodity_code", "billiards_dwd.dim_store_goods", "TEXT"),
|
||||
("not_sale", "not_sale", "billiards_dwd.dim_store_goods", "INTEGER"),
|
||||
],
|
||||
"billiards_ods.table_fee_discount_records": [
|
||||
("table_name", "table_name", "billiards_dwd.dwd_table_fee_adjust", "TEXT"),
|
||||
("table_price", "table_price", "billiards_dwd.dwd_table_fee_adjust", "NUMERIC(18,2)"),
|
||||
("charge_free", "charge_free", "billiards_dwd.dwd_table_fee_adjust", "BOOLEAN"),
|
||||
("area_type_id", "area_type_id", "billiards_dwd.dwd_table_fee_adjust_ex", "BIGINT"),
|
||||
("site_table_area_id", "site_table_area_id", "billiards_dwd.dwd_table_fee_adjust_ex", "BIGINT"),
|
||||
("site_table_area_name", "site_table_area_name", "billiards_dwd.dwd_table_fee_adjust_ex", "TEXT"),
|
||||
("sitename", "site_name", "billiards_dwd.dwd_table_fee_adjust_ex", "TEXT"),
|
||||
("tenant_name", "tenant_name", "billiards_dwd.dwd_table_fee_adjust_ex", "TEXT"),
|
||||
],
|
||||
"billiards_ods.tenant_goods_master": [
|
||||
("not_sale", "not_sale", "billiards_dwd.dim_tenant_goods", "INTEGER"),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_db_table_columns(db: DatabaseConnection, table_name: str) -> set:
|
||||
"""获取数据库表的所有列名"""
|
||||
schema, name = table_name.split(".", 1) if "." in table_name else ("public", table_name)
|
||||
sql = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
"""
|
||||
rows = db.query(sql, (schema, name))
|
||||
return {r["column_name"].lower() for r in rows}
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 80)
|
||||
print("ODS → DWD 字段同步脚本")
|
||||
print("时间:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
||||
print("=" * 80)
|
||||
|
||||
# 连接数据库
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("[错误] 未找到 PG_DSN 环境变量")
|
||||
return False
|
||||
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
all_ddl = []
|
||||
executed_ddl = []
|
||||
failed_ddl = []
|
||||
|
||||
for ods_table, fields in NEW_FIELDS_TO_DWD.items():
|
||||
print(f"\n处理 ODS 表: {ods_table}")
|
||||
|
||||
for ods_col, dwd_col, dwd_table, col_type in fields:
|
||||
# 检查 DWD 表是否存在该列
|
||||
try:
|
||||
dwd_cols = get_db_table_columns(db, dwd_table)
|
||||
except Exception as e:
|
||||
print(f" [跳过] DWD 表 {dwd_table} 不存在或无法访问: {e}")
|
||||
continue
|
||||
|
||||
if dwd_col.lower() in dwd_cols:
|
||||
print(f" [存在] {dwd_table}.{dwd_col}")
|
||||
continue
|
||||
|
||||
# 生成 DDL
|
||||
ddl = f'ALTER TABLE {dwd_table} ADD COLUMN IF NOT EXISTS "{dwd_col}" {col_type};'
|
||||
all_ddl.append(ddl)
|
||||
|
||||
# 执行 DDL
|
||||
try:
|
||||
db.execute(ddl)
|
||||
db.commit()
|
||||
executed_ddl.append(ddl)
|
||||
print(f" [新增] {dwd_table}.{dwd_col} ({col_type})")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
failed_ddl.append((ddl, str(e)))
|
||||
print(f" [失败] {dwd_table}.{dwd_col} - {e}")
|
||||
|
||||
db.close()
|
||||
|
||||
# 汇总
|
||||
print("\n" + "=" * 80)
|
||||
print("执行汇总")
|
||||
print("=" * 80)
|
||||
print(f"总计生成 DDL: {len(all_ddl)} 条")
|
||||
print(f"执行成功: {len(executed_ddl)} 条")
|
||||
print(f"执行失败: {len(failed_ddl)} 条")
|
||||
|
||||
if failed_ddl:
|
||||
print("\n失败的 DDL:")
|
||||
for ddl, err in failed_ddl:
|
||||
print(f" - {ddl}")
|
||||
print(f" 错误: {err}")
|
||||
|
||||
# 保存执行日志
|
||||
log_file = Path(__file__).parent / "sync_dwd_columns_log.json"
|
||||
log = {
|
||||
"executed_at": datetime.now().isoformat(),
|
||||
"total_ddl": len(all_ddl),
|
||||
"success_count": len(executed_ddl),
|
||||
"failed_count": len(failed_ddl),
|
||||
"executed_ddl": executed_ddl,
|
||||
"failed_ddl": [{"ddl": d, "error": e} for d, e in failed_ddl],
|
||||
}
|
||||
with open(log_file, "w", encoding="utf-8") as f:
|
||||
json.dump(log, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n执行日志已保存到: {log_file}")
|
||||
|
||||
return len(failed_ddl) == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
86
tmp/test_backfill_feature.py
Normal file
86
tmp/test_backfill_feature.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试 ODS 回填特性
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
project_root = Path(__file__).parent.parent / "etl_billiards"
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
from database.connection import DatabaseConnection
|
||||
|
||||
dsn = os.getenv("PG_DSN")
|
||||
db = DatabaseConnection(dsn)
|
||||
|
||||
print("=== 测试 ODS 回填特性 ===")
|
||||
|
||||
# 1. 创建一个测试场景:找一条有 NULL 值的记录
|
||||
result = db.query("""
|
||||
SELECT id, plcouponsaleamount, mervousalesamount,
|
||||
payload->'settleList'->>'plCouponSaleAmount' as payload_val
|
||||
FROM billiards_ods.settlement_records
|
||||
WHERE plcouponsaleamount IS NOT NULL
|
||||
LIMIT 1
|
||||
""")
|
||||
|
||||
if result:
|
||||
row = result[0]
|
||||
print(f"找到测试记录: id={row['id']}")
|
||||
print(f" 当前 plcouponsaleamount: {row['plcouponsaleamount']}")
|
||||
print(f" payload 中的值: {row['payload_val']}")
|
||||
else:
|
||||
print("未找到测试记录")
|
||||
|
||||
# 2. 模拟生成的 SQL 语句
|
||||
print("\n=== 生成的 SQL 示例 ===")
|
||||
|
||||
# 获取表结构
|
||||
cols = db.query("""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_ods'
|
||||
AND table_name = 'settlement_records'
|
||||
ORDER BY ordinal_position
|
||||
""")
|
||||
|
||||
col_names = [c["column_name"] for c in cols]
|
||||
pk_cols = ["id"] # 假设主键是 id
|
||||
|
||||
meta_cols = {"payload", "source_file", "source_endpoint", "fetched_at", "content_hash"}
|
||||
pk_cols_lower = {c.lower() for c in pk_cols}
|
||||
update_cols = [
|
||||
c for c in col_names
|
||||
if c.lower() not in pk_cols_lower and c.lower() not in meta_cols
|
||||
]
|
||||
|
||||
print(f"表有 {len(col_names)} 列")
|
||||
print(f"可更新列: {len(update_cols)} 列")
|
||||
|
||||
# 生成 SQL
|
||||
table = "billiards_ods.settlement_records"
|
||||
pk_clause = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
set_clause = ", ".join(
|
||||
f'"{c}" = COALESCE({table}."{c}", EXCLUDED."{c}")'
|
||||
for c in update_cols[:3] # 只显示前3个
|
||||
)
|
||||
where_clause = " OR ".join(f'{table}."{c}" IS NULL' for c in update_cols[:3])
|
||||
|
||||
print(f"\nSQL 示例 (前3列):")
|
||||
print(f"INSERT INTO {table} (...) VALUES ...")
|
||||
print(f"ON CONFLICT ({pk_clause}) DO UPDATE SET")
|
||||
print(f" {set_clause}")
|
||||
print(f"WHERE {where_clause}")
|
||||
|
||||
print("\n=== 特性说明 ===")
|
||||
print("1. 新记录 -> 正常插入")
|
||||
print("2. 已存在记录 -> 只更新 NULL 列 (COALESCE)")
|
||||
print("3. 已有值的列 -> 保持不变")
|
||||
print("4. 可通过配置 run.ods_backfill_null_columns=false 禁用")
|
||||
|
||||
db.close()
|
||||
print("\n测试完成!")
|
||||
70
tmp/test_conflict_modes.py
Normal file
70
tmp/test_conflict_modes.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试 ODS 冲突处理三种模式
|
||||
"""
|
||||
print("=" * 70)
|
||||
print("ODS 冲突处理模式说明")
|
||||
print("=" * 70)
|
||||
|
||||
modes = [
|
||||
("nothing", "跳过已存在记录", """
|
||||
INSERT INTO table (...) VALUES (...)
|
||||
ON CONFLICT (pk) DO NOTHING
|
||||
|
||||
行为: 已存在的记录完全跳过,不做任何更新
|
||||
适用: 严格保留原始快照,不允许修改历史数据
|
||||
"""),
|
||||
|
||||
("backfill", "回填 NULL 列", """
|
||||
INSERT INTO table (...) VALUES (...)
|
||||
ON CONFLICT (pk) DO UPDATE SET
|
||||
col1 = COALESCE(table.col1, EXCLUDED.col1),
|
||||
col2 = COALESCE(table.col2, EXCLUDED.col2)
|
||||
WHERE table.col1 IS NULL OR table.col2 IS NULL
|
||||
|
||||
行为: 只填充数据库中为 NULL 的字段,已有值保持不变
|
||||
适用: 新增字段后回填历史数据,但不覆盖已有值
|
||||
"""),
|
||||
|
||||
("update", "全字段对比更新 (默认)", """
|
||||
INSERT INTO table (...) VALUES (...)
|
||||
ON CONFLICT (pk) DO UPDATE SET
|
||||
col1 = EXCLUDED.col1,
|
||||
col2 = EXCLUDED.col2
|
||||
WHERE table.col1 IS DISTINCT FROM EXCLUDED.col1
|
||||
OR table.col2 IS DISTINCT FROM EXCLUDED.col2
|
||||
|
||||
行为: 对比所有字段,有变化则更新
|
||||
适用: 数据同步,保持与 API 一致
|
||||
"""),
|
||||
]
|
||||
|
||||
for mode, title, sql in modes:
|
||||
print(f"\n【模式: {mode}】{title}")
|
||||
print("-" * 50)
|
||||
print(sql)
|
||||
|
||||
print("=" * 70)
|
||||
print("配置方式 (在 .env 中设置)")
|
||||
print("=" * 70)
|
||||
print("""
|
||||
# 方式1: 直接设置模式
|
||||
run.ods_conflict_mode=update # 全字段对比更新 (默认)
|
||||
run.ods_conflict_mode=backfill # 只回填 NULL
|
||||
run.ods_conflict_mode=nothing # 跳过已存在
|
||||
|
||||
# 方式2: 兼容旧配置
|
||||
run.ods_backfill_null_columns=false # 等同于 nothing 模式
|
||||
""")
|
||||
|
||||
print("=" * 70)
|
||||
print("对比表")
|
||||
print("=" * 70)
|
||||
print("""
|
||||
| 场景 | nothing | backfill | update |
|
||||
|--------------------------|---------|----------|--------|
|
||||
| 新记录 | 插入 | 插入 | 插入 |
|
||||
| 已存在 + 字段已有值 | 跳过 | 保留原值 | 更新 |
|
||||
| 已存在 + 字段为 NULL | 跳过 | 填充新值 | 填充 |
|
||||
| 已存在 + API值与DB相同 | 跳过 | 跳过 | 跳过 |
|
||||
""")
|
||||
Reference in New Issue
Block a user