数据库 数据校验写入等逻辑更新。
This commit is contained in:
@@ -32,7 +32,7 @@ SCHEMA_ETL=etl_admin
|
||||
# API 配置
|
||||
# ------------------------------------------------------------------------------
|
||||
API_BASE=https://pc.ficoo.vip/apiprod/admin/v1/
|
||||
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6Iks1ZnBhYlRTNkFsR0FpMmN4WGYrMHdJVkk0L2UvTVQrSVBHM3V5VWRrSjg9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzEvMzEg5LiL5Y2IMTA6MTQ6NTEiLCJuZWVkQ2hlY2tUb2tlbiI6ImZhbHNlIiwiZXhwIjoxNzY5ODY4ODkxLCJpc3MiOiJ0ZXN0IiwiYXVkIjoiVXNlciJ9.BH3-iwwrBczb8aFfI__6kwe3AIsEPacN9TruaTrQ3nY
|
||||
API_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnQtdHlwZSI6IjQiLCJ1c2VyLXR5cGUiOiIxIiwiaHR0cDovL3NjaGVtYXMubWljcm9zb2Z0LmNvbS93cy8yMDA4LzA2L2lkZW50aXR5L2NsYWltcy9yb2xlIjoiMTIiLCJyb2xlLWlkIjoiMTIiLCJ0ZW5hbnQtaWQiOiIyNzkwNjgzMTYwNzA5OTU3Iiwibmlja25hbWUiOiLnp5_miLfnrqHnkIblkZjvvJrmganmgakxIiwic2l0ZS1pZCI6IjAiLCJtb2JpbGUiOiIxMzgxMDUwMjMwNCIsInNpZCI6IjI5NTA0ODk2NTgzOTU4NDUiLCJzdGFmZi1pZCI6IjMwMDk5MTg2OTE1NTkwNDUiLCJvcmctaWQiOiIwIiwicm9sZS10eXBlIjoiMyIsInJlZnJlc2hUb2tlbiI6Ik1oKzFpTitjclRHMTY3cUp5SzFXYllteVBaaUhjdDI2ZTZDZkJvd1pxSVk9IiwicmVmcmVzaEV4cGlyeVRpbWUiOiIyMDI2LzIvNyDkuIvljYg5OjU2OjE4IiwibmVlZENoZWNrVG9rZW4iOiJmYWxzZSIsImV4cCI6MTc3MDQ3MjU3OCwiaXNzIjoidGVzdCIsImF1ZCI6IlVzZXIifQ.rY03o82SKznD7NOktXKzTOI1btl2FHsklMCChOlZUeY
|
||||
|
||||
# API 请求超时(秒)
|
||||
API_TIMEOUT=20
|
||||
|
||||
@@ -109,9 +109,18 @@ DEFAULTS = {
|
||||
"mode": "history",
|
||||
"history_start": "2025-07-01",
|
||||
"history_end": "",
|
||||
"include_dimensions": False,
|
||||
"include_dimensions": True,
|
||||
"auto_check": False,
|
||||
"auto_backfill": False,
|
||||
"compare_content": True,
|
||||
"content_sample_limit": 50,
|
||||
"backfill_mismatch": True,
|
||||
"recheck_after_backfill": True,
|
||||
"ods_task_codes": "",
|
||||
"force_monthly_split": True,
|
||||
},
|
||||
"dwd": {
|
||||
"fact_upsert": True,
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
@@ -55,7 +55,12 @@ ENV_MAP = {
|
||||
"INTEGRITY_INCLUDE_DIMENSIONS": ("integrity.include_dimensions",),
|
||||
"INTEGRITY_AUTO_CHECK": ("integrity.auto_check",),
|
||||
"INTEGRITY_AUTO_BACKFILL": ("integrity.auto_backfill",),
|
||||
"INTEGRITY_COMPARE_CONTENT": ("integrity.compare_content",),
|
||||
"INTEGRITY_CONTENT_SAMPLE_LIMIT": ("integrity.content_sample_limit",),
|
||||
"INTEGRITY_BACKFILL_MISMATCH": ("integrity.backfill_mismatch",),
|
||||
"INTEGRITY_RECHECK_AFTER_BACKFILL": ("integrity.recheck_after_backfill",),
|
||||
"INTEGRITY_ODS_TASK_CODES": ("integrity.ods_task_codes",),
|
||||
"DWD_FACT_UPSERT": ("dwd.fact_upsert",),
|
||||
}
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -84,7 +84,7 @@ CREATE TABLE IF NOT EXISTS dim_site (
|
||||
SCD2_end_time TIMESTAMPTZ DEFAULT '9999-12-31',
|
||||
SCD2_is_current INT DEFAULT 1,
|
||||
SCD2_version INT DEFAULT 1,
|
||||
PRIMARY KEY (site_id)
|
||||
PRIMARY KEY (site_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_site IS 'DWD 维度表:dim_site。ODS 来源表:billiards_ods.table_fee_transactions(对应 JSON:table_fee_transactions.json;分析:table_fee_transactions-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -133,7 +133,7 @@ CREATE TABLE IF NOT EXISTS dim_site_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ DEFAULT '9999-12-31',
|
||||
SCD2_is_current INT DEFAULT 1,
|
||||
SCD2_version INT DEFAULT 1,
|
||||
PRIMARY KEY (site_id)
|
||||
PRIMARY KEY (site_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_site_ex IS 'DWD 维度表(扩展字段表):dim_site_ex。ODS 来源表:billiards_ods.table_fee_transactions(对应 JSON:table_fee_transactions.json;分析:table_fee_transactions-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -176,7 +176,7 @@ CREATE TABLE IF NOT EXISTS dim_table (
|
||||
SCD2_end_time TIMESTAMPTZ DEFAULT '9999-12-31',
|
||||
SCD2_is_current INT DEFAULT 1,
|
||||
SCD2_version INT DEFAULT 1,
|
||||
PRIMARY KEY (table_id)
|
||||
PRIMARY KEY (table_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_table IS 'DWD 维度表:dim_table。ODS 来源表:billiards_ods.site_tables_master(对应 JSON:site_tables_master.json;分析:site_tables_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -204,7 +204,7 @@ CREATE TABLE IF NOT EXISTS dim_table_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ DEFAULT '9999-12-31',
|
||||
SCD2_is_current INT DEFAULT 1,
|
||||
SCD2_version INT DEFAULT 1,
|
||||
PRIMARY KEY (table_id)
|
||||
PRIMARY KEY (table_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_table_ex IS 'DWD 维度表(扩展字段表):dim_table_ex。ODS 来源表:billiards_ods.site_tables_master(对应 JSON:site_tables_master.json;分析:site_tables_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -240,7 +240,7 @@ CREATE TABLE IF NOT EXISTS dim_assistant (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (assistant_id)
|
||||
PRIMARY KEY (assistant_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_assistant IS 'DWD 维度表:dim_assistant。ODS 来源表:billiards_ods.assistant_accounts_master(对应 JSON:assistant_accounts_master.json;分析:assistant_accounts_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -314,7 +314,7 @@ CREATE TABLE IF NOT EXISTS dim_assistant_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (assistant_id)
|
||||
PRIMARY KEY (assistant_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_assistant_ex IS 'DWD 维度表(扩展字段表):dim_assistant_ex。ODS 来源表:billiards_ods.assistant_accounts_master(对应 JSON:assistant_accounts_master.json;分析:assistant_accounts_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -383,7 +383,7 @@ CREATE TABLE IF NOT EXISTS dim_member (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (member_id)
|
||||
PRIMARY KEY (member_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_member IS 'DWD 维度表:dim_member。ODS 来源表:billiards_ods.member_profiles(对应 JSON:member_profiles.json;分析:member_profiles-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -415,7 +415,7 @@ CREATE TABLE IF NOT EXISTS dim_member_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (member_id)
|
||||
PRIMARY KEY (member_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_member_ex IS 'DWD 维度表(扩展字段表):dim_member_ex。ODS 来源表:billiards_ods.member_profiles(对应 JSON:member_profiles.json;分析:member_profiles-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -454,7 +454,7 @@ CREATE TABLE IF NOT EXISTS dim_member_card_account (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (member_card_id)
|
||||
PRIMARY KEY (member_card_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_member_card_account IS 'DWD 维度表:dim_member_card_account。ODS 来源表:billiards_ods.member_stored_value_cards(对应 JSON:member_stored_value_cards.json;分析:member_stored_value_cards-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -538,7 +538,7 @@ CREATE TABLE IF NOT EXISTS dim_member_card_account_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (member_card_id)
|
||||
PRIMARY KEY (member_card_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_member_card_account_ex IS 'DWD 维度表(扩展字段表):dim_member_card_account_ex。ODS 来源表:billiards_ods.member_stored_value_cards(对应 JSON:member_stored_value_cards.json;分析:member_stored_value_cards-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -619,7 +619,7 @@ CREATE TABLE IF NOT EXISTS dim_tenant_goods (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (tenant_goods_id)
|
||||
PRIMARY KEY (tenant_goods_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_tenant_goods IS 'DWD 维度表:dim_tenant_goods。ODS 来源表:billiards_ods.tenant_goods_master(对应 JSON:tenant_goods_master.json;分析:tenant_goods_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -666,7 +666,7 @@ CREATE TABLE IF NOT EXISTS dim_tenant_goods_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (tenant_goods_id)
|
||||
PRIMARY KEY (tenant_goods_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_tenant_goods_ex IS 'DWD 维度表(扩展字段表):dim_tenant_goods_ex。ODS 来源表:billiards_ods.tenant_goods_master(对应 JSON:tenant_goods_master.json;分析:tenant_goods_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -719,7 +719,7 @@ CREATE TABLE IF NOT EXISTS dim_store_goods (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (site_goods_id)
|
||||
PRIMARY KEY (site_goods_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_store_goods IS 'DWD 维度表:dim_store_goods。ODS 来源表:billiards_ods.store_goods_master(对应 JSON:store_goods_master.json;分析:store_goods_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -780,7 +780,7 @@ CREATE TABLE IF NOT EXISTS dim_store_goods_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (site_goods_id)
|
||||
PRIMARY KEY (site_goods_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_store_goods_ex IS 'DWD 维度表(扩展字段表):dim_store_goods_ex。ODS 来源表:billiards_ods.store_goods_master(对应 JSON:store_goods_master.json;分析:store_goods_master-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -833,7 +833,7 @@ CREATE TABLE IF NOT EXISTS dim_goods_category (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (category_id)
|
||||
PRIMARY KEY (category_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_goods_category IS 'DWD 维度表:dim_goods_category。ODS 来源表:billiards_ods.stock_goods_category_tree(对应 JSON:stock_goods_category_tree.json;分析:stock_goods_category_tree-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -876,7 +876,7 @@ CREATE TABLE IF NOT EXISTS dim_groupbuy_package (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (groupbuy_package_id)
|
||||
PRIMARY KEY (groupbuy_package_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_groupbuy_package IS 'DWD 维度表:dim_groupbuy_package。ODS 来源表:billiards_ods.group_buy_packages(对应 JSON:group_buy_packages.json;分析:group_buy_packages-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
@@ -927,7 +927,7 @@ CREATE TABLE IF NOT EXISTS dim_groupbuy_package_Ex (
|
||||
SCD2_end_time TIMESTAMPTZ,
|
||||
SCD2_is_current INT,
|
||||
SCD2_version INT,
|
||||
PRIMARY KEY (groupbuy_package_id)
|
||||
PRIMARY KEY (groupbuy_package_id, scd2_start_time)
|
||||
);
|
||||
|
||||
COMMENT ON TABLE billiards_dwd.dim_groupbuy_package_ex IS 'DWD 维度表(扩展字段表):dim_groupbuy_package_ex。ODS 来源表:billiards_ods.group_buy_packages(对应 JSON:group_buy_packages.json;分析:group_buy_packages-Analysis.md)。装载/清洗逻辑参考:etl_billiards/tasks/dwd_load_task.py(DwdLoadTask)。';
|
||||
|
||||
21
etl_billiards/docs/DWS 数据库处理需求.md
Normal file
21
etl_billiards/docs/DWS 数据库处理需求.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# DWS 数据层需求
|
||||
## 简介
|
||||
项目路径:C:\dev\LLTQ\ETL\feiqiu-ETL
|
||||
|
||||
本文档描述在ETL已完成的DWD层数据基础上对DWS层的数据处理:
|
||||
- 完成对DWS层数据库的处理,即数据库设计,成果为DDL的SQL语句。
|
||||
- 数据读取处理到落库,即DWD读取,Python处理,SQL写入。
|
||||
|
||||
文档更多聚焦业务描述,你需要使用专业技能,使用面向对象编程OOP思想,完成程序设计直至代码完成:
|
||||
- 参考.\README.md 了解现在项目现状。
|
||||
- 参考.\etl_billiards\docs\dwd_main_tables_dictionary.md 了解 DWD的schema的表和字段(若与数据库有出路,则以当前数据库为准。)
|
||||
- SQL和Python代码需要详尽的,高密度的中文注释。
|
||||
- 完成内容,需要详尽高密度的补充至.\README.md,以方便后续维护。
|
||||
- DWS的表与表的字段 参考.\etl_billiards\docs\dwd_main_tables_dictionary.md 完成类似的数据库文档,方便后续维护。
|
||||
- 注意中文编码需求。
|
||||
|
||||
## 具体需求
|
||||
### 助教视角
|
||||
- 需要
|
||||
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
# dim_assistant_ex 助教档案扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_assistant_ex |
|
||||
| 主键 | assistant_id, scd2_start_time |
|
||||
| 主表 | dim_assistant |
|
||||
| 记录数 | 69 |
|
||||
| 说明 | 助教档案的扩展字段,包含个人资料、评分、状态配置、灯控等详细信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | assistant_id | BIGINT | NO | PK | 助教 ID → dim_assistant |
|
||||
| 2 | gender | INTEGER | YES | | 性别。**枚举值**: 0(59)=未填写, 2(10)=女(**[1=男 待确认]**) |
|
||||
| 3 | birth_date | TIMESTAMPTZ | YES | | 出生日期 |
|
||||
| 4 | avatar | TEXT | YES | | 头像 URL(默认: https://oss.ficoo.vip/maUiImages/images/defaultAvatar.png) |
|
||||
| 5 | introduce | TEXT | YES | | 个人简介(当前数据全为空) |
|
||||
| 6 | video_introduction_url | TEXT | YES | | 视频介绍 URL |
|
||||
| 7 | height | NUMERIC(5,2) | YES | | 身高(厘米) |
|
||||
| 8 | weight | NUMERIC(5,2) | YES | | 体重(公斤) |
|
||||
| 9 | shop_name | TEXT | YES | | 门店名称快照。**当前值**: "朗朗桌球" |
|
||||
| 10 | group_id | BIGINT | YES | | 分组 ID(当前数据全为 0) |
|
||||
| 11 | group_name | TEXT | YES | | 分组名称(当前数据全为空) |
|
||||
| 12 | person_org_id | BIGINT | YES | | 人事组织 ID |
|
||||
| 13 | staff_id | BIGINT | YES | | 员工 ID(当前数据全为 0) |
|
||||
| 14 | staff_profile_id | BIGINT | YES | | 员工档案 ID(当前数据全为 0) |
|
||||
| 15 | assistant_grade | DOUBLE PRECISION | YES | | 平均评分 |
|
||||
| 16 | sum_grade | DOUBLE PRECISION | YES | | 累计评分 |
|
||||
| 17 | get_grade_times | INTEGER | YES | | 评分次数(当前数据全为 0) |
|
||||
| 18 | charge_way | INTEGER | YES | | 计费方式。**枚举值**: 2(69)=计时 **[其他值待确认]** |
|
||||
| 19 | allow_cx | INTEGER | YES | | 允许促销计费。**枚举值**: 1(69)=允许 |
|
||||
| 20 | is_guaranteed | INTEGER | YES | | 是否保底。**枚举值**: 1(69)=有保底 |
|
||||
| 21 | salary_grant_enabled | INTEGER | YES | | 薪资发放开关。**枚举值**: 2(69)=**[含义待确认]** |
|
||||
| 22 | entry_type | INTEGER | YES | | 入职类型。**枚举值**: 1(68)=正式, 3(1)=**[待确认]** |
|
||||
| 23 | entry_sign_status | INTEGER | YES | | 入职签约状态。**枚举值**: 0(69)=未签约 |
|
||||
| 24 | resign_sign_status | INTEGER | YES | | 离职签约状态。**枚举值**: 0(69)=未签约 |
|
||||
| 25 | work_status | INTEGER | YES | | 工作状态。**枚举值**: 1(29)=在岗, 2(40)=离岗 |
|
||||
| 26 | show_status | INTEGER | YES | | 展示状态。**枚举值**: 1(69)=显示 |
|
||||
| 27 | show_sort | INTEGER | YES | | 展示排序序号 |
|
||||
| 28 | online_status | INTEGER | YES | | 在线状态。**枚举值**: 1(69)=在线 |
|
||||
| 29 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0(69)=未删除 |
|
||||
| 30 | criticism_status | INTEGER | YES | | 投诉状态。**枚举值**: 1(68)=**[待确认]**, 2(1)=**[待确认]** |
|
||||
| 31 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 32 | update_time | TIMESTAMPTZ | YES | | 更新时间 |
|
||||
| 33 | start_time | TIMESTAMPTZ | YES | | 配置生效开始时间 |
|
||||
| 34 | end_time | TIMESTAMPTZ | YES | | 配置生效结束时间 |
|
||||
| 35 | last_table_id | BIGINT | YES | | 最近服务台桌 ID → dim_table |
|
||||
| 36 | last_table_name | TEXT | YES | | 最近服务台桌名称。**样本值**: "发财", "C2", "VIP包厢 VIP5" |
|
||||
| 37 | last_update_name | TEXT | YES | | 最近更新操作人。**样本值**: "教练:周蒙", "管理员:郑丽珊" |
|
||||
| 38 | order_trade_no | BIGINT | YES | | 最近关联订单号 |
|
||||
| 39 | ding_talk_synced | INTEGER | YES | | 钉钉同步状态。**枚举值**: 1(69)=已同步 |
|
||||
| 40 | site_light_cfg_id | BIGINT | YES | | 灯控配置 ID(当前数据全为 0) |
|
||||
| 41 | light_equipment_id | TEXT | YES | | 灯控设备 ID(当前数据全为空) |
|
||||
| 42 | light_status | INTEGER | YES | | 灯控状态。**枚举值**: 2(69)=**[含义待确认]** |
|
||||
| 43 | is_team_leader | INTEGER | YES | | 是否组长。**枚举值**: 0(69)=否 |
|
||||
| 44 | serial_number | BIGINT | YES | | 序列号 |
|
||||
| 45 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 46 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 47 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 48 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.*, e.*
|
||||
FROM billiards_dwd.dim_assistant m
|
||||
JOIN billiards_dwd.dim_assistant_ex e
|
||||
ON m.assistant_id = e.assistant_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
@@ -0,0 +1,64 @@
|
||||
# dim_groupbuy_package_ex 团购套餐扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_groupbuy_package_ex |
|
||||
| 主键 | groupbuy_package_id, scd2_start_time |
|
||||
| 主表 | dim_groupbuy_package |
|
||||
| 记录数 | 34 |
|
||||
| 说明 | 团购套餐的扩展配置,包含使用时段、台区限制、套餐类型等详细信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | groupbuy_package_id | BIGINT | NO | PK | 套餐 ID → dim_groupbuy_package |
|
||||
| 2 | site_name | VARCHAR(100) | YES | | 门店名称快照。**当前值**: "朗朗桌球" |
|
||||
| 3 | usable_count | INTEGER | YES | | 可使用次数(当前数据全为 0,表示不限次) |
|
||||
| 4 | date_type | INTEGER | YES | | 日期类型。**枚举值**: 1(34)=**[含义待确认]** |
|
||||
| 5 | usable_range | VARCHAR(255) | YES | | 可用日期范围描述(当前数据全为空) |
|
||||
| 6 | date_info | VARCHAR(255) | YES | | 日期信息 |
|
||||
| 7 | start_clock | VARCHAR(16) | YES | | 可用开始时间。**枚举值**: "00:00:00"(29), "10:00:00"(4), "23:00:00"(1) |
|
||||
| 8 | end_clock | VARCHAR(16) | YES | | 可用结束时间。**枚举值**: "1.00:00:00"(29)=次日0点, "23:59:59"(3), "1.02:00:00"(2)=次日2点 |
|
||||
| 9 | add_start_clock | VARCHAR(16) | YES | | 附加时段开始时间 |
|
||||
| 10 | add_end_clock | VARCHAR(16) | YES | | 附加时段结束时间 |
|
||||
| 11 | area_tag_type | INTEGER | YES | | 区域标记类型。**枚举值**: 1(34)=**[含义待确认]** |
|
||||
| 12 | table_area_id | BIGINT | YES | | 台区 ID(当前数据全为 0) |
|
||||
| 13 | tenant_table_area_id | BIGINT | YES | | 租户级台区 ID(当前数据全为 0) |
|
||||
| 14 | table_area_id_list | VARCHAR(512) | YES | | 台区 ID 列表(当前数据全为空) |
|
||||
| 15 | group_type | INTEGER | YES | | 团购类型。**枚举值**: 1(34)=**[含义待确认]** |
|
||||
| 16 | system_group_type | INTEGER | YES | | 系统团购类型。**枚举值**: 1(34)=**[含义待确认]** |
|
||||
| 17 | package_type | INTEGER | YES | | 套餐类型。**枚举值**: 1(26)=普通套餐 **[待确认]**, 2(8)=VIP套餐 **[待确认]** |
|
||||
| 18 | effective_status | INTEGER | YES | | 生效状态。**枚举值**: 1(24)=有效, 3(10)=失效 **[待确认]** |
|
||||
| 19 | max_selectable_categories | INTEGER | YES | | 最大可选分类数(当前数据全为 0) |
|
||||
| 20 | creator_name | VARCHAR(100) | YES | | 创建人。**样本值**: "店长:郑丽珊", "管理员:郑丽珊" |
|
||||
| 21 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 22 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 23 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 24 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| groupbuy_package_id | start_clock | end_clock | package_type | effective_status | creator_name |
|
||||
|--------------------|-------------|-----------|--------------|------------------|--------------|
|
||||
| 2798905767676933 | 00:00:00 | 1.00:00:00 | 2 | 1 | 店长:郑丽珊 |
|
||||
| 2798901295615045 | 00:00:00 | 1.00:00:00 | 2 | 3 | 店长:郑丽珊 |
|
||||
| 2798731703045189 | 00:00:00 | 1.00:00:00 | 1 | 1 | 店长:郑丽珊 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.package_name, m.duration_seconds, e.start_clock, e.end_clock, e.effective_status
|
||||
FROM billiards_dwd.dim_groupbuy_package m
|
||||
JOIN billiards_dwd.dim_groupbuy_package_ex e
|
||||
ON m.groupbuy_package_id = e.groupbuy_package_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
@@ -0,0 +1,90 @@
|
||||
# dim_member_card_account_ex 会员卡账户扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_member_card_account_ex |
|
||||
| 主键 | member_card_id, scd2_start_time |
|
||||
| 主表 | dim_member_card_account |
|
||||
| 记录数 | 945 |
|
||||
| 说明 | 会员卡账户扩展表,包含折扣配置、抵扣规则、使用限制等详细配置 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | member_card_id | BIGINT | NO | PK | 会员卡 ID → dim_member_card_account |
|
||||
| 2 | site_name | TEXT | YES | | 门店名称。**当前值**: "朗朗桌球" |
|
||||
| 3 | tenant_name | VARCHAR(64) | YES | | 租户名称(当前数据全为空) |
|
||||
| 4 | tenantavatar | TEXT | YES | | 租户头像(当前数据全为空) |
|
||||
| 5 | effect_site_id | BIGINT | YES | | 生效门店 ID(0=不限门店) |
|
||||
| 6 | able_cross_site | INTEGER | YES | | 允许跨门店。**枚举值**: 1(945)=允许 |
|
||||
| 7 | card_physics_type | INTEGER | YES | | 物理卡类型。**枚举值**: 1(945)=**[待确认]** |
|
||||
| 8 | card_no | TEXT | YES | | 物理卡号(当前数据全为空) |
|
||||
| 9 | bind_password | TEXT | YES | | 绑定密码(当前数据全为空) |
|
||||
| 10 | use_scene | TEXT | YES | | 使用场景(当前数据全为空) |
|
||||
| 11 | denomination | NUMERIC(18,2) | YES | | 面额/初始额度 |
|
||||
| 12 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 13 | disable_start_time | TIMESTAMPTZ | YES | | 禁用开始时间 |
|
||||
| 14 | disable_end_time | TIMESTAMPTZ | YES | | 禁用结束时间 |
|
||||
| 15 | is_allow_give | INTEGER | YES | | 允许转赠。**枚举值**: 0(945)=不允许 |
|
||||
| 16 | is_allow_order_deduct | INTEGER | YES | | 允许订单抵扣。**枚举值**: 0(945)=不允许 |
|
||||
| 17 | sort | INTEGER | YES | | 排序序号 |
|
||||
| 18 | table_discount | NUMERIC(10,2) | YES | | 台费折扣率(10.0=不打折) |
|
||||
| 19 | goods_discount | NUMERIC(10,2) | YES | | 商品折扣率 |
|
||||
| 20 | assistant_discount | NUMERIC(10,2) | YES | | 助教折扣率 |
|
||||
| 21 | assistant_reward_discount | NUMERIC(10,2) | YES | | 助教奖励折扣率 |
|
||||
| 22 | table_service_discount | NUMERIC(10,2) | YES | | 台费服务折扣率 |
|
||||
| 23 | goods_service_discount | NUMERIC(10,2) | YES | | 商品服务折扣率 |
|
||||
| 24 | assistant_service_discount | NUMERIC(10,2) | YES | | 助教服务折扣率 |
|
||||
| 25 | coupon_discount | NUMERIC(10,2) | YES | | 券折扣率 |
|
||||
| 26 | table_discount_sub_switch | INTEGER | YES | | 台费折扣叠加开关。**枚举值**: 2(945)=关闭 **[1=开启 待确认]** |
|
||||
| 27 | goods_discount_sub_switch | INTEGER | YES | | 商品折扣叠加开关 |
|
||||
| 28 | assistant_discount_sub_switch | INTEGER | YES | | 助教折扣叠加开关 |
|
||||
| 29 | assistant_reward_discount_sub_switch | INTEGER | YES | | 助教奖励折扣叠加开关 |
|
||||
| 30 | goods_discount_range_type | INTEGER | YES | | 商品折扣范围类型。**枚举值**: 1(945)=**[待确认]** |
|
||||
| 31 | table_deduct_radio | NUMERIC(10,2) | YES | | 台费抵扣比例(100.0=全额抵扣) |
|
||||
| 32 | goods_deduct_radio | NUMERIC(10,2) | YES | | 商品抵扣比例 |
|
||||
| 33 | assistant_deduct_radio | NUMERIC(10,2) | YES | | 助教抵扣比例 |
|
||||
| 34 | table_service_deduct_radio | NUMERIC(10,2) | YES | | 台费服务抵扣比例 |
|
||||
| 35 | goods_service_deduct_radio | NUMERIC(10,2) | YES | | 商品服务抵扣比例 |
|
||||
| 36 | assistant_service_deduct_radio | NUMERIC(10,2) | YES | | 助教服务抵扣比例 |
|
||||
| 37 | assistant_reward_deduct_radio | NUMERIC(10,2) | YES | | 助教奖励抵扣比例 |
|
||||
| 38 | coupon_deduct_radio | NUMERIC(10,2) | YES | | 券抵扣比例 |
|
||||
| 39 | cardsettlededuct | NUMERIC(18,2) | YES | | 结算扣卡金额配置 |
|
||||
| 40 | tablecarddeduct | NUMERIC(18,2) | YES | | 台费扣卡金额 |
|
||||
| 41 | tableservicecarddeduct | NUMERIC(18,2) | YES | | 台费服务扣卡金额 |
|
||||
| 42 | goodscardeduct | NUMERIC(18,2) | YES | | 商品扣卡金额 |
|
||||
| 43 | goodsservicecarddeduct | NUMERIC(18,2) | YES | | 商品服务扣卡金额 |
|
||||
| 44 | assistantcarddeduct | NUMERIC(18,2) | YES | | 助教扣卡金额 |
|
||||
| 45 | assistantservicecarddeduct | NUMERIC(18,2) | YES | | 助教服务扣卡金额 |
|
||||
| 46 | assistantrewardcarddeduct | NUMERIC(18,2) | YES | | 助教奖励扣卡金额 |
|
||||
| 47 | couponcarddeduct | NUMERIC(18,2) | YES | | 券扣卡金额 |
|
||||
| 48 | deliveryfeededuct | NUMERIC(18,2) | YES | | 配送费扣卡金额 |
|
||||
| 49 | tableareaid | TEXT | YES | | 可用台区 ID 列表(当前数据全为空) |
|
||||
| 50 | goodscategoryid | TEXT | YES | | 可用商品分类 ID 列表(当前数据全为空) |
|
||||
| 51 | pdassisnatlevel | TEXT | YES | | 陪打助教等级限制。**当前值**: "{}" |
|
||||
| 52 | cxassisnatlevel | TEXT | YES | | 促销助教等级限制。**当前值**: "{}" |
|
||||
| 53 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 54 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 55 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 56 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联查询卡片及折扣配置
|
||||
SELECT
|
||||
m.member_card_type_name, m.balance,
|
||||
e.table_discount, e.goods_discount, e.assistant_discount
|
||||
FROM billiards_dwd.dim_member_card_account m
|
||||
JOIN billiards_dwd.dim_member_card_account_ex e
|
||||
ON m.member_card_id = e.member_card_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
51
etl_billiards/docs/bd_manual/Ex/BD_manual_dim_member_ex.md
Normal file
51
etl_billiards/docs/bd_manual/Ex/BD_manual_dim_member_ex.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# dim_member_ex 会员档案扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_member_ex |
|
||||
| 主键 | member_id, scd2_start_time |
|
||||
| 主表 | dim_member |
|
||||
| 记录数 | 556 |
|
||||
| 说明 | 会员档案扩展表,包含积分、成长值、状态等字段 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | member_id | BIGINT | NO | PK | 会员 ID → dim_member |
|
||||
| 2 | referrer_member_id | BIGINT | YES | | 推荐人会员 ID(当前数据全为 0,表示无推荐人) |
|
||||
| 3 | point | NUMERIC(18,2) | YES | | 积分余额 |
|
||||
| 4 | register_site_name | TEXT | YES | | 注册门店名称。**当前值**: "朗朗桌球" |
|
||||
| 5 | growth_value | NUMERIC(18,2) | YES | | 成长值 |
|
||||
| 6 | user_status | INTEGER | YES | | 用户状态。**枚举值**: 1(556)=正常 |
|
||||
| 7 | status | INTEGER | YES | | 账户状态。**枚举值**: 1(490)=正常, 3(66)=**[含义待确认]** |
|
||||
| 8 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 9 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 10 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 11 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| member_id | point | growth_value | user_status | status |
|
||||
|-----------|-------|--------------|-------------|--------|
|
||||
| 3043883848157381 | 0.00 | 0.00 | 1 | 1 |
|
||||
| 3037269565082949 | 0.00 | 0.00 | 1 | 1 |
|
||||
| 3025342944414469 | 0.00 | 0.00 | 1 | 1 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.*, e.point, e.growth_value, e.status
|
||||
FROM billiards_dwd.dim_member m
|
||||
JOIN billiards_dwd.dim_member_ex e
|
||||
ON m.member_id = e.member_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
57
etl_billiards/docs/bd_manual/Ex/BD_manual_dim_site_ex.md
Normal file
57
etl_billiards/docs/bd_manual/Ex/BD_manual_dim_site_ex.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# dim_site_ex 门店扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_site_ex |
|
||||
| 主键 | site_id, scd2_start_time |
|
||||
| 主表 | dim_site |
|
||||
| 记录数 | 1 |
|
||||
| 说明 | 门店扩展表,包含灯控、考勤、客服等配置信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | site_id | BIGINT | NO | PK | 门店 ID → dim_site |
|
||||
| 2 | avatar | TEXT | YES | | 门店头像 URL |
|
||||
| 3 | address | TEXT | YES | | 地址(冗余) |
|
||||
| 4 | longitude | NUMERIC(9,6) | YES | | 经度(冗余) |
|
||||
| 5 | latitude | NUMERIC(9,6) | YES | | 纬度(冗余) |
|
||||
| 6 | tenant_site_region_id | BIGINT | YES | | 区域 ID(冗余) |
|
||||
| 7 | auto_light | INTEGER | YES | | 自动灯控。**枚举值**: 1(1)=启用 |
|
||||
| 8 | light_status | INTEGER | YES | | 灯控状态。**枚举值**: 1(1)=**[待确认]** |
|
||||
| 9 | light_type | INTEGER | YES | | 灯控类型。**枚举值**: 0(1)=**[待确认]** |
|
||||
| 10 | light_token | TEXT | YES | | 灯控令牌 |
|
||||
| 11 | site_type | INTEGER | YES | | 门店类型(冗余) |
|
||||
| 12 | site_label | TEXT | YES | | 门店标签(冗余) |
|
||||
| 13 | attendance_enabled | INTEGER | YES | | 考勤启用。**枚举值**: 1(1)=启用 |
|
||||
| 14 | attendance_distance | INTEGER | YES | | 考勤距离(米)。**当前值**: 0 |
|
||||
| 15 | customer_service_qrcode | TEXT | YES | | 客服二维码 URL |
|
||||
| 16 | customer_service_wechat | TEXT | YES | | 客服微信号 |
|
||||
| 17 | fixed_pay_qrcode | TEXT | YES | | 固定收款码 URL |
|
||||
| 18 | prod_env | TEXT | YES | | 环境标识。**当前值**: "1" |
|
||||
| 19 | shop_status | INTEGER | YES | | 营业状态(冗余) |
|
||||
| 20 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 21 | update_time | TIMESTAMPTZ | YES | | 更新时间 |
|
||||
| 22 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 23 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 24 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 25 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.*, e.*
|
||||
FROM billiards_dwd.dim_site m
|
||||
JOIN billiards_dwd.dim_site_ex e
|
||||
ON m.site_id = e.site_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
@@ -0,0 +1,62 @@
|
||||
# dim_store_goods_ex 门店商品扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_store_goods_ex |
|
||||
| 主键 | site_goods_id, scd2_start_time |
|
||||
| 主表 | dim_store_goods |
|
||||
| 记录数 | 170 |
|
||||
| 说明 | 门店商品扩展表,包含单位、成本、库存管理、折扣等详细配置 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | site_goods_id | BIGINT | NO | PK | 门店商品 ID → dim_store_goods |
|
||||
| 2 | site_name | TEXT | YES | | 门店名称。**当前值**: "朗朗桌球" |
|
||||
| 3 | unit | TEXT | YES | | 商品单位。**枚举值**: "包"(62), "瓶"(49), "个"(17), "份"(14), "根"(10), "杯"(5), "盒"(4), "桶"(3), "盘"(2), "罐"(1), "支"(1), "双"(1), "张"(1) |
|
||||
| 4 | goods_barcode | TEXT | YES | | 商品条码(当前数据全为空) |
|
||||
| 5 | goods_cover_url | TEXT | YES | | 商品封面图 URL |
|
||||
| 6 | pinyin_initial | TEXT | YES | | 拼音首字母(用于搜索) |
|
||||
| 7 | stock_qty | INTEGER | YES | | 库存数量 |
|
||||
| 8 | stock_secondary_qty | INTEGER | YES | | 副单位库存(当前数据全为 0) |
|
||||
| 9 | safety_stock_qty | INTEGER | YES | | 安全库存(当前数据全为 0) |
|
||||
| 10 | cost_price | NUMERIC(18,4) | YES | | 成本价 |
|
||||
| 11 | cost_price_type | INTEGER | YES | | 成本价类型。**枚举值**: 1(160)=**[待确认]**, 2(10)=**[待确认]** |
|
||||
| 12 | provisional_total_cost | NUMERIC(18,2) | YES | | 暂估总成本 |
|
||||
| 13 | total_purchase_cost | NUMERIC(18,2) | YES | | 采购总成本 |
|
||||
| 14 | min_discount_price | NUMERIC(18,2) | YES | | 最低折扣价 |
|
||||
| 15 | is_discountable | INTEGER | YES | | 允许折扣。**枚举值**: 1(170)=允许 |
|
||||
| 16 | days_on_shelf | INTEGER | YES | | 上架天数 |
|
||||
| 17 | audit_status | INTEGER | YES | | 审核状态。**枚举值**: 2(170)=**[待确认]** |
|
||||
| 18 | sale_channel | INTEGER | YES | | 销售渠道(当前数据全为空) |
|
||||
| 19 | is_warehousing | INTEGER | YES | | 库存管理。**枚举值**: 1(170)=参与库存管理 |
|
||||
| 20 | freeze_status | INTEGER | YES | | 冻结状态。**枚举值**: 0(170)=未冻结 |
|
||||
| 21 | forbid_sell_status | INTEGER | YES | | 禁售状态。**枚举值**: 1(170)=**[待确认]** |
|
||||
| 22 | able_site_transfer | INTEGER | YES | | 允许店间调拨。**枚举值**: 0(1), 2(169) **[待确认]** |
|
||||
| 23 | custom_label_type | INTEGER | YES | | 自定义标签类型。**枚举值**: 2(170)=**[待确认]** |
|
||||
| 24 | option_required | INTEGER | YES | | 选项必填。**枚举值**: 1(170)=**[待确认]** |
|
||||
| 25 | remark | TEXT | YES | | 备注(当前数据全为空) |
|
||||
| 26 | sort_order | INTEGER | YES | | 排序序号 |
|
||||
| 27 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 28 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 29 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 30 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.goods_name, m.sale_price, m.sale_qty, e.unit, e.stock_qty, e.cost_price
|
||||
FROM billiards_dwd.dim_store_goods m
|
||||
JOIN billiards_dwd.dim_store_goods_ex e
|
||||
ON m.site_goods_id = e.site_goods_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
50
etl_billiards/docs/bd_manual/Ex/BD_manual_dim_table_ex.md
Normal file
50
etl_billiards/docs/bd_manual/Ex/BD_manual_dim_table_ex.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# dim_table_ex 台桌扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_table_ex |
|
||||
| 主键 | table_id, scd2_start_time |
|
||||
| 主表 | dim_table |
|
||||
| 记录数 | 74 |
|
||||
| 说明 | 台桌扩展表,包含展示状态、预约设置、台呢使用等信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | table_id | BIGINT | NO | PK | 台桌 ID → dim_table |
|
||||
| 2 | show_status | INTEGER | YES | | 展示状态。**枚举值**: 1(70)=显示, 2(4)=隐藏 |
|
||||
| 3 | is_online_reservation | INTEGER | YES | | 在线预约。**枚举值**: 1(2)=支持, 2(72)=不支持 |
|
||||
| 4 | table_cloth_use_time | INTEGER | YES | | 台呢已使用时间(当前数据全为空) |
|
||||
| 5 | table_cloth_use_cycle | INTEGER | YES | | 台呢使用周期(当前数据全为 0) |
|
||||
| 6 | table_status | INTEGER | YES | | 台桌状态。**枚举值**: 1(66)=空闲, 2(1)=**[待确认]**, 3(7)=使用中 **[待确认]** |
|
||||
| 7 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 8 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 9 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 10 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| table_id | show_status | is_online_reservation | table_status |
|
||||
|----------|-------------|-----------------------|--------------|
|
||||
| 2791964216463493 | 1 | 2 | 1 |
|
||||
| 2792521437958213 | 1 | 2 | 1 |
|
||||
| 2793001695301765 | 1 | 2 | 1 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.table_name, m.site_table_area_name, e.show_status, e.table_status
|
||||
FROM billiards_dwd.dim_table m
|
||||
JOIN billiards_dwd.dim_table_ex e
|
||||
ON m.table_id = e.table_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
@@ -0,0 +1,54 @@
|
||||
# dim_tenant_goods_ex 租户商品扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_tenant_goods_ex |
|
||||
| 主键 | tenant_goods_id, scd2_start_time |
|
||||
| 主表 | dim_tenant_goods |
|
||||
| 记录数 | 171 |
|
||||
| 说明 | 租户商品扩展表,包含图片、条码、成本、折扣配置等详细信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | tenant_goods_id | BIGINT | NO | PK | 租户商品 ID → dim_tenant_goods |
|
||||
| 2 | remark_name | VARCHAR(128) | YES | | 备注名称(当前数据全为空) |
|
||||
| 3 | pinyin_initial | VARCHAR(128) | YES | | 拼音首字母 |
|
||||
| 4 | goods_cover | VARCHAR(512) | YES | | 商品封面图 URL |
|
||||
| 5 | goods_bar_code | VARCHAR(64) | YES | | 商品条码(当前数据全为空) |
|
||||
| 6 | commodity_code | VARCHAR(64) | YES | | 商品编码 |
|
||||
| 7 | commodity_code_list | VARCHAR(256) | YES | | 商品编码列表 |
|
||||
| 8 | min_discount_price | NUMERIC(18,2) | YES | | 最低折扣价 |
|
||||
| 9 | cost_price | NUMERIC(18,2) | YES | | 成本价 |
|
||||
| 10 | cost_price_type | INTEGER | YES | | 成本价类型。**枚举值**: 1(160), 2(11) **[待确认]** |
|
||||
| 11 | able_discount | INTEGER | YES | | 允许折扣。**枚举值**: 1(171)=允许 |
|
||||
| 12 | sale_channel | INTEGER | YES | | 销售渠道(当前数据全为空) |
|
||||
| 13 | is_warehousing | INTEGER | YES | | 库存管理。**枚举值**: 1(171)=参与库存管理 |
|
||||
| 14 | is_in_site | BOOLEAN | YES | | 是否在门店。**枚举值**: False(171)=否 |
|
||||
| 15 | able_site_transfer | INTEGER | YES | | 允许店间调拨。**枚举值**: 0(1), 2(170) **[待确认]** |
|
||||
| 16 | common_sale_royalty | INTEGER | YES | | 普通销售提成(当前数据全为 0) |
|
||||
| 17 | point_sale_royalty | INTEGER | YES | | 积分销售提成(当前数据全为 0) |
|
||||
| 18 | out_goods_id | BIGINT | YES | | 外部商品 ID(当前数据全为 0) |
|
||||
| 19 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 20 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 21 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 22 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.goods_name, m.market_price, e.cost_price, e.min_discount_price
|
||||
FROM billiards_dwd.dim_tenant_goods m
|
||||
JOIN billiards_dwd.dim_tenant_goods_ex e
|
||||
ON m.tenant_goods_id = e.tenant_goods_id
|
||||
AND m.scd2_start_time = e.scd2_start_time
|
||||
WHERE m.scd2_is_current = 1;
|
||||
```
|
||||
@@ -0,0 +1,61 @@
|
||||
# dwd_assistant_service_log_ex 助教服务流水扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_assistant_service_log_ex |
|
||||
| 主键 | assistant_service_id |
|
||||
| 主表 | dwd_assistant_service_log |
|
||||
| 记录数 | 5003 |
|
||||
| 说明 | 助教服务流水扩展表,包含台桌、折扣、评分、废单等详细信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | assistant_service_id | BIGINT | NO | PK | 服务流水 ID → dwd_assistant_service_log |
|
||||
| 2 | table_name | VARCHAR(64) | YES | | 台桌名称。**样本值**: "888", "TV", "VIP5", "666", "C1", "VIP1", "S1", "M1", "A1" |
|
||||
| 3 | assistant_name | VARCHAR(64) | YES | | 助教真实姓名。**样本值**: "陈嘉怡", "张永英", "邹绮", "胡敏" |
|
||||
| 4 | ledger_name | VARCHAR(128) | YES | | 账本名称(工号-昵称)。**样本值**: "2-佳怡", "23-婉婉", "15-七七" |
|
||||
| 5 | ledger_group_name | VARCHAR(128) | YES | | 账本分组名称(当前数据全为空) |
|
||||
| 6 | ledger_count | INTEGER | YES | | 计费时长(秒,与主表 income_seconds 类似) |
|
||||
| 7 | member_discount_amount | NUMERIC(10,2) | YES | | 会员折扣金额 |
|
||||
| 8 | manual_discount_amount | NUMERIC(10,2) | YES | | 手动折扣金额 |
|
||||
| 9 | service_money | NUMERIC(10,2) | YES | | 服务费金额 |
|
||||
| 10 | returns_clock | INTEGER | YES | | 退时长(当前数据全为 0) |
|
||||
| 11 | ledger_start_time | TIMESTAMPTZ | YES | | 账本开始时间 |
|
||||
| 12 | ledger_end_time | TIMESTAMPTZ | YES | | 账本结束时间 |
|
||||
| 13 | ledger_status | INTEGER | YES | | 账本状态。**枚举值**: 1(5003)=已结算 |
|
||||
| 14 | is_confirm | INTEGER | YES | | 是否确认。**枚举值**: 2(5003)=**[待确认]** |
|
||||
| 15 | is_single_order | INTEGER | YES | | 是否独立订单。**枚举值**: 1(5003)=是 |
|
||||
| 16 | is_not_responding | INTEGER | YES | | 无响应。**枚举值**: 0(5003)=正常 |
|
||||
| 17 | is_trash | INTEGER | YES | | 是否废单。**枚举值**: 0(5003)=正常 |
|
||||
| 18 | trash_applicant_id | BIGINT | YES | | 废单申请人 ID(当前数据全为 0) |
|
||||
| 19 | trash_applicant_name | VARCHAR(64) | YES | | 废单申请人姓名(当前数据全为空) |
|
||||
| 20 | trash_reason | VARCHAR(255) | YES | | 废单原因(当前数据全为空) |
|
||||
| 21 | salesman_user_id | BIGINT | YES | | 销售员用户 ID(当前数据全为 0) |
|
||||
| 22 | salesman_name | VARCHAR(64) | YES | | 销售员姓名(当前数据全为空) |
|
||||
| 23 | salesman_org_id | BIGINT | YES | | 销售员组织 ID(当前数据全为 0) |
|
||||
| 24 | skill_grade | INTEGER | YES | | 技能评分(当前数据全为 0) |
|
||||
| 25 | service_grade | INTEGER | YES | | 服务评分(当前数据全为 0) |
|
||||
| 26 | composite_grade | NUMERIC(5,2) | YES | | 综合评分 |
|
||||
| 27 | sum_grade | NUMERIC(10,2) | YES | | 累计评分 |
|
||||
| 28 | get_grade_times | INTEGER | YES | | 评分次数(当前数据全为 0) |
|
||||
| 29 | grade_status | INTEGER | YES | | 评分状态。**枚举值**: 0(216)=未评分, 1(4787)=已评分 **[待确认]** |
|
||||
| 30 | composite_grade_time | TIMESTAMPTZ | YES | | 评分时间 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT m.nickname, m.ledger_amount, e.table_name, e.assistant_name, e.grade_status
|
||||
FROM billiards_dwd.dwd_assistant_service_log m
|
||||
JOIN billiards_dwd.dwd_assistant_service_log_ex e
|
||||
ON m.assistant_service_id = e.assistant_service_id
|
||||
WHERE m.is_delete = 0;
|
||||
```
|
||||
@@ -0,0 +1,49 @@
|
||||
# dwd_assistant_trash_event_ex 助教服务作废扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_assistant_trash_event_ex |
|
||||
| 主键 | assistant_trash_event_id |
|
||||
| 主表 | dwd_assistant_trash_event |
|
||||
| 记录数 | 98 |
|
||||
| 说明 | 助教服务作废扩展表,记录台桌和台区名称 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | assistant_trash_event_id | BIGINT | NO | PK | 作废事件 ID → dwd_assistant_trash_event |
|
||||
| 2 | table_name | VARCHAR(64) | YES | | 台桌名称。**热门值**: "888"(14), "发财"(8), "C1"(7), "M7"(6) |
|
||||
| 3 | table_area_name | VARCHAR(64) | YES | | 台区名称。**枚举值**: "C区"(16), "K包"(14), "A区"(11), "发财"(8), "B区"(7), "麻将房"(7), "补时长"(7), "VIP包厢"(6) |
|
||||
|
||||
## 台区作废分布
|
||||
|
||||
| 台区名称 | 作废次数 | 占比 |
|
||||
|----------|----------|------|
|
||||
| C区 | 16 | 16.3% |
|
||||
| K包 | 14 | 14.3% |
|
||||
| A区 | 11 | 11.2% |
|
||||
| 发财 | 8 | 8.2% |
|
||||
| B区 | 7 | 7.1% |
|
||||
| 麻将房 | 7 | 7.1% |
|
||||
| 补时长 | 7 | 7.1% |
|
||||
| VIP包厢 | 6 | 6.1% |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| table_name | table_area_name |
|
||||
|------------|-----------------|
|
||||
| C1 | C区 |
|
||||
| 补时长5 | 补时长 |
|
||||
| VIP1 | VIP包厢 |
|
||||
| 888 | K包 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_assistant_trash_event` 通过 `assistant_trash_event_id` 关联,提供台桌和台区名称信息。
|
||||
@@ -0,0 +1,62 @@
|
||||
# dwd_groupbuy_redemption_ex 团购核销扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_groupbuy_redemption_ex |
|
||||
| 主键 | redemption_id |
|
||||
| 主表 | dwd_groupbuy_redemption |
|
||||
| 记录数 | 11427 |
|
||||
| 说明 | 团购核销扩展表,记录门店、台桌名称、操作员等扩展信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | redemption_id | BIGINT | NO | PK | 核销 ID → dwd_groupbuy_redemption |
|
||||
| 2 | site_name | VARCHAR(64) | YES | | 门店名称。**枚举值**: "朗朗桌球"(11427) |
|
||||
| 3 | table_name | VARCHAR(64) | YES | | 台桌名称。**热门值**: "A3"(892), "A4"(858), "A5"(835), "A7"(774) |
|
||||
| 4 | table_area_name | VARCHAR(64) | YES | | 台区名称。**枚举值**: "A区"(9294), "B区"(998), "斯诺克区"(962), "麻将房"(137) |
|
||||
| 5 | order_pay_id | BIGINT | YES | | 支付单 ID(当前数据全为 0) |
|
||||
| 6 | goods_option_price | NUMERIC(18,2) | YES | | 商品选项价格 |
|
||||
| 7 | goods_promotion_money | NUMERIC(18,2) | YES | | 商品促销金额 |
|
||||
| 8 | table_service_promotion_money | NUMERIC(18,2) | YES | | 台服促销金额 |
|
||||
| 9 | assistant_promotion_money | NUMERIC(18,2) | YES | | 助教促销金额 |
|
||||
| 10 | assistant_service_promotion_money | NUMERIC(18,2) | YES | | 助教服务促销金额 |
|
||||
| 11 | reward_promotion_money | NUMERIC(18,2) | YES | | 奖励促销金额 |
|
||||
| 12 | recharge_promotion_money | NUMERIC(18,2) | YES | | 充值促销金额 |
|
||||
| 13 | offer_type | INTEGER | YES | | 优惠类型。**枚举值**: 1(11427) |
|
||||
| 14 | ledger_status | INTEGER | YES | | 账本状态。**枚举值**: 1(11427)=已结算 |
|
||||
| 15 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
| 16 | operator_name | VARCHAR(64) | YES | | 操作员名称。**枚举值**: "收银员:郑丽珊"(11426), "收银员:郑丽珍"(1) |
|
||||
| 17 | salesman_user_id | BIGINT | YES | | 销售员用户 ID(当前数据全为 0) |
|
||||
| 18 | salesman_name | VARCHAR(64) | YES | | 销售员名称(当前数据全为 NULL) |
|
||||
| 19 | salesman_role_id | BIGINT | YES | | 销售员角色 ID(当前数据全为 0) |
|
||||
| 20 | salesman_org_id | BIGINT | YES | | 销售员组织 ID(当前数据全为 0) |
|
||||
| 21 | ledger_group_name | VARCHAR(128) | YES | | 账本分组名称(当前数据全为 NULL) |
|
||||
|
||||
## 台区核销分布
|
||||
|
||||
| 台区名称 | 核销数量 | 占比 |
|
||||
|----------|----------|------|
|
||||
| A区 | 9294 | 81.3% |
|
||||
| B区 | 998 | 8.7% |
|
||||
| 斯诺克区 | 962 | 8.4% |
|
||||
| 麻将房 | 137 | 1.2% |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| table_name | table_area_name | operator_name | ledger_status |
|
||||
|------------|-----------------|---------------|---------------|
|
||||
| A17 | A区 | 收银员:郑丽珊 | 1 |
|
||||
| A4 | A区 | 收银员:郑丽珊 | 1 |
|
||||
| B5 | B区 | 收银员:郑丽珊 | 1 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_groupbuy_redemption` 通过 `redemption_id` 关联,提供门店、台桌名称、操作员等扩展信息。
|
||||
@@ -0,0 +1,49 @@
|
||||
# dwd_member_balance_change_ex 会员余额变动扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_member_balance_change_ex |
|
||||
| 主键 | balance_change_id |
|
||||
| 主表 | dwd_member_balance_change |
|
||||
| 记录数 | 4745 |
|
||||
| 说明 | 会员余额变动扩展表,记录操作员和门店名称等扩展信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | balance_change_id | BIGINT | NO | PK | 变动流水 ID → dwd_member_balance_change |
|
||||
| 2 | pay_site_name | VARCHAR(64) | YES | | 支付门店名称。**枚举值**: "朗朗桌球"(4720) |
|
||||
| 3 | register_site_name | VARCHAR(64) | YES | | 注册门店名称。**枚举值**: "朗朗桌球"(4745) |
|
||||
| 4 | refund_amount | NUMERIC(18,2) | YES | | 退款金额 |
|
||||
| 5 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
| 6 | operator_name | VARCHAR(64) | YES | | 操作员名称。**枚举值**: "收银员:郑丽珊"(4101), "店长:郑丽珊"(223), "管理员:郑丽珊"(153), "店长:蒋雨轩"(124), "店长:谢晓洪"(115), "店长:黄月柳"(29) |
|
||||
|
||||
## 操作员分布
|
||||
|
||||
| 操作员名称 | 操作次数 | 占比 |
|
||||
|------------|----------|------|
|
||||
| 收银员:郑丽珊 | 4101 | 86.4% |
|
||||
| 店长:郑丽珊 | 223 | 4.7% |
|
||||
| 管理员:郑丽珊 | 153 | 3.2% |
|
||||
| 店长:蒋雨轩 | 124 | 2.6% |
|
||||
| 店长:谢晓洪 | 115 | 2.4% |
|
||||
| 店长:黄月柳 | 29 | 0.6% |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| pay_site_name | register_site_name | operator_name | refund_amount |
|
||||
|---------------|--------------------|---------------|---------------|
|
||||
| 朗朗桌球 | 朗朗桌球 | 收银员:郑丽珊 | 0.00 |
|
||||
| 朗朗桌球 | 朗朗桌球 | 收银员:郑丽珊 | 0.00 |
|
||||
| 朗朗桌球 | 朗朗桌球 | 收银员:郑丽珊 | 0.00 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_member_balance_change` 通过 `balance_change_id` 关联,提供操作员和门店名称等扩展信息。
|
||||
@@ -0,0 +1,47 @@
|
||||
# dwd_platform_coupon_redemption_ex 平台券核销扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_platform_coupon_redemption_ex |
|
||||
| 主键 | platform_coupon_redemption_id |
|
||||
| 主表 | dwd_platform_coupon_redemption |
|
||||
| 记录数 | 16977 |
|
||||
| 说明 | 平台券核销扩展表,记录券封面、备注、操作员等扩展信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | platform_coupon_redemption_id | BIGINT | NO | PK | 核销 ID → dwd_platform_coupon_redemption |
|
||||
| 2 | coupon_cover | VARCHAR(255) | YES | | 券封面图片 URL(当前数据全为 NULL) |
|
||||
| 3 | coupon_remark | VARCHAR(255) | YES | | 券备注(抖音券有核验信息) |
|
||||
| 4 | groupon_type | INTEGER | YES | | 团购类型。**枚举值**: 1(16977)=**[待确认]** |
|
||||
| 5 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
| 6 | operator_name | VARCHAR(50) | YES | | 操作员名称。**枚举值**: "收银员:郑丽珊"(16968), "店长:郑丽珊"(8), "收银员:郑丽珍"(1) |
|
||||
|
||||
## 操作员分布
|
||||
|
||||
| 操作员名称 | 核销数量 | 占比 |
|
||||
|------------|----------|------|
|
||||
| 收银员:郑丽珊 | 16968 | 99.9% |
|
||||
| 店长:郑丽珊 | 8 | <0.1% |
|
||||
| 收银员:郑丽珍 | 1 | <0.1% |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| groupon_type | operator_name | coupon_cover | coupon_remark |
|
||||
|--------------|---------------|--------------|---------------|
|
||||
| 1 | 收银员:郑丽珊 | NULL | NULL |
|
||||
| 1 | 收银员:郑丽珊 | NULL | NULL |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_platform_coupon_redemption` 通过 `platform_coupon_redemption_id` 关联,提供操作员等扩展信息。
|
||||
|
||||
**注意**: `coupon_remark` 字段在抖音渠道的核销记录中包含核验信息。
|
||||
@@ -0,0 +1,69 @@
|
||||
# dwd_recharge_order_ex 充值订单扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_recharge_order_ex |
|
||||
| 主键 | recharge_order_id |
|
||||
| 主表 | dwd_recharge_order |
|
||||
| 记录数 | 455 |
|
||||
| 说明 | 充值订单扩展表,记录操作员、各类金额明细等扩展信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | recharge_order_id | BIGINT | NO | PK | 充值订单 ID → dwd_recharge_order |
|
||||
| 2 | site_name_snapshot | TEXT | YES | | 门店名称快照。**枚举值**: "朗朗桌球"(374) |
|
||||
| 3 | settle_status | INTEGER | YES | | 结算状态。**枚举值**: 2(455)=已结算 |
|
||||
| 4 | is_bind_member | BOOLEAN | YES | | 是否绑定会员。**枚举值**: False(455) |
|
||||
| 5 | is_activity | BOOLEAN | YES | | 是否活动。**枚举值**: False(455) |
|
||||
| 6 | is_use_coupon | BOOLEAN | YES | | 是否使用优惠券。**枚举值**: False(455) |
|
||||
| 7 | is_use_discount | BOOLEAN | YES | | 是否使用折扣。**枚举值**: False(455) |
|
||||
| 8 | can_be_revoked | BOOLEAN | YES | | 是否可撤销。**枚举值**: False(455) |
|
||||
| 9 | online_amount | NUMERIC(18,2) | YES | | 在线支付金额 |
|
||||
| 10 | balance_amount | NUMERIC(18,2) | YES | | 余额支付金额 |
|
||||
| 11 | card_amount | NUMERIC(18,2) | YES | | 卡支付金额 |
|
||||
| 12 | coupon_amount | NUMERIC(18,2) | YES | | 优惠券金额 |
|
||||
| 13 | recharge_card_amount | NUMERIC(18,2) | YES | | 充值卡金额 |
|
||||
| 14 | gift_card_amount | NUMERIC(18,2) | YES | | 礼品卡金额 |
|
||||
| 15 | prepay_money | NUMERIC(18,2) | YES | | 预付金额 |
|
||||
| 16 | consume_money | NUMERIC(18,2) | YES | | 消费金额 |
|
||||
| 17 | goods_money | NUMERIC(18,2) | YES | | 商品金额 |
|
||||
| 18 | real_goods_money | NUMERIC(18,2) | YES | | 实收商品金额 |
|
||||
| 19 | table_charge_money | NUMERIC(18,2) | YES | | 台费金额 |
|
||||
| 20 | service_money | NUMERIC(18,2) | YES | | 服务费金额 |
|
||||
| 21 | activity_discount | NUMERIC(18,2) | YES | | 活动折扣金额 |
|
||||
| 22 | all_coupon_discount | NUMERIC(18,2) | YES | | 优惠券折扣总额 |
|
||||
| 23 | goods_promotion_money | NUMERIC(18,2) | YES | | 商品促销金额 |
|
||||
| 24 | assistant_promotion_money | NUMERIC(18,2) | YES | | 助教促销金额 |
|
||||
| 25 | assistant_pd_money | NUMERIC(18,2) | YES | | 助教陪打金额 |
|
||||
| 26 | assistant_cx_money | NUMERIC(18,2) | YES | | 助教培训金额 |
|
||||
| 27 | assistant_manual_discount | NUMERIC(18,2) | YES | | 助教手动折扣 |
|
||||
| 28 | coupon_sale_amount | NUMERIC(18,2) | YES | | 优惠券销售金额 |
|
||||
| 29 | member_discount_amount | NUMERIC(18,2) | YES | | 会员折扣金额 |
|
||||
| 30 | point_discount_price | NUMERIC(18,2) | YES | | 积分抵扣金额 |
|
||||
| 31 | point_discount_cost | NUMERIC(18,2) | YES | | 积分抵扣成本 |
|
||||
| 32 | adjust_amount | NUMERIC(18,2) | YES | | 调整金额 |
|
||||
| 33 | rounding_amount | NUMERIC(18,2) | YES | | 取整金额 |
|
||||
| 34 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
| 35 | operator_name_snapshot | TEXT | YES | | 操作员名称快照。**枚举值**: "收银员:郑丽珊"(455) |
|
||||
| 36 | salesman_user_id | BIGINT | YES | | 销售员用户 ID(当前全为 0) |
|
||||
| 37 | salesman_name | TEXT | YES | | 销售员名称(当前全为 NULL) |
|
||||
| 38 | order_remark | TEXT | YES | | 订单备注(当前全为 NULL) |
|
||||
| 39 | table_id | INTEGER | YES | | 台桌 ID(当前全为 0) |
|
||||
| 40 | serial_number | INTEGER | YES | | 序列号(当前全为 0) |
|
||||
| 41 | revoke_order_id | BIGINT | YES | | 撤销订单 ID(当前全为 0) |
|
||||
| 42 | revoke_order_name | TEXT | YES | | 撤销订单名称(当前全为 NULL) |
|
||||
| 43 | revoke_time | TIMESTAMPTZ | YES | | 撤销时间 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_recharge_order` 通过 `recharge_order_id` 关联,提供操作员、各类金额明细等扩展信息。
|
||||
|
||||
**注意**: 样本数据获取时因日期解析错误未能获取。
|
||||
51
etl_billiards/docs/bd_manual/Ex/BD_manual_dwd_refund_ex.md
Normal file
51
etl_billiards/docs/bd_manual/Ex/BD_manual_dwd_refund_ex.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# dwd_refund_ex 退款流水扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_refund_ex |
|
||||
| 主键 | refund_id |
|
||||
| 主表 | dwd_refund |
|
||||
| 记录数 | 45 |
|
||||
| 说明 | 退款流水扩展表,记录退款的详细状态和渠道信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | refund_id | BIGINT | NO | PK | 退款流水 ID → dwd_refund |
|
||||
| 2 | tenant_name | VARCHAR(64) | YES | | 租户名称。**枚举值**: "朗朗桌球"(45) |
|
||||
| 3 | pay_sn | BIGINT | YES | | 支付序列号(当前全为 0) |
|
||||
| 4 | refund_amount | NUMERIC(18,2) | YES | | 退款金额(冗余) |
|
||||
| 5 | round_amount | NUMERIC(18,2) | YES | | 取整金额 |
|
||||
| 6 | balance_frozen_amount | NUMERIC(18,2) | YES | | 余额冻结金额 |
|
||||
| 7 | card_frozen_amount | NUMERIC(18,2) | YES | | 卡冻结金额 |
|
||||
| 8 | pay_status | INTEGER | YES | | 支付状态。**枚举值**: 2(45)=已退款 |
|
||||
| 9 | action_type | INTEGER | YES | | 操作类型。**枚举值**: 2(45)=退款 |
|
||||
| 10 | is_revoke | INTEGER | YES | | 是否撤销。**枚举值**: 0(45)=否 |
|
||||
| 11 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0(45)=未删除 |
|
||||
| 12 | check_status | INTEGER | YES | | 审核状态。**枚举值**: 1(45)=已审核 |
|
||||
| 13 | online_pay_channel | INTEGER | YES | | 在线支付渠道(当前全为 0) |
|
||||
| 14 | online_pay_type | INTEGER | YES | | 在线支付类型(当前全为 0) |
|
||||
| 15 | pay_terminal | INTEGER | YES | | 支付终端。**枚举值**: 1(45)=POS |
|
||||
| 16 | pay_config_id | INTEGER | YES | | 支付配置 ID(当前全为 0) |
|
||||
| 17 | cashier_point_id | INTEGER | YES | | 收银点 ID(当前全为 0) |
|
||||
| 18 | operator_id | BIGINT | YES | | 操作员 ID(当前全为 0) |
|
||||
| 19 | channel_payer_id | VARCHAR(128) | YES | | 渠道支付者 ID(当前全为 NULL) |
|
||||
| 20 | channel_pay_no | VARCHAR(128) | YES | | 渠道支付号(当前全为 NULL) |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| tenant_name | pay_status | action_type | check_status |
|
||||
|-------------|------------|-------------|--------------|
|
||||
| 朗朗桌球 | 2 | 2 | 1 |
|
||||
| 朗朗桌球 | 2 | 2 | 1 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_refund` 通过 `refund_id` 关联,提供退款状态和渠道等扩展信息。
|
||||
@@ -0,0 +1,69 @@
|
||||
# dwd_settlement_head_ex 结账头表扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_settlement_head_ex |
|
||||
| 主键 | order_settle_id |
|
||||
| 主表 | dwd_settlement_head |
|
||||
| 记录数 | 23366 |
|
||||
| 说明 | 结账单扩展表,包含支付明细、撤销信息、操作员、活动标记等详细信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | order_settle_id | BIGINT | NO | PK | 结账单 ID → dwd_settlement_head |
|
||||
| 2 | serial_number | INTEGER | YES | | 流水号(当前数据全为 0) |
|
||||
| 3 | settle_status | INTEGER | YES | | 结账状态。**枚举值**: 2(23366)=已完成 **[待确认]** |
|
||||
| 4 | can_be_revoked | BOOLEAN | YES | | 可否撤销。**枚举值**: False(23366)=不可撤销 |
|
||||
| 5 | revoke_order_name | VARCHAR(100) | YES | | 撤销订单名称(当前数据全为空) |
|
||||
| 6 | revoke_time | TIMESTAMPTZ | YES | | 撤销时间 |
|
||||
| 7 | is_first_order | BOOLEAN | YES | | 是否首单。**枚举值**: False(23366)=否 |
|
||||
| 8 | service_money | NUMERIC(18,2) | YES | | 服务费金额 |
|
||||
| 9 | cash_amount | NUMERIC(18,2) | YES | | 现金支付金额 |
|
||||
| 10 | card_amount | NUMERIC(18,2) | YES | | 刷卡支付金额 |
|
||||
| 11 | online_amount | NUMERIC(18,2) | YES | | 在线支付金额 |
|
||||
| 12 | refund_amount | NUMERIC(18,2) | YES | | 退款金额 |
|
||||
| 13 | prepay_money | NUMERIC(18,2) | YES | | 预付金额 |
|
||||
| 14 | payment_method | INTEGER | YES | | 支付方式(当前数据全为 0) |
|
||||
| 15 | coupon_sale_amount | NUMERIC(18,2) | YES | | 券销售金额 |
|
||||
| 16 | all_coupon_discount | NUMERIC(18,2) | YES | | 全部券折扣 |
|
||||
| 17 | goods_promotion_money | NUMERIC(18,2) | YES | | 商品促销金额 |
|
||||
| 18 | assistant_promotion_money | NUMERIC(18,2) | YES | | 助教促销金额 |
|
||||
| 19 | activity_discount | NUMERIC(18,2) | YES | | 活动折扣 |
|
||||
| 20 | assistant_manual_discount | NUMERIC(18,2) | YES | | 助教手动折扣 |
|
||||
| 21 | point_discount_price | NUMERIC(18,2) | YES | | 积分抵扣金额 |
|
||||
| 22 | point_discount_cost | NUMERIC(18,2) | YES | | 积分抵扣成本 |
|
||||
| 23 | is_use_coupon | BOOLEAN | YES | | 是否使用优惠券。**枚举值**: False(23366)=否 |
|
||||
| 24 | is_use_discount | BOOLEAN | YES | | 是否使用折扣。**枚举值**: False(23366)=否 |
|
||||
| 25 | is_activity | BOOLEAN | YES | | 是否活动订单。**枚举值**: False(23366)=否 |
|
||||
| 26 | operator_name | VARCHAR(100) | YES | | 操作员姓名。**枚举值**: "收银员:郑丽珊"(23361), "收银员:郑丽珍"(2), "教练:周蒙"(2), "店长:郑丽珊"(1) |
|
||||
| 27 | salesman_name | VARCHAR(100) | YES | | 销售员姓名(当前数据全为空) |
|
||||
| 28 | order_remark | VARCHAR(255) | YES | | 订单备注。**样本值**: "五折"(42), "轩哥"(24), "陈德韩"(7), "免台费"(3) |
|
||||
| 29 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
| 30 | salesman_user_id | BIGINT | YES | | 销售员用户 ID(当前数据全为 0) |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 关联主表与扩展表
|
||||
SELECT
|
||||
m.settle_name, m.consume_money, m.pay_amount,
|
||||
e.operator_name, e.order_remark, e.settle_status
|
||||
FROM billiards_dwd.dwd_settlement_head m
|
||||
JOIN billiards_dwd.dwd_settlement_head_ex e
|
||||
ON m.order_settle_id = e.order_settle_id;
|
||||
|
||||
-- 统计备注订单
|
||||
SELECT order_remark, COUNT(*)
|
||||
FROM billiards_dwd.dwd_settlement_head_ex
|
||||
WHERE order_remark IS NOT NULL
|
||||
GROUP BY order_remark
|
||||
ORDER BY COUNT(*) DESC;
|
||||
```
|
||||
@@ -0,0 +1,59 @@
|
||||
# dwd_store_goods_sale_ex 商品销售扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_store_goods_sale_ex |
|
||||
| 主键 | store_goods_sale_id |
|
||||
| 主表 | dwd_store_goods_sale |
|
||||
| 记录数 | 17563 |
|
||||
| 说明 | 商品销售扩展表,记录销售详情、折扣优惠等扩展信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | store_goods_sale_id | BIGINT | NO | PK | 销售流水 ID → dwd_store_goods_sale |
|
||||
| 2 | legacy_order_goods_id | BIGINT | YES | | 旧系统订单商品 ID(当前全为 0) |
|
||||
| 3 | site_name | TEXT | YES | | 门店名称。**枚举值**: "朗朗桌球"(17563) |
|
||||
| 4 | legacy_site_id | BIGINT | YES | | 旧系统门店 ID |
|
||||
| 5 | goods_remark | TEXT | YES | | 商品备注。**热门备注**: "哇哈哈矿泉水", "东方树叶", "可乐", "一次性手套", "地道肠" |
|
||||
| 6 | option_value_name | TEXT | YES | | 选项值名称(当前全为 NULL) |
|
||||
| 7 | operator_name | TEXT | YES | | 操作员名称。**枚举值**: "收银员:郑丽珊"(17562), "收银员:郑丽珍"(1) |
|
||||
| 8 | open_salesman_flag | INTEGER | YES | | 开启销售员标记。**枚举值**: 2(17563)=否 |
|
||||
| 9 | salesman_user_id | BIGINT | YES | | 销售员用户 ID(当前全为 0) |
|
||||
| 10 | salesman_name | TEXT | YES | | 销售员名称(当前全为 NULL) |
|
||||
| 11 | salesman_role_id | BIGINT | YES | | 销售员角色 ID(当前全为 0) |
|
||||
| 12 | salesman_org_id | BIGINT | YES | | 销售员组织 ID(当前全为 0) |
|
||||
| 13 | discount_money | NUMERIC(18,2) | YES | | 折扣金额 |
|
||||
| 14 | returns_number | INTEGER | YES | | 退货数量(当前全为 0) |
|
||||
| 15 | coupon_deduct_money | NUMERIC(18,2) | YES | | 优惠券抵扣金额 |
|
||||
| 16 | member_discount_amount | NUMERIC(18,2) | YES | | 会员折扣金额 |
|
||||
| 17 | point_discount_money | NUMERIC(18,2) | YES | | 积分抵扣金额 |
|
||||
| 18 | point_discount_money_cost | NUMERIC(18,2) | YES | | 积分抵扣成本 |
|
||||
| 19 | package_coupon_id | BIGINT | YES | | 套餐券 ID(当前全为 0) |
|
||||
| 20 | order_coupon_id | BIGINT | YES | | 订单券 ID(当前全为 0) |
|
||||
| 21 | member_coupon_id | BIGINT | YES | | 会员券 ID(当前全为 0) |
|
||||
| 22 | option_price | NUMERIC(18,2) | YES | | 选项价格 |
|
||||
| 23 | option_member_discount_money | NUMERIC(18,2) | YES | | 选项会员折扣金额 |
|
||||
| 24 | option_coupon_deduct_money | NUMERIC(18,2) | YES | | 选项券抵扣金额 |
|
||||
| 25 | push_money | NUMERIC(18,2) | YES | | 推手金额 |
|
||||
| 26 | is_single_order | INTEGER | YES | | 是否独立订单。**枚举值**: 1(17563)=是 |
|
||||
| 27 | sales_type | INTEGER | YES | | 销售类型。**枚举值**: 1(17563)=普通销售 |
|
||||
| 28 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| site_name | goods_remark | operator_name | discount_money |
|
||||
|-----------|--------------|---------------|----------------|
|
||||
| 朗朗桌球 | 鸡翅三个一份 | 收银员:郑丽珊 | 0.00 |
|
||||
| 朗朗桌球 | NULL | 收银员:郑丽珊 | 0.00 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_store_goods_sale` 通过 `store_goods_sale_id` 关联,提供销售详情、折扣优惠等扩展信息。
|
||||
@@ -0,0 +1,39 @@
|
||||
# dwd_table_fee_adjust_ex 台费调整扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_table_fee_adjust_ex |
|
||||
| 主键 | table_fee_adjust_id |
|
||||
| 主表 | dwd_table_fee_adjust |
|
||||
| 记录数 | 2849 |
|
||||
| 说明 | 台费调整扩展表,记录调整类型、申请人、操作员等信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | table_fee_adjust_id | BIGINT | NO | PK | 台费调整 ID → dwd_table_fee_adjust |
|
||||
| 2 | adjust_type | INTEGER | YES | | 调整类型。**枚举值**: 1(2849)=**[待确认]** |
|
||||
| 3 | ledger_count | INTEGER | YES | | 账本数量。**枚举值**: 1(2849) |
|
||||
| 4 | ledger_name | VARCHAR(128) | YES | | 账本名称(当前数据全为 NULL) |
|
||||
| 5 | applicant_name | VARCHAR(64) | YES | | 申请人名称。**枚举值**: "收银员:郑丽珊"(2849) |
|
||||
| 6 | operator_name | VARCHAR(64) | YES | | 操作员名称。**枚举值**: "收银员:郑丽珊"(2849) |
|
||||
| 7 | applicant_id | BIGINT | YES | | 申请人 ID |
|
||||
| 8 | operator_id | BIGINT | YES | | 操作员 ID |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| adjust_type | applicant_name | operator_name |
|
||||
|-------------|----------------|---------------|
|
||||
| 1 | 收银员:郑丽珊 | 收银员:郑丽珊 |
|
||||
| 1 | 收银员:郑丽珊 | 收银员:郑丽珊 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_table_fee_adjust` 通过 `table_fee_adjust_id` 关联,提供调整类型、申请人、操作员等扩展信息。
|
||||
@@ -0,0 +1,44 @@
|
||||
# dwd_table_fee_log_ex 台费流水扩展表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_table_fee_log_ex |
|
||||
| 主键 | table_fee_log_id |
|
||||
| 主表 | dwd_table_fee_log |
|
||||
| 记录数 | 18386 |
|
||||
| 说明 | 台费流水扩展表,记录操作员、销售员、时间等扩展信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | table_fee_log_id | BIGINT | NO | PK | 台费流水 ID → dwd_table_fee_log |
|
||||
| 2 | operator_name | VARCHAR(64) | YES | | 操作员名称。**枚举值**: "收银员:郑丽珊"(18382), "收银员:郑丽珍"(2), "店长:郑丽珊"(1), "教练:周蒙"(1) |
|
||||
| 3 | salesman_name | VARCHAR(64) | YES | | 销售员名称(当前数据全为 NULL) |
|
||||
| 4 | used_card_amount | NUMERIC(18,2) | YES | | 使用卡金额(当前数据全为 0) |
|
||||
| 5 | service_money | NUMERIC(18,2) | YES | | 服务费金额(当前数据全为 0) |
|
||||
| 6 | mgmt_fee | NUMERIC(18,2) | YES | | 管理费金额(当前数据全为 0) |
|
||||
| 7 | fee_total | NUMERIC(18,2) | YES | | 费用合计(当前数据全为 0) |
|
||||
| 8 | ledger_start_time | TIMESTAMPTZ | YES | | 账本开始时间 |
|
||||
| 9 | last_use_time | TIMESTAMPTZ | YES | | 最后使用时间 |
|
||||
| 10 | operator_id | BIGINT | YES | | 操作员 ID。**枚举值**: 3个不同ID |
|
||||
| 11 | salesman_user_id | BIGINT | YES | | 销售员用户 ID(当前数据全为 0) |
|
||||
| 12 | salesman_org_id | BIGINT | YES | | 销售员组织 ID(当前数据全为 0) |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| operator_name | ledger_start_time | last_use_time |
|
||||
|---------------|-------------------|---------------|
|
||||
| 收银员:郑丽珊 | 2025-11-09 22:28:57 | 2025-11-09 23:28:57 |
|
||||
| 收银员:郑丽珊 | 2025-11-09 21:34:27 | 2025-11-09 23:34:27 |
|
||||
| 收银员:郑丽珊 | 2025-11-09 22:32:55 | 2025-11-09 23:32:55 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
与主表 `dwd_table_fee_log` 通过 `table_fee_log_id` 关联,提供操作员和时间相关的扩展信息。
|
||||
118
etl_billiards/docs/bd_manual/main/BD_manual_billiards_dwd.md
Normal file
118
etl_billiards/docs/bd_manual/main/BD_manual_billiards_dwd.md
Normal file
@@ -0,0 +1,118 @@
|
||||
# billiards_dwd Schema 数据字典
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
> 数据来源:数据库实时查询 + 500行样本数据分析
|
||||
> 不确定内容已使用 **[待确认]** 标记
|
||||
|
||||
## 概述
|
||||
|
||||
`billiards_dwd` 是台球门店数据仓库的明细层(DWD),包含维度表(DIM)和事实表(DWD)。本 Schema 基于 SCD2 缓慢变化维度设计,支持历史数据追溯。
|
||||
|
||||
---
|
||||
|
||||
## 维度表 (Dimension Tables)
|
||||
|
||||
| 序号 | 表名 | 说明 | 主键 | 扩展表 | 文档链接 |
|
||||
|------|------|------|------|--------|----------|
|
||||
| 1 | dim_assistant | 助教信息 | assistant_id | dim_assistant_ex | [主表](BD_manual_dim_assistant.md) / [扩展表](BD_manual_dim_assistant_ex.md) |
|
||||
| 2 | dim_goods_category | 商品分类 | goods_category_id | 无 | [主表](BD_manual_dim_goods_category.md) |
|
||||
| 3 | dim_groupbuy_package | 团购套餐 | groupbuy_package_id | dim_groupbuy_package_ex | [主表](BD_manual_dim_groupbuy_package.md) / [扩展表](BD_manual_dim_groupbuy_package_ex.md) |
|
||||
| 4 | dim_member | 会员信息 | member_id | dim_member_ex | [主表](BD_manual_dim_member.md) / [扩展表](BD_manual_dim_member_ex.md) |
|
||||
| 5 | dim_member_card_account | 会员卡账户 | member_card_account_id | dim_member_card_account_ex | [主表](BD_manual_dim_member_card_account.md) / [扩展表](BD_manual_dim_member_card_account_ex.md) |
|
||||
| 6 | dim_site | 门店信息 | site_id | dim_site_ex | [主表](BD_manual_dim_site.md) / [扩展表](BD_manual_dim_site_ex.md) |
|
||||
| 7 | dim_store_goods | 门店商品 | store_goods_id | dim_store_goods_ex | [主表](BD_manual_dim_store_goods.md) / [扩展表](BD_manual_dim_store_goods_ex.md) |
|
||||
| 8 | dim_table | 台桌信息 | table_id | dim_table_ex | [主表](BD_manual_dim_table.md) / [扩展表](BD_manual_dim_table_ex.md) |
|
||||
| 9 | dim_tenant_goods | 租户商品 | tenant_goods_id | dim_tenant_goods_ex | [主表](BD_manual_dim_tenant_goods.md) / [扩展表](BD_manual_dim_tenant_goods_ex.md) |
|
||||
|
||||
---
|
||||
|
||||
## 事实表 (Fact Tables)
|
||||
|
||||
| 序号 | 表名 | 说明 | 主键 | 扩展表 | 文档链接 |
|
||||
|------|------|------|------|--------|----------|
|
||||
| 1 | dwd_assistant_service_log | 助教服务流水 | assistant_service_log_id | dwd_assistant_service_log_ex | [主表](BD_manual_dwd_assistant_service_log.md) / [扩展表](BD_manual_dwd_assistant_service_log_ex.md) |
|
||||
| 2 | dwd_assistant_trash_event | 助教服务作废 | assistant_trash_event_id | dwd_assistant_trash_event_ex | [主表](BD_manual_dwd_assistant_trash_event.md) / [扩展表](BD_manual_dwd_assistant_trash_event_ex.md) |
|
||||
| 3 | dwd_groupbuy_redemption | 团购券核销 | redemption_id | dwd_groupbuy_redemption_ex | [主表](BD_manual_dwd_groupbuy_redemption.md) / [扩展表](BD_manual_dwd_groupbuy_redemption_ex.md) |
|
||||
| 4 | dwd_member_balance_change | 会员余额变动 | balance_change_id | dwd_member_balance_change_ex | [主表](BD_manual_dwd_member_balance_change.md) / [扩展表](BD_manual_dwd_member_balance_change_ex.md) |
|
||||
| 5 | dwd_payment | 支付流水 | payment_id | 无 | [主表](BD_manual_dwd_payment.md) |
|
||||
| 6 | dwd_platform_coupon_redemption | 平台券核销 | platform_coupon_redemption_id | dwd_platform_coupon_redemption_ex | [主表](BD_manual_dwd_platform_coupon_redemption.md) / [扩展表](BD_manual_dwd_platform_coupon_redemption_ex.md) |
|
||||
| 7 | dwd_recharge_order | 充值订单 | recharge_order_id | dwd_recharge_order_ex | [主表](BD_manual_dwd_recharge_order.md) / [扩展表](BD_manual_dwd_recharge_order_ex.md) |
|
||||
| 8 | dwd_refund | 退款流水 | refund_id | dwd_refund_ex | [主表](BD_manual_dwd_refund.md) / [扩展表](BD_manual_dwd_refund_ex.md) |
|
||||
| 9 | dwd_settlement_head | 结账单 | settlement_head_id | dwd_settlement_head_ex | [主表](BD_manual_dwd_settlement_head.md) / [扩展表](BD_manual_dwd_settlement_head_ex.md) |
|
||||
| 10 | dwd_store_goods_sale | 商品销售流水 | store_goods_sale_id | dwd_store_goods_sale_ex | [主表](BD_manual_dwd_store_goods_sale.md) / [扩展表](BD_manual_dwd_store_goods_sale_ex.md) |
|
||||
| 11 | dwd_table_fee_adjust | 台费调整 | table_fee_adjust_id | dwd_table_fee_adjust_ex | [主表](BD_manual_dwd_table_fee_adjust.md) / [扩展表](BD_manual_dwd_table_fee_adjust_ex.md) |
|
||||
| 12 | dwd_table_fee_log | 台费计费流水 | table_fee_log_id | dwd_table_fee_log_ex | [主表](BD_manual_dwd_table_fee_log.md) / [扩展表](BD_manual_dwd_table_fee_log_ex.md) |
|
||||
|
||||
---
|
||||
|
||||
## SCD2 公共字段
|
||||
|
||||
所有维度表都实现了 SCD2(缓慢变化维度类型2),包含以下公共字段:
|
||||
|
||||
| 字段名 | 类型 | 说明 |
|
||||
|--------|------|------|
|
||||
| scd2_start_time | TIMESTAMPTZ | 版本生效开始时间 |
|
||||
| scd2_end_time | TIMESTAMPTZ | 版本生效结束时间(NULL 或 9999-12-31 表示当前有效) |
|
||||
| scd2_is_current | INTEGER | 是否当前版本(1=是, 0=否) |
|
||||
| scd2_version | INTEGER | 版本号 |
|
||||
|
||||
---
|
||||
|
||||
## 常见 ID 关联说明
|
||||
|
||||
| ID 字段 | 关联表 | 说明 |
|
||||
|---------|--------|------|
|
||||
| tenant_id | - | 租户 ID,标识所属租户 |
|
||||
| site_id | dim_site | 门店 ID |
|
||||
| member_id | dim_member | 会员 ID(0=散客) |
|
||||
| tenant_member_card_id | dim_member_card_account | 会员卡账户 ID |
|
||||
| assistant_id | dim_assistant | 助教 ID |
|
||||
| table_id / site_table_id | dim_table | 台桌 ID |
|
||||
| tenant_goods_id | dim_tenant_goods | 租户商品 ID |
|
||||
| site_goods_id | dim_store_goods | 门店商品 ID |
|
||||
| order_settle_id | dwd_settlement_head | 结账单 ID |
|
||||
|
||||
---
|
||||
|
||||
## 表设计模式
|
||||
|
||||
### 主表 + 扩展表模式
|
||||
|
||||
大部分表采用"主表 + 扩展表"的设计模式:
|
||||
- **主表**:包含核心业务字段(如金额、状态、关键 ID)
|
||||
- **扩展表**:包含附属信息(如操作员、门店名称快照、各类详细字段)
|
||||
- 两表通过主键一对一关联
|
||||
|
||||
### 枚举值说明
|
||||
|
||||
文档中的枚举值格式为 `值(数量)=含义`,例如:
|
||||
- `1(100)=有效` 表示值为 1 的记录有 100 条,含义为"有效"
|
||||
- **[待确认]** 表示该值的含义无法从数据中确定
|
||||
|
||||
---
|
||||
|
||||
## 数据量统计
|
||||
|
||||
| 表名 | 记录数 |
|
||||
|------|--------|
|
||||
| dwd_payment | 22,949 |
|
||||
| dwd_settlement_head | 22,475 |
|
||||
| dwd_table_fee_log | 18,386 |
|
||||
| dwd_store_goods_sale | 17,563 |
|
||||
| dwd_platform_coupon_redemption | 16,977 |
|
||||
| dwd_groupbuy_redemption | 11,420 |
|
||||
| dwd_member_balance_change | 4,745 |
|
||||
| dwd_table_fee_adjust | 2,849 |
|
||||
| dwd_assistant_service_log | 1,090 |
|
||||
| dwd_recharge_order | 455 |
|
||||
| dwd_assistant_trash_event | 98 |
|
||||
| dwd_refund | 45 |
|
||||
|
||||
---
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **枚举值推断**:文档中的枚举值含义基于 500 行样本数据推断,可能不完整
|
||||
2. **[待确认] 标记**:不确定的字段含义或枚举值已明确标记
|
||||
3. **数据时效性**:文档基于 2026-01-28 的数据库快照生成
|
||||
4. **扩展表样本数据**:部分扩展表因日期解析问题无法获取样本数据
|
||||
47
etl_billiards/docs/bd_manual/main/BD_manual_dim_assistant.md
Normal file
47
etl_billiards/docs/bd_manual/main/BD_manual_dim_assistant.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# dim_assistant 助教档案主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_assistant |
|
||||
| 主键 | assistant_id, scd2_start_time |
|
||||
| 扩展表 | dim_assistant_ex |
|
||||
| 记录数 | 69 |
|
||||
| 说明 | 助教人员档案的核心信息,包括工号、姓名、联系方式、团队归属、等级等 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | assistant_id | BIGINT | NO | PK | 助教唯一标识 ID |
|
||||
| 2 | user_id | BIGINT | YES | | 关联用户 ID(当前数据全为 0,**[作用待确认]**) |
|
||||
| 3 | assistant_no | TEXT | YES | | 助教工号,如 "11"、"27" |
|
||||
| 4 | real_name | TEXT | YES | | 真实姓名,如 "梁婷婷"、"周佳怡" |
|
||||
| 5 | nickname | TEXT | YES | | 昵称/花名,如 "柚子"、"周周"、"Amy" |
|
||||
| 6 | mobile | TEXT | YES | | 手机号码 |
|
||||
| 7 | tenant_id | BIGINT | YES | | 租户 ID(当前值: 2790683160709957) |
|
||||
| 8 | site_id | BIGINT | YES | | 门店 ID → dim_site(当前值: 2790685415443269) |
|
||||
| 9 | team_id | BIGINT | YES | | 团队 ID |
|
||||
| 10 | team_name | TEXT | YES | | 团队名称。**枚举值**: "1组"(对应 team_id = 2792011585884037), "2组"(对应 team_id = 2959085810992645) |
|
||||
| 11 | level | INTEGER | YES | | 助教等级。**枚举值**: 8 = 助教管理, 10 = 初级, 20 = 中级, 30 = 高级, 40 =专家 |
|
||||
| 12 | entry_time | TIMESTAMPTZ | YES | | 入职时间 |
|
||||
| 13 | resign_time | TIMESTAMPTZ | YES | | 离职时间(远未来日期如 2225-xx-xx 表示在职) |
|
||||
| 14 | leave_status | INTEGER | YES | | 在职状态。**枚举值**: 0 = 在职, 1 = 已离职 |
|
||||
| 15 | assistant_status | INTEGER | YES | | 观察者状态。**枚举值**: 1 = 为非观察者, 2 = 为观察者。 |
|
||||
| 16 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 17 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 18 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 19 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前在职助教
|
||||
SELECT * FROM billiards_dwd.dim_assistant
|
||||
WHERE scd2_is_current = 1 AND leave_status = 0;
|
||||
```
|
||||
@@ -0,0 +1,68 @@
|
||||
# dim_goods_category 商品分类维度表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_goods_category |
|
||||
| 主键 | category_id, scd2_start_time |
|
||||
| 扩展表 | 无 |
|
||||
| 记录数 | 26 |
|
||||
| 说明 | 商品分类树结构表,支持一级/二级分类层次 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | category_id | BIGINT | NO | PK | 分类唯一标识 |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID(当前值: 2790683160709957) |
|
||||
| 3 | category_name | VARCHAR | YES | | 分类名称。**样本值**: "槟榔", "皮头" 等 |
|
||||
| 4 | alias_name | VARCHAR | YES | | 分类别名(当前数据大部分为空) |
|
||||
| 5 | parent_category_id | BIGINT | YES | | 父级分类 ID(0=一级分类)→ 自关联 |
|
||||
| 6 | business_name | VARCHAR | YES | | 业务大类名称。**样本值**: "酒水", "器材" 等 |
|
||||
| 7 | tenant_goods_business_id | BIGINT | YES | | 业务大类 ID |
|
||||
| 8 | category_level | INTEGER | YES | | 分类层级。**枚举值**: 1=一级大类, 2=二级子类 |
|
||||
| 9 | is_leaf | INTEGER | YES | | 是否叶子节点。**枚举值**: 0=非叶子, 1=叶子 |
|
||||
| 10 | open_salesman | INTEGER | YES | | 营业员开关。 |
|
||||
| 11 | sort_order | INTEGER | YES | | 排序序号 |
|
||||
| 12 | is_warehousing | INTEGER | YES | | 是否库存管理。**枚举值**: 1=参与库存管理 |
|
||||
| 13 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 14 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 15 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 16 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 分类树结构示例
|
||||
|
||||
```
|
||||
槟榔(一级)
|
||||
├── 槟榔(二级)
|
||||
|
||||
器材(一级)
|
||||
├── 皮头
|
||||
├── 球杆
|
||||
├── 其他
|
||||
|
||||
酒水(一级)
|
||||
├── 饮料
|
||||
├── 酒水
|
||||
├── 茶水
|
||||
├── 咖啡
|
||||
├── 加料
|
||||
├── 洋酒
|
||||
```
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询一级分类
|
||||
SELECT * FROM billiards_dwd.dim_goods_category
|
||||
WHERE scd2_is_current = 1 AND parent_category_id = 0;
|
||||
|
||||
-- 查询某一级分类下的二级分类
|
||||
SELECT * FROM billiards_dwd.dim_goods_category
|
||||
WHERE scd2_is_current = 1 AND parent_category_id = <一级分类ID>;
|
||||
```
|
||||
@@ -0,0 +1,49 @@
|
||||
# dim_groupbuy_package 团购套餐主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_groupbuy_package |
|
||||
| 主键 | groupbuy_package_id, scd2_start_time |
|
||||
| 扩展表 | dim_groupbuy_package_ex |
|
||||
| 记录数 | 34 |
|
||||
| 说明 | 内部团购/套餐定义,记录套餐名称、价格、时长、适用台区等核心信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | groupbuy_package_id | BIGINT | NO | PK | 团购套餐 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID(当前值: 2790683160709957) |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID → dim_site(当前值: 2790685415443269) |
|
||||
| 4 | package_name | VARCHAR | YES | | 套餐名称。**样本值**: "中八、斯诺克包厢两小时", "斯诺克两小时"等 |
|
||||
| 5 | package_template_id | BIGINT | YES | | 套餐模板 ID |
|
||||
| 6 | selling_price | NUMERIC(10,2) | YES | | 售卖价格(每笔订单不同,从核销记录中dwd_groupbuy_redemption获取) |
|
||||
| 7 | coupon_face_value | NUMERIC(10,2) | YES | | 券面值(每笔订单不同,从核销记录中dwd_groupbuy_redemption获取) |
|
||||
| 8 | duration_seconds | INTEGER | YES | | 套餐时长(秒)。**样本值**: 3600=1小时, 7200=2小时, 14400=4小时 等 |
|
||||
| 9 | start_time | TIMESTAMPTZ | YES | | 套餐生效开始时间 |
|
||||
| 10 | end_time | TIMESTAMPTZ | YES | | 套餐生效结束时间 |
|
||||
| 11 | table_area_name | VARCHAR | YES | | 适用台区名称。**枚举值**: "A区", "VIP包厢", "斯诺克区", "B区", "麻将房", "888" |
|
||||
| 12 | is_enabled | INTEGER | YES | | 启用状态。**枚举值**: 1=启用, 2=停用 |
|
||||
| 13 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 14 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 15 | tenant_table_area_id_list | VARCHAR | YES | | 租户级台区 ID 列表 |
|
||||
| 16 | card_type_ids | VARCHAR | YES | | 允许使用的卡类型 ID 列表(当前数据为 "0") |
|
||||
| 17 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 18 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 19 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 20 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前启用的套餐
|
||||
SELECT * FROM billiards_dwd.dim_groupbuy_package
|
||||
WHERE scd2_is_current = 1 AND is_delete = 0 AND is_enabled = 1;
|
||||
|
||||
```
|
||||
49
etl_billiards/docs/bd_manual/main/BD_manual_dim_member.md
Normal file
49
etl_billiards/docs/bd_manual/main/BD_manual_dim_member.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# dim_member 会员档案主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_member |
|
||||
| 主键 | member_id, scd2_start_time |
|
||||
| 扩展表 | dim_member_ex |
|
||||
| 记录数 | 556 |
|
||||
| 说明 | 租户会员档案主表,记录会员基本信息和卡种等级 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | member_id | BIGINT | NO | PK | 租户内会员 ID(tenant_member_id) |
|
||||
| 2 | system_member_id | BIGINT | YES | | 系统级会员 ID |
|
||||
| 3 | tenant_id | BIGINT | YES | | 租户 ID(当前值: 2790683160709957) |
|
||||
| 4 | register_site_id | BIGINT | YES | | 注册门店 ID → dim_site(当前值: 2790685415443269) |
|
||||
| 5 | mobile | TEXT | YES | | 手机号码 |
|
||||
| 6 | nickname | TEXT | YES | | 昵称。**样本值**: "陈先生", "张先生", "李先生",等 |
|
||||
| 7 | member_card_grade_code | BIGINT | YES | | 卡等级代码 |
|
||||
| 8 | member_card_grade_name | TEXT | YES | | 卡等级名称。**枚举值**: "储值卡", "台费卡", "年卡", "活动抵用券", "月卡" |
|
||||
| 9 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 10 | update_time | TIMESTAMPTZ | YES | | 更新时间 |
|
||||
| 11 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 12 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 13 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 14 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前有效会员
|
||||
SELECT * FROM billiards_dwd.dim_member
|
||||
WHERE scd2_is_current = 1;
|
||||
|
||||
-- 按卡类型统计会员数
|
||||
SELECT member_card_grade_name, COUNT(*)
|
||||
FROM billiards_dwd.dim_member
|
||||
WHERE scd2_is_current = 1
|
||||
GROUP BY member_card_grade_name;
|
||||
```
|
||||
@@ -0,0 +1,63 @@
|
||||
# dim_member_card_account 会员卡账户主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_member_card_account |
|
||||
| 主键 | member_card_id, scd2_start_time |
|
||||
| 扩展表 | dim_member_card_account_ex |
|
||||
| 记录数 | 945 |
|
||||
| 说明 | 会员卡账户主表,记录卡种、余额、有效期等核心信息。一个会员可持有多张卡。 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | member_card_id | BIGINT | NO | PK | 会员卡账户 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | register_site_id | BIGINT | YES | | 开卡门店 ID → dim_site |
|
||||
| 4 | tenant_member_id | BIGINT | YES | | 持卡会员 ID → dim_member(0=未绑定会员) |
|
||||
| 5 | system_member_id | BIGINT | YES | | 系统级会员 ID |
|
||||
| 6 | card_type_id | BIGINT | YES | | 卡种 ID |
|
||||
| 7 | member_card_grade_code | BIGINT | YES | | 卡等级代码 |
|
||||
| 8 | member_card_grade_code_name | TEXT | YES | | 卡等级名称。**枚举值**: "储值卡", "台费卡", "活动抵用券", "酒水卡", "月卡", "年卡" |
|
||||
| 9 | member_card_type_name | TEXT | YES | | 卡类型名称(与 grade_code_name 相同) |
|
||||
| 10 | member_name | TEXT | YES | | 持卡人姓名快照 |
|
||||
| 11 | member_mobile | TEXT | YES | | 持卡人手机号快照 |
|
||||
| 12 | balance | NUMERIC(18,2) | YES | | 当前余额(元) |
|
||||
| 13 | start_time | TIMESTAMPTZ | YES | | 卡生效时间 |
|
||||
| 14 | end_time | TIMESTAMPTZ | YES | | 卡失效时间(2225-01-01=长期有效) |
|
||||
| 15 | last_consume_time | TIMESTAMPTZ | YES | | 最近消费时间 |
|
||||
| 16 | status | INTEGER | YES | | 卡状态。**枚举值**: 1=正常, 4=过期 |
|
||||
| 17 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 18 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 19 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 20 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 21 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 卡种分布
|
||||
|
||||
| card_type_id | 卡类型 | 说明 |
|
||||
|--------------|--------|------|
|
||||
| 2793249295533893 | 储值卡 | 充值获得,可抵扣任意费用 |
|
||||
| 2791990152417157 | 台费卡 | 充值赠送,即可抵扣台费 |
|
||||
| 2793266846533445 | 活动抵用券 | 充值赠送,不可抵扣助教费 |
|
||||
| 2794699703437125 | 酒水卡 | 充值赠送,仅可抵扣酒水饮料食品商品 |
|
||||
| 2793306611533637 | 月卡 | 充值获得,时长卡,仅可抵扣台费 |
|
||||
| 2791987095408517 | 年卡 | 充值获得,时长卡,仅可抵扣台费 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询有效的储值卡
|
||||
SELECT * FROM billiards_dwd.dim_member_card_account
|
||||
WHERE scd2_is_current = 1
|
||||
AND is_delete = 0
|
||||
AND status = 1
|
||||
AND member_card_type_name = '储值卡';
|
||||
```
|
||||
51
etl_billiards/docs/bd_manual/main/BD_manual_dim_site.md
Normal file
51
etl_billiards/docs/bd_manual/main/BD_manual_dim_site.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# dim_site 门店主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_site |
|
||||
| 主键 | site_id, scd2_start_time |
|
||||
| 扩展表 | dim_site_ex |
|
||||
| 记录数 | 1 |
|
||||
| 说明 | 门店维度主表,记录门店基本信息(地址、联系方式等) |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | site_id | BIGINT | NO | PK | 门店 ID |
|
||||
| 2 | org_id | BIGINT | YES | | 组织机构 ID |
|
||||
| 3 | tenant_id | BIGINT | YES | | 租户 ID(当前值: 2790683160709957) |
|
||||
| 4 | shop_name | TEXT | YES | | 门店名称。**当前值**: "朗朗桌球" |
|
||||
| 5 | site_label | TEXT | YES | | 门店标签。**当前值**: "A" |
|
||||
| 6 | full_address | TEXT | YES | | 详细地址。**当前值**: "广东省广州市天河区丽阳街12号" |
|
||||
| 7 | address | TEXT | YES | | 地址描述。**当前值**: "广东省广州市天河区天园街道朗朗桌球" |
|
||||
| 8 | longitude | NUMERIC(10,6) | YES | | 经度。**当前值**: 113.360321 |
|
||||
| 9 | latitude | NUMERIC(10,6) | YES | | 纬度。**当前值**: 23.133629 |
|
||||
| 10 | tenant_site_region_id | BIGINT | YES | | 区域 ID。**当前值**: 156440100 |
|
||||
| 11 | business_tel | TEXT | YES | | 联系电话。**当前值**: "13316068642" |
|
||||
| 12 | site_type | INTEGER | YES | | 门店类型。**枚举值**: 1(1)=**[待确认]** |
|
||||
| 13 | shop_status | INTEGER | YES | | 营业状态。**枚举值**: 1(1)=营业中 **[待确认]** |
|
||||
| 14 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 15 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 16 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 17 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 当前门店数据
|
||||
|
||||
| site_id | shop_name | full_address | longitude | latitude |
|
||||
|---------|-----------|--------------|-----------|----------|
|
||||
| 2790685415443269 | 朗朗桌球 | 广东省广州市天河区丽阳街12号 | 113.360321 | 23.133629 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前有效门店
|
||||
SELECT * FROM billiards_dwd.dim_site
|
||||
WHERE scd2_is_current = 1;
|
||||
```
|
||||
@@ -0,0 +1,61 @@
|
||||
# dim_store_goods 门店商品主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_store_goods |
|
||||
| 主键 | site_goods_id, scd2_start_time |
|
||||
| 扩展表 | dim_store_goods_ex |
|
||||
| 记录数 | 170 |
|
||||
| 说明 | 门店级商品库存维度表,记录门店的商品库存、价格、销量等信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | site_goods_id | BIGINT | NO | PK | 门店商品 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID → dim_site |
|
||||
| 4 | tenant_goods_id | BIGINT | YES | | 租户商品 ID → dim_tenant_goods |
|
||||
| 5 | goods_name | TEXT | YES | | 商品名称。**样本值**: "双中支中华", "炫赫门小南京"等 |
|
||||
| 6 | goods_category_id | BIGINT | YES | | 一级分类 ID → dim_goods_category |
|
||||
| 7 | goods_second_category_id | BIGINT | YES | | 二级分类 ID → dim_goods_category |
|
||||
| 8 | category_level1_name | TEXT | YES | | 一级分类名称。**样本值**: "零食", "酒水", "其他", "香烟" 等 |
|
||||
| 9 | category_level2_name | TEXT | YES | | 二级分类名称。**样本值**: "零食", "饮料", "其他2", "香烟", "雪糕", "酒水", "球杆", "槟榔" 等 |
|
||||
| 10 | batch_stock_qty | INTEGER | YES | | 批次库存数量 |
|
||||
| 11 | sale_qty | INTEGER | YES | | 销售数量 |
|
||||
| 12 | total_sales_qty | INTEGER | YES | | 累计销售数量 |
|
||||
| 13 | sale_price | NUMERIC(18,2) | YES | | 销售价格(元) |
|
||||
| 14 | created_at | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 15 | updated_at | TIMESTAMPTZ | YES | | 更新时间 |
|
||||
| 16 | avg_monthly_sales | NUMERIC(18,4) | YES | | 月均销量 |
|
||||
| 17 | goods_state | INTEGER | YES | | 商品状态。**枚举值**: 1=上架, 2=下架 |
|
||||
| 18 | enable_status | INTEGER | YES | | 启用状态。**枚举值**: 1=启用 |
|
||||
| 19 | send_state | INTEGER | YES | | 配送状态。暂无作用 |
|
||||
| 20 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 21 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 22 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 23 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 24 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| goods_name | category_level1_name | sale_price | sale_qty | goods_state |
|
||||
|------------|----------------------|------------|----------|-------------|
|
||||
| 双中支中华 | 香烟 | 72.00 | 94 | 1 |
|
||||
| 炫赫门小南京 | 香烟 | 28.00 | 110 | 1 |
|
||||
| 细荷花 | 香烟 | 55.00 | 184 | 1 |
|
||||
| 可乐 | 酒水 | 5.00 | 78 | 1 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前上架商品
|
||||
SELECT * FROM billiards_dwd.dim_store_goods
|
||||
WHERE scd2_is_current = 1 AND goods_state = 1 AND is_delete = 0;
|
||||
```
|
||||
66
etl_billiards/docs/bd_manual/main/BD_manual_dim_table.md
Normal file
66
etl_billiards/docs/bd_manual/main/BD_manual_dim_table.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# dim_table 台桌主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_table |
|
||||
| 主键 | table_id, scd2_start_time |
|
||||
| 扩展表 | dim_table_ex |
|
||||
| 记录数 | 74 |
|
||||
| 说明 | 台桌维度主表,记录台桌名称、所属台区、单价等核心信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | table_id | BIGINT | NO | PK | 台桌 ID |
|
||||
| 2 | site_id | BIGINT | YES | | 门店 ID → dim_site |
|
||||
| 3 | table_name | TEXT | YES | | 台桌名称。**样本值**: "A1", "A2", "B1", "B2", "S1", "C1", "VIP1", "M3", "666" 等 |
|
||||
| 4 | site_table_area_id | BIGINT | YES | | 台区 ID |
|
||||
| 5 | site_table_area_name | TEXT | YES | | 台区名称。**样本值**: "A区", "B区", "补时长", "C区", "麻将房", "K包", "VIP包厢", "斯诺克区", "666", "k包活动区", "M7" 等 |
|
||||
| 6 | tenant_table_area_id | BIGINT | YES | | 租户级台区 ID |
|
||||
| 7 | table_price | NUMERIC(18,2) | YES | | 台桌单价(当前数据全为 0.00) |
|
||||
| 8 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 9 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 10 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 11 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
## 台区分布
|
||||
|
||||
| 台区名称 | 台桌数量 | 大类/索引 |
|
||||
|----------|----------|----------|
|
||||
| A区 | 18 | 台球/打球/中八/追分 |
|
||||
| B区 | 15 | 台球/打球/中八/追分 |
|
||||
| 补时长 | 7 | 补时长 |
|
||||
| C区 | 6 | 台球/打球/中八/追分 |
|
||||
| 麻将房 | 5 | 麻将/麻将棋牌 |
|
||||
| M7 | 2 | 麻将/麻将棋牌 |
|
||||
| M8 | 1 | 麻将/麻将棋牌 |
|
||||
| K包 | 4 | K包/K歌/KTV |
|
||||
| VIP包厢 | 4 | 台球/打球/中八/追分 (V5为 台球/打球/斯诺克) |
|
||||
| 斯诺克区 | 4 | 台球/打球/斯诺克 |
|
||||
| 666 | 2 | 麻将/麻将棋牌 |
|
||||
| TV台 | 1 | 台球/打球/中八/追分 |
|
||||
| k包活动区 | 2 | K包/K歌/KTV |
|
||||
| 幸会158 | 2 | K包/K歌/KTV |
|
||||
| 发财 | 1 | 麻将/麻将棋牌 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前有效台桌
|
||||
SELECT * FROM billiards_dwd.dim_table
|
||||
WHERE scd2_is_current = 1;
|
||||
|
||||
-- 按台区统计台桌数
|
||||
SELECT site_table_area_name, COUNT(*)
|
||||
FROM billiards_dwd.dim_table
|
||||
WHERE scd2_is_current = 1
|
||||
GROUP BY site_table_area_name
|
||||
ORDER BY COUNT(*) DESC;
|
||||
```
|
||||
@@ -0,0 +1,47 @@
|
||||
# dim_tenant_goods 租户商品主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dim_tenant_goods |
|
||||
| 主键 | tenant_goods_id, scd2_start_time |
|
||||
| 扩展表 | dim_tenant_goods_ex |
|
||||
| 记录数 | 171 |
|
||||
| 说明 | 租户级商品档案主表(SKU 定义),被门店商品表引用 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | tenant_goods_id | BIGINT | NO | PK | 租户商品 ID(SKU) |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | supplier_id | BIGINT | YES | | 供应商 ID(当前数据全为 0) |
|
||||
| 4 | category_name | VARCHAR | YES | | 分类名称(二级分类)。**样本值**: "零食", "饮料", "香烟"等 |
|
||||
| 5 | goods_category_id | BIGINT | YES | | 一级分类 ID |
|
||||
| 6 | goods_second_category_id | BIGINT | YES | | 二级分类 ID |
|
||||
| 7 | goods_name | VARCHAR | YES | | 商品名称。**样本值**: "海之言", "西梅多多饮品", "美汁源果粒橙", "三诺橙汁"等 |
|
||||
| 8 | goods_number | VARCHAR | YES | | 商品编号(序号) |
|
||||
| 9 | unit | VARCHAR | YES | | 商品单位。**枚举值**: "包", "瓶", "个", "份"等 |
|
||||
| 10 | market_price | NUMERIC(18,2) | YES | | 市场价/吊牌价(元) |
|
||||
| 11 | goods_state | INTEGER | YES | | 商品状态。**枚举值**: 1=上架, 2=下架 |
|
||||
| 12 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 13 | update_time | TIMESTAMPTZ | YES | | 更新时间 |
|
||||
| 14 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 15 | scd2_start_time | TIMESTAMPTZ | NO | PK | SCD2 版本生效时间 |
|
||||
| 16 | scd2_end_time | TIMESTAMPTZ | YES | | SCD2 版本失效时间 |
|
||||
| 17 | scd2_is_current | INTEGER | YES | | 当前版本标记 |
|
||||
| 18 | scd2_version | INTEGER | YES | | 版本号 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 查询当前有效的租户商品
|
||||
SELECT * FROM billiards_dwd.dim_tenant_goods
|
||||
WHERE scd2_is_current = 1 AND is_delete = 0;
|
||||
```
|
||||
@@ -0,0 +1,67 @@
|
||||
# dwd_assistant_service_log 助教服务流水主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_assistant_service_log |
|
||||
| 主键 | assistant_service_id |
|
||||
| 扩展表 | dwd_assistant_service_log_ex |
|
||||
| 记录数 | 5003 |
|
||||
| 说明 | 助教服务计费流水事实表,记录每次陪打/教学服务的详细信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | assistant_service_id | BIGINT | NO | PK | 服务流水 ID |
|
||||
| 2 | order_trade_no | BIGINT | YES | | 订单号 → dwd_settlement_head |
|
||||
| 3 | order_settle_id | BIGINT | YES | | 结账单 ID → dwd_settlement_head |
|
||||
| 4 | order_pay_id | BIGINT | YES | | 支付单 ID(当前数据全为 0) |
|
||||
| 5 | order_assistant_id | BIGINT | YES | | 订单助教 ID |
|
||||
| 6 | order_assistant_type | INTEGER | YES | | 服务类型。**枚举值**: 1=基础课 或 包厢课, 2=附加课/激励课 |
|
||||
| 7 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 8 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 9 | site_table_id | BIGINT | YES | | 台桌 ID → dim_table(0=非台桌服务) |
|
||||
| 10 | tenant_member_id | BIGINT | YES | | 会员 ID → dim_member(0=散客) |
|
||||
| 11 | system_member_id | BIGINT | YES | | 系统会员 ID(0=散客) |
|
||||
| 12 | assistant_no | VARCHAR | YES | | 助教工号。**样本值**: "2", "9"等 |
|
||||
| 13 | nickname | VARCHAR | YES | | 助教昵称。**样本值**: "佳怡", "婉婉", "七七"等 |
|
||||
| 14 | site_assistant_id | BIGINT | YES | | 助教 ID → dim_assistant |
|
||||
| 15 | user_id | BIGINT | YES | | 助教用户 ID |
|
||||
| 16 | assistant_team_id | BIGINT | YES | | 助教团队 ID。**枚举值**: 2792011585884037=1组, 2959085810992645=2组 |
|
||||
| 17 | person_org_id | BIGINT | YES | | 人事组织 ID |
|
||||
| 18 | assistant_level | INTEGER | YES | | 助教等级。**枚举值**: 8=助教管理, 10=初级, 20=中级, 30=高级, 40=星级 |
|
||||
| 19 | level_name | VARCHAR | YES | | 等级名称。**枚举值**: "助教管理", "初级", "中级", "高级", "星级" |
|
||||
| 20 | skill_id | BIGINT | YES | | 技能 ID |
|
||||
| 21 | skill_name | VARCHAR | YES | | 技能名称。**枚举值**: "基础课", "附加课/激励课", "包厢课" |
|
||||
| 22 | ledger_unit_price | NUMERIC(10,2) | YES | | 单价(元/小时),**样本值**: 98.00/108.00/190.00 等 |
|
||||
| 23 | ledger_amount | NUMERIC(10,2) | YES | | 计费金额 |
|
||||
| 24 | projected_income | NUMERIC(10,2) | YES | | 预估收入 |
|
||||
| 25 | coupon_deduct_money | NUMERIC(10,2) | YES | | 券抵扣金额 |
|
||||
| 26 | income_seconds | INTEGER | YES | | 计费时长(秒)。常见值: 3600=1h, 7200=2h, 10800=3h |
|
||||
| 27 | real_use_seconds | INTEGER | YES | | 实际使用时长(秒) |
|
||||
| 28 | add_clock | INTEGER | YES | | 加时时长(秒),大多为 0 |
|
||||
| 29 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 30 | start_use_time | TIMESTAMPTZ | YES | | 服务开始时间 |
|
||||
| 31 | last_use_time | TIMESTAMPTZ | YES | | 服务结束时间 |
|
||||
| 32 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 统计助教服务收入
|
||||
SELECT
|
||||
nickname, level_name,
|
||||
COUNT(*) AS service_count,
|
||||
SUM(ledger_amount) AS total_amount,
|
||||
SUM(income_seconds)/3600.0 AS total_hours
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE is_delete = 0
|
||||
GROUP BY nickname, level_name
|
||||
ORDER BY total_amount DESC;
|
||||
```
|
||||
@@ -0,0 +1,43 @@
|
||||
# dwd_assistant_trash_event 助教服务作废主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_assistant_trash_event |
|
||||
| 主键 | assistant_trash_event_id |
|
||||
| 扩展表 | dwd_assistant_trash_event_ex |
|
||||
| 记录数 | 98 |
|
||||
| 说明 | 助教服务作废事实表,记录被取消/作废的助教服务记录 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | assistant_trash_event_id | BIGINT | NO | PK | 作废事件 ID |
|
||||
| 2 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 3 | table_id | BIGINT | YES | | 台桌 ID → dim_table |
|
||||
| 4 | table_area_id | BIGINT | YES | | 台区 ID |
|
||||
| 5 | assistant_no | VARCHAR | YES | | 助教工号/昵称。**样本值**: "七七", "乔西", "球球"等 |
|
||||
| 6 | assistant_name | VARCHAR | YES | | 助教名称,与 assistant_no 相同 |
|
||||
| 7 | charge_minutes_raw | INTEGER | YES | | 原计费时长(秒)。**样本值**: 0, 3600=1h, 10800=3h 等 |
|
||||
| 8 | abolish_amount | NUMERIC(18,2) | YES | | 作废金额(元)。**样本值**: 0.00, 190.00, 570.00 等 |
|
||||
| 9 | trash_reason | VARCHAR | YES | | 作废原因(当前数据全为 NULL) |
|
||||
| 10 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 助教作废金额统计
|
||||
SELECT
|
||||
assistant_name,
|
||||
COUNT(*) AS trash_count,
|
||||
SUM(abolish_amount) AS total_abolished
|
||||
FROM billiards_dwd.dwd_assistant_trash_event
|
||||
GROUP BY assistant_name
|
||||
ORDER BY total_abolished DESC;
|
||||
```
|
||||
@@ -0,0 +1,57 @@
|
||||
# dwd_groupbuy_redemption 团购核销主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_groupbuy_redemption |
|
||||
| 主键 | redemption_id |
|
||||
| 扩展表 | dwd_groupbuy_redemption_ex |
|
||||
| 记录数 | 11420 |
|
||||
| 说明 | 团购券核销事实表,记录团购券的核销使用明细 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | redemption_id | BIGINT | NO | PK | 核销 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 4 | table_id | BIGINT | YES | | 台桌 ID → dim_table |
|
||||
| 5 | tenant_table_area_id | BIGINT | YES | | 台区 ID |
|
||||
| 6 | table_charge_seconds | INTEGER | YES | | 台费计费时长(秒)。**样本值**: 3600=1h, 7200=2h, 10800=3h 等 |
|
||||
| 7 | order_trade_no | BIGINT | YES | | 订单号 |
|
||||
| 8 | order_settle_id | BIGINT | YES | | 结账单 ID → dwd_settlement_head |
|
||||
| 9 | order_coupon_id | BIGINT | YES | | 订单券 ID |
|
||||
| 10 | coupon_origin_id | BIGINT | YES | | 券来源 ID |
|
||||
| 11 | promotion_activity_id | BIGINT | YES | | 促销活动 ID |
|
||||
| 12 | promotion_coupon_id | BIGINT | YES | | 促销券 ID → dim_groupbuy_package |
|
||||
| 13 | order_coupon_channel | INTEGER | YES | | 券渠道。**枚举值**: 1=美团, 2=抖音 |
|
||||
| 14 | ledger_unit_price | NUMERIC(18,2) | YES | | 单价(元)。**样本值**: 29.90, 12.12, 11.11, 39.90 等 |
|
||||
| 15 | ledger_count | INTEGER | YES | | 计费数量(秒)。**样本值**: 3600=1h, 7200=2h 等 |
|
||||
| 16 | ledger_amount | NUMERIC(18,2) | YES | | 账本金额(元)。**样本值**: 48.00, 96.00, 68.00 等 |
|
||||
| 17 | coupon_money | NUMERIC(18,2) | YES | | 券面额(元)。**样本值**: 48.00, 116.00, 96.00, 68.00 等 |
|
||||
| 18 | promotion_seconds | INTEGER | YES | | 促销时长(秒)。**样本值**: 3600=1h, 7200=2h, 14400=4h 等 |
|
||||
| 19 | coupon_code | VARCHAR | YES | | 券码 |
|
||||
| 20 | is_single_order | INTEGER | YES | | 是否独立订单。**枚举值**: 0=否, 1=是 |
|
||||
| 21 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 22 | ledger_name | VARCHAR | YES | | 套餐名称。**样本值**: "全天A区中八一小时", "中八A区新人特惠一小时" 等 |
|
||||
| 23 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 各套餐核销统计
|
||||
SELECT
|
||||
ledger_name,
|
||||
COUNT(*) AS redemption_count,
|
||||
SUM(ledger_amount) AS total_amount
|
||||
FROM billiards_dwd.dwd_groupbuy_redemption
|
||||
WHERE is_delete = 0
|
||||
GROUP BY ledger_name
|
||||
ORDER BY redemption_count DESC;
|
||||
```
|
||||
@@ -0,0 +1,72 @@
|
||||
# dwd_member_balance_change 会员余额变动主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_member_balance_change |
|
||||
| 主键 | balance_change_id |
|
||||
| 扩展表 | dwd_member_balance_change_ex |
|
||||
| 记录数 | 4745 |
|
||||
| 说明 | 会员卡余额变动流水事实表,记录每次余额变动的金额和原因 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | balance_change_id | BIGINT | NO | PK | 变动流水 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 4 | register_site_id | BIGINT | YES | | 注册门店 ID |
|
||||
| 5 | tenant_member_id | BIGINT | YES | | 会员 ID → dim_member |
|
||||
| 6 | system_member_id | BIGINT | YES | | 系统会员 ID |
|
||||
| 7 | tenant_member_card_id | BIGINT | YES | | 会员卡 ID → dim_member_card_account |
|
||||
| 8 | card_type_id | BIGINT | YES | | 卡类型 ID |
|
||||
| 9 | card_type_name | VARCHAR | YES | | 卡类型名称。**枚举值**: "储值卡", "活动抵用券", "台费卡", "酒水卡", "年卡", "月卡" |
|
||||
| 10 | member_name | VARCHAR | YES | | 会员名称快照 |
|
||||
| 11 | member_mobile | VARCHAR | YES | | 会员手机号快照 |
|
||||
| 12 | balance_before | NUMERIC(18,2) | YES | | 变动前余额 |
|
||||
| 13 | change_amount | NUMERIC(18,2) | YES | | 变动金额(正=充值/赠送,负=消费) |
|
||||
| 14 | balance_after | NUMERIC(18,2) | YES | | 变动后余额 |
|
||||
| 15 | from_type | INTEGER | YES | | 变动来源。**枚举值**: 1=结账/消费, 2=结账撤销, 3=现付充值, 4=活动赠送, 7=充值撤销/退款, 9=手动调整 |
|
||||
| 16 | payment_method | INTEGER | YES | | 支付方式,暂未启用。 |
|
||||
| 17 | change_time | TIMESTAMPTZ | YES | | 变动时间 |
|
||||
| 18 | is_delete | INTEGER | YES | | 删除标记 |
|
||||
| 19 | remark | VARCHAR | YES | | 备注。**样本值**: "注销会员", "充值退款" 等 |
|
||||
|
||||
## 卡类型余额变动分布
|
||||
|
||||
| 卡类型 | 变动次数 | 说明 |
|
||||
|--------|----------|------|
|
||||
| 储值卡 | 2825 | 最主要的消费卡种 |
|
||||
| 活动抵用券 | 1275 | 营销活动赠送 |
|
||||
| 台费卡 | 482 | 台费专用卡 |
|
||||
| 酒水卡 | 149 | 酒水专用卡 |
|
||||
|
||||
## 样本数据
|
||||
|
||||
| member_name | card_type_name | balance_before | change_amount | balance_after | from_type |
|
||||
|-------------|----------------|----------------|---------------|---------------|-----------|
|
||||
| 曾丹烨 | 储值卡 | 816.30 | -120.00 | 696.30 | 1 |
|
||||
| 葛先生 | 储值卡 | 6745.27 | -144.00 | 6601.27 | 1 |
|
||||
| 陈腾鑫 | 储值卡 | 293.20 | -114.61 | 178.59 | 1 |
|
||||
| 轩哥 | 酒水卡 | 532.00 | -41.00 | 491.00 | 1 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 会员消费总额排行
|
||||
SELECT
|
||||
member_name,
|
||||
member_mobile,
|
||||
card_type_name,
|
||||
SUM(CASE WHEN change_amount < 0 THEN ABS(change_amount) ELSE 0 END) AS total_consume
|
||||
FROM billiards_dwd.dwd_member_balance_change
|
||||
WHERE is_delete = 0
|
||||
GROUP BY member_name, member_mobile, card_type_name
|
||||
ORDER BY total_consume DESC;
|
||||
```
|
||||
47
etl_billiards/docs/bd_manual/main/BD_manual_dwd_payment.md
Normal file
47
etl_billiards/docs/bd_manual/main/BD_manual_dwd_payment.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# dwd_payment 支付流水表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_payment |
|
||||
| 主键 | payment_id |
|
||||
| 扩展表 | 无 |
|
||||
| 记录数 | 22949 |
|
||||
| 说明 | 支付流水事实表,记录每笔支付的方式、金额、时间等信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | payment_id | BIGINT | NO | PK | 支付流水 ID |
|
||||
| 2 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 3 | relate_type | INTEGER | YES | | 关联业务类型。**枚举值**: 1=预付, 2=结账, 5=充值, 6=线上商城 |
|
||||
| 4 | relate_id | BIGINT | YES | | 关联业务 ID |
|
||||
| 5 | pay_amount | NUMERIC(18,2) | YES | | 支付金额(元) |
|
||||
| 6 | pay_status | INTEGER | YES | | 支付状态。**枚举值**: 2=已支付 |
|
||||
| 7 | payment_method | INTEGER | YES | | 支付方式。**枚举值**: 2=现金支付 , 4=离线支付 |
|
||||
| 8 | online_pay_channel | INTEGER | YES | | 在线支付渠道(当前数据全为 0) |
|
||||
| 9 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 10 | pay_time | TIMESTAMPTZ | YES | | 支付时间 |
|
||||
| 11 | pay_date | DATE | YES | | 支付日期 |
|
||||
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 每日支付金额统计
|
||||
SELECT
|
||||
pay_date,
|
||||
COUNT(*) AS pay_count,
|
||||
SUM(pay_amount) AS total_amount
|
||||
FROM billiards_dwd.dwd_payment
|
||||
WHERE pay_status = 2
|
||||
GROUP BY pay_date
|
||||
ORDER BY pay_date DESC;
|
||||
```
|
||||
@@ -0,0 +1,57 @@
|
||||
# dwd_platform_coupon_redemption 平台券核销主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_platform_coupon_redemption |
|
||||
| 主键 | platform_coupon_redemption_id |
|
||||
| 扩展表 | dwd_platform_coupon_redemption_ex |
|
||||
| 记录数 | 16977 |
|
||||
| 说明 | 平台优惠券核销事实表,记录美团/抖音等平台券的核销明细 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | platform_coupon_redemption_id | BIGINT | NO | PK | 核销 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 4 | coupon_code | VARCHAR | YES | | 券码 |
|
||||
| 5 | coupon_channel | INTEGER | YES | | 券渠道。**枚举值**: 1=美团, 2=抖音 |
|
||||
| 6 | coupon_name | VARCHAR | YES | | 券名称。**样本值**: "【全天可用】中八桌球一小时(A区)", "【全天可用】中八桌球两小时(A区)" 等 |
|
||||
| 7 | sale_price | NUMERIC(10,2) | YES | | 售卖价(元)。**样本值**: 29.90, 69.90, 59.90, 39.90, 19.90 等 |
|
||||
| 8 | coupon_money | NUMERIC(10,2) | YES | | 券面额(元)。**样本值**: 48.00, 96.00, 116.00, 68.00 等 |
|
||||
| 9 | coupon_free_time | INTEGER | YES | | 券赠送时长(当前数据全为 0) |
|
||||
| 10 | channel_deal_id | BIGINT | YES | | 渠道交易 ID |
|
||||
| 11 | deal_id | BIGINT | YES | | 交易 ID |
|
||||
| 12 | group_package_id | BIGINT | YES | | 团购套餐 ID(当前数据全为 0) |
|
||||
| 13 | site_order_id | BIGINT | YES | | 门店订单 ID |
|
||||
| 14 | table_id | BIGINT | YES | | 台桌 ID → dim_table |
|
||||
| 15 | certificate_id | VARCHAR | YES | | 凭证 ID |
|
||||
| 16 | verify_id | VARCHAR | YES | | 核验 ID(仅抖音券有值) |
|
||||
| 17 | use_status | INTEGER | YES | | 使用状态。**枚举值**: 1=已使用, 2=已撤销 |
|
||||
| 18 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 19 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 20 | consume_time | TIMESTAMPTZ | YES | | 核销时间 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 各渠道核销统计
|
||||
SELECT
|
||||
CASE coupon_channel
|
||||
WHEN 1 THEN '美团'
|
||||
WHEN 2 THEN '抖音'
|
||||
ELSE '其他'
|
||||
END AS channel,
|
||||
COUNT(*) AS redemption_count,
|
||||
SUM(coupon_money) AS total_coupon_value,
|
||||
SUM(sale_price) AS total_sale_price
|
||||
FROM billiards_dwd.dwd_platform_coupon_redemption
|
||||
WHERE is_delete = 0 AND use_status = 1
|
||||
GROUP BY coupon_channel;
|
||||
```
|
||||
@@ -0,0 +1,53 @@
|
||||
# dwd_recharge_order 充值订单主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_recharge_order |
|
||||
| 主键 | recharge_order_id |
|
||||
| 扩展表 | dwd_recharge_order_ex |
|
||||
| 记录数 | 455 |
|
||||
| 说明 | 会员充值订单事实表,记录会员卡充值的金额、方式等信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | recharge_order_id | BIGINT | NO | PK | 充值订单 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 4 | member_id | BIGINT | YES | | 会员 ID → dim_member |
|
||||
| 5 | member_name_snapshot | TEXT | YES | | 会员名称快照 |
|
||||
| 6 | member_phone_snapshot | TEXT | YES | | 会员电话快照 |
|
||||
| 7 | tenant_member_card_id | BIGINT | YES | | 会员卡账户 ID → dim_member_card_account |
|
||||
| 8 | member_card_type_name | TEXT | YES | | 卡类型名称。**枚举值**: "储值卡", "月卡" |
|
||||
| 9 | settle_relate_id | BIGINT | YES | | 结算关联 ID |
|
||||
| 10 | settle_type | INTEGER | YES | | 结算类型。**枚举值**: 5=充值订单, 7=充值退款 |
|
||||
| 11 | settle_name | TEXT | YES | | 结算名称。**枚举值**: "充值订单", "充值退款" |
|
||||
| 12 | is_first | INTEGER | YES | | 是否首充。**枚举值**: 1=是, 2=否 |
|
||||
| 13 | pay_amount | NUMERIC(18,2) | YES | | 充值金额(元,撤销为负数) |
|
||||
| 14 | refund_amount | NUMERIC(18,2) | YES | | 退款金额 |
|
||||
| 15 | point_amount | NUMERIC(18,2) | YES | | 积分金额 |
|
||||
| 16 | cash_amount | NUMERIC(18,2) | YES | | 现金金额 |
|
||||
| 17 | payment_method | INTEGER | YES | | 支付方式,暂未启用。 |
|
||||
| 18 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 19 | pay_time | TIMESTAMPTZ | YES | | 支付时间 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 充值总额统计(不含撤销)
|
||||
SELECT
|
||||
member_card_type_name,
|
||||
COUNT(*) AS order_count,
|
||||
SUM(pay_amount) AS total_recharge
|
||||
FROM billiards_dwd.dwd_recharge_order
|
||||
WHERE settle_type = 5
|
||||
GROUP BY member_card_type_name;
|
||||
```
|
||||
45
etl_billiards/docs/bd_manual/main/BD_manual_dwd_refund.md
Normal file
45
etl_billiards/docs/bd_manual/main/BD_manual_dwd_refund.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# dwd_refund 退款流水主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_refund |
|
||||
| 主键 | refund_id |
|
||||
| 扩展表 | dwd_refund_ex |
|
||||
| 记录数 | 45 |
|
||||
| 说明 | 退款流水事实表,记录退款的金额、关联业务等信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | refund_id | BIGINT | NO | PK | 退款流水 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 4 | relate_type | INTEGER | YES | | 关联业务类型。**枚举值**: 1(7)=预付退款 , 2(31)=结账退款, 5(7)=充值退款 |
|
||||
| 5 | relate_id | BIGINT | YES | | 关联业务 ID |
|
||||
| 6 | pay_amount | NUMERIC(18,2) | YES | | 退款金额(元,负数) |
|
||||
| 7 | channel_fee | NUMERIC(18,2) | YES | | 渠道手续费 |
|
||||
| 8 | pay_time | TIMESTAMPTZ | YES | | 退款时间 |
|
||||
| 9 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 10 | payment_method | INTEGER | YES | | 支付方式,暂无用途。 |
|
||||
| 11 | member_id | BIGINT | YES | | 会员 ID(当前数据全为 0) |
|
||||
| 12 | member_card_id | BIGINT | YES | | 会员卡 ID(当前数据全为 0) |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 退款统计
|
||||
SELECT
|
||||
relate_type,
|
||||
COUNT(*) AS refund_count,
|
||||
SUM(ABS(pay_amount)) AS total_refund
|
||||
FROM billiards_dwd.dwd_refund
|
||||
GROUP BY relate_type;
|
||||
```
|
||||
@@ -0,0 +1,74 @@
|
||||
# dwd_settlement_head 结账头表主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_settlement_head |
|
||||
| 主键 | order_settle_id |
|
||||
| 扩展表 | dwd_settlement_head_ex |
|
||||
| 记录数 | 23366 |
|
||||
| 说明 | 结账单头表事实表,是核心交易表,记录每笔结账的消费金额、支付方式、折扣等汇总信息 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | order_settle_id | BIGINT | NO | PK | 结账单 ID |
|
||||
| 2 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 3 | site_id | BIGINT | YES | | 门店 ID → dim_site |
|
||||
| 4 | site_name | VARCHAR | YES | | 门店名称。**当前值**: "朗朗桌球" |
|
||||
| 5 | table_id | BIGINT | YES | | 台桌 ID → dim_table(0=非台桌订单,如商城订单) |
|
||||
| 6 | settle_name | VARCHAR | YES | | 结账名称。**样本值**: "商城订单", "A区 A3", "A区 A4", "斯诺克区 S1" |
|
||||
| 7 | order_trade_no | BIGINT | YES | | 订单号 |
|
||||
| 8 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
| 9 | pay_time | TIMESTAMPTZ | YES | | 支付时间 |
|
||||
| 10 | settle_type | INTEGER | YES | | 结账类型。**枚举值**: 1=台桌结账, 3=商城订单, 6=退货订单, 7=退款订单 |
|
||||
| 11 | revoke_order_id | BIGINT | YES | | 撤销订单 ID(当前数据全为 0) |
|
||||
| 12 | member_id | BIGINT | YES | | 会员 ID → dim_member(0=散客,占比约 82.8%) |
|
||||
| 13 | member_name | VARCHAR | YES | | 会员名称 |
|
||||
| 14 | member_phone | VARCHAR | YES | | 会员电话 |
|
||||
| 15 | member_card_account_id | BIGINT | YES | | 会员卡账户 ID(当前数据全为 0) |
|
||||
| 16 | member_card_type_name | VARCHAR | YES | | 卡类型名称(当前数据全为空) |
|
||||
| 17 | is_bind_member | BOOLEAN | YES | | 是否绑定会员。**枚举值**: False=否 |
|
||||
| 18 | member_discount_amount | NUMERIC(18,2) | YES | | 会员折扣金额 |
|
||||
| 19 | consume_money | NUMERIC(18,2) | YES | | 消费总金额(元) |
|
||||
| 20 | table_charge_money | NUMERIC(18,2) | YES | | 台费金额 |
|
||||
| 21 | goods_money | NUMERIC(18,2) | YES | | 商品金额 |
|
||||
| 22 | real_goods_money | NUMERIC(18,2) | YES | | 实收商品金额 |
|
||||
| 23 | assistant_pd_money | NUMERIC(18,2) | YES | | 助教陪打费用 |
|
||||
| 24 | assistant_cx_money | NUMERIC(18,2) | YES | | 助教促销费用 |
|
||||
| 25 | adjust_amount | NUMERIC(18,2) | YES | | 调整金额 |
|
||||
| 26 | pay_amount | NUMERIC(18,2) | YES | | 实付金额 |
|
||||
| 27 | balance_amount | NUMERIC(18,2) | YES | | 余额支付金额 |
|
||||
| 28 | recharge_card_amount | NUMERIC(18,2) | YES | | 储值卡支付金额 |
|
||||
| 29 | gift_card_amount | NUMERIC(18,2) | YES | | 礼品卡支付金额 |
|
||||
| 30 | coupon_amount | NUMERIC(18,2) | YES | | 券抵扣金额 |
|
||||
| 31 | rounding_amount | NUMERIC(18,2) | YES | | 抹零金额 |
|
||||
| 32 | point_amount | NUMERIC(18,2) | YES | | 积分抵扣等值金额 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 每日营收统计
|
||||
SELECT
|
||||
DATE(pay_time) AS pay_date,
|
||||
COUNT(*) AS order_count,
|
||||
SUM(consume_money) AS total_consume,
|
||||
SUM(pay_amount) AS total_pay
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
GROUP BY DATE(pay_time)
|
||||
ORDER BY pay_date DESC;
|
||||
|
||||
-- 台费 vs 商品 vs 助教收入
|
||||
SELECT
|
||||
SUM(table_charge_money) AS table_revenue,
|
||||
SUM(goods_money) AS goods_revenue,
|
||||
SUM(assistant_pd_money + assistant_cx_money) AS assistant_revenue
|
||||
FROM billiards_dwd.dwd_settlement_head;
|
||||
```
|
||||
@@ -0,0 +1,61 @@
|
||||
# dwd_store_goods_sale 商品销售主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_store_goods_sale |
|
||||
| 主键 | store_goods_sale_id |
|
||||
| 扩展表 | dwd_store_goods_sale_ex |
|
||||
| 记录数 | 17563 |
|
||||
| 说明 | 商品销售流水事实表,记录每笔商品销售明细 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | store_goods_sale_id | BIGINT | NO | PK | 销售流水 ID |
|
||||
| 2 | order_trade_no | BIGINT | YES | | 订单号 |
|
||||
| 3 | order_settle_id | BIGINT | YES | | 结账单 ID → dwd_settlement_head |
|
||||
| 4 | order_pay_id | BIGINT | YES | | 支付单 ID(当前数据全为 0) |
|
||||
| 5 | order_goods_id | BIGINT | YES | | 订单商品 ID(0=商城订单) |
|
||||
| 6 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 7 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 8 | site_goods_id | BIGINT | YES | | 门店商品 ID → dim_store_goods |
|
||||
| 9 | tenant_goods_id | BIGINT | YES | | 租户商品 ID → dim_tenant_goods |
|
||||
| 10 | tenant_goods_category_id | BIGINT | YES | | 商品分类 ID |
|
||||
| 11 | tenant_goods_business_id | BIGINT | YES | | 业务大类 ID |
|
||||
| 12 | site_table_id | BIGINT | YES | | 台桌 ID(0=商城订单,非台桌消费) |
|
||||
| 13 | ledger_name | VARCHAR | YES | | 商品名称。**样本值**: "哇哈哈矿泉水", "东方树叶", "可乐" 等 |
|
||||
| 14 | ledger_group_name | VARCHAR | YES | | 商品分类。**样本值**: "酒水", "零食", "香烟" 等 |
|
||||
| 15 | ledger_unit_price | NUMERIC(18,2) | YES | | 单价(元) |
|
||||
| 16 | ledger_count | INTEGER | YES | | 购买数量。**样本值**: 1, 2, 3, 4 等 |
|
||||
| 17 | ledger_amount | NUMERIC(18,2) | YES | | 销售金额(元) |
|
||||
| 18 | discount_price | NUMERIC(18,2) | YES | | 折扣金额 |
|
||||
| 19 | real_goods_money | NUMERIC(18,2) | YES | | 实收金额 |
|
||||
| 20 | cost_money | NUMERIC(18,2) | YES | | 成本金额 |
|
||||
| 21 | ledger_status | INTEGER | YES | | 账本状态。**枚举值**: 1=已结算 |
|
||||
| 22 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
| 23 | create_time | TIMESTAMPTZ | YES | | 创建时间 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 热销商品排行
|
||||
SELECT
|
||||
ledger_name,
|
||||
ledger_group_name,
|
||||
COUNT(*) AS sale_count,
|
||||
SUM(ledger_count) AS total_qty,
|
||||
SUM(real_goods_money) AS total_revenue
|
||||
FROM billiards_dwd.dwd_store_goods_sale
|
||||
WHERE is_delete = 0
|
||||
GROUP BY ledger_name, ledger_group_name
|
||||
ORDER BY total_revenue DESC
|
||||
LIMIT 20;
|
||||
```
|
||||
@@ -0,0 +1,43 @@
|
||||
# dwd_table_fee_adjust 台费调整主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_table_fee_adjust |
|
||||
| 主键 | table_fee_adjust_id |
|
||||
| 扩展表 | dwd_table_fee_adjust_ex |
|
||||
| 记录数 | 2849 |
|
||||
| 说明 | 台费调整事实表,记录台费调整的金额和时间 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | table_fee_adjust_id | BIGINT | NO | PK | 台费调整 ID |
|
||||
| 2 | order_trade_no | BIGINT | YES | | 订单号 |
|
||||
| 3 | order_settle_id | BIGINT | YES | | 结账单 ID → dwd_settlement_head |
|
||||
| 4 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 5 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 6 | table_id | BIGINT | YES | | 台桌 ID → dim_table |
|
||||
| 7 | table_area_id | BIGINT | YES | | 台区 ID |
|
||||
| 8 | table_area_name | VARCHAR | YES | | 台区名称(当前数据全为 NULL) |
|
||||
| 9 | tenant_table_area_id | BIGINT | YES | | 租户台区 ID |
|
||||
| 10 | ledger_amount | NUMERIC(18,2) | YES | | 调整金额(元) |
|
||||
| 11 | ledger_status | INTEGER | YES | | 账本状态。**枚举值**: 0=待确认, 1=已确认 |
|
||||
| 12 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 台费调整统计
|
||||
SELECT
|
||||
COUNT(*) AS adjust_count,
|
||||
SUM(ledger_amount) AS total_adjust
|
||||
FROM billiards_dwd.dwd_table_fee_adjust
|
||||
WHERE is_delete = 0 AND ledger_status = 1;
|
||||
```
|
||||
@@ -0,0 +1,64 @@
|
||||
# dwd_table_fee_log 台费流水主表
|
||||
|
||||
> 生成时间:2026-01-28
|
||||
|
||||
|
||||
## 表信息
|
||||
|
||||
| 属性 | 值 |
|
||||
|------|-----|
|
||||
| Schema | billiards_dwd |
|
||||
| 表名 | dwd_table_fee_log |
|
||||
| 主键 | table_fee_log_id |
|
||||
| 扩展表 | dwd_table_fee_log_ex |
|
||||
| 记录数 | 18386 |
|
||||
| 说明 | 台费计费流水事实表,记录每次台桌使用的计费明细 |
|
||||
|
||||
## 字段说明
|
||||
|
||||
| 序号 | 字段名 | 类型 | 可空 | 主键 | 说明 |
|
||||
|------|--------|------|------|------|------|
|
||||
| 1 | table_fee_log_id | BIGINT | NO | PK | 台费流水 ID |
|
||||
| 2 | order_trade_no | BIGINT | YES | | 订单号 |
|
||||
| 3 | order_settle_id | BIGINT | YES | | 结账单 ID → dwd_settlement_head |
|
||||
| 4 | order_pay_id | BIGINT | YES | | 支付单 ID(当前数据全为 0) |
|
||||
| 5 | tenant_id | BIGINT | YES | | 租户 ID |
|
||||
| 6 | site_id | BIGINT | YES | | 门店 ID |
|
||||
| 7 | site_table_id | BIGINT | YES | | 台桌 ID → dim_table |
|
||||
| 8 | site_table_area_id | BIGINT | YES | | 台区 ID |
|
||||
| 9 | site_table_area_name | VARCHAR | YES | | 台区名称。**枚举值**: "A区", "B区", "斯诺克区", "麻将房", "C区", "补时长", "VIP包厢" 等 |
|
||||
| 10 | tenant_table_area_id | BIGINT | YES | | 租户级台区 ID |
|
||||
| 11 | member_id | BIGINT | YES | | 会员 ID(0=散客,占比约 82.4%) |
|
||||
| 12 | ledger_name | VARCHAR | YES | | 台桌名称。**样本值**: "A3", "A5", "A4", "S1", "B5", "M3" 等 |
|
||||
| 13 | ledger_unit_price | NUMERIC(18,2) | YES | | 单价(元/小时),如 48.00/58.00/68.00 |
|
||||
| 14 | ledger_count | INTEGER | YES | | 计费时长(秒)。**样本值**: 3600=1h, 7200=2h, 10800=3h 等 |
|
||||
| 15 | ledger_amount | NUMERIC(18,2) | YES | | 计费金额(元) |
|
||||
| 16 | real_table_charge_money | NUMERIC(18,2) | YES | | 实收台费金额 |
|
||||
| 17 | coupon_promotion_amount | NUMERIC(18,2) | YES | | 券促销金额 |
|
||||
| 18 | member_discount_amount | NUMERIC(18,2) | YES | | 会员折扣金额 |
|
||||
| 19 | adjust_amount | NUMERIC(18,2) | YES | | 调整金额 |
|
||||
| 20 | real_table_use_seconds | INTEGER | YES | | 实际使用时长(秒) |
|
||||
| 21 | add_clock_seconds | INTEGER | YES | | 加时时长(秒),大多为 0 |
|
||||
| 22 | start_use_time | TIMESTAMPTZ | YES | | 开台时间 |
|
||||
| 23 | ledger_end_time | TIMESTAMPTZ | YES | | 结账时间 |
|
||||
| 24 | create_time | TIMESTAMPTZ | YES | | 记录创建时间 |
|
||||
| 25 | ledger_status | INTEGER | YES | | 账本状态。**枚举值**: 1=已结算 |
|
||||
| 26 | is_single_order | INTEGER | YES | | 是否独立订单。**枚举值**: 0=合并订单, 1=独立订单 |
|
||||
| 27 | is_delete | INTEGER | YES | | 删除标记。**枚举值**: 0=未删除 |
|
||||
|
||||
|
||||
## 使用说明
|
||||
|
||||
```sql
|
||||
-- 各台区台费收入统计
|
||||
SELECT
|
||||
site_table_area_name,
|
||||
COUNT(*) AS usage_count,
|
||||
SUM(ledger_amount) AS total_fee,
|
||||
SUM(real_table_charge_money) AS real_fee,
|
||||
SUM(coupon_promotion_amount) AS coupon_fee
|
||||
FROM billiards_dwd.dwd_table_fee_log
|
||||
WHERE is_delete = 0
|
||||
GROUP BY site_table_area_name
|
||||
ORDER BY total_fee DESC;
|
||||
```
|
||||
@@ -1,585 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""生成 2025年10-12月 助教排行榜 + 助教详情表(CSV + MD)。
|
||||
|
||||
输出目录:etl_billiards/docs/table_2025-12-19
|
||||
|
||||
注意:客户流水/充值归因涉及“多助教/多订单命中”时按全额复制计入,会导致助教汇总>门店汇总,表格说明会写明。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from statistics import median
|
||||
from typing import Any
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
|
||||
SITE_ID = 2790685415443269
|
||||
TZ = "Asia/Shanghai"
|
||||
|
||||
WIN_OCT = ("2025-10-01 00:00:00+08", "2025-11-01 00:00:00+08")
|
||||
WIN_NOV = ("2025-11-01 00:00:00+08", "2025-12-01 00:00:00+08")
|
||||
WIN_DEC = ("2025-12-01 00:00:00+08", "2026-01-01 00:00:00+08")
|
||||
WIN_ALL = (WIN_OCT[0], WIN_DEC[1])
|
||||
|
||||
MONTHS = [
|
||||
("2025-10", "10月", WIN_OCT),
|
||||
("2025-11", "11月", WIN_NOV),
|
||||
("2025-12", "12月", WIN_DEC),
|
||||
]
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
ENV_PATH = REPO_ROOT / "etl_billiards" / ".env"
|
||||
OUT_DIR = Path(__file__).resolve().parent
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SqlBlock:
|
||||
title: str
|
||||
sql: str
|
||||
|
||||
|
||||
def read_pg_dsn() -> str:
|
||||
text = ENV_PATH.read_text(encoding="utf-8")
|
||||
m = re.search(r"^PG_DSN=(.+)$", text, re.M)
|
||||
if not m:
|
||||
raise RuntimeError(f"未在 {ENV_PATH} 中找到 PG_DSN")
|
||||
return m.group(1).strip()
|
||||
|
||||
|
||||
def conn():
|
||||
return psycopg2.connect(read_pg_dsn(), connect_timeout=10)
|
||||
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
name = name.strip()
|
||||
name = re.sub(r"[<>:\"/\\|?*]+", "_", name)
|
||||
name = re.sub(r"\s+", " ", name)
|
||||
return name
|
||||
|
||||
|
||||
def d(v: Any) -> Decimal:
|
||||
if v is None:
|
||||
return Decimal("0")
|
||||
if isinstance(v, Decimal):
|
||||
return v
|
||||
return Decimal(str(v))
|
||||
|
||||
|
||||
def fmt_money(v: Any) -> str:
|
||||
return f"{d(v):.2f}"
|
||||
|
||||
|
||||
def fmt_hours(v: Any, digits: int = 2) -> str:
|
||||
q = Decimal("1").scaleb(-digits)
|
||||
return f"{d(v).quantize(q):f}h"
|
||||
|
||||
|
||||
def write_csv(path: Path, title: str, description: str, header_rows: list[list[str]], rows: list[list[Any]]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", newline="", encoding="utf-8") as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow([title])
|
||||
w.writerow([description])
|
||||
w.writerow([])
|
||||
for hr in header_rows:
|
||||
w.writerow(hr)
|
||||
for r in rows:
|
||||
w.writerow(["" if v is None else v for v in r])
|
||||
|
||||
|
||||
def write_csv_sections(path: Path, title: str, description: str, section_rows: list[list[Any]]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", newline="", encoding="utf-8") as f:
|
||||
w = csv.writer(f)
|
||||
w.writerow([title])
|
||||
w.writerow([description])
|
||||
w.writerow([])
|
||||
for r in section_rows:
|
||||
w.writerow(["" if v is None else v for v in r])
|
||||
|
||||
|
||||
def write_md(path: Path, title: str, thinking: str, description: str, sql_blocks: list[SqlBlock]) -> None:
|
||||
parts: list[str] = []
|
||||
parts.append(f"# {title}\n")
|
||||
parts.append("## 思考过程\n")
|
||||
parts.append(thinking.strip() + "\n")
|
||||
parts.append("\n## 查询说明\n")
|
||||
parts.append(description.strip() + "\n")
|
||||
parts.append("\n## SQL\n")
|
||||
for b in sql_blocks:
|
||||
parts.append(f"\n### {b.title}\n")
|
||||
parts.append("```sql\n")
|
||||
parts.append(b.sql.strip() + "\n")
|
||||
parts.append("```\n")
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text("".join(parts), encoding="utf-8")
|
||||
|
||||
|
||||
def fetch_all(cur, sql: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
cur.execute(sql, params)
|
||||
return list(cur.fetchall())
|
||||
|
||||
|
||||
def month_case(ts_expr: str) -> str:
|
||||
parts = []
|
||||
for month_key, _, (ws, we) in MONTHS:
|
||||
parts.append(
|
||||
f"when {ts_expr} >= '{ws}'::timestamptz and {ts_expr} < '{we}'::timestamptz then '{month_key}'"
|
||||
)
|
||||
return "case " + " ".join(parts) + " else null end"
|
||||
|
||||
|
||||
def sql_order_base(window_start: str, window_end: str) -> str:
|
||||
return f"""
|
||||
with base_orders as (
|
||||
select
|
||||
tfl.order_settle_id,
|
||||
max(tfl.member_id) as member_id,
|
||||
min(tfl.start_use_time) as order_start_time,
|
||||
max(tfl.ledger_end_time) as order_end_time,
|
||||
sum(tfl.ledger_amount) as table_amount
|
||||
from billiards_dwd.dwd_table_fee_log tfl
|
||||
where tfl.site_id = %(site_id)s
|
||||
and coalesce(tfl.is_delete,0) = 0
|
||||
and tfl.start_use_time >= '{window_start}'::timestamptz
|
||||
and tfl.start_use_time < '{window_end}'::timestamptz
|
||||
group by tfl.order_settle_id
|
||||
),
|
||||
assistant_amount as (
|
||||
select
|
||||
asl.order_settle_id,
|
||||
sum(asl.ledger_amount) as assistant_amount
|
||||
from billiards_dwd.dwd_assistant_service_log asl
|
||||
join base_orders bo on bo.order_settle_id = asl.order_settle_id
|
||||
where asl.site_id = %(site_id)s
|
||||
and coalesce(asl.is_delete,0) = 0
|
||||
group by asl.order_settle_id
|
||||
),
|
||||
goods_amount as (
|
||||
select
|
||||
g.order_settle_id,
|
||||
sum(g.ledger_amount) as goods_amount
|
||||
from billiards_dwd.dwd_store_goods_sale g
|
||||
join base_orders bo on bo.order_settle_id = g.order_settle_id
|
||||
where g.site_id = %(site_id)s
|
||||
and coalesce(g.is_delete,0) = 0
|
||||
group by g.order_settle_id
|
||||
),
|
||||
orders as (
|
||||
select
|
||||
bo.order_settle_id,
|
||||
bo.member_id,
|
||||
bo.order_start_time,
|
||||
bo.order_end_time,
|
||||
coalesce(bo.table_amount,0) + coalesce(a.assistant_amount,0) + coalesce(g.goods_amount,0) as order_amount
|
||||
from base_orders bo
|
||||
left join assistant_amount a on a.order_settle_id = bo.order_settle_id
|
||||
left join goods_amount g on g.order_settle_id = bo.order_settle_id
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
def dense_rank_desc(values: dict[str, Decimal]) -> dict[str, int]:
|
||||
uniq = sorted({v for v in values.values() if v > 0}, reverse=True)
|
||||
rank_map = {v: i + 1 for i, v in enumerate(uniq)}
|
||||
return {k: rank_map.get(v, 0) for k, v in values.items()}
|
||||
|
||||
|
||||
def calc_diff(all_values: dict[str, Decimal], current: Decimal) -> tuple[Decimal, Decimal]:
|
||||
xs = [v for v in all_values.values() if v > 0]
|
||||
if not xs or current <= 0:
|
||||
return Decimal("0"), Decimal("0")
|
||||
avg = sum(xs) / Decimal(len(xs))
|
||||
med = Decimal(str(median([float(v) for v in xs])))
|
||||
return current - avg, current - med
|
||||
|
||||
|
||||
def main() -> None:
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with conn() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||
assistants_rows = fetch_all(
|
||||
cur,
|
||||
"""
|
||||
select distinct nickname as assistant
|
||||
from billiards_dwd.dwd_assistant_service_log
|
||||
where site_id=%(site_id)s and coalesce(is_delete,0)=0
|
||||
and start_use_time >= %(window_start)s::timestamptz
|
||||
and start_use_time < %(window_end)s::timestamptz
|
||||
order by assistant;
|
||||
""",
|
||||
{"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]},
|
||||
)
|
||||
assistants = [r["assistant"] for r in assistants_rows if r.get("assistant")]
|
||||
|
||||
# 助教-客户-月份:服务时长
|
||||
sql_svc = f"""
|
||||
with raw as (
|
||||
select
|
||||
asl.nickname as assistant,
|
||||
asl.tenant_member_id as member_id,
|
||||
{month_case('asl.start_use_time')} as month_key,
|
||||
asl.order_assistant_type,
|
||||
asl.income_seconds
|
||||
from billiards_dwd.dwd_assistant_service_log asl
|
||||
where asl.site_id=%(site_id)s and coalesce(asl.is_delete,0)=0
|
||||
and asl.start_use_time >= %(window_start)s::timestamptz
|
||||
and asl.start_use_time < %(window_end)s::timestamptz
|
||||
and asl.tenant_member_id is not null and asl.tenant_member_id<>0
|
||||
)
|
||||
select
|
||||
assistant,
|
||||
member_id,
|
||||
month_key,
|
||||
sum(case when order_assistant_type=1 then income_seconds else 0 end)/3600.0 as base_hours,
|
||||
sum(case when order_assistant_type=2 then income_seconds else 0 end)/3600.0 as extra_hours
|
||||
from raw
|
||||
where month_key is not null
|
||||
group by assistant, member_id, month_key;
|
||||
"""
|
||||
svc_rows = fetch_all(cur, sql_svc, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]})
|
||||
|
||||
# 助教-客户-月份:客户流水
|
||||
sql_rev = sql_order_base(WIN_ALL[0], WIN_ALL[1]) + f"""
|
||||
, assistant_orders as (
|
||||
select distinct order_settle_id, nickname as assistant
|
||||
from billiards_dwd.dwd_assistant_service_log
|
||||
where site_id=%(site_id)s and coalesce(is_delete,0)=0
|
||||
and start_use_time >= %(window_start)s::timestamptz
|
||||
and start_use_time < %(window_end)s::timestamptz
|
||||
),
|
||||
raw as (
|
||||
select
|
||||
ao.assistant,
|
||||
o.member_id,
|
||||
{month_case('o.order_start_time')} as month_key,
|
||||
o.order_amount
|
||||
from orders o
|
||||
join assistant_orders ao on ao.order_settle_id=o.order_settle_id
|
||||
where o.member_id is not null and o.member_id<>0
|
||||
)
|
||||
select
|
||||
assistant,
|
||||
member_id,
|
||||
month_key,
|
||||
sum(order_amount) as revenue_amount
|
||||
from raw
|
||||
where month_key is not null
|
||||
group by assistant, member_id, month_key;
|
||||
"""
|
||||
rev_rows = fetch_all(cur, sql_rev, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]})
|
||||
|
||||
# 助教-客户-月份:充值归因
|
||||
sql_rech = f"""
|
||||
with base_orders as (
|
||||
select
|
||||
tfl.order_settle_id,
|
||||
max(tfl.member_id) as member_id,
|
||||
min(tfl.start_use_time) as table_start_time,
|
||||
max(tfl.ledger_end_time) as table_end_time
|
||||
from billiards_dwd.dwd_table_fee_log tfl
|
||||
where tfl.site_id=%(site_id)s and coalesce(tfl.is_delete,0)=0
|
||||
and tfl.start_use_time >= %(window_start)s::timestamptz
|
||||
and tfl.start_use_time < %(window_end)s::timestamptz
|
||||
group by tfl.order_settle_id
|
||||
),
|
||||
assistant_time as (
|
||||
select
|
||||
asl.order_settle_id,
|
||||
min(asl.start_use_time) as assistant_start_time,
|
||||
max(asl.last_use_time) as assistant_end_time
|
||||
from billiards_dwd.dwd_assistant_service_log asl
|
||||
join base_orders bo on bo.order_settle_id=asl.order_settle_id
|
||||
where asl.site_id=%(site_id)s and coalesce(asl.is_delete,0)=0
|
||||
group by asl.order_settle_id
|
||||
),
|
||||
order_windows as (
|
||||
select
|
||||
bo.order_settle_id,
|
||||
bo.member_id,
|
||||
least(bo.table_start_time, coalesce(at.assistant_start_time, bo.table_start_time)) as win_start,
|
||||
greatest(bo.table_end_time, coalesce(at.assistant_end_time, bo.table_end_time)) as win_end
|
||||
from base_orders bo
|
||||
left join assistant_time at on at.order_settle_id=bo.order_settle_id
|
||||
where bo.member_id is not null and bo.member_id<>0
|
||||
),
|
||||
assistant_orders as (
|
||||
select distinct order_settle_id, nickname as assistant
|
||||
from billiards_dwd.dwd_assistant_service_log
|
||||
where site_id=%(site_id)s and coalesce(is_delete,0)=0
|
||||
and start_use_time >= %(window_start)s::timestamptz
|
||||
and start_use_time < %(window_end)s::timestamptz
|
||||
),
|
||||
recharge_pay as (
|
||||
select
|
||||
p.pay_time,
|
||||
r.member_id,
|
||||
p.pay_amount
|
||||
from billiards_dwd.dwd_payment p
|
||||
join billiards_dwd.dwd_recharge_order r on r.recharge_order_id=p.relate_id
|
||||
where p.site_id=%(site_id)s
|
||||
and p.relate_type=5
|
||||
and p.pay_status=2
|
||||
and p.pay_amount>0
|
||||
and p.pay_time >= %(window_start)s::timestamptz
|
||||
and p.pay_time < %(window_end)s::timestamptz
|
||||
),
|
||||
matched as (
|
||||
select
|
||||
rp.pay_time,
|
||||
ow.order_settle_id,
|
||||
ow.member_id,
|
||||
rp.pay_amount
|
||||
from recharge_pay rp
|
||||
join order_windows ow
|
||||
on ow.member_id=rp.member_id
|
||||
and rp.pay_time >= ow.win_start - interval '30 minutes'
|
||||
and rp.pay_time <= ow.win_end + interval '30 minutes'
|
||||
),
|
||||
raw as (
|
||||
select
|
||||
ao.assistant,
|
||||
m.member_id,
|
||||
{month_case('m.pay_time')} as month_key,
|
||||
m.pay_amount
|
||||
from matched m
|
||||
join assistant_orders ao on ao.order_settle_id=m.order_settle_id
|
||||
)
|
||||
select
|
||||
assistant,
|
||||
member_id,
|
||||
month_key,
|
||||
sum(pay_amount) as recharge_amount
|
||||
from raw
|
||||
where month_key is not null
|
||||
group by assistant, member_id, month_key;
|
||||
"""
|
||||
rech_rows = fetch_all(cur, sql_rech, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]})
|
||||
|
||||
# 汇总:月度助教指标
|
||||
svc_map = {mk: {a: {"base": Decimal('0'), "extra": Decimal('0')} for a in assistants} for mk,_,_ in MONTHS}
|
||||
for r in svc_rows:
|
||||
mk = r["month_key"]; a = r["assistant"]
|
||||
if mk in svc_map and a in svc_map[mk]:
|
||||
svc_map[mk][a]["base"] += d(r["base_hours"])
|
||||
svc_map[mk][a]["extra"] += d(r["extra_hours"])
|
||||
|
||||
revenue_map = {mk: {a: Decimal('0') for a in assistants} for mk,_,_ in MONTHS}
|
||||
for r in rev_rows:
|
||||
mk = r["month_key"]; a = r["assistant"]
|
||||
if mk in revenue_map and a in revenue_map[mk]:
|
||||
revenue_map[mk][a] += d(r["revenue_amount"])
|
||||
|
||||
recharge_map = {mk: {a: Decimal('0') for a in assistants} for mk,_,_ in MONTHS}
|
||||
for r in rech_rows:
|
||||
mk = r["month_key"]; a = r["assistant"]
|
||||
if mk in recharge_map and a in recharge_map[mk]:
|
||||
recharge_map[mk][a] += d(r["recharge_amount"])
|
||||
|
||||
# ====== 输出4张排行榜 ======
|
||||
def write_rank(file_stem: str, title: str, desc: str, rows: list[list[Any]]):
|
||||
write_csv(OUT_DIR / f"{file_stem}.csv", title, desc, [["月份", "排名", "助教昵称", "指标"]], rows)
|
||||
write_md(OUT_DIR / f"{file_stem}.md", title, "按月聚合并做dense_rank排名。", desc, [])
|
||||
|
||||
rows = []
|
||||
for mk,_,_ in MONTHS:
|
||||
values = {a: svc_map[mk][a]["base"] for a in assistants}
|
||||
ranks = dense_rank_desc(values)
|
||||
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
|
||||
v = values[a]
|
||||
if v > 0:
|
||||
rows.append([mk, ranks[a], a, fmt_hours(v, 2)])
|
||||
write_rank(
|
||||
"助教_基础课时长排行_2025年10-12月",
|
||||
"2025年10-12月 助教基础课时长排行榜",
|
||||
"口径:order_assistant_type=1,时长=income_seconds/3600(小时),按月排名。",
|
||||
rows,
|
||||
)
|
||||
|
||||
rows = []
|
||||
for mk,_,_ in MONTHS:
|
||||
values = {a: svc_map[mk][a]["extra"] for a in assistants}
|
||||
ranks = dense_rank_desc(values)
|
||||
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
|
||||
v = values[a]
|
||||
if v > 0:
|
||||
rows.append([mk, ranks[a], a, fmt_hours(v, 2)])
|
||||
write_rank(
|
||||
"助教_附加课时长排行_2025年10-12月",
|
||||
"2025年10-12月 助教附加课(超休)时长排行榜",
|
||||
"口径:order_assistant_type=2,超休时长=income_seconds/3600(小时),按月排名。",
|
||||
rows,
|
||||
)
|
||||
|
||||
rows = []
|
||||
for mk,_,_ in MONTHS:
|
||||
values = revenue_map[mk]
|
||||
ranks = dense_rank_desc(values)
|
||||
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
|
||||
v = values[a]
|
||||
if v > 0:
|
||||
rows.append([mk, ranks[a], a, fmt_money(v)])
|
||||
write_rank(
|
||||
"助教_客户流水排行_2025年10-12月",
|
||||
"2025年10-12月 助教客户流水排行榜(全额复制口径)",
|
||||
"口径:客户流水=台费+助教+商品应付金额按订单归集后,全额计入订单内每位助教;多助教会导致汇总>门店总额。",
|
||||
rows,
|
||||
)
|
||||
|
||||
rows = []
|
||||
for mk,_,_ in MONTHS:
|
||||
values = recharge_map[mk]
|
||||
ranks = dense_rank_desc(values)
|
||||
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
|
||||
v = values[a]
|
||||
if v > 0:
|
||||
rows.append([mk, ranks[a], a, fmt_money(v)])
|
||||
write_rank(
|
||||
"助教_客户充值归因排行_2025年10-12月",
|
||||
"2025年10-12月 助教客户充值归因排行榜(全额复制口径)",
|
||||
"口径:充值支付(dwd_payment.relate_type=5)在消费窗口±30分钟内命中且订单有助教,则全额计入助教;多助教/多订单命中会重复计入。",
|
||||
rows,
|
||||
)
|
||||
|
||||
# ====== 输出助教详情(每人一份) ======
|
||||
# 会员昵称
|
||||
cur.execute("select member_id, nickname from billiards_dwd.dim_member where scd2_is_current=1")
|
||||
member_name = {r["member_id"]: (r.get("nickname") or "") for r in cur.fetchall()}
|
||||
|
||||
# 索引:assistant->member->month
|
||||
svc_idx = {a: {} for a in assistants}
|
||||
for r in svc_rows:
|
||||
a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"]
|
||||
svc_idx.setdefault(a, {}).setdefault(mid, {})[mk] = {"base": d(r["base_hours"]), "extra": d(r["extra_hours"])}
|
||||
|
||||
rev_idx = {a: {} for a in assistants}
|
||||
for r in rev_rows:
|
||||
a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"]
|
||||
rev_idx.setdefault(a, {}).setdefault(mid, {})[mk] = d(r["revenue_amount"])
|
||||
|
||||
rech_idx = {a: {} for a in assistants}
|
||||
for r in rech_rows:
|
||||
a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"]
|
||||
rech_idx.setdefault(a, {}).setdefault(mid, {})[mk] = d(r["recharge_amount"])
|
||||
|
||||
for a in assistants:
|
||||
safe = sanitize_filename(a)
|
||||
csv_path = OUT_DIR / f"助教详情_{safe}.csv"
|
||||
md_path = OUT_DIR / f"助教详情_{safe}.md"
|
||||
|
||||
# 评价(简短)
|
||||
base_total = sum((svc_map[mk][a]["base"] for mk,_,_ in MONTHS), Decimal('0'))
|
||||
extra_total = sum((svc_map[mk][a]["extra"] for mk,_,_ in MONTHS), Decimal('0'))
|
||||
rev_total = sum((revenue_map[mk][a] for mk,_,_ in MONTHS), Decimal('0'))
|
||||
rech_total = sum((recharge_map[mk][a] for mk,_,_ in MONTHS), Decimal('0'))
|
||||
|
||||
# 头部客户 Top100(按12月消费业绩)
|
||||
members = set(rev_idx.get(a, {}).keys()) | set(svc_idx.get(a, {}).keys()) | set(rech_idx.get(a, {}).keys())
|
||||
def rev_dec(mid: int) -> Decimal:
|
||||
return rev_idx.get(a, {}).get(mid, {}).get('2025-12', Decimal('0'))
|
||||
top_members = sorted(members, key=lambda mid: rev_dec(mid), reverse=True)[:100]
|
||||
|
||||
top3 = '、'.join([(member_name.get(mid) or str(mid)) for mid in top_members[:3]])
|
||||
assistant_review = (
|
||||
f"评价:基础{fmt_hours(base_total,1)},附加{fmt_hours(extra_total,1)};"
|
||||
f"客户流水¥{rev_total:.2f},充值归因¥{rech_total:.2f};"
|
||||
f"头部客户(12月)Top3:{top3 or '无'}。"
|
||||
)
|
||||
|
||||
# Part1-4
|
||||
part1=[]; part2=[]; part3=[]; part4=[]
|
||||
for mk, mcn, _ in MONTHS:
|
||||
base_v = svc_map[mk][a]["base"]
|
||||
extra_v = svc_map[mk][a]["extra"]
|
||||
rev_v = revenue_map[mk][a]
|
||||
rech_v = recharge_map[mk][a]
|
||||
|
||||
base_all = {x: svc_map[mk][x]["base"] for x in assistants}
|
||||
extra_all = {x: svc_map[mk][x]["extra"] for x in assistants}
|
||||
rev_all = {x: revenue_map[mk][x] for x in assistants}
|
||||
rech_all = {x: recharge_map[mk][x] for x in assistants}
|
||||
|
||||
base_rank = dense_rank_desc(base_all).get(a, 0)
|
||||
extra_rank = dense_rank_desc(extra_all).get(a, 0)
|
||||
rev_rank = dense_rank_desc(rev_all).get(a, 0)
|
||||
rech_rank = dense_rank_desc(rech_all).get(a, 0)
|
||||
|
||||
base_da, base_dm = calc_diff(base_all, base_v)
|
||||
extra_da, extra_dm = calc_diff(extra_all, extra_v)
|
||||
rev_da, rev_dm = calc_diff(rev_all, rev_v)
|
||||
rech_da, rech_dm = calc_diff(rech_all, rech_v)
|
||||
|
||||
part1.append([mcn, fmt_hours(base_v,2), base_rank or "", fmt_hours(base_da,2), fmt_hours(base_dm,2)])
|
||||
part2.append([mcn, fmt_hours(extra_v,2), extra_rank or "", fmt_hours(extra_da,2), fmt_hours(extra_dm,2)])
|
||||
part3.append([mcn, fmt_money(rev_v), rev_rank or "", fmt_money(rev_da), fmt_money(rev_dm)])
|
||||
part4.append([mcn, fmt_money(rech_v), rech_rank or "", fmt_money(rech_da), fmt_money(rech_dm)])
|
||||
|
||||
# Part5 rows
|
||||
part5=[]
|
||||
for i, mid in enumerate(top_members, start=1):
|
||||
def h_pair(month_key: str) -> str:
|
||||
v = svc_idx.get(a, {}).get(mid, {}).get(month_key, {})
|
||||
return f"{fmt_hours(v.get('base',Decimal('0')),1)} / {fmt_hours(v.get('extra',Decimal('0')),1)}"
|
||||
def rev_m(month_key: str) -> Decimal:
|
||||
return rev_idx.get(a, {}).get(mid, {}).get(month_key, Decimal('0'))
|
||||
def rech_m(month_key: str) -> Decimal:
|
||||
return rech_idx.get(a, {}).get(mid, {}).get(month_key, Decimal('0'))
|
||||
name = member_name.get(mid) or str(mid)
|
||||
part5.append([
|
||||
i,
|
||||
name,
|
||||
h_pair('2025-12'), fmt_money(rev_m('2025-12')), fmt_money(rech_m('2025-12')),
|
||||
h_pair('2025-11'), fmt_money(rev_m('2025-11')), fmt_money(rech_m('2025-11')),
|
||||
h_pair('2025-10'), fmt_money(rev_m('2025-10')), fmt_money(rech_m('2025-10')),
|
||||
])
|
||||
|
||||
title = f"助教详情:{a}(2025年10-12月)"
|
||||
desc = (
|
||||
"本表包含5个部分:基础课业绩、附加课业绩、客户消费业绩、客户充值业绩、头部客户情况。"
|
||||
"均值/中位数差值对比集合为当月该指标>0的助教。"
|
||||
"充值/客户流水多助教与多订单命中均按全额复制计入,故汇总可能大于门店总额。"
|
||||
)
|
||||
|
||||
rows=[]
|
||||
rows += [["一、基础课业绩"], ["说明:" + assistant_review], []]
|
||||
rows += [["月份", "基础课业绩", "基础课业绩", "基础课业绩", "基础课业绩"], ["月份", "小时数", "排名", "平均值差值小时数", "中位数值差值小时数"]]
|
||||
rows += part1
|
||||
rows += [[], ["二、附加课业绩"], ["说明:附加课=order_assistant_type=2。"], []]
|
||||
rows += [["月份", "附加课业绩", "附加课业绩", "附加课业绩", "附加课业绩"], ["月份", "小时数", "排名", "平均值差值小时数", "中位数值差值小时数"]]
|
||||
rows += part2
|
||||
rows += [[], ["三、客户消费业绩"], ["说明:订单台费+助教+商品应付金额全额计入订单内助教。"], []]
|
||||
rows += [["月份", "客户消费业绩", "客户消费业绩", "客户消费业绩", "客户消费业绩"], ["月份", "合计元", "排名", "平均值差值元", "中位数值差值元"]]
|
||||
rows += part3
|
||||
rows += [[], ["四、客户充值业绩"], ["说明:充值命中消费窗口±30分钟且有助教则归因;全额复制。"], []]
|
||||
rows += [["月份", "客户充值业绩", "客户充值业绩", "客户充值业绩", "客户充值业绩"], ["月份", "合计元", "排名", "平均值差值元", "中位数值差值元"]]
|
||||
rows += part4
|
||||
rows += [[], ["五、头部客户(按12月消费业绩排序,Top100)"], ["说明:基础/附加课时=基础h/附加h。"], []]
|
||||
rows += [["排名", "客户名称", "12月", "12月", "12月", "11月", "11月", "11月", "10月", "10月", "10月"],
|
||||
["排名", "客户名称", "基础/附加课时", "消费业绩(元)", "客户充值(元)", "基础/附加课时", "消费业绩(元)", "客户充值(元)", "基础/附加课时", "消费业绩(元)", "客户充值(元)"]]
|
||||
rows += part5
|
||||
|
||||
write_csv_sections(csv_path, title, desc, rows)
|
||||
write_md(
|
||||
md_path,
|
||||
title,
|
||||
"按模板拆分5部分输出;月度排名采用dense_rank;均值/中位数在当月该指标>0助教集合上计算。",
|
||||
desc + "\n" + assistant_review,
|
||||
[
|
||||
SqlBlock("服务时长(助教-客户-月份)", sql_svc),
|
||||
SqlBlock("客户流水(助教-客户-月份)", sql_rev),
|
||||
SqlBlock("充值归因(助教-客户-月份)", sql_rech),
|
||||
],
|
||||
)
|
||||
|
||||
print(f"完成:{OUT_DIR}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -45,7 +45,7 @@ class AppSettings:
|
||||
"history_start": "",
|
||||
"history_end": "",
|
||||
"lookback_hours": 24,
|
||||
"include_dimensions": False,
|
||||
"include_dimensions": True,
|
||||
"auto_backfill": False,
|
||||
"ods_tasks": "",
|
||||
},
|
||||
@@ -277,7 +277,7 @@ class AppSettings:
|
||||
|
||||
@property
|
||||
def integrity_include_dimensions(self) -> bool:
|
||||
return self._settings.get("integrity_check", {}).get("include_dimensions", False)
|
||||
return self._settings.get("integrity_check", {}).get("include_dimensions", True)
|
||||
|
||||
@integrity_include_dimensions.setter
|
||||
def integrity_include_dimensions(self, value: bool):
|
||||
|
||||
@@ -41,8 +41,19 @@ ENV_GROUPS = {
|
||||
},
|
||||
"integrity": {
|
||||
"title": "数据完整性配置",
|
||||
"keys": ["INTEGRITY_MODE", "INTEGRITY_HISTORY_START", "INTEGRITY_HISTORY_END",
|
||||
"INTEGRITY_INCLUDE_DIMENSIONS", "INTEGRITY_AUTO_CHECK", "INTEGRITY_ODS_TASK_CODES"],
|
||||
"keys": [
|
||||
"INTEGRITY_MODE",
|
||||
"INTEGRITY_HISTORY_START",
|
||||
"INTEGRITY_HISTORY_END",
|
||||
"INTEGRITY_INCLUDE_DIMENSIONS",
|
||||
"INTEGRITY_AUTO_CHECK",
|
||||
"INTEGRITY_AUTO_BACKFILL",
|
||||
"INTEGRITY_COMPARE_CONTENT",
|
||||
"INTEGRITY_CONTENT_SAMPLE_LIMIT",
|
||||
"INTEGRITY_BACKFILL_MISMATCH",
|
||||
"INTEGRITY_RECHECK_AFTER_BACKFILL",
|
||||
"INTEGRITY_ODS_TASK_CODES",
|
||||
],
|
||||
"sensitive": [],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -348,8 +348,8 @@ class TaskPanel(QWidget):
|
||||
integrity_layout.addWidget(self.include_dimensions_check)
|
||||
|
||||
# 自动补全选项
|
||||
self.auto_backfill_check = QCheckBox("校验后自动补全丢失数据")
|
||||
self.auto_backfill_check.setToolTip("如果发现丢失数据,自动从 API 重新获取并补全到 ODS")
|
||||
self.auto_backfill_check = QCheckBox("更新并验证(自动回补缺失/不一致)")
|
||||
self.auto_backfill_check.setToolTip("先校验,再自动从 API 回补缺失/不一致数据;可通过 INTEGRITY_RECHECK_AFTER_BACKFILL 控制是否再校验")
|
||||
integrity_layout.addWidget(self.auto_backfill_check)
|
||||
|
||||
# 指定 ODS 任务
|
||||
|
||||
@@ -147,25 +147,164 @@ def _amount_columns(cols: List[str], types: Dict[str, str]) -> List[str]:
|
||||
return out
|
||||
|
||||
|
||||
def _count_table(cur, schema: str, table: str, time_col: str | None, window: IntegrityWindow | None) -> int:
|
||||
where = ""
|
||||
def _build_hash_expr(alias: str, cols: list[str]) -> str:
|
||||
if not cols:
|
||||
return "NULL"
|
||||
parts = ", ".join([f"COALESCE({alias}.\"{c}\"::text,'')" for c in cols])
|
||||
return f"md5(concat_ws('||', {parts}))"
|
||||
|
||||
|
||||
def _build_snapshot_subquery(
|
||||
schema: str,
|
||||
table: str,
|
||||
cols: list[str],
|
||||
key_cols: list[str],
|
||||
order_col: str | None,
|
||||
where_sql: str,
|
||||
) -> str:
|
||||
cols_sql = ", ".join([f'"{c}"' for c in cols])
|
||||
if key_cols and order_col:
|
||||
keys = ", ".join([f'"{c}"' for c in key_cols])
|
||||
order_by = ", ".join([*(f'"{c}"' for c in key_cols), f'"{order_col}" DESC NULLS LAST'])
|
||||
return (
|
||||
f'SELECT DISTINCT ON ({keys}) {cols_sql} '
|
||||
f'FROM "{schema}"."{table}" {where_sql} '
|
||||
f"ORDER BY {order_by}"
|
||||
)
|
||||
return f'SELECT {cols_sql} FROM "{schema}"."{table}" {where_sql}'
|
||||
|
||||
|
||||
def _build_snapshot_expr_subquery(
|
||||
schema: str,
|
||||
table: str,
|
||||
select_exprs: list[str],
|
||||
key_exprs: list[str],
|
||||
order_col: str | None,
|
||||
where_sql: str,
|
||||
) -> str:
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
table_sql = f'"{schema}"."{table}"'
|
||||
if key_exprs and order_col:
|
||||
distinct_on = ", ".join(key_exprs)
|
||||
order_by = ", ".join([*key_exprs, f'"{order_col}" DESC NULLS LAST'])
|
||||
return (
|
||||
f"SELECT DISTINCT ON ({distinct_on}) {select_cols_sql} "
|
||||
f"FROM {table_sql} {where_sql} "
|
||||
f"ORDER BY {order_by}"
|
||||
)
|
||||
return f"SELECT {select_cols_sql} FROM {table_sql} {where_sql}"
|
||||
|
||||
|
||||
def _cast_expr(col: str, cast_type: str | None) -> str:
|
||||
if col.upper() == "NULL":
|
||||
base = "NULL"
|
||||
else:
|
||||
is_expr = not col.isidentifier() or "->" in col or "#>>" in col or "::" in col or "'" in col
|
||||
base = col if is_expr else f'"{col}"'
|
||||
if cast_type:
|
||||
cast_lower = cast_type.lower()
|
||||
if cast_lower in {"bigint", "integer", "numeric", "decimal"}:
|
||||
return f"CAST(NULLIF(CAST({base} AS text), '') AS numeric):: {cast_type}"
|
||||
if cast_lower == "timestamptz":
|
||||
return f"({base})::timestamptz"
|
||||
return f"{base}::{cast_type}"
|
||||
return base
|
||||
|
||||
|
||||
def _fetch_pk_columns(cur, schema: str, table: str) -> List[str]:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT kcu.column_name
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
WHERE tc.constraint_type = 'PRIMARY KEY'
|
||||
AND tc.table_schema = %s
|
||||
AND tc.table_name = %s
|
||||
ORDER BY kcu.ordinal_position
|
||||
""",
|
||||
(schema, table),
|
||||
)
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
|
||||
def _pick_snapshot_order_column(cols: Iterable[str]) -> str | None:
|
||||
lower = {c.lower() for c in cols}
|
||||
for candidate in ("fetched_at", "update_time", "create_time"):
|
||||
if candidate in lower:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _count_table(
|
||||
cur,
|
||||
schema: str,
|
||||
table: str,
|
||||
time_col: str | None,
|
||||
window: IntegrityWindow | None,
|
||||
*,
|
||||
pk_cols: List[str] | None = None,
|
||||
snapshot_order_col: str | None = None,
|
||||
current_only: bool = False,
|
||||
) -> int:
|
||||
where_parts: List[str] = []
|
||||
params: List[Any] = []
|
||||
if current_only:
|
||||
where_parts.append("COALESCE(scd2_is_current,1)=1")
|
||||
if time_col and window:
|
||||
where = f'WHERE "{time_col}" >= %s AND "{time_col}" < %s'
|
||||
params = [window.start, window.end]
|
||||
sql = f'SELECT COUNT(1) FROM "{schema}"."{table}" {where}'
|
||||
where_parts.append(f'"{time_col}" >= %s AND "{time_col}" < %s')
|
||||
params.extend([window.start, window.end])
|
||||
where = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
|
||||
|
||||
if pk_cols and snapshot_order_col:
|
||||
keys = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
order_by = ", ".join([*(f'"{c}"' for c in pk_cols), f'"{snapshot_order_col}" DESC NULLS LAST'])
|
||||
sql = (
|
||||
f'SELECT COUNT(1) FROM ('
|
||||
f'SELECT DISTINCT ON ({keys}) 1 FROM "{schema}"."{table}" {where} '
|
||||
f'ORDER BY {order_by}'
|
||||
f') t'
|
||||
)
|
||||
else:
|
||||
sql = f'SELECT COUNT(1) FROM "{schema}"."{table}" {where}'
|
||||
cur.execute(sql, params)
|
||||
row = cur.fetchone()
|
||||
return int(row[0] if row else 0)
|
||||
|
||||
|
||||
def _sum_column(cur, schema: str, table: str, col: str, time_col: str | None, window: IntegrityWindow | None) -> float:
|
||||
where = ""
|
||||
def _sum_column(
|
||||
cur,
|
||||
schema: str,
|
||||
table: str,
|
||||
col: str,
|
||||
time_col: str | None,
|
||||
window: IntegrityWindow | None,
|
||||
*,
|
||||
pk_cols: List[str] | None = None,
|
||||
snapshot_order_col: str | None = None,
|
||||
current_only: bool = False,
|
||||
) -> float:
|
||||
where_parts: List[str] = []
|
||||
params: List[Any] = []
|
||||
if current_only:
|
||||
where_parts.append("COALESCE(scd2_is_current,1)=1")
|
||||
if time_col and window:
|
||||
where = f'WHERE "{time_col}" >= %s AND "{time_col}" < %s'
|
||||
params = [window.start, window.end]
|
||||
sql = f'SELECT COALESCE(SUM("{col}"), 0) FROM "{schema}"."{table}" {where}'
|
||||
where_parts.append(f'"{time_col}" >= %s AND "{time_col}" < %s')
|
||||
params.extend([window.start, window.end])
|
||||
where = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
|
||||
|
||||
if pk_cols and snapshot_order_col:
|
||||
keys = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
order_by = ", ".join([*(f'"{c}"' for c in pk_cols), f'"{snapshot_order_col}" DESC NULLS LAST'])
|
||||
sql = (
|
||||
f'SELECT COALESCE(SUM("{col}"), 0) FROM ('
|
||||
f'SELECT DISTINCT ON ({keys}) "{col}" FROM "{schema}"."{table}" {where} '
|
||||
f'ORDER BY {order_by}'
|
||||
f') t'
|
||||
)
|
||||
else:
|
||||
sql = f'SELECT COALESCE(SUM("{col}"), 0) FROM "{schema}"."{table}" {where}'
|
||||
cur.execute(sql, params)
|
||||
row = cur.fetchone()
|
||||
return float(row[0] if row else 0)
|
||||
@@ -176,14 +315,21 @@ def run_dwd_vs_ods_check(
|
||||
cfg: AppConfig,
|
||||
window: IntegrityWindow | None,
|
||||
include_dimensions: bool,
|
||||
compare_content: bool | None = None,
|
||||
content_sample_limit: int | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
dsn = cfg["db"]["dsn"]
|
||||
session = cfg["db"].get("session")
|
||||
db_conn = DatabaseConnection(dsn=dsn, session=session)
|
||||
if compare_content is None:
|
||||
compare_content = bool(cfg.get("integrity.compare_content", True))
|
||||
if content_sample_limit is None:
|
||||
content_sample_limit = cfg.get("integrity.content_sample_limit") or 50
|
||||
try:
|
||||
with db_conn.conn.cursor() as cur:
|
||||
results: List[Dict[str, Any]] = []
|
||||
table_map = DwdLoadTask.TABLE_MAP
|
||||
total_mismatch = 0
|
||||
for dwd_table, ods_table in table_map.items():
|
||||
if not include_dimensions and ".dim_" in dwd_table:
|
||||
continue
|
||||
@@ -193,16 +339,55 @@ def run_dwd_vs_ods_check(
|
||||
dwd_cols, dwd_types = _fetch_columns(cur, schema_dwd, name_dwd)
|
||||
ods_cols, ods_types = _fetch_columns(cur, schema_ods, name_ods)
|
||||
time_col = _pick_time_column(dwd_cols, ods_cols)
|
||||
count_dwd = _count_table(cur, schema_dwd, name_dwd, time_col, window)
|
||||
count_ods = _count_table(cur, schema_ods, name_ods, time_col, window)
|
||||
pk_dwd = _fetch_pk_columns(cur, schema_dwd, name_dwd)
|
||||
pk_ods_raw = _fetch_pk_columns(cur, schema_ods, name_ods)
|
||||
pk_ods = [c for c in pk_ods_raw if c.lower() != "content_hash"]
|
||||
ods_has_snapshot = any(c.lower() == "content_hash" for c in ods_cols)
|
||||
ods_snapshot_order = _pick_snapshot_order_column(ods_cols) if ods_has_snapshot else None
|
||||
dwd_current_only = any(c.lower() == "scd2_is_current" for c in dwd_cols)
|
||||
|
||||
count_dwd = _count_table(
|
||||
cur,
|
||||
schema_dwd,
|
||||
name_dwd,
|
||||
time_col,
|
||||
window,
|
||||
current_only=dwd_current_only,
|
||||
)
|
||||
count_ods = _count_table(
|
||||
cur,
|
||||
schema_ods,
|
||||
name_ods,
|
||||
time_col,
|
||||
window,
|
||||
pk_cols=pk_ods if ods_has_snapshot else None,
|
||||
snapshot_order_col=ods_snapshot_order if ods_has_snapshot else None,
|
||||
)
|
||||
|
||||
dwd_amount_cols = _amount_columns(dwd_cols, dwd_types)
|
||||
ods_amount_cols = _amount_columns(ods_cols, ods_types)
|
||||
common_amount_cols = sorted(set(dwd_amount_cols) & set(ods_amount_cols))
|
||||
amounts: List[Dict[str, Any]] = []
|
||||
for col in common_amount_cols:
|
||||
dwd_sum = _sum_column(cur, schema_dwd, name_dwd, col, time_col, window)
|
||||
ods_sum = _sum_column(cur, schema_ods, name_ods, col, time_col, window)
|
||||
dwd_sum = _sum_column(
|
||||
cur,
|
||||
schema_dwd,
|
||||
name_dwd,
|
||||
col,
|
||||
time_col,
|
||||
window,
|
||||
current_only=dwd_current_only,
|
||||
)
|
||||
ods_sum = _sum_column(
|
||||
cur,
|
||||
schema_ods,
|
||||
name_ods,
|
||||
col,
|
||||
time_col,
|
||||
window,
|
||||
pk_cols=pk_ods if ods_has_snapshot else None,
|
||||
snapshot_order_col=ods_snapshot_order if ods_has_snapshot else None,
|
||||
)
|
||||
amounts.append(
|
||||
{
|
||||
"column": col,
|
||||
@@ -212,6 +397,151 @@ def run_dwd_vs_ods_check(
|
||||
}
|
||||
)
|
||||
|
||||
mismatch = None
|
||||
mismatch_samples: list[dict] = []
|
||||
mismatch_error = None
|
||||
if compare_content:
|
||||
dwd_cols_lower = [c.lower() for c in dwd_cols]
|
||||
ods_cols_lower = [c.lower() for c in ods_cols]
|
||||
dwd_col_set = set(dwd_cols_lower)
|
||||
ods_col_set = set(ods_cols_lower)
|
||||
scd_cols = {c.lower() for c in DwdLoadTask.SCD_COLS}
|
||||
ods_exclude = {
|
||||
"payload", "source_file", "source_endpoint", "fetched_at", "content_hash", "record_index"
|
||||
}
|
||||
numeric_types = {
|
||||
"integer",
|
||||
"bigint",
|
||||
"smallint",
|
||||
"numeric",
|
||||
"double precision",
|
||||
"real",
|
||||
"decimal",
|
||||
}
|
||||
text_types = {"text", "character varying", "varchar"}
|
||||
mapping = {
|
||||
dst.lower(): (src, cast_type)
|
||||
for dst, src, cast_type in (DwdLoadTask.FACT_MAPPINGS.get(dwd_table) or [])
|
||||
}
|
||||
business_keys = [c for c in pk_dwd if c.lower() not in scd_cols]
|
||||
def resolve_ods_expr(col: str) -> str | None:
|
||||
mapped = mapping.get(col)
|
||||
if mapped:
|
||||
src, cast_type = mapped
|
||||
return _cast_expr(src, cast_type)
|
||||
if col in ods_col_set:
|
||||
d_type = dwd_types.get(col)
|
||||
o_type = ods_types.get(col)
|
||||
if d_type in numeric_types and o_type in text_types:
|
||||
return _cast_expr(col, d_type)
|
||||
return f'"{col}"'
|
||||
if "id" in ods_col_set and col.endswith("_id"):
|
||||
d_type = dwd_types.get(col)
|
||||
o_type = ods_types.get("id")
|
||||
if d_type in numeric_types and o_type in text_types:
|
||||
return _cast_expr("id", d_type)
|
||||
return '"id"'
|
||||
return None
|
||||
|
||||
key_exprs: list[str] = []
|
||||
join_keys: list[str] = []
|
||||
for key in business_keys:
|
||||
key_lower = key.lower()
|
||||
expr = resolve_ods_expr(key_lower)
|
||||
if expr is None:
|
||||
key_exprs = []
|
||||
join_keys = []
|
||||
break
|
||||
key_exprs.append(expr)
|
||||
join_keys.append(key_lower)
|
||||
|
||||
compare_cols: list[str] = []
|
||||
for col in dwd_col_set:
|
||||
if col in ods_exclude or col in scd_cols:
|
||||
continue
|
||||
if col in {k.lower() for k in business_keys}:
|
||||
continue
|
||||
if dwd_types.get(col) in ("json", "jsonb"):
|
||||
continue
|
||||
if ods_types.get(col) in ("json", "jsonb"):
|
||||
continue
|
||||
if resolve_ods_expr(col) is None:
|
||||
continue
|
||||
compare_cols.append(col)
|
||||
compare_cols = sorted(set(compare_cols))
|
||||
|
||||
if join_keys and compare_cols:
|
||||
where_parts_dwd: list[str] = []
|
||||
params_dwd: list[Any] = []
|
||||
if dwd_current_only:
|
||||
where_parts_dwd.append("COALESCE(scd2_is_current,1)=1")
|
||||
if time_col and window:
|
||||
where_parts_dwd.append(f"\"{time_col}\" >= %s AND \"{time_col}\" < %s")
|
||||
params_dwd.extend([window.start, window.end])
|
||||
where_dwd = f"WHERE {' AND '.join(where_parts_dwd)}" if where_parts_dwd else ""
|
||||
|
||||
where_parts_ods: list[str] = []
|
||||
params_ods: list[Any] = []
|
||||
if time_col and window:
|
||||
where_parts_ods.append(f"\"{time_col}\" >= %s AND \"{time_col}\" < %s")
|
||||
params_ods.extend([window.start, window.end])
|
||||
where_ods = f"WHERE {' AND '.join(where_parts_ods)}" if where_parts_ods else ""
|
||||
|
||||
ods_select_exprs: list[str] = []
|
||||
needed_cols = sorted(set(join_keys + compare_cols))
|
||||
for col in needed_cols:
|
||||
expr = resolve_ods_expr(col)
|
||||
if expr is None:
|
||||
continue
|
||||
ods_select_exprs.append(f"{expr} AS \"{col}\"")
|
||||
|
||||
if not ods_select_exprs:
|
||||
mismatch_error = "join_keys_or_compare_cols_unavailable"
|
||||
else:
|
||||
ods_sql = _build_snapshot_expr_subquery(
|
||||
schema_ods,
|
||||
name_ods,
|
||||
ods_select_exprs,
|
||||
key_exprs,
|
||||
ods_snapshot_order,
|
||||
where_ods,
|
||||
)
|
||||
dwd_cols_sql = ", ".join([f"\"{c}\"" for c in needed_cols])
|
||||
dwd_sql = f"SELECT {dwd_cols_sql} FROM \"{schema_dwd}\".\"{name_dwd}\" {where_dwd}"
|
||||
|
||||
join_cond = " AND ".join([f"d.\"{k}\" = o.\"{k}\"" for k in join_keys])
|
||||
hash_o = _build_hash_expr("o", compare_cols)
|
||||
hash_d = _build_hash_expr("d", compare_cols)
|
||||
|
||||
mismatch_sql = (
|
||||
f"WITH ods_latest AS ({ods_sql}), dwd_filtered AS ({dwd_sql}) "
|
||||
f"SELECT COUNT(1) FROM ("
|
||||
f"SELECT 1 FROM ods_latest o JOIN dwd_filtered d ON {join_cond} "
|
||||
f"WHERE {hash_o} <> {hash_d}"
|
||||
f") t"
|
||||
)
|
||||
params = params_ods + params_dwd
|
||||
cur.execute(mismatch_sql, params)
|
||||
row = cur.fetchone()
|
||||
mismatch = int(row[0] if row and row[0] is not None else 0)
|
||||
total_mismatch += mismatch
|
||||
|
||||
if content_sample_limit and mismatch > 0:
|
||||
select_keys_sql = ", ".join([f"d.\"{k}\" AS \"{k}\"" for k in join_keys])
|
||||
sample_sql = (
|
||||
f"WITH ods_latest AS ({ods_sql}), dwd_filtered AS ({dwd_sql}) "
|
||||
f"SELECT {select_keys_sql}, {hash_o} AS ods_hash, {hash_d} AS dwd_hash "
|
||||
f"FROM ods_latest o JOIN dwd_filtered d ON {join_cond} "
|
||||
f"WHERE {hash_o} <> {hash_d} LIMIT %s"
|
||||
)
|
||||
cur.execute(sample_sql, params + [int(content_sample_limit)])
|
||||
rows = cur.fetchall() or []
|
||||
if rows:
|
||||
columns = [desc[0] for desc in (cur.description or [])]
|
||||
mismatch_samples = [dict(zip(columns, r)) for r in rows]
|
||||
else:
|
||||
mismatch_error = "join_keys_or_compare_cols_unavailable"
|
||||
|
||||
results.append(
|
||||
{
|
||||
"dwd_table": dwd_table,
|
||||
@@ -220,6 +550,9 @@ def run_dwd_vs_ods_check(
|
||||
"window_col": time_col,
|
||||
"count": {"dwd": count_dwd, "ods": count_ods, "diff": count_dwd - count_ods},
|
||||
"amounts": amounts,
|
||||
"mismatch": mismatch,
|
||||
"mismatch_samples": mismatch_samples,
|
||||
"mismatch_error": mismatch_error,
|
||||
}
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
@@ -231,6 +564,8 @@ def run_dwd_vs_ods_check(
|
||||
"window_col": None,
|
||||
"count": {"dwd": None, "ods": None, "diff": None},
|
||||
"amounts": [],
|
||||
"mismatch": None,
|
||||
"mismatch_samples": [],
|
||||
"error": f"{type(exc).__name__}: {exc}",
|
||||
}
|
||||
)
|
||||
@@ -243,6 +578,7 @@ def run_dwd_vs_ods_check(
|
||||
return {
|
||||
"tables": results,
|
||||
"total_count_diff": total_count_diff,
|
||||
"total_mismatch": total_mismatch,
|
||||
}
|
||||
finally:
|
||||
db_conn.close()
|
||||
@@ -262,6 +598,8 @@ def run_integrity_window(
|
||||
task_codes: str,
|
||||
logger,
|
||||
write_report: bool,
|
||||
compare_content: bool | None = None,
|
||||
content_sample_limit: int | None = None,
|
||||
report_path: Path | None = None,
|
||||
window_split_unit: str | None = None,
|
||||
window_compensation_hours: int | None = None,
|
||||
@@ -274,6 +612,11 @@ def run_integrity_window(
|
||||
window_days = 0
|
||||
window_hours = max(1, total_seconds // 3600 or 1)
|
||||
|
||||
if compare_content is None:
|
||||
compare_content = bool(cfg.get("integrity.compare_content", True))
|
||||
if content_sample_limit is None:
|
||||
content_sample_limit = cfg.get("integrity.content_sample_limit")
|
||||
|
||||
ods_payload = run_gap_check(
|
||||
cfg=cfg,
|
||||
start=window.start,
|
||||
@@ -290,6 +633,8 @@ def run_integrity_window(
|
||||
cutoff_overlap_hours=24,
|
||||
allow_small_window=True,
|
||||
logger=logger,
|
||||
compare_content=bool(compare_content),
|
||||
content_sample_limit=content_sample_limit,
|
||||
window_split_unit=window_split_unit,
|
||||
window_compensation_hours=window_compensation_hours,
|
||||
)
|
||||
@@ -298,6 +643,8 @@ def run_integrity_window(
|
||||
cfg=cfg,
|
||||
window=window,
|
||||
include_dimensions=include_dimensions,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
|
||||
report = {
|
||||
@@ -331,12 +678,15 @@ def run_integrity_history(
|
||||
task_codes: str,
|
||||
logger,
|
||||
write_report: bool,
|
||||
compare_content: bool | None = None,
|
||||
content_sample_limit: int | None = None,
|
||||
report_path: Path | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
windows = build_history_windows(start_dt, end_dt, tz)
|
||||
results: List[Dict[str, Any]] = []
|
||||
total_missing = 0
|
||||
total_mismatch = 0
|
||||
total_errors = 0
|
||||
|
||||
for window in windows:
|
||||
@@ -348,9 +698,12 @@ def run_integrity_history(
|
||||
task_codes=task_codes,
|
||||
logger=logger,
|
||||
write_report=False,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
results.append(payload)
|
||||
total_missing += int(payload.get("api_to_ods", {}).get("total_missing") or 0)
|
||||
total_mismatch += int(payload.get("api_to_ods", {}).get("total_mismatch") or 0)
|
||||
total_errors += int(payload.get("api_to_ods", {}).get("total_errors") or 0)
|
||||
|
||||
report = {
|
||||
@@ -359,6 +712,7 @@ def run_integrity_history(
|
||||
"end": _ensure_tz(end_dt, tz).isoformat(),
|
||||
"windows": results,
|
||||
"total_missing": total_missing,
|
||||
"total_mismatch": total_mismatch,
|
||||
"total_errors": total_errors,
|
||||
"generated_at": datetime.now(tz).isoformat(),
|
||||
}
|
||||
|
||||
256
etl_billiards/quality/integrity_service.py
Normal file
256
etl_billiards/quality/integrity_service.py
Normal file
@@ -0,0 +1,256 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Shared integrity flow helpers (window/history + optional backfill)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Tuple
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import json
|
||||
|
||||
from quality.integrity_checker import IntegrityWindow, compute_last_etl_end, run_integrity_history, run_integrity_window
|
||||
from scripts.backfill_missing_data import run_backfill
|
||||
from utils.windowing import split_window
|
||||
|
||||
|
||||
def _normalize_windows(cfg, windows: Iterable[Tuple[datetime, datetime]]) -> list[Tuple[datetime, datetime]]:
|
||||
segments = list(windows)
|
||||
if not segments:
|
||||
return segments
|
||||
|
||||
force_monthly = bool(cfg.get("integrity.force_monthly_split", True))
|
||||
if not force_monthly:
|
||||
return segments
|
||||
|
||||
overall_start = segments[0][0]
|
||||
overall_end = segments[-1][1]
|
||||
total_days = (overall_end - overall_start).total_seconds() / 86400.0
|
||||
if total_days <= 31 and len(segments) == 1:
|
||||
return segments
|
||||
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
comp_hours = cfg.get("run.window_split.compensation_hours", 0)
|
||||
monthly = split_window(
|
||||
overall_start,
|
||||
overall_end,
|
||||
tz=tz,
|
||||
split_unit="month",
|
||||
compensation_hours=comp_hours,
|
||||
)
|
||||
return monthly or segments
|
||||
|
||||
|
||||
def build_window_report(
|
||||
*,
|
||||
cfg,
|
||||
windows: Iterable[Tuple[datetime, datetime]],
|
||||
include_dimensions: bool,
|
||||
task_codes: str,
|
||||
logger,
|
||||
compare_content: bool | None,
|
||||
content_sample_limit: int | None,
|
||||
) -> tuple[dict, dict]:
|
||||
window_reports = []
|
||||
total_missing = 0
|
||||
total_mismatch = 0
|
||||
total_errors = 0
|
||||
segments = list(windows)
|
||||
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
|
||||
window = IntegrityWindow(
|
||||
start=seg_start,
|
||||
end=seg_end,
|
||||
label=f"segment_{idx}",
|
||||
granularity="window",
|
||||
)
|
||||
payload = run_integrity_window(
|
||||
cfg=cfg,
|
||||
window=window,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=logger,
|
||||
write_report=False,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
report_path=None,
|
||||
window_split_unit="none",
|
||||
window_compensation_hours=0,
|
||||
)
|
||||
window_reports.append(payload)
|
||||
total_missing += int(payload.get("api_to_ods", {}).get("total_missing") or 0)
|
||||
total_mismatch += int(payload.get("api_to_ods", {}).get("total_mismatch") or 0)
|
||||
total_errors += int(payload.get("api_to_ods", {}).get("total_errors") or 0)
|
||||
|
||||
overall_start = segments[0][0]
|
||||
overall_end = segments[-1][1]
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
report = {
|
||||
"mode": "window",
|
||||
"window": {
|
||||
"start": overall_start.isoformat(),
|
||||
"end": overall_end.isoformat(),
|
||||
"segments": len(segments),
|
||||
},
|
||||
"windows": window_reports,
|
||||
"api_to_ods": {
|
||||
"total_missing": total_missing,
|
||||
"total_mismatch": total_mismatch,
|
||||
"total_errors": total_errors,
|
||||
},
|
||||
"total_missing": total_missing,
|
||||
"total_mismatch": total_mismatch,
|
||||
"total_errors": total_errors,
|
||||
"generated_at": datetime.now(tz).isoformat(),
|
||||
}
|
||||
counts = {
|
||||
"missing": int(total_missing or 0),
|
||||
"mismatch": int(total_mismatch or 0),
|
||||
"errors": int(total_errors or 0),
|
||||
}
|
||||
return report, counts
|
||||
|
||||
|
||||
def run_window_flow(
|
||||
*,
|
||||
cfg,
|
||||
windows: Iterable[Tuple[datetime, datetime]],
|
||||
include_dimensions: bool,
|
||||
task_codes: str,
|
||||
logger,
|
||||
compare_content: bool | None,
|
||||
content_sample_limit: int | None,
|
||||
do_backfill: bool,
|
||||
include_mismatch: bool,
|
||||
recheck_after_backfill: bool,
|
||||
page_size: int | None = None,
|
||||
chunk_size: int = 500,
|
||||
) -> tuple[dict, dict]:
|
||||
segments = _normalize_windows(cfg, windows)
|
||||
report, counts = build_window_report(
|
||||
cfg=cfg,
|
||||
windows=segments,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=logger,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
overall_start = segments[0][0]
|
||||
overall_end = segments[-1][1]
|
||||
|
||||
backfill_result = None
|
||||
post_report = None
|
||||
if do_backfill:
|
||||
missing_count = int(counts.get("missing", 0))
|
||||
mismatch_count = int(counts.get("mismatch", 0))
|
||||
need_backfill = missing_count > 0 or (include_mismatch and mismatch_count > 0)
|
||||
if need_backfill:
|
||||
backfill_result = run_backfill(
|
||||
cfg=cfg,
|
||||
start=overall_start,
|
||||
end=overall_end,
|
||||
task_codes=task_codes or None,
|
||||
include_mismatch=bool(include_mismatch),
|
||||
dry_run=False,
|
||||
page_size=int(page_size or cfg.get("api.page_size") or 200),
|
||||
chunk_size=chunk_size,
|
||||
logger=logger,
|
||||
)
|
||||
report["backfill_result"] = backfill_result
|
||||
if recheck_after_backfill:
|
||||
post_report, post_counts = build_window_report(
|
||||
cfg=cfg,
|
||||
windows=segments,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=logger,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
report["post_check"] = post_report
|
||||
counts.update(post_counts)
|
||||
return report, counts
|
||||
|
||||
|
||||
def run_history_flow(
|
||||
*,
|
||||
cfg,
|
||||
start_dt: datetime,
|
||||
end_dt: datetime | None,
|
||||
include_dimensions: bool,
|
||||
task_codes: str,
|
||||
logger,
|
||||
compare_content: bool | None,
|
||||
content_sample_limit: int | None,
|
||||
do_backfill: bool,
|
||||
include_mismatch: bool,
|
||||
recheck_after_backfill: bool,
|
||||
page_size: int | None = None,
|
||||
chunk_size: int = 500,
|
||||
) -> tuple[dict, dict]:
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
if end_dt is None:
|
||||
end_dt = compute_last_etl_end(cfg) or datetime.now(tz)
|
||||
|
||||
report = run_integrity_history(
|
||||
cfg=cfg,
|
||||
start_dt=start_dt,
|
||||
end_dt=end_dt,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=logger,
|
||||
write_report=False,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
counts = {
|
||||
"missing": int(report.get("total_missing") or 0),
|
||||
"mismatch": int(report.get("total_mismatch") or 0),
|
||||
"errors": int(report.get("total_errors") or 0),
|
||||
}
|
||||
if do_backfill:
|
||||
need_backfill = counts.get("missing", 0) > 0 or (include_mismatch and counts.get("mismatch", 0) > 0)
|
||||
if need_backfill:
|
||||
backfill_result = run_backfill(
|
||||
cfg=cfg,
|
||||
start=start_dt,
|
||||
end=end_dt,
|
||||
task_codes=task_codes or None,
|
||||
include_mismatch=bool(include_mismatch),
|
||||
dry_run=False,
|
||||
page_size=int(page_size or cfg.get("api.page_size") or 200),
|
||||
chunk_size=chunk_size,
|
||||
logger=logger,
|
||||
)
|
||||
report["backfill_result"] = backfill_result
|
||||
if recheck_after_backfill:
|
||||
post_report = run_integrity_history(
|
||||
cfg=cfg,
|
||||
start_dt=start_dt,
|
||||
end_dt=end_dt,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=logger,
|
||||
write_report=False,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
report["post_check"] = post_report
|
||||
counts.update(
|
||||
{
|
||||
"missing": int(post_report.get("total_missing") or 0),
|
||||
"mismatch": int(post_report.get("total_mismatch") or 0),
|
||||
"errors": int(post_report.get("total_errors") or 0),
|
||||
}
|
||||
)
|
||||
return report, counts
|
||||
|
||||
|
||||
def write_report(report: dict, *, prefix: str, tz: ZoneInfo, report_path: Path | None = None) -> str:
|
||||
if report_path is None:
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
report_path = root / "reports" / f"{prefix}_{stamp}.json"
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
return str(report_path)
|
||||
10359
etl_billiards/reports/data_integrity_window_20260128_233259.json
Normal file
10359
etl_billiards/reports/data_integrity_window_20260128_233259.json
Normal file
File diff suppressed because it is too large
Load Diff
10374
etl_billiards/reports/data_integrity_window_20260128_233424.json
Normal file
10374
etl_billiards/reports/data_integrity_window_20260128_233424.json
Normal file
File diff suppressed because it is too large
Load Diff
10407
etl_billiards/reports/data_integrity_window_20260128_233947.json
Normal file
10407
etl_billiards/reports/data_integrity_window_20260128_233947.json
Normal file
File diff suppressed because it is too large
Load Diff
58516
etl_billiards/reports/data_integrity_window_20260129_110434.json
Normal file
58516
etl_billiards/reports/data_integrity_window_20260129_110434.json
Normal file
File diff suppressed because it is too large
Load Diff
9432
etl_billiards/reports/data_integrity_window_20260130_211444.json
Normal file
9432
etl_billiards/reports/data_integrity_window_20260130_211444.json
Normal file
File diff suppressed because it is too large
Load Diff
9450
etl_billiards/reports/data_integrity_window_20260130_215332.json
Normal file
9450
etl_billiards/reports/data_integrity_window_20260130_215332.json
Normal file
File diff suppressed because it is too large
Load Diff
2226
etl_billiards/reports/data_integrity_window_20260131_060115.json
Normal file
2226
etl_billiards/reports/data_integrity_window_20260131_060115.json
Normal file
File diff suppressed because it is too large
Load Diff
5096
etl_billiards/reports/data_integrity_window_20260131_152947.json
Normal file
5096
etl_billiards/reports/data_integrity_window_20260131_152947.json
Normal file
File diff suppressed because it is too large
Load Diff
9874
etl_billiards/reports/data_integrity_window_20260131_160043.json
Normal file
9874
etl_billiards/reports/data_integrity_window_20260131_160043.json
Normal file
File diff suppressed because it is too large
Load Diff
4684
etl_billiards/reports/data_integrity_window_20260131_161314.json
Normal file
4684
etl_billiards/reports/data_integrity_window_20260131_161314.json
Normal file
File diff suppressed because it is too large
Load Diff
9708
etl_billiards/reports/data_integrity_window_20260131_173258.json
Normal file
9708
etl_billiards/reports/data_integrity_window_20260131_173258.json
Normal file
File diff suppressed because it is too large
Load Diff
4713
etl_billiards/reports/data_integrity_window_20260131_174631.json
Normal file
4713
etl_billiards/reports/data_integrity_window_20260131_174631.json
Normal file
File diff suppressed because it is too large
Load Diff
14442
etl_billiards/reports/data_integrity_window_20260131_204737.json
Normal file
14442
etl_billiards/reports/data_integrity_window_20260131_204737.json
Normal file
File diff suppressed because it is too large
Load Diff
7787
etl_billiards/reports/data_integrity_window_20260131_205713.json
Normal file
7787
etl_billiards/reports/data_integrity_window_20260131_205713.json
Normal file
File diff suppressed because it is too large
Load Diff
102219
etl_billiards/reports/data_integrity_window_20260131_232429.json
Normal file
102219
etl_billiards/reports/data_integrity_window_20260131_232429.json
Normal file
File diff suppressed because it is too large
Load Diff
49981
etl_billiards/reports/data_integrity_window_20260131_235928.json
Normal file
49981
etl_billiards/reports/data_integrity_window_20260131_235928.json
Normal file
File diff suppressed because it is too large
Load Diff
1
etl_billiards/scripts/Untitled
Normal file
1
etl_billiards/scripts/Untitled
Normal file
@@ -0,0 +1 @@
|
||||
check_data_integrity.py
|
||||
@@ -32,9 +32,15 @@ from api.client import APIClient
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from models.parsers import TypeParser
|
||||
from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS, OdsTaskSpec
|
||||
from tasks.ods_tasks import BaseOdsTask, ENABLED_ODS_CODES, ODS_TASK_SPECS, OdsTaskSpec
|
||||
from scripts.check_ods_gaps import run_gap_check
|
||||
from utils.logging_utils import build_log_path, configure_logging
|
||||
from utils.ods_record_utils import (
|
||||
get_value_case_insensitive,
|
||||
merge_record_layers,
|
||||
normalize_pk_value,
|
||||
pk_tuple_from_record,
|
||||
)
|
||||
|
||||
|
||||
def _reconfigure_stdout_utf8() -> None:
|
||||
@@ -74,56 +80,26 @@ def _get_spec(code: str) -> Optional[OdsTaskSpec]:
|
||||
|
||||
|
||||
def _merge_record_layers(record: dict) -> dict:
|
||||
"""展开嵌套的 data 层"""
|
||||
merged = record
|
||||
data_part = merged.get("data")
|
||||
while isinstance(data_part, dict):
|
||||
merged = {**data_part, **merged}
|
||||
data_part = data_part.get("data")
|
||||
settle_inner = merged.get("settleList")
|
||||
if isinstance(settle_inner, dict):
|
||||
merged = {**settle_inner, **merged}
|
||||
return merged
|
||||
"""Flatten nested data layers into a single dict."""
|
||||
return merge_record_layers(record)
|
||||
|
||||
|
||||
def _get_value_case_insensitive(record: dict | None, col: str | None):
|
||||
"""不区分大小写地获取值"""
|
||||
if record is None or col is None:
|
||||
return None
|
||||
if col in record:
|
||||
return record.get(col)
|
||||
col_lower = col.lower()
|
||||
for k, v in record.items():
|
||||
if isinstance(k, str) and k.lower() == col_lower:
|
||||
return v
|
||||
return None
|
||||
"""Fetch value without case sensitivity."""
|
||||
return get_value_case_insensitive(record, col)
|
||||
|
||||
|
||||
def _normalize_pk_value(value):
|
||||
"""规范化 PK 值"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
try:
|
||||
return int(value)
|
||||
except Exception:
|
||||
return value
|
||||
return value
|
||||
"""Normalize PK value."""
|
||||
return normalize_pk_value(value)
|
||||
|
||||
|
||||
def _pk_tuple_from_record(record: dict, pk_cols: List[str]) -> Optional[Tuple]:
|
||||
"""从记录中提取 PK 元组"""
|
||||
merged = _merge_record_layers(record)
|
||||
values = []
|
||||
for col in pk_cols:
|
||||
val = _normalize_pk_value(_get_value_case_insensitive(merged, col))
|
||||
if val is None or val == "":
|
||||
return None
|
||||
values.append(val)
|
||||
return tuple(values)
|
||||
"""Extract PK tuple from record."""
|
||||
return pk_tuple_from_record(record, pk_cols)
|
||||
|
||||
|
||||
def _get_table_pk_columns(conn, table: str) -> List[str]:
|
||||
def _get_table_pk_columns(conn, table: str, *, include_content_hash: bool = False) -> List[str]:
|
||||
"""获取表的主键列"""
|
||||
if "." in table:
|
||||
schema, name = table.split(".", 1)
|
||||
@@ -142,7 +118,10 @@ def _get_table_pk_columns(conn, table: str) -> List[str]:
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema, name))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
cols = [r[0] for r in cur.fetchall()]
|
||||
if include_content_hash:
|
||||
return cols
|
||||
return [c for c in cols if c.lower() != "content_hash"]
|
||||
|
||||
|
||||
def _get_table_columns(conn, table: str) -> List[Tuple[str, str, str]]:
|
||||
@@ -247,6 +226,13 @@ class MissingDataBackfiller:
|
||||
"""关闭连接"""
|
||||
if self.db:
|
||||
self.db.close()
|
||||
|
||||
def _ensure_db(self):
|
||||
"""确保数据库连接可用"""
|
||||
if self.db and getattr(self.db, "conn", None) is not None:
|
||||
if getattr(self.db.conn, "closed", 0) == 0:
|
||||
return
|
||||
self.db = DatabaseConnection(dsn=self.cfg["db"]["dsn"], session=self.cfg["db"].get("session"))
|
||||
|
||||
def backfill_from_gap_check(
|
||||
self,
|
||||
@@ -254,8 +240,10 @@ class MissingDataBackfiller:
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
task_codes: Optional[str] = None,
|
||||
include_mismatch: bool = False,
|
||||
page_size: int = 200,
|
||||
chunk_size: int = 500,
|
||||
content_sample_limit: int | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
运行 gap check 并补全丢失数据
|
||||
@@ -292,16 +280,21 @@ class MissingDataBackfiller:
|
||||
cutoff_overlap_hours=24,
|
||||
allow_small_window=True,
|
||||
logger=self.logger,
|
||||
compare_content=include_mismatch,
|
||||
content_sample_limit=content_sample_limit or 10000,
|
||||
)
|
||||
|
||||
total_missing = gap_result.get("total_missing", 0)
|
||||
if total_missing == 0:
|
||||
self.logger.info("数据完整,无缺失记录")
|
||||
total_mismatch = gap_result.get("total_mismatch", 0)
|
||||
if total_missing == 0 and (not include_mismatch or total_mismatch == 0):
|
||||
self.logger.info("Data complete: no missing/mismatch records")
|
||||
return {"backfilled": 0, "errors": 0, "details": []}
|
||||
|
||||
self.logger.info("缺失检查完成 总缺失=%s", total_missing)
|
||||
if include_mismatch:
|
||||
self.logger.info("Missing/mismatch check done missing=%s mismatch=%s", total_missing, total_mismatch)
|
||||
else:
|
||||
self.logger.info("Missing check done missing=%s", total_missing)
|
||||
|
||||
# 补全每个任务的丢失数据
|
||||
results = []
|
||||
total_backfilled = 0
|
||||
total_errors = 0
|
||||
@@ -310,13 +303,16 @@ class MissingDataBackfiller:
|
||||
task_code = task_result.get("task_code")
|
||||
missing = task_result.get("missing", 0)
|
||||
missing_samples = task_result.get("missing_samples", [])
|
||||
mismatch = task_result.get("mismatch", 0) if include_mismatch else 0
|
||||
mismatch_samples = task_result.get("mismatch_samples", []) if include_mismatch else []
|
||||
target_samples = list(missing_samples) + list(mismatch_samples)
|
||||
|
||||
if missing == 0:
|
||||
if missing == 0 and mismatch == 0:
|
||||
continue
|
||||
|
||||
self.logger.info(
|
||||
"开始补全任务 任务=%s 缺失=%s 样本数=%s",
|
||||
task_code, missing, len(missing_samples)
|
||||
"Start backfill task task=%s missing=%s mismatch=%s samples=%s",
|
||||
task_code, missing, mismatch, len(target_samples)
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -324,7 +320,7 @@ class MissingDataBackfiller:
|
||||
task_code=task_code,
|
||||
table=task_result.get("table"),
|
||||
pk_columns=task_result.get("pk_columns", []),
|
||||
missing_samples=missing_samples,
|
||||
pk_samples=target_samples,
|
||||
start=start,
|
||||
end=end,
|
||||
page_size=page_size,
|
||||
@@ -333,6 +329,7 @@ class MissingDataBackfiller:
|
||||
results.append({
|
||||
"task_code": task_code,
|
||||
"missing": missing,
|
||||
"mismatch": mismatch,
|
||||
"backfilled": backfilled,
|
||||
"error": None,
|
||||
})
|
||||
@@ -342,6 +339,7 @@ class MissingDataBackfiller:
|
||||
results.append({
|
||||
"task_code": task_code,
|
||||
"missing": missing,
|
||||
"mismatch": mismatch,
|
||||
"backfilled": 0,
|
||||
"error": str(exc),
|
||||
})
|
||||
@@ -354,6 +352,7 @@ class MissingDataBackfiller:
|
||||
|
||||
return {
|
||||
"total_missing": total_missing,
|
||||
"total_mismatch": total_mismatch,
|
||||
"backfilled": total_backfilled,
|
||||
"errors": total_errors,
|
||||
"details": results,
|
||||
@@ -365,20 +364,25 @@ class MissingDataBackfiller:
|
||||
task_code: str,
|
||||
table: str,
|
||||
pk_columns: List[str],
|
||||
missing_samples: List[Dict],
|
||||
pk_samples: List[Dict],
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
page_size: int,
|
||||
chunk_size: int,
|
||||
) -> int:
|
||||
"""补全单个任务的丢失数据"""
|
||||
self._ensure_db()
|
||||
spec = _get_spec(task_code)
|
||||
if not spec:
|
||||
self.logger.warning("未找到任务规格 任务=%s", task_code)
|
||||
return 0
|
||||
|
||||
if not pk_columns:
|
||||
pk_columns = _get_table_pk_columns(self.db.conn, table)
|
||||
pk_columns = _get_table_pk_columns(self.db.conn, table, include_content_hash=False)
|
||||
|
||||
conflict_columns = _get_table_pk_columns(self.db.conn, table, include_content_hash=True)
|
||||
if not conflict_columns:
|
||||
conflict_columns = pk_columns
|
||||
|
||||
if not pk_columns:
|
||||
self.logger.warning("未找到主键列 任务=%s 表=%s", task_code, table)
|
||||
@@ -386,7 +390,7 @@ class MissingDataBackfiller:
|
||||
|
||||
# 提取丢失的 PK 值
|
||||
missing_pks: Set[Tuple] = set()
|
||||
for sample in missing_samples:
|
||||
for sample in pk_samples:
|
||||
pk_tuple = tuple(sample.get(col) for col in pk_columns)
|
||||
if all(v is not None for v in pk_tuple):
|
||||
missing_pks.add(pk_tuple)
|
||||
@@ -410,6 +414,12 @@ class MissingDataBackfiller:
|
||||
if c[1] in ("json", "jsonb") or c[2] in ("json", "jsonb")
|
||||
}
|
||||
col_names = [c[0] for c in cols_info]
|
||||
|
||||
# 结束只读事务,避免长时间 API 拉取导致 idle_in_tx 超时
|
||||
try:
|
||||
self.db.conn.commit()
|
||||
except Exception:
|
||||
self.db.conn.rollback()
|
||||
|
||||
try:
|
||||
for page_no, records, _, response_payload in self.api.iter_paginated(
|
||||
@@ -444,9 +454,12 @@ class MissingDataBackfiller:
|
||||
records=records_to_insert,
|
||||
cols_info=cols_info,
|
||||
pk_columns=pk_columns,
|
||||
conflict_columns=conflict_columns,
|
||||
db_json_cols_lower=db_json_cols_lower,
|
||||
)
|
||||
backfilled += inserted
|
||||
# 避免长事务阻塞与 idle_in_tx 超时
|
||||
self.db.conn.commit()
|
||||
self.logger.info(
|
||||
"已插入 任务=%s 页=%s 数量=%s",
|
||||
task_code, page_no, inserted
|
||||
@@ -498,6 +511,7 @@ class MissingDataBackfiller:
|
||||
records: List[Dict],
|
||||
cols_info: List[Tuple[str, str, str]],
|
||||
pk_columns: List[str],
|
||||
conflict_columns: List[str],
|
||||
db_json_cols_lower: Set[str],
|
||||
) -> int:
|
||||
"""插入记录到数据库"""
|
||||
@@ -505,10 +519,12 @@ class MissingDataBackfiller:
|
||||
return 0
|
||||
|
||||
col_names = [c[0] for c in cols_info]
|
||||
needs_content_hash = any(c[0].lower() == "content_hash" for c in cols_info)
|
||||
quoted_cols = ", ".join(f'"{c}"' for c in col_names)
|
||||
sql = f"INSERT INTO {table} ({quoted_cols}) VALUES %s"
|
||||
if pk_columns:
|
||||
pk_clause = ", ".join(f'"{c}"' for c in pk_columns)
|
||||
conflict_cols = conflict_columns or pk_columns
|
||||
if conflict_cols:
|
||||
pk_clause = ", ".join(f'"{c}"' for c in conflict_cols)
|
||||
sql += f" ON CONFLICT ({pk_clause}) DO NOTHING"
|
||||
|
||||
now = datetime.now(self.tz)
|
||||
@@ -522,12 +538,20 @@ class MissingDataBackfiller:
|
||||
if pk_columns:
|
||||
missing_pk = False
|
||||
for pk in pk_columns:
|
||||
if str(pk).lower() == "content_hash":
|
||||
continue
|
||||
pk_val = _get_value_case_insensitive(merged_rec, pk)
|
||||
if pk_val is None or pk_val == "":
|
||||
missing_pk = True
|
||||
break
|
||||
if missing_pk:
|
||||
continue
|
||||
|
||||
content_hash = None
|
||||
if needs_content_hash:
|
||||
hash_record = dict(merged_rec)
|
||||
hash_record["fetched_at"] = now
|
||||
content_hash = BaseOdsTask._compute_content_hash(hash_record, include_fetched_at=True)
|
||||
|
||||
row_vals: List[Any] = []
|
||||
for (col_name, data_type, _udt) in cols_info:
|
||||
@@ -544,6 +568,9 @@ class MissingDataBackfiller:
|
||||
if col_lower == "fetched_at":
|
||||
row_vals.append(now)
|
||||
continue
|
||||
if col_lower == "content_hash":
|
||||
row_vals.append(content_hash)
|
||||
continue
|
||||
|
||||
value = _normalize_scalar(_get_value_case_insensitive(merged_rec, col_name))
|
||||
if col_lower in db_json_cols_lower:
|
||||
@@ -574,9 +601,11 @@ def run_backfill(
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
task_codes: Optional[str] = None,
|
||||
include_mismatch: bool = False,
|
||||
dry_run: bool = False,
|
||||
page_size: int = 200,
|
||||
chunk_size: int = 500,
|
||||
content_sample_limit: int | None = None,
|
||||
logger: logging.Logger,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -601,8 +630,10 @@ def run_backfill(
|
||||
start=start,
|
||||
end=end,
|
||||
task_codes=task_codes,
|
||||
include_mismatch=include_mismatch,
|
||||
page_size=page_size,
|
||||
chunk_size=chunk_size,
|
||||
content_sample_limit=content_sample_limit,
|
||||
)
|
||||
finally:
|
||||
backfiller.close()
|
||||
@@ -615,6 +646,8 @@ def main() -> int:
|
||||
ap.add_argument("--start", default="2025-07-01", help="开始日期 (默认: 2025-07-01)")
|
||||
ap.add_argument("--end", default="", help="结束日期 (默认: 当前时间)")
|
||||
ap.add_argument("--task-codes", default="", help="指定任务代码(逗号分隔,留空=全部)")
|
||||
ap.add_argument("--include-mismatch", action="store_true", help="同时补全内容不一致的记录")
|
||||
ap.add_argument("--content-sample-limit", type=int, default=None, help="不一致样本上限 (默认: 10000)")
|
||||
ap.add_argument("--dry-run", action="store_true", help="仅预览,不实际写入")
|
||||
ap.add_argument("--page-size", type=int, default=200, help="API 分页大小 (默认: 200)")
|
||||
ap.add_argument("--chunk-size", type=int, default=500, help="数据库批量大小 (默认: 500)")
|
||||
@@ -646,15 +679,19 @@ def main() -> int:
|
||||
start=start,
|
||||
end=end,
|
||||
task_codes=args.task_codes or None,
|
||||
include_mismatch=args.include_mismatch,
|
||||
dry_run=args.dry_run,
|
||||
page_size=args.page_size,
|
||||
chunk_size=args.chunk_size,
|
||||
content_sample_limit=args.content_sample_limit,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("补全完成!")
|
||||
logger.info(" 总丢失: %s", result.get("total_missing", 0))
|
||||
if args.include_mismatch:
|
||||
logger.info(" 总不一致: %s", result.get("total_mismatch", 0))
|
||||
logger.info(" 已补全: %s", result.get("backfilled", 0))
|
||||
logger.info(" 错误数: %s", result.get("errors", 0))
|
||||
logger.info("=" * 60)
|
||||
@@ -663,17 +700,19 @@ def main() -> int:
|
||||
for detail in result.get("details", []):
|
||||
if detail.get("error"):
|
||||
logger.error(
|
||||
" %s: 丢失=%s 补全=%s 错误=%s",
|
||||
" %s: 丢失=%s 不一致=%s 补全=%s 错误=%s",
|
||||
detail.get("task_code"),
|
||||
detail.get("missing"),
|
||||
detail.get("mismatch", 0),
|
||||
detail.get("backfilled"),
|
||||
detail.get("error"),
|
||||
)
|
||||
elif detail.get("backfilled", 0) > 0:
|
||||
logger.info(
|
||||
" %s: 丢失=%s 补全=%s",
|
||||
" %s: 丢失=%s 不一致=%s 补全=%s",
|
||||
detail.get("task_code"),
|
||||
detail.get("missing"),
|
||||
detail.get("mismatch", 0),
|
||||
detail.get("backfilled"),
|
||||
)
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -12,12 +11,7 @@ from zoneinfo import ZoneInfo
|
||||
from dateutil import parser as dtparser
|
||||
|
||||
from config.settings import AppConfig
|
||||
from quality.integrity_checker import (
|
||||
IntegrityWindow,
|
||||
compute_last_etl_end,
|
||||
run_integrity_history,
|
||||
run_integrity_window,
|
||||
)
|
||||
from quality.integrity_service import run_history_flow, run_window_flow, write_report
|
||||
from utils.logging_utils import build_log_path, configure_logging
|
||||
from utils.windowing import split_window
|
||||
|
||||
@@ -38,14 +32,37 @@ def main() -> int:
|
||||
|
||||
ap = argparse.ArgumentParser(description="Data integrity checks (API -> ODS -> DWD)")
|
||||
ap.add_argument("--mode", choices=["history", "window"], default="history")
|
||||
ap.add_argument(
|
||||
"--flow",
|
||||
choices=["verify", "update_and_verify"],
|
||||
default="verify",
|
||||
help="verify only or update+verify (auto backfill then optional recheck)",
|
||||
)
|
||||
ap.add_argument("--start", default="2025-07-01", help="history start date (default: 2025-07-01)")
|
||||
ap.add_argument("--end", default="", help="history end datetime (default: last ETL end)")
|
||||
ap.add_argument("--window-start", default="", help="window start datetime (mode=window)")
|
||||
ap.add_argument("--window-end", default="", help="window end datetime (mode=window)")
|
||||
ap.add_argument("--window-split-unit", default="", help="split unit (month/none), default from config")
|
||||
ap.add_argument("--window-compensation-hours", type=int, default=None, help="window compensation hours, default from config")
|
||||
ap.add_argument("--include-dimensions", action="store_true", help="include dimension tables in ODS->DWD checks")
|
||||
ap.add_argument(
|
||||
"--include-dimensions",
|
||||
action="store_true",
|
||||
default=None,
|
||||
help="include dimension tables in ODS->DWD checks",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--no-include-dimensions",
|
||||
action="store_true",
|
||||
help="exclude dimension tables in ODS->DWD checks",
|
||||
)
|
||||
ap.add_argument("--ods-task-codes", default="", help="comma-separated ODS task codes for API checks")
|
||||
ap.add_argument("--compare-content", action="store_true", help="compare API vs ODS content hash")
|
||||
ap.add_argument("--no-compare-content", action="store_true", help="disable content comparison even if enabled in config")
|
||||
ap.add_argument("--include-mismatch", action="store_true", help="backfill mismatch records as well")
|
||||
ap.add_argument("--no-include-mismatch", action="store_true", help="disable mismatch backfill")
|
||||
ap.add_argument("--recheck", action="store_true", help="re-run checks after backfill")
|
||||
ap.add_argument("--no-recheck", action="store_true", help="skip recheck after backfill")
|
||||
ap.add_argument("--content-sample-limit", type=int, default=None, help="max mismatch samples per table")
|
||||
ap.add_argument("--out", default="", help="output JSON path")
|
||||
ap.add_argument("--log-file", default="", help="log file path")
|
||||
ap.add_argument("--log-dir", default="", help="log directory")
|
||||
@@ -68,6 +85,39 @@ def main() -> int:
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
report_path = Path(args.out) if args.out else None
|
||||
|
||||
if args.recheck and args.no_recheck:
|
||||
raise SystemExit("cannot set both --recheck and --no-recheck")
|
||||
if args.include_mismatch and args.no_include_mismatch:
|
||||
raise SystemExit("cannot set both --include-mismatch and --no-include-mismatch")
|
||||
if args.include_dimensions and args.no_include_dimensions:
|
||||
raise SystemExit("cannot set both --include-dimensions and --no-include-dimensions")
|
||||
|
||||
compare_content = None
|
||||
if args.compare_content and args.no_compare_content:
|
||||
raise SystemExit("cannot set both --compare-content and --no-compare-content")
|
||||
if args.compare_content:
|
||||
compare_content = True
|
||||
elif args.no_compare_content:
|
||||
compare_content = False
|
||||
|
||||
include_mismatch = cfg.get("integrity.backfill_mismatch", True)
|
||||
if args.include_mismatch:
|
||||
include_mismatch = True
|
||||
elif args.no_include_mismatch:
|
||||
include_mismatch = False
|
||||
|
||||
recheck_after_backfill = cfg.get("integrity.recheck_after_backfill", True)
|
||||
if args.recheck:
|
||||
recheck_after_backfill = True
|
||||
elif args.no_recheck:
|
||||
recheck_after_backfill = False
|
||||
|
||||
include_dimensions = cfg.get("integrity.include_dimensions", True)
|
||||
if args.include_dimensions:
|
||||
include_dimensions = True
|
||||
elif args.no_include_dimensions:
|
||||
include_dimensions = False
|
||||
|
||||
if args.mode == "window":
|
||||
if not args.window_start or not args.window_end:
|
||||
raise SystemExit("window-start and window-end are required for mode=window")
|
||||
@@ -88,78 +138,52 @@ def main() -> int:
|
||||
if not windows:
|
||||
windows = [(start_dt, end_dt)]
|
||||
|
||||
window_reports = []
|
||||
total_missing = 0
|
||||
total_errors = 0
|
||||
for idx, (seg_start, seg_end) in enumerate(windows, start=1):
|
||||
window = IntegrityWindow(
|
||||
start=seg_start,
|
||||
end=seg_end,
|
||||
label=f"segment_{idx}",
|
||||
granularity="window",
|
||||
)
|
||||
payload = run_integrity_window(
|
||||
cfg=cfg,
|
||||
window=window,
|
||||
include_dimensions=args.include_dimensions,
|
||||
task_codes=args.ods_task_codes,
|
||||
logger=logger,
|
||||
write_report=False,
|
||||
report_path=None,
|
||||
window_split_unit="none",
|
||||
window_compensation_hours=0,
|
||||
)
|
||||
window_reports.append(payload)
|
||||
total_missing += int(payload.get("api_to_ods", {}).get("total_missing") or 0)
|
||||
total_errors += int(payload.get("api_to_ods", {}).get("total_errors") or 0)
|
||||
|
||||
overall_start = windows[0][0]
|
||||
overall_end = windows[-1][1]
|
||||
report = {
|
||||
"mode": "window",
|
||||
"window": {
|
||||
"start": overall_start.isoformat(),
|
||||
"end": overall_end.isoformat(),
|
||||
"segments": len(windows),
|
||||
},
|
||||
"windows": window_reports,
|
||||
"api_to_ods": {
|
||||
"total_missing": total_missing,
|
||||
"total_errors": total_errors,
|
||||
},
|
||||
"total_missing": total_missing,
|
||||
"total_errors": total_errors,
|
||||
"generated_at": datetime.now(tz).isoformat(),
|
||||
}
|
||||
if report_path is None:
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
stamp = datetime.now(tz).strftime("%Y%m%d_%H%M%S")
|
||||
report_path = root / "reports" / f"data_integrity_window_{stamp}.json"
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
report["report_path"] = str(report_path)
|
||||
report, counts = run_window_flow(
|
||||
cfg=cfg,
|
||||
windows=windows,
|
||||
include_dimensions=bool(include_dimensions),
|
||||
task_codes=args.ods_task_codes,
|
||||
logger=logger,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=args.content_sample_limit,
|
||||
do_backfill=args.flow == "update_and_verify",
|
||||
include_mismatch=bool(include_mismatch),
|
||||
recheck_after_backfill=bool(recheck_after_backfill),
|
||||
page_size=int(cfg.get("api.page_size") or 200),
|
||||
chunk_size=500,
|
||||
)
|
||||
report_path = write_report(report, prefix="data_integrity_window", tz=tz, report_path=report_path)
|
||||
report["report_path"] = report_path
|
||||
logger.info("REPORT_WRITTEN path=%s", report.get("report_path"))
|
||||
else:
|
||||
start_dt = _parse_dt(args.start, tz)
|
||||
if args.end:
|
||||
end_dt = _parse_dt(args.end, tz)
|
||||
else:
|
||||
end_dt = compute_last_etl_end(cfg) or datetime.now(tz)
|
||||
report = run_integrity_history(
|
||||
end_dt = None
|
||||
report, counts = run_history_flow(
|
||||
cfg=cfg,
|
||||
start_dt=start_dt,
|
||||
end_dt=end_dt,
|
||||
include_dimensions=args.include_dimensions,
|
||||
include_dimensions=bool(include_dimensions),
|
||||
task_codes=args.ods_task_codes,
|
||||
logger=logger,
|
||||
write_report=True,
|
||||
report_path=report_path,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=args.content_sample_limit,
|
||||
do_backfill=args.flow == "update_and_verify",
|
||||
include_mismatch=bool(include_mismatch),
|
||||
recheck_after_backfill=bool(recheck_after_backfill),
|
||||
page_size=int(cfg.get("api.page_size") or 200),
|
||||
chunk_size=500,
|
||||
)
|
||||
report_path = write_report(report, prefix="data_integrity_history", tz=tz, report_path=report_path)
|
||||
report["report_path"] = report_path
|
||||
logger.info("REPORT_WRITTEN path=%s", report.get("report_path"))
|
||||
logger.info(
|
||||
"SUMMARY missing=%s errors=%s",
|
||||
report.get("total_missing"),
|
||||
report.get("total_errors"),
|
||||
"SUMMARY missing=%s mismatch=%s errors=%s",
|
||||
counts.get("missing"),
|
||||
counts.get("mismatch"),
|
||||
counts.get("errors"),
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -22,6 +22,7 @@ from typing import Iterable, Sequence
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from dateutil import parser as dtparser
|
||||
from psycopg2 import InterfaceError, OperationalError
|
||||
from psycopg2.extras import execute_values
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
@@ -32,8 +33,14 @@ from api.client import APIClient
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from models.parsers import TypeParser
|
||||
from tasks.ods_tasks import ENABLED_ODS_CODES, ODS_TASK_SPECS
|
||||
from tasks.ods_tasks import BaseOdsTask, ENABLED_ODS_CODES, ODS_TASK_SPECS
|
||||
from utils.logging_utils import build_log_path, configure_logging
|
||||
from utils.ods_record_utils import (
|
||||
get_value_case_insensitive,
|
||||
merge_record_layers,
|
||||
normalize_pk_value,
|
||||
pk_tuple_from_record,
|
||||
)
|
||||
from utils.windowing import split_window
|
||||
|
||||
DEFAULT_START = "2025-07-01"
|
||||
@@ -74,38 +81,7 @@ def _iter_windows(start: datetime, end: datetime, window_size: timedelta) -> Ite
|
||||
|
||||
|
||||
def _merge_record_layers(record: dict) -> dict:
|
||||
merged = record
|
||||
data_part = merged.get("data")
|
||||
while isinstance(data_part, dict):
|
||||
merged = {**data_part, **merged}
|
||||
data_part = data_part.get("data")
|
||||
settle_inner = merged.get("settleList")
|
||||
if isinstance(settle_inner, dict):
|
||||
merged = {**settle_inner, **merged}
|
||||
return merged
|
||||
|
||||
|
||||
def _get_value_case_insensitive(record: dict | None, col: str | None):
|
||||
if record is None or col is None:
|
||||
return None
|
||||
if col in record:
|
||||
return record.get(col)
|
||||
col_lower = col.lower()
|
||||
for k, v in record.items():
|
||||
if isinstance(k, str) and k.lower() == col_lower:
|
||||
return v
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_pk_value(value):
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
try:
|
||||
return int(value)
|
||||
except Exception:
|
||||
return value
|
||||
return value
|
||||
return merge_record_layers(record)
|
||||
|
||||
|
||||
def _chunked(seq: Sequence, size: int) -> Iterable[Sequence]:
|
||||
@@ -133,7 +109,24 @@ def _get_table_pk_columns(conn, table: str) -> list[str]:
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema, name))
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
cols = [r[0] for r in cur.fetchall()]
|
||||
return [c for c in cols if c.lower() != "content_hash"]
|
||||
|
||||
|
||||
def _table_has_column(conn, table: str, column: str) -> bool:
|
||||
if "." in table:
|
||||
schema, name = table.split(".", 1)
|
||||
else:
|
||||
schema, name = "public", table
|
||||
sql = """
|
||||
SELECT 1
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s AND column_name = %s
|
||||
LIMIT 1
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql, (schema, name, column))
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def _fetch_existing_pk_set(conn, table: str, pk_cols: Sequence[str], pk_values: list[tuple], chunk_size: int) -> set[tuple]:
|
||||
@@ -155,6 +148,54 @@ def _fetch_existing_pk_set(conn, table: str, pk_cols: Sequence[str], pk_values:
|
||||
return existing
|
||||
|
||||
|
||||
def _fetch_existing_pk_hash_set(
|
||||
conn, table: str, pk_cols: Sequence[str], pk_hash_values: list[tuple], chunk_size: int
|
||||
) -> set[tuple]:
|
||||
if not pk_hash_values:
|
||||
return set()
|
||||
select_cols = ", ".join([*(f't.\"{c}\"' for c in pk_cols), 't.\"content_hash\"'])
|
||||
value_cols = ", ".join([*(f'\"{c}\"' for c in pk_cols), '\"content_hash\"'])
|
||||
join_cond = " AND ".join([*(f't.\"{c}\" = v.\"{c}\"' for c in pk_cols), 't.\"content_hash\" = v.\"content_hash\"'])
|
||||
sql = (
|
||||
f"SELECT {select_cols} FROM {table} t "
|
||||
f"JOIN (VALUES %s) AS v({value_cols}) ON {join_cond}"
|
||||
)
|
||||
existing: set[tuple] = set()
|
||||
with conn.cursor() as cur:
|
||||
for chunk in _chunked(pk_hash_values, chunk_size):
|
||||
execute_values(cur, sql, chunk, page_size=len(chunk))
|
||||
for row in cur.fetchall():
|
||||
existing.add(tuple(row))
|
||||
return existing
|
||||
|
||||
|
||||
def _init_db_state(cfg: AppConfig) -> dict:
|
||||
db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
|
||||
try:
|
||||
db_conn.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
db_conn.conn.autocommit = True
|
||||
return {"db": db_conn, "conn": db_conn.conn}
|
||||
|
||||
|
||||
def _reconnect_db(db_state: dict, cfg: AppConfig, logger: logging.Logger):
|
||||
try:
|
||||
db_state.get("db").close()
|
||||
except Exception:
|
||||
pass
|
||||
db_state.update(_init_db_state(cfg))
|
||||
logger.warning("DB connection reset/reconnected")
|
||||
return db_state["conn"]
|
||||
|
||||
|
||||
def _ensure_db_conn(db_state: dict, cfg: AppConfig, logger: logging.Logger):
|
||||
conn = db_state.get("conn")
|
||||
if conn is None or getattr(conn, "closed", 0):
|
||||
return _reconnect_db(db_state, cfg, logger)
|
||||
return conn
|
||||
|
||||
|
||||
def _merge_common_params(cfg: AppConfig, task_code: str, base: dict) -> dict:
|
||||
merged: dict = {}
|
||||
common = cfg.get("api.params", {}) or {}
|
||||
@@ -182,19 +223,22 @@ def _build_params(cfg: AppConfig, spec, store_id: int, window_start: datetime |
|
||||
return _merge_common_params(cfg, spec.code, base)
|
||||
|
||||
|
||||
def _pk_tuple_from_record(record: dict, pk_cols: Sequence[str]) -> tuple | None:
|
||||
merged = _merge_record_layers(record)
|
||||
def _pk_tuple_from_merged(merged: dict, pk_cols: Sequence[str]) -> tuple | None:
|
||||
values = []
|
||||
for col in pk_cols:
|
||||
val = _normalize_pk_value(_get_value_case_insensitive(merged, col))
|
||||
val = normalize_pk_value(get_value_case_insensitive(merged, col))
|
||||
if val is None or val == "":
|
||||
return None
|
||||
values.append(val)
|
||||
return tuple(values)
|
||||
|
||||
|
||||
def _pk_tuple_from_record(record: dict, pk_cols: Sequence[str]) -> tuple | None:
|
||||
return pk_tuple_from_record(record, pk_cols)
|
||||
|
||||
|
||||
def _pk_tuple_from_ticket_candidate(value) -> tuple | None:
|
||||
val = _normalize_pk_value(value)
|
||||
val = normalize_pk_value(value)
|
||||
if val is None or val == "":
|
||||
return None
|
||||
return (val,)
|
||||
@@ -204,10 +248,17 @@ def _format_missing_sample(pk_cols: Sequence[str], pk_tuple: tuple) -> dict:
|
||||
return {col: pk_tuple[idx] for idx, col in enumerate(pk_cols)}
|
||||
|
||||
|
||||
def _format_mismatch_sample(pk_cols: Sequence[str], pk_tuple: tuple, content_hash: str | None) -> dict:
|
||||
sample = _format_missing_sample(pk_cols, pk_tuple)
|
||||
if content_hash:
|
||||
sample["content_hash"] = content_hash
|
||||
return sample
|
||||
|
||||
|
||||
def _check_spec(
|
||||
*,
|
||||
client: APIClient,
|
||||
db_conn,
|
||||
db_state: dict,
|
||||
cfg: AppConfig,
|
||||
tz: ZoneInfo,
|
||||
logger: logging.Logger,
|
||||
@@ -219,6 +270,8 @@ def _check_spec(
|
||||
page_size: int,
|
||||
chunk_size: int,
|
||||
sample_limit: int,
|
||||
compare_content: bool,
|
||||
content_sample_limit: int,
|
||||
sleep_per_window: float,
|
||||
sleep_per_page: float,
|
||||
) -> dict:
|
||||
@@ -231,19 +284,34 @@ def _check_spec(
|
||||
"records_with_pk": 0,
|
||||
"missing": 0,
|
||||
"missing_samples": [],
|
||||
"mismatch": 0,
|
||||
"mismatch_samples": [],
|
||||
"pages": 0,
|
||||
"skipped_missing_pk": 0,
|
||||
"errors": 0,
|
||||
"error_detail": None,
|
||||
}
|
||||
|
||||
pk_cols = _get_table_pk_columns(db_conn, spec.table_name)
|
||||
db_conn = _ensure_db_conn(db_state, cfg, logger)
|
||||
try:
|
||||
pk_cols = _get_table_pk_columns(db_conn, spec.table_name)
|
||||
except (OperationalError, InterfaceError):
|
||||
db_conn = _reconnect_db(db_state, cfg, logger)
|
||||
pk_cols = _get_table_pk_columns(db_conn, spec.table_name)
|
||||
result["pk_columns"] = pk_cols
|
||||
if not pk_cols:
|
||||
result["errors"] = 1
|
||||
result["error_detail"] = "no primary key columns found"
|
||||
return result
|
||||
|
||||
try:
|
||||
has_content_hash = bool(compare_content and _table_has_column(db_conn, spec.table_name, "content_hash"))
|
||||
except (OperationalError, InterfaceError):
|
||||
db_conn = _reconnect_db(db_state, cfg, logger)
|
||||
has_content_hash = bool(compare_content and _table_has_column(db_conn, spec.table_name, "content_hash"))
|
||||
result["compare_content"] = bool(compare_content)
|
||||
result["content_hash_supported"] = has_content_hash
|
||||
|
||||
if spec.requires_window and spec.time_fields:
|
||||
if not start or not end:
|
||||
result["errors"] = 1
|
||||
@@ -293,24 +361,33 @@ def _check_spec(
|
||||
result["pages"] += 1
|
||||
result["records"] += len(records)
|
||||
pk_tuples: list[tuple] = []
|
||||
pk_hash_tuples: list[tuple] = []
|
||||
for rec in records:
|
||||
if not isinstance(rec, dict):
|
||||
result["skipped_missing_pk"] += 1
|
||||
window_skipped += 1
|
||||
continue
|
||||
pk_tuple = _pk_tuple_from_record(rec, pk_cols)
|
||||
merged = _merge_record_layers(rec)
|
||||
pk_tuple = _pk_tuple_from_merged(merged, pk_cols)
|
||||
if not pk_tuple:
|
||||
result["skipped_missing_pk"] += 1
|
||||
window_skipped += 1
|
||||
continue
|
||||
pk_tuples.append(pk_tuple)
|
||||
if has_content_hash:
|
||||
content_hash = BaseOdsTask._compute_content_hash(merged, include_fetched_at=False)
|
||||
pk_hash_tuples.append((*pk_tuple, content_hash))
|
||||
|
||||
if not pk_tuples:
|
||||
continue
|
||||
|
||||
result["records_with_pk"] += len(pk_tuples)
|
||||
pk_unique = list(dict.fromkeys(pk_tuples))
|
||||
existing = _fetch_existing_pk_set(db_conn, spec.table_name, pk_cols, pk_unique, chunk_size)
|
||||
try:
|
||||
existing = _fetch_existing_pk_set(db_conn, spec.table_name, pk_cols, pk_unique, chunk_size)
|
||||
except (OperationalError, InterfaceError):
|
||||
db_conn = _reconnect_db(db_state, cfg, logger)
|
||||
existing = _fetch_existing_pk_set(db_conn, spec.table_name, pk_cols, pk_unique, chunk_size)
|
||||
for pk_tuple in pk_unique:
|
||||
if pk_tuple in existing:
|
||||
continue
|
||||
@@ -321,6 +398,29 @@ def _check_spec(
|
||||
window_missing += 1
|
||||
if len(result["missing_samples"]) < sample_limit:
|
||||
result["missing_samples"].append(_format_missing_sample(pk_cols, pk_tuple))
|
||||
|
||||
if has_content_hash and pk_hash_tuples:
|
||||
pk_hash_unique = list(dict.fromkeys(pk_hash_tuples))
|
||||
try:
|
||||
existing_hash = _fetch_existing_pk_hash_set(
|
||||
db_conn, spec.table_name, pk_cols, pk_hash_unique, chunk_size
|
||||
)
|
||||
except (OperationalError, InterfaceError):
|
||||
db_conn = _reconnect_db(db_state, cfg, logger)
|
||||
existing_hash = _fetch_existing_pk_hash_set(
|
||||
db_conn, spec.table_name, pk_cols, pk_hash_unique, chunk_size
|
||||
)
|
||||
for pk_hash_tuple in pk_hash_unique:
|
||||
pk_tuple = pk_hash_tuple[:-1]
|
||||
if pk_tuple not in existing:
|
||||
continue
|
||||
if pk_hash_tuple in existing_hash:
|
||||
continue
|
||||
result["mismatch"] += 1
|
||||
if len(result["mismatch_samples"]) < content_sample_limit:
|
||||
result["mismatch_samples"].append(
|
||||
_format_mismatch_sample(pk_cols, pk_tuple, pk_hash_tuple[-1])
|
||||
)
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
logger.debug(
|
||||
"PAGE task=%s idx=%s page=%s records=%s missing=%s skipped=%s",
|
||||
@@ -369,7 +469,7 @@ def _check_spec(
|
||||
def _check_settlement_tickets(
|
||||
*,
|
||||
client: APIClient,
|
||||
db_conn,
|
||||
db_state: dict,
|
||||
cfg: AppConfig,
|
||||
tz: ZoneInfo,
|
||||
logger: logging.Logger,
|
||||
@@ -380,11 +480,18 @@ def _check_settlement_tickets(
|
||||
page_size: int,
|
||||
chunk_size: int,
|
||||
sample_limit: int,
|
||||
compare_content: bool,
|
||||
content_sample_limit: int,
|
||||
sleep_per_window: float,
|
||||
sleep_per_page: float,
|
||||
) -> dict:
|
||||
table_name = "billiards_ods.settlement_ticket_details"
|
||||
pk_cols = _get_table_pk_columns(db_conn, table_name)
|
||||
db_conn = _ensure_db_conn(db_state, cfg, logger)
|
||||
try:
|
||||
pk_cols = _get_table_pk_columns(db_conn, table_name)
|
||||
except (OperationalError, InterfaceError):
|
||||
db_conn = _reconnect_db(db_state, cfg, logger)
|
||||
pk_cols = _get_table_pk_columns(db_conn, table_name)
|
||||
result = {
|
||||
"task_code": "ODS_SETTLEMENT_TICKET",
|
||||
"table": table_name,
|
||||
@@ -394,6 +501,8 @@ def _check_settlement_tickets(
|
||||
"records_with_pk": 0,
|
||||
"missing": 0,
|
||||
"missing_samples": [],
|
||||
"mismatch": 0,
|
||||
"mismatch_samples": [],
|
||||
"pages": 0,
|
||||
"skipped_missing_pk": 0,
|
||||
"errors": 0,
|
||||
@@ -476,7 +585,11 @@ def _check_settlement_tickets(
|
||||
|
||||
result["records_with_pk"] += len(pk_tuples)
|
||||
pk_unique = list(dict.fromkeys(pk_tuples))
|
||||
existing = _fetch_existing_pk_set(db_conn, table_name, pk_cols, pk_unique, chunk_size)
|
||||
try:
|
||||
existing = _fetch_existing_pk_set(db_conn, table_name, pk_cols, pk_unique, chunk_size)
|
||||
except (OperationalError, InterfaceError):
|
||||
db_conn = _reconnect_db(db_state, cfg, logger)
|
||||
existing = _fetch_existing_pk_set(db_conn, table_name, pk_cols, pk_unique, chunk_size)
|
||||
for pk_tuple in pk_unique:
|
||||
if pk_tuple in existing:
|
||||
continue
|
||||
@@ -585,6 +698,8 @@ def run_gap_check(
|
||||
cutoff_overlap_hours: int,
|
||||
allow_small_window: bool,
|
||||
logger: logging.Logger,
|
||||
compare_content: bool = False,
|
||||
content_sample_limit: int | None = None,
|
||||
window_split_unit: str | None = None,
|
||||
window_compensation_hours: int | None = None,
|
||||
) -> dict:
|
||||
@@ -668,6 +783,9 @@ def run_gap_check(
|
||||
if windows:
|
||||
start, end = windows[0][0], windows[-1][1]
|
||||
|
||||
if content_sample_limit is None:
|
||||
content_sample_limit = sample_limit
|
||||
|
||||
logger.info(
|
||||
"START range=%s~%s window_days=%s window_hours=%s split_unit=%s comp_hours=%s page_size=%s chunk_size=%s",
|
||||
start.isoformat() if isinstance(start, datetime) else None,
|
||||
@@ -690,12 +808,7 @@ def run_gap_check(
|
||||
headers_extra=cfg["api"].get("headers_extra") or {},
|
||||
)
|
||||
|
||||
db_conn = DatabaseConnection(dsn=cfg["db"]["dsn"], session=cfg["db"].get("session"))
|
||||
try:
|
||||
db_conn.conn.rollback()
|
||||
except Exception:
|
||||
pass
|
||||
db_conn.conn.autocommit = True
|
||||
db_state = _init_db_state(cfg)
|
||||
try:
|
||||
task_filter = {t.strip().upper() for t in (task_codes or "").split(",") if t.strip()}
|
||||
specs = [s for s in ODS_TASK_SPECS if s.code in ENABLED_ODS_CODES]
|
||||
@@ -708,7 +821,7 @@ def run_gap_check(
|
||||
continue
|
||||
result = _check_spec(
|
||||
client=client,
|
||||
db_conn=db_conn.conn,
|
||||
db_state=db_state,
|
||||
cfg=cfg,
|
||||
tz=tz,
|
||||
logger=logger,
|
||||
@@ -720,6 +833,8 @@ def run_gap_check(
|
||||
page_size=page_size,
|
||||
chunk_size=chunk_size,
|
||||
sample_limit=sample_limit,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
sleep_per_window=sleep_per_window,
|
||||
sleep_per_page=sleep_per_page,
|
||||
)
|
||||
@@ -735,7 +850,7 @@ def run_gap_check(
|
||||
if (not task_filter) or ("ODS_SETTLEMENT_TICKET" in task_filter):
|
||||
ticket_result = _check_settlement_tickets(
|
||||
client=client,
|
||||
db_conn=db_conn.conn,
|
||||
db_state=db_state,
|
||||
cfg=cfg,
|
||||
tz=tz,
|
||||
logger=logger,
|
||||
@@ -746,6 +861,8 @@ def run_gap_check(
|
||||
page_size=page_size,
|
||||
chunk_size=chunk_size,
|
||||
sample_limit=sample_limit,
|
||||
compare_content=compare_content,
|
||||
content_sample_limit=content_sample_limit,
|
||||
sleep_per_window=sleep_per_window,
|
||||
sleep_per_page=sleep_per_page,
|
||||
)
|
||||
@@ -759,6 +876,7 @@ def run_gap_check(
|
||||
)
|
||||
|
||||
total_missing = sum(int(r.get("missing") or 0) for r in results)
|
||||
total_mismatch = sum(int(r.get("mismatch") or 0) for r in results)
|
||||
total_errors = sum(int(r.get("errors") or 0) for r in results)
|
||||
|
||||
payload = {
|
||||
@@ -772,16 +890,22 @@ def run_gap_check(
|
||||
"page_size": page_size,
|
||||
"chunk_size": chunk_size,
|
||||
"sample_limit": sample_limit,
|
||||
"compare_content": compare_content,
|
||||
"content_sample_limit": content_sample_limit,
|
||||
"store_id": store_id,
|
||||
"base_url": cfg.get("api.base_url"),
|
||||
"results": results,
|
||||
"total_missing": total_missing,
|
||||
"total_mismatch": total_mismatch,
|
||||
"total_errors": total_errors,
|
||||
"generated_at": datetime.now(tz).isoformat(),
|
||||
}
|
||||
return payload
|
||||
finally:
|
||||
db_conn.close()
|
||||
try:
|
||||
db_state.get("db").close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def main() -> int:
|
||||
@@ -796,6 +920,13 @@ def main() -> int:
|
||||
ap.add_argument("--page-size", type=int, default=200, help="API page size (default: 200)")
|
||||
ap.add_argument("--chunk-size", type=int, default=500, help="DB query chunk size (default: 500)")
|
||||
ap.add_argument("--sample-limit", type=int, default=50, help="max missing PK samples per table")
|
||||
ap.add_argument("--compare-content", action="store_true", help="compare record content hash (mismatch detection)")
|
||||
ap.add_argument(
|
||||
"--content-sample-limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="max mismatch samples per table (default: same as --sample-limit)",
|
||||
)
|
||||
ap.add_argument("--sleep-per-window-seconds", type=float, default=0, help="sleep seconds after each window")
|
||||
ap.add_argument("--sleep-per-page-seconds", type=float, default=0, help="sleep seconds after each page")
|
||||
ap.add_argument("--task-codes", default="", help="comma-separated task codes to check (optional)")
|
||||
@@ -847,6 +978,8 @@ def main() -> int:
|
||||
cutoff_overlap_hours=args.cutoff_overlap_hours,
|
||||
allow_small_window=args.allow_small_window,
|
||||
logger=logger,
|
||||
compare_content=args.compare_content,
|
||||
content_sample_limit=args.content_sample_limit,
|
||||
window_split_unit=args.window_split_unit or None,
|
||||
window_compensation_hours=args.window_compensation_hours,
|
||||
)
|
||||
@@ -862,8 +995,9 @@ def main() -> int:
|
||||
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
logger.info("REPORT_WRITTEN path=%s", out_path)
|
||||
logger.info(
|
||||
"SUMMARY missing=%s errors=%s",
|
||||
"SUMMARY missing=%s mismatch=%s errors=%s",
|
||||
payload.get("total_missing"),
|
||||
payload.get("total_mismatch"),
|
||||
payload.get("total_errors"),
|
||||
)
|
||||
|
||||
|
||||
324
etl_billiards/scripts/migrate_snapshot_ods.py
Normal file
324
etl_billiards/scripts/migrate_snapshot_ods.py
Normal file
@@ -0,0 +1,324 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
迁移到“快照型 ODS + DWD SCD2”:
|
||||
1) 为所有 ODS 表补充 content_hash,并以 (业务主键, content_hash) 作为新主键;
|
||||
2) 基于 payload 计算 content_hash,避免重复快照;
|
||||
3) 为所有 DWD 维度表补齐 SCD2 字段,并调整主键为 (业务主键, scd2_start_time)。
|
||||
|
||||
用法:
|
||||
PYTHONPATH=. python -m etl_billiards.scripts.migrate_snapshot_ods --dsn "postgresql://..."
|
||||
|
||||
可选参数:
|
||||
--only-ods / --only-dwd
|
||||
--dry-run
|
||||
--batch-size 500
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Iterable, List, Sequence
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_batch, RealDictCursor
|
||||
|
||||
|
||||
def _hash_default(value):
|
||||
return value.isoformat() if hasattr(value, "isoformat") else str(value)
|
||||
|
||||
|
||||
def _sanitize_record_for_hash(record: Any) -> Any:
|
||||
exclude = {
|
||||
"data",
|
||||
"payload",
|
||||
"source_file",
|
||||
"source_endpoint",
|
||||
"fetched_at",
|
||||
"content_hash",
|
||||
"record_index",
|
||||
}
|
||||
|
||||
def _strip(value):
|
||||
if isinstance(value, dict):
|
||||
cleaned = {}
|
||||
for k, v in value.items():
|
||||
if isinstance(k, str) and k.lower() in exclude:
|
||||
continue
|
||||
cleaned[k] = _strip(v)
|
||||
return cleaned
|
||||
if isinstance(value, list):
|
||||
return [_strip(v) for v in value]
|
||||
return value
|
||||
|
||||
return _strip(record or {})
|
||||
|
||||
|
||||
def _compute_content_hash(record: Any) -> str:
|
||||
cleaned = _sanitize_record_for_hash(record)
|
||||
payload = json.dumps(
|
||||
cleaned,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=_hash_default,
|
||||
)
|
||||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _fetch_tables(cur, schema: str) -> List[str]:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_type = 'BASE TABLE'
|
||||
ORDER BY table_name
|
||||
""",
|
||||
(schema,),
|
||||
)
|
||||
return [r[0] for r in cur.fetchall()]
|
||||
|
||||
|
||||
def _fetch_columns(cur, schema: str, table: str) -> List[str]:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
ORDER BY ordinal_position
|
||||
""",
|
||||
(schema, table),
|
||||
)
|
||||
cols = []
|
||||
for row in cur.fetchall():
|
||||
if isinstance(row, dict):
|
||||
cols.append(row.get("column_name"))
|
||||
else:
|
||||
cols.append(row[0])
|
||||
return [c for c in cols if c]
|
||||
|
||||
|
||||
def _fetch_pk_constraint(cur, schema: str, table: str) -> tuple[str | None, list[str]]:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT tc.constraint_name, kcu.column_name, kcu.ordinal_position
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
WHERE tc.constraint_type = 'PRIMARY KEY'
|
||||
AND tc.table_schema = %s
|
||||
AND tc.table_name = %s
|
||||
ORDER BY kcu.ordinal_position
|
||||
""",
|
||||
(schema, table),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
if not rows:
|
||||
return None, []
|
||||
if isinstance(rows[0], dict):
|
||||
name = rows[0].get("constraint_name")
|
||||
cols = [r.get("column_name") for r in rows]
|
||||
else:
|
||||
name = rows[0][0]
|
||||
cols = [r[1] for r in rows]
|
||||
return name, [c for c in cols if c]
|
||||
|
||||
|
||||
def _ensure_content_hash_column(cur, schema: str, table: str, dry_run: bool) -> None:
|
||||
cols = _fetch_columns(cur, schema, table)
|
||||
if any(c.lower() == "content_hash" for c in cols):
|
||||
return
|
||||
sql = f'ALTER TABLE "{schema}"."{table}" ADD COLUMN content_hash TEXT'
|
||||
if dry_run:
|
||||
print(f"[DRY] {sql}")
|
||||
return
|
||||
print(f"[ODS] 添加 content_hash: {schema}.{table}")
|
||||
cur.execute(sql)
|
||||
|
||||
|
||||
def _backfill_content_hash(conn, schema: str, table: str, batch_size: int, dry_run: bool) -> int:
|
||||
updated = 0
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cols = _fetch_columns(cur, schema, table)
|
||||
if "content_hash" not in [c.lower() for c in cols]:
|
||||
return 0
|
||||
pk_name, pk_cols = _fetch_pk_constraint(cur, schema, table)
|
||||
if not pk_cols:
|
||||
return 0
|
||||
# 过滤 content_hash
|
||||
pk_cols = [c for c in pk_cols if c.lower() != "content_hash"]
|
||||
select_cols = [*pk_cols]
|
||||
if any(c.lower() == "payload" for c in cols):
|
||||
select_cols.append("payload")
|
||||
else:
|
||||
select_cols.extend([c for c in cols if c.lower() not in {"content_hash"}])
|
||||
select_cols_sql = ", ".join(f'"{c}"' for c in select_cols)
|
||||
sql = f'SELECT {select_cols_sql} FROM "{schema}"."{table}" WHERE content_hash IS NULL'
|
||||
cur.execute(sql)
|
||||
rows = cur.fetchall()
|
||||
|
||||
if not rows:
|
||||
return 0
|
||||
|
||||
def build_row(row: dict) -> tuple:
|
||||
payload = row.get("payload")
|
||||
if payload is None:
|
||||
payload = {k: v for k, v in row.items() if k.lower() not in {"content_hash", "payload"}}
|
||||
content_hash = _compute_content_hash(payload)
|
||||
key_vals = [row.get(k) for k in pk_cols]
|
||||
return (content_hash, *key_vals)
|
||||
|
||||
updates = [build_row(r) for r in rows]
|
||||
if dry_run:
|
||||
print(f"[DRY] {schema}.{table}: 预计更新 {len(updates)} 行 content_hash")
|
||||
return len(updates)
|
||||
|
||||
where_clause = " AND ".join([f'"{c}" = %s' for c in pk_cols])
|
||||
update_sql = (
|
||||
f'UPDATE "{schema}"."{table}" SET content_hash = %s '
|
||||
f'WHERE {where_clause} AND content_hash IS NULL'
|
||||
)
|
||||
with conn.cursor() as cur2:
|
||||
execute_batch(cur2, update_sql, updates, page_size=batch_size)
|
||||
updated = cur2.rowcount or len(updates)
|
||||
print(f"[ODS] {schema}.{table}: 更新 content_hash {updated} 行")
|
||||
return updated
|
||||
|
||||
|
||||
def _ensure_ods_primary_key(cur, schema: str, table: str, dry_run: bool) -> None:
|
||||
name, pk_cols = _fetch_pk_constraint(cur, schema, table)
|
||||
if not pk_cols:
|
||||
return
|
||||
if any(c.lower() == "content_hash" for c in pk_cols):
|
||||
return
|
||||
new_pk = pk_cols + ["content_hash"]
|
||||
drop_sql = f'ALTER TABLE "{schema}"."{table}" DROP CONSTRAINT "{name}"'
|
||||
cols_sql = ", ".join([f'"{c}"' for c in new_pk])
|
||||
add_sql = f'ALTER TABLE "{schema}"."{table}" ADD PRIMARY KEY ({cols_sql})'
|
||||
if dry_run:
|
||||
print(f"[DRY] {drop_sql}")
|
||||
print(f"[DRY] {add_sql}")
|
||||
return
|
||||
print(f"[ODS] 变更主键: {schema}.{table} -> ({', '.join(new_pk)})")
|
||||
cur.execute(drop_sql)
|
||||
cur.execute(add_sql)
|
||||
|
||||
|
||||
def _migrate_ods(conn, schema: str, batch_size: int, dry_run: bool) -> None:
|
||||
with conn.cursor() as cur:
|
||||
tables = _fetch_tables(cur, schema)
|
||||
for table in tables:
|
||||
with conn.cursor() as cur:
|
||||
_ensure_content_hash_column(cur, schema, table, dry_run)
|
||||
conn.commit()
|
||||
_backfill_content_hash(conn, schema, table, batch_size, dry_run)
|
||||
with conn.cursor() as cur:
|
||||
_ensure_ods_primary_key(cur, schema, table, dry_run)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _backfill_scd2_fields(cur, schema: str, table: str, columns: Sequence[str], dry_run: bool) -> None:
|
||||
lower = {c.lower() for c in columns}
|
||||
fallback_cols = [
|
||||
"updated_at",
|
||||
"update_time",
|
||||
"created_at",
|
||||
"create_time",
|
||||
"fetched_at",
|
||||
]
|
||||
fallback = None
|
||||
for col in fallback_cols:
|
||||
if col in lower:
|
||||
fallback = f'"{col}"'
|
||||
break
|
||||
if fallback is None:
|
||||
fallback = "now()"
|
||||
|
||||
sql = (
|
||||
f'UPDATE "{schema}"."{table}" '
|
||||
f'SET scd2_start_time = COALESCE(scd2_start_time, {fallback}), '
|
||||
f"scd2_end_time = COALESCE(scd2_end_time, TIMESTAMPTZ '9999-12-31'), "
|
||||
f"scd2_is_current = COALESCE(scd2_is_current, 1), "
|
||||
f"scd2_version = COALESCE(scd2_version, 1) "
|
||||
f"WHERE scd2_start_time IS NULL OR scd2_end_time IS NULL OR scd2_is_current IS NULL OR scd2_version IS NULL"
|
||||
)
|
||||
if dry_run:
|
||||
print(f"[DRY] {sql}")
|
||||
return
|
||||
cur.execute(sql)
|
||||
|
||||
|
||||
def _ensure_dwd_primary_key(cur, schema: str, table: str, dry_run: bool) -> None:
|
||||
name, pk_cols = _fetch_pk_constraint(cur, schema, table)
|
||||
if not pk_cols:
|
||||
return
|
||||
if any(c.lower() == "scd2_start_time" for c in pk_cols):
|
||||
return
|
||||
new_pk = pk_cols + ["scd2_start_time"]
|
||||
drop_sql = f'ALTER TABLE "{schema}"."{table}" DROP CONSTRAINT "{name}"'
|
||||
cols_sql = ", ".join([f'"{c}"' for c in new_pk])
|
||||
add_sql = f'ALTER TABLE "{schema}"."{table}" ADD PRIMARY KEY ({cols_sql})'
|
||||
if dry_run:
|
||||
print(f"[DRY] {drop_sql}")
|
||||
print(f"[DRY] {add_sql}")
|
||||
return
|
||||
print(f"[DWD] 变更主键: {schema}.{table} -> ({', '.join(new_pk)})")
|
||||
cur.execute(drop_sql)
|
||||
cur.execute(add_sql)
|
||||
|
||||
|
||||
def _migrate_dwd(conn, schema: str, dry_run: bool) -> None:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT DISTINCT table_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND column_name ILIKE 'scd2_start_time'
|
||||
ORDER BY table_name
|
||||
""",
|
||||
(schema,),
|
||||
)
|
||||
tables = [r[0] for r in cur.fetchall()]
|
||||
|
||||
for table in tables:
|
||||
with conn.cursor() as cur:
|
||||
cols = _fetch_columns(cur, schema, table)
|
||||
_backfill_scd2_fields(cur, schema, table, cols, dry_run)
|
||||
conn.commit()
|
||||
with conn.cursor() as cur:
|
||||
_ensure_dwd_primary_key(cur, schema, table, dry_run)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="迁移 ODS 快照 + DWD SCD2")
|
||||
parser.add_argument("--dsn", dest="dsn", help="PostgreSQL DSN(也可用环境变量 PG_DSN)")
|
||||
parser.add_argument("--schema-ods", dest="schema_ods", default="billiards_ods")
|
||||
parser.add_argument("--schema-dwd", dest="schema_dwd", default="billiards_dwd")
|
||||
parser.add_argument("--batch-size", dest="batch_size", type=int, default=500)
|
||||
parser.add_argument("--only-ods", dest="only_ods", action="store_true")
|
||||
parser.add_argument("--only-dwd", dest="only_dwd", action="store_true")
|
||||
parser.add_argument("--dry-run", dest="dry_run", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
dsn = args.dsn or os.environ.get("PG_DSN")
|
||||
if not dsn:
|
||||
print("缺少 DSN(--dsn 或环境变量 PG_DSN)")
|
||||
return 2
|
||||
|
||||
conn = psycopg2.connect(dsn)
|
||||
conn.autocommit = False
|
||||
try:
|
||||
if not args.only_dwd:
|
||||
_migrate_ods(conn, args.schema_ods, args.batch_size, args.dry_run)
|
||||
if not args.only_ods:
|
||||
_migrate_dwd(conn, args.schema_dwd, args.dry_run)
|
||||
return 0
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Data integrity task that checks API -> ODS -> DWD completeness."""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -7,16 +7,9 @@ from zoneinfo import ZoneInfo
|
||||
|
||||
from dateutil import parser as dtparser
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from utils.windowing import build_window_segments, calc_window_minutes
|
||||
from .base_task import BaseTask
|
||||
from quality.integrity_checker import (
|
||||
IntegrityWindow,
|
||||
compute_last_etl_end,
|
||||
run_integrity_history,
|
||||
run_integrity_window,
|
||||
)
|
||||
from quality.integrity_service import run_history_flow, run_window_flow, write_report
|
||||
|
||||
|
||||
class DataIntegrityTask(BaseTask):
|
||||
@@ -31,15 +24,25 @@ class DataIntegrityTask(BaseTask):
|
||||
include_dimensions = bool(self.config.get("integrity.include_dimensions", False))
|
||||
task_codes = str(self.config.get("integrity.ods_task_codes", "") or "").strip()
|
||||
auto_backfill = bool(self.config.get("integrity.auto_backfill", False))
|
||||
compare_content = self.config.get("integrity.compare_content")
|
||||
if compare_content is None:
|
||||
compare_content = True
|
||||
content_sample_limit = self.config.get("integrity.content_sample_limit")
|
||||
backfill_mismatch = self.config.get("integrity.backfill_mismatch")
|
||||
if backfill_mismatch is None:
|
||||
backfill_mismatch = True
|
||||
recheck_after_backfill = self.config.get("integrity.recheck_after_backfill")
|
||||
if recheck_after_backfill is None:
|
||||
recheck_after_backfill = True
|
||||
|
||||
# 检测是否通过 CLI 传入了时间窗口参数(window_override)
|
||||
# 如果有,自动切换到 window 模式
|
||||
# Switch to window mode when CLI override is provided.
|
||||
window_override_start = self.config.get("run.window_override.start")
|
||||
window_override_end = self.config.get("run.window_override.end")
|
||||
if window_override_start or window_override_end:
|
||||
self.logger.info(
|
||||
"检测到 CLI 时间窗口参数,自动切换到 window 模式: %s ~ %s",
|
||||
window_override_start, window_override_end
|
||||
"Detected CLI window override. Switching to window mode: %s ~ %s",
|
||||
window_override_start,
|
||||
window_override_end,
|
||||
)
|
||||
mode = "window"
|
||||
|
||||
@@ -57,65 +60,28 @@ class DataIntegrityTask(BaseTask):
|
||||
|
||||
total_segments = len(segments)
|
||||
if total_segments > 1:
|
||||
self.logger.info("数据完整性检查: 分段执行 共%s段", total_segments)
|
||||
self.logger.info("Data integrity check split into %s segments.", total_segments)
|
||||
|
||||
window_reports = []
|
||||
total_missing = 0
|
||||
total_errors = 0
|
||||
for idx, (seg_start, seg_end) in enumerate(segments, start=1):
|
||||
window = IntegrityWindow(
|
||||
start=seg_start,
|
||||
end=seg_end,
|
||||
label=f"segment_{idx}",
|
||||
granularity="window",
|
||||
)
|
||||
payload = run_integrity_window(
|
||||
cfg=self.config,
|
||||
window=window,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=self.logger,
|
||||
write_report=False,
|
||||
window_split_unit="none",
|
||||
window_compensation_hours=0,
|
||||
)
|
||||
window_reports.append(payload)
|
||||
total_missing += int(payload.get("api_to_ods", {}).get("total_missing") or 0)
|
||||
total_errors += int(payload.get("api_to_ods", {}).get("total_errors") or 0)
|
||||
report, counts = run_window_flow(
|
||||
cfg=self.config,
|
||||
windows=segments,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=self.logger,
|
||||
compare_content=bool(compare_content),
|
||||
content_sample_limit=content_sample_limit,
|
||||
do_backfill=bool(auto_backfill),
|
||||
include_mismatch=bool(backfill_mismatch),
|
||||
recheck_after_backfill=bool(recheck_after_backfill),
|
||||
page_size=int(self.config.get("api.page_size") or 200),
|
||||
chunk_size=500,
|
||||
)
|
||||
|
||||
overall_start = segments[0][0]
|
||||
overall_end = segments[-1][1]
|
||||
report = {
|
||||
"mode": "window",
|
||||
"window": {
|
||||
"start": overall_start.isoformat(),
|
||||
"end": overall_end.isoformat(),
|
||||
"segments": total_segments,
|
||||
},
|
||||
"windows": window_reports,
|
||||
"api_to_ods": {
|
||||
"total_missing": total_missing,
|
||||
"total_errors": total_errors,
|
||||
},
|
||||
"total_missing": total_missing,
|
||||
"total_errors": total_errors,
|
||||
"generated_at": datetime.now(tz).isoformat(),
|
||||
}
|
||||
report_path = self._write_report(report, "data_integrity_window")
|
||||
report_path = write_report(report, prefix="data_integrity_window", tz=tz)
|
||||
report["report_path"] = report_path
|
||||
|
||||
missing_count = int(total_missing or 0)
|
||||
counts = {
|
||||
"missing": missing_count,
|
||||
"errors": int(total_errors or 0),
|
||||
}
|
||||
|
||||
# ????
|
||||
backfill_result = None
|
||||
if auto_backfill and missing_count > 0:
|
||||
backfill_result = self._run_backfill(base_start, base_end, task_codes)
|
||||
counts["backfilled"] = backfill_result.get("backfilled", 0)
|
||||
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": counts,
|
||||
@@ -125,7 +91,7 @@ class DataIntegrityTask(BaseTask):
|
||||
"minutes": calc_window_minutes(overall_start, overall_end),
|
||||
},
|
||||
"report_path": report_path,
|
||||
"backfill_result": backfill_result,
|
||||
"backfill_result": report.get("backfill_result"),
|
||||
}
|
||||
|
||||
history_start = str(self.config.get("integrity.history_start", "2025-07-01") or "2025-07-01")
|
||||
@@ -136,77 +102,52 @@ class DataIntegrityTask(BaseTask):
|
||||
else:
|
||||
start_dt = start_dt.astimezone(tz)
|
||||
|
||||
end_dt = None
|
||||
if history_end:
|
||||
end_dt = dtparser.parse(history_end)
|
||||
if end_dt.tzinfo is None:
|
||||
end_dt = end_dt.replace(tzinfo=tz)
|
||||
else:
|
||||
end_dt = end_dt.astimezone(tz)
|
||||
else:
|
||||
end_dt = compute_last_etl_end(self.config) or datetime.now(tz)
|
||||
|
||||
report = run_integrity_history(
|
||||
report, counts = run_history_flow(
|
||||
cfg=self.config,
|
||||
start_dt=start_dt,
|
||||
end_dt=end_dt,
|
||||
include_dimensions=include_dimensions,
|
||||
task_codes=task_codes,
|
||||
logger=self.logger,
|
||||
write_report=True,
|
||||
compare_content=bool(compare_content),
|
||||
content_sample_limit=content_sample_limit,
|
||||
do_backfill=bool(auto_backfill),
|
||||
include_mismatch=bool(backfill_mismatch),
|
||||
recheck_after_backfill=bool(recheck_after_backfill),
|
||||
page_size=int(self.config.get("api.page_size") or 200),
|
||||
chunk_size=500,
|
||||
)
|
||||
missing_count = int(report.get("total_missing") or 0)
|
||||
counts = {
|
||||
"missing": missing_count,
|
||||
"errors": int(report.get("total_errors") or 0),
|
||||
}
|
||||
|
||||
# 自动补全
|
||||
backfill_result = None
|
||||
if auto_backfill and missing_count > 0:
|
||||
backfill_result = self._run_backfill(start_dt, end_dt, task_codes)
|
||||
counts["backfilled"] = backfill_result.get("backfilled", 0)
|
||||
|
||||
report_path = write_report(report, prefix="data_integrity_history", tz=tz)
|
||||
report["report_path"] = report_path
|
||||
|
||||
end_dt_used = end_dt
|
||||
if end_dt_used is None:
|
||||
end_str = report.get("end")
|
||||
if end_str:
|
||||
parsed = dtparser.parse(end_str)
|
||||
if parsed.tzinfo is None:
|
||||
end_dt_used = parsed.replace(tzinfo=tz)
|
||||
else:
|
||||
end_dt_used = parsed.astimezone(tz)
|
||||
if end_dt_used is None:
|
||||
end_dt_used = start_dt
|
||||
|
||||
return {
|
||||
"status": "SUCCESS",
|
||||
"counts": counts,
|
||||
"window": {
|
||||
"start": start_dt,
|
||||
"end": end_dt,
|
||||
"minutes": int((end_dt - start_dt).total_seconds() // 60) if end_dt > start_dt else 0,
|
||||
"end": end_dt_used,
|
||||
"minutes": int((end_dt_used - start_dt).total_seconds() // 60) if end_dt_used > start_dt else 0,
|
||||
},
|
||||
"report_path": report.get("report_path"),
|
||||
"backfill_result": backfill_result,
|
||||
"report_path": report_path,
|
||||
"backfill_result": report.get("backfill_result"),
|
||||
}
|
||||
|
||||
def _write_report(self, report: dict, prefix: str) -> str:
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
stamp = datetime.now(self.tz).strftime("%Y%m%d_%H%M%S")
|
||||
path = root / "reports" / f"{prefix}_{stamp}.json"
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(report, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
return str(path)
|
||||
|
||||
def _run_backfill(self, start_dt: datetime, end_dt: datetime, task_codes: str) -> dict:
|
||||
"""运行数据补全"""
|
||||
self.logger.info("自动补全开始 起始=%s 结束=%s", start_dt, end_dt)
|
||||
try:
|
||||
from scripts.backfill_missing_data import run_backfill
|
||||
result = run_backfill(
|
||||
cfg=self.config,
|
||||
start=start_dt,
|
||||
end=end_dt,
|
||||
task_codes=task_codes or None,
|
||||
dry_run=False,
|
||||
page_size=200,
|
||||
chunk_size=500,
|
||||
logger=self.logger,
|
||||
)
|
||||
self.logger.info(
|
||||
"自动补全完成 已补全=%s 错误数=%s",
|
||||
result.get("backfilled", 0),
|
||||
result.get("errors", 0),
|
||||
)
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.logger.exception("自动补全失败")
|
||||
return {"backfilled": 0, "errors": 1, "error": str(exc)}
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
"""DWD 装载任务:从 ODS 增量写入 DWD(维度 SCD2,事实按时间增量)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Dict, Iterable, List, Sequence
|
||||
|
||||
from psycopg2.extras import RealDictCursor, execute_batch, execute_values
|
||||
@@ -77,6 +79,37 @@ class DwdLoadTask(BaseTask):
|
||||
"billiards_dwd.dwd_assistant_service_log",
|
||||
}
|
||||
|
||||
_NUMERIC_RE = re.compile(r"^[+-]?\d+(?:\.\d+)?$")
|
||||
_BOOL_STRINGS = {"true", "false", "1", "0", "yes", "no", "y", "n", "t", "f"}
|
||||
|
||||
def _strip_scd2_keys(self, pk_cols: Sequence[str]) -> list[str]:
|
||||
return [c for c in pk_cols if c.lower() not in self.SCD_COLS]
|
||||
|
||||
@staticmethod
|
||||
def _pick_snapshot_order_column(ods_cols: Sequence[str]) -> str | None:
|
||||
lower_cols = {c.lower() for c in ods_cols}
|
||||
for candidate in ("fetched_at", "update_time", "create_time"):
|
||||
if candidate in lower_cols:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _latest_snapshot_select_sql(
|
||||
select_cols_sql: str,
|
||||
ods_table_sql: str,
|
||||
key_exprs: Sequence[str],
|
||||
order_col: str | None,
|
||||
where_sql: str = "",
|
||||
) -> str:
|
||||
if key_exprs and order_col:
|
||||
distinct_on = ", ".join(key_exprs)
|
||||
order_by = ", ".join([*key_exprs, f'"{order_col}" DESC NULLS LAST'])
|
||||
return (
|
||||
f"SELECT DISTINCT ON ({distinct_on}) {select_cols_sql} "
|
||||
f"FROM {ods_table_sql} {where_sql} ORDER BY {order_by}"
|
||||
)
|
||||
return f"SELECT {select_cols_sql} FROM {ods_table_sql} {where_sql}"
|
||||
|
||||
# 特殊列映射:dwd 列名 -> 源列表达式(可选 CAST)
|
||||
FACT_MAPPINGS: dict[str, list[tuple[str, str, str | None]]] = {
|
||||
# 维度表(补齐主键/字段差异)
|
||||
@@ -652,9 +685,8 @@ class DwdLoadTask(BaseTask):
|
||||
if not pk_cols:
|
||||
raise ValueError(f"{dwd_table} 未配置主键,无法执行维表合并")
|
||||
|
||||
pk_has_scd = any(pk.lower() in self.SCD_COLS for pk in pk_cols)
|
||||
scd_cols_present = any(c.lower() in self.SCD_COLS for c in dwd_cols)
|
||||
if scd_cols_present and pk_has_scd:
|
||||
if scd_cols_present:
|
||||
return self._merge_dim_scd2(cur, dwd_table, ods_table, dwd_cols, ods_cols, now)
|
||||
return self._merge_dim_type1_upsert(cur, dwd_table, ods_table, dwd_cols, ods_cols, pk_cols, now)
|
||||
|
||||
@@ -701,12 +733,19 @@ class DwdLoadTask(BaseTask):
|
||||
if not select_exprs:
|
||||
return 0
|
||||
|
||||
# 对于 dim_site 和 dim_site_ex,使用 DISTINCT ON 优化查询
|
||||
# 避免从大表 table_fee_transactions 全表扫描,只获取每个 site_id 的最新记录
|
||||
if dwd_table in ("billiards_dwd.dim_site", "billiards_dwd.dim_site_ex"):
|
||||
sql = f"SELECT DISTINCT ON (site_id) {', '.join(select_exprs)} FROM {ods_table_sql} ORDER BY site_id, fetched_at DESC NULLS LAST"
|
||||
else:
|
||||
sql = f"SELECT {', '.join(select_exprs)} FROM {ods_table_sql}"
|
||||
order_col = self._pick_snapshot_order_column(ods_cols)
|
||||
business_keys = self._strip_scd2_keys(pk_cols)
|
||||
key_exprs: list[str] = []
|
||||
for key in business_keys:
|
||||
lc = key.lower()
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
key_exprs.append(self._cast_expr(src, cast_type))
|
||||
elif lc in ods_set:
|
||||
key_exprs.append(f'"{lc}"')
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
sql = self._latest_snapshot_select_sql(select_cols_sql, ods_table_sql, key_exprs, order_col)
|
||||
|
||||
cur.execute(sql)
|
||||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||||
@@ -784,7 +823,11 @@ class DwdLoadTask(BaseTask):
|
||||
if not pk_cols:
|
||||
raise ValueError(f"{dwd_table} 未配置主键,无法执行 SCD2 合并")
|
||||
|
||||
mapping = self._build_column_mapping(dwd_table, pk_cols, ods_cols)
|
||||
business_keys = self._strip_scd2_keys(pk_cols)
|
||||
if not business_keys:
|
||||
raise ValueError(f"{dwd_table} primary key only contains SCD2 columns; cannot merge")
|
||||
|
||||
mapping = self._build_column_mapping(dwd_table, business_keys, ods_cols)
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
table_sql = self._format_table(ods_table, "billiards_ods")
|
||||
# 构造 SELECT 表达式,支持 JSON/expression 映射
|
||||
@@ -806,7 +849,7 @@ class DwdLoadTask(BaseTask):
|
||||
select_exprs.append('"categoryboxes" AS "categoryboxes"')
|
||||
added.add("categoryboxes")
|
||||
# 主键兜底确保被选出
|
||||
for pk in pk_cols:
|
||||
for pk in business_keys:
|
||||
lc = pk.lower()
|
||||
if lc not in added:
|
||||
if lc in mapping:
|
||||
@@ -819,7 +862,18 @@ class DwdLoadTask(BaseTask):
|
||||
if not select_exprs:
|
||||
return 0
|
||||
|
||||
sql = f"SELECT {', '.join(select_exprs)} FROM {table_sql}"
|
||||
order_col = self._pick_snapshot_order_column(ods_cols)
|
||||
key_exprs: list[str] = []
|
||||
for key in business_keys:
|
||||
lc = key.lower()
|
||||
if lc in mapping:
|
||||
src, cast_type = mapping[lc]
|
||||
key_exprs.append(self._cast_expr(src, cast_type))
|
||||
elif lc in ods_set:
|
||||
key_exprs.append(f'"{lc}"')
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
sql = self._latest_snapshot_select_sql(select_cols_sql, table_sql, key_exprs, order_col)
|
||||
cur.execute(sql)
|
||||
rows = [{k.lower(): v for k, v in r.items()} for r in cur.fetchall()]
|
||||
|
||||
@@ -842,11 +896,11 @@ class DwdLoadTask(BaseTask):
|
||||
value = row.get(src.lower())
|
||||
mapped_row[lc] = value
|
||||
|
||||
pk_key = tuple(mapped_row.get(pk) for pk in pk_cols)
|
||||
pk_key = tuple(mapped_row.get(pk) for pk in business_keys)
|
||||
if pk_key in seen_pk:
|
||||
continue
|
||||
if any(v is None for v in pk_key):
|
||||
self.logger.warning("跳过 %s:主键缺失 %s", dwd_table, dict(zip(pk_cols, pk_key)))
|
||||
self.logger.warning("跳过 %s:主键缺失 %s", dwd_table, dict(zip(business_keys, pk_key)))
|
||||
continue
|
||||
seen_pk.add(pk_key)
|
||||
src_rows_by_pk[pk_key] = mapped_row
|
||||
@@ -862,7 +916,7 @@ class DwdLoadTask(BaseTask):
|
||||
current_by_pk: dict[tuple[Any, ...], Dict[str, Any]] = {}
|
||||
for r in current_rows:
|
||||
rr = {k.lower(): v for k, v in r.items()}
|
||||
pk_key = tuple(rr.get(pk) for pk in pk_cols)
|
||||
pk_key = tuple(rr.get(pk) for pk in business_keys)
|
||||
current_by_pk[pk_key] = rr
|
||||
|
||||
# 计算需要关闭/插入的主键集合
|
||||
@@ -881,7 +935,7 @@ class DwdLoadTask(BaseTask):
|
||||
|
||||
# 先关闭旧版本(同一批次统一 end_time)
|
||||
if to_close:
|
||||
self._close_current_dim_bulk(cur, dwd_table, pk_cols, to_close, now)
|
||||
self._close_current_dim_bulk(cur, dwd_table, business_keys, to_close, now)
|
||||
|
||||
# 批量插入新版本
|
||||
if to_insert:
|
||||
@@ -1031,10 +1085,105 @@ class DwdLoadTask(BaseTask):
|
||||
lc = col.lower()
|
||||
if lc in self.SCD_COLS:
|
||||
continue
|
||||
if current.get(lc) != incoming.get(lc):
|
||||
if not self._values_equal(current.get(lc), incoming.get(lc)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _values_equal(self, current_val: Any, incoming_val: Any) -> bool:
|
||||
"""Normalize common type mismatches (numeric/text, naive/aware datetime) before compare."""
|
||||
current_val = self._normalize_empty(current_val)
|
||||
incoming_val = self._normalize_empty(incoming_val)
|
||||
if current_val is None and incoming_val is None:
|
||||
return True
|
||||
|
||||
# Datetime normalization (naive vs aware)
|
||||
if isinstance(current_val, (datetime, date)) or isinstance(incoming_val, (datetime, date)):
|
||||
return self._normalize_datetime(current_val) == self._normalize_datetime(incoming_val)
|
||||
|
||||
# Boolean normalization
|
||||
if self._looks_bool(current_val) or self._looks_bool(incoming_val):
|
||||
cur_bool = self._coerce_bool(current_val)
|
||||
inc_bool = self._coerce_bool(incoming_val)
|
||||
if cur_bool is not None and inc_bool is not None:
|
||||
return cur_bool == inc_bool
|
||||
|
||||
# Numeric normalization (string vs numeric)
|
||||
if self._looks_numeric(current_val) or self._looks_numeric(incoming_val):
|
||||
cur_num = self._coerce_numeric(current_val)
|
||||
inc_num = self._coerce_numeric(incoming_val)
|
||||
if cur_num is not None and inc_num is not None:
|
||||
return cur_num == inc_num
|
||||
|
||||
return current_val == incoming_val
|
||||
|
||||
def _normalize_empty(self, value: Any) -> Any:
|
||||
if isinstance(value, str):
|
||||
stripped = value.strip()
|
||||
return None if stripped == "" else stripped
|
||||
return value
|
||||
|
||||
def _normalize_datetime(self, value: Any) -> Any:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, date) and not isinstance(value, datetime):
|
||||
value = datetime.combine(value, datetime.min.time())
|
||||
if not isinstance(value, datetime):
|
||||
return value
|
||||
if value.tzinfo is None:
|
||||
return value.replace(tzinfo=self.tz)
|
||||
return value.astimezone(self.tz)
|
||||
|
||||
def _looks_numeric(self, value: Any) -> bool:
|
||||
if isinstance(value, (int, float, Decimal)) and not isinstance(value, bool):
|
||||
return True
|
||||
if isinstance(value, str):
|
||||
return bool(self._NUMERIC_RE.match(value.strip()))
|
||||
return False
|
||||
|
||||
def _coerce_numeric(self, value: Any) -> Decimal | None:
|
||||
value = self._normalize_empty(value)
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return Decimal(int(value))
|
||||
if isinstance(value, (int, float, Decimal)):
|
||||
try:
|
||||
return Decimal(str(value))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
s = value.strip()
|
||||
if not self._NUMERIC_RE.match(s):
|
||||
return None
|
||||
try:
|
||||
return Decimal(s)
|
||||
except InvalidOperation:
|
||||
return None
|
||||
return None
|
||||
|
||||
def _looks_bool(self, value: Any) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return True
|
||||
if isinstance(value, str):
|
||||
return value.strip().lower() in self._BOOL_STRINGS
|
||||
return False
|
||||
|
||||
def _coerce_bool(self, value: Any) -> bool | None:
|
||||
value = self._normalize_empty(value)
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, Decimal)) and not isinstance(value, bool):
|
||||
return bool(int(value))
|
||||
if isinstance(value, str):
|
||||
s = value.strip().lower()
|
||||
if s in {"true", "1", "yes", "y", "t"}:
|
||||
return True
|
||||
if s in {"false", "0", "no", "n", "f"}:
|
||||
return False
|
||||
return None
|
||||
|
||||
def _merge_fact_increment(
|
||||
self,
|
||||
cur,
|
||||
@@ -1052,6 +1201,9 @@ class DwdLoadTask(BaseTask):
|
||||
mapping: Dict[str, tuple[str, str | None]] = {
|
||||
dst.lower(): (src, cast_type) for dst, src, cast_type in mapping_entries
|
||||
}
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
snapshot_mode = "content_hash" in ods_set
|
||||
fact_upsert = bool(self.config.get("dwd.fact_upsert", True))
|
||||
|
||||
mapping_dest = [dst for dst, _, _ in mapping_entries]
|
||||
insert_cols: List[str] = list(mapping_dest)
|
||||
@@ -1064,7 +1216,6 @@ class DwdLoadTask(BaseTask):
|
||||
insert_cols.append(col)
|
||||
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
ods_set = {c.lower() for c in ods_cols}
|
||||
existing_lower = [c.lower() for c in insert_cols]
|
||||
for pk in pk_cols:
|
||||
pk_lower = pk.lower()
|
||||
@@ -1092,7 +1243,11 @@ class DwdLoadTask(BaseTask):
|
||||
self.logger.warning("跳过 %s:未找到可插入的列", dwd_table)
|
||||
return 0
|
||||
|
||||
order_col = self._pick_order_column(dwd_table, dwd_cols, ods_cols)
|
||||
order_col = (
|
||||
self._pick_snapshot_order_column(ods_cols)
|
||||
if snapshot_mode
|
||||
else self._pick_order_column(dwd_table, dwd_cols, ods_cols)
|
||||
)
|
||||
where_sql = ""
|
||||
params: List[Any] = []
|
||||
dwd_table_sql = self._format_table(dwd_table, "billiards_dwd")
|
||||
@@ -1122,12 +1277,40 @@ class DwdLoadTask(BaseTask):
|
||||
|
||||
select_cols_sql = ", ".join(select_exprs)
|
||||
insert_cols_sql = ", ".join(f'"{c}"' for c in insert_cols)
|
||||
sql = f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) SELECT {select_cols_sql} FROM {ods_table_sql} {where_sql}'
|
||||
if snapshot_mode and pk_cols:
|
||||
key_exprs: list[str] = []
|
||||
for pk in pk_cols:
|
||||
pk_lower = pk.lower()
|
||||
if pk_lower in mapping:
|
||||
src, cast_type = mapping[pk_lower]
|
||||
key_exprs.append(self._cast_expr(src, cast_type))
|
||||
elif pk_lower in ods_set:
|
||||
key_exprs.append(f'"{pk_lower}"')
|
||||
elif "id" in ods_set:
|
||||
key_exprs.append('"id"')
|
||||
select_sql = self._latest_snapshot_select_sql(
|
||||
select_cols_sql,
|
||||
ods_table_sql,
|
||||
key_exprs,
|
||||
order_col,
|
||||
where_sql,
|
||||
)
|
||||
sql = f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) {select_sql}'
|
||||
else:
|
||||
sql = f'INSERT INTO {dwd_table_sql} ({insert_cols_sql}) SELECT {select_cols_sql} FROM {ods_table_sql} {where_sql}'
|
||||
|
||||
pk_cols = self._get_primary_keys(cur, dwd_table)
|
||||
if pk_cols:
|
||||
pk_sql = ", ".join(f'"{c}"' for c in pk_cols)
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
pk_lower = {c.lower() for c in pk_cols}
|
||||
set_exprs = [f'"{c}" = EXCLUDED."{c}"' for c in insert_cols if c.lower() not in pk_lower]
|
||||
if snapshot_mode or fact_upsert:
|
||||
if set_exprs:
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO UPDATE SET {', '.join(set_exprs)}"
|
||||
else:
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
else:
|
||||
sql += f" ON CONFLICT ({pk_sql}) DO NOTHING"
|
||||
|
||||
cur.execute(sql, params)
|
||||
inserted = cur.rowcount
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
"""手工示例数据灌入:按 schema_ODS_doc.sql 的表结构写入 ODS。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
@@ -252,12 +253,17 @@ class ManualIngestTask(BaseTask):
|
||||
except Exception:
|
||||
pk_index = None
|
||||
|
||||
has_content_hash = any(c[0].lower() == "content_hash" for c in columns_info)
|
||||
|
||||
col_list = ", ".join(f'"{c}"' for c in columns)
|
||||
sql_prefix = f"INSERT INTO {table} ({col_list}) VALUES %s"
|
||||
if pk_col_db:
|
||||
update_cols = [c for c in columns if c != pk_col_db]
|
||||
set_clause = ", ".join(f'"{c}"=EXCLUDED."{c}"' for c in update_cols)
|
||||
sql_prefix += f' ON CONFLICT ("{pk_col_db}") DO UPDATE SET {set_clause}'
|
||||
if has_content_hash:
|
||||
sql_prefix += f' ON CONFLICT ("{pk_col_db}", "content_hash") DO NOTHING'
|
||||
else:
|
||||
update_cols = [c for c in columns if c != pk_col_db]
|
||||
set_clause = ", ".join(f'"{c}"=EXCLUDED."{c}"' for c in update_cols)
|
||||
sql_prefix += f' ON CONFLICT ("{pk_col_db}") DO UPDATE SET {set_clause}'
|
||||
|
||||
params = []
|
||||
now = datetime.now()
|
||||
@@ -284,6 +290,12 @@ class ManualIngestTask(BaseTask):
|
||||
if pk_col and (pk_val is None or pk_val == ""):
|
||||
continue
|
||||
|
||||
content_hash = None
|
||||
if has_content_hash:
|
||||
hash_record = dict(merged_rec)
|
||||
hash_record["fetched_at"] = merged_rec.get("fetched_at", now)
|
||||
content_hash = self._compute_content_hash(hash_record, include_fetched_at=True)
|
||||
|
||||
row_vals = []
|
||||
for col_name, data_type, udt in columns_info:
|
||||
col_lower = col_name.lower()
|
||||
@@ -296,6 +308,9 @@ class ManualIngestTask(BaseTask):
|
||||
if col_lower == "fetched_at":
|
||||
row_vals.append(merged_rec.get(col_name, now))
|
||||
continue
|
||||
if col_lower == "content_hash":
|
||||
row_vals.append(content_hash)
|
||||
continue
|
||||
|
||||
value = self._normalize_scalar(self._get_value_case_insensitive(merged_rec, col_name))
|
||||
|
||||
@@ -401,3 +416,48 @@ class ManualIngestTask(BaseTask):
|
||||
if dt.startswith("timestamp") or dt in ("date", "time", "interval"):
|
||||
return value if isinstance(value, str) else None
|
||||
return value
|
||||
|
||||
@staticmethod
|
||||
def _hash_default(value):
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
return str(value)
|
||||
|
||||
@classmethod
|
||||
def _sanitize_record_for_hash(cls, record: dict, *, include_fetched_at: bool) -> dict:
|
||||
exclude = {
|
||||
"data",
|
||||
"payload",
|
||||
"source_file",
|
||||
"source_endpoint",
|
||||
"content_hash",
|
||||
"record_index",
|
||||
}
|
||||
if not include_fetched_at:
|
||||
exclude.add("fetched_at")
|
||||
|
||||
def _strip(value):
|
||||
if isinstance(value, dict):
|
||||
cleaned = {}
|
||||
for k, v in value.items():
|
||||
if isinstance(k, str) and k.lower() in exclude:
|
||||
continue
|
||||
cleaned[k] = _strip(v)
|
||||
return cleaned
|
||||
if isinstance(value, list):
|
||||
return [_strip(v) for v in value]
|
||||
return value
|
||||
|
||||
return _strip(record or {})
|
||||
|
||||
@classmethod
|
||||
def _compute_content_hash(cls, record: dict, *, include_fetched_at: bool) -> str:
|
||||
cleaned = cls._sanitize_record_for_hash(record, include_fetched_at=include_fetched_at)
|
||||
payload = json.dumps(
|
||||
cleaned,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=cls._hash_default,
|
||||
)
|
||||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
"""ODS ingestion tasks."""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
@@ -317,6 +318,7 @@ class BaseOdsTask(BaseTask):
|
||||
db_json_cols_lower = {
|
||||
c[0].lower() for c in cols_info if c[1] in ("json", "jsonb") or c[2] in ("json", "jsonb")
|
||||
}
|
||||
needs_content_hash = any(c[0].lower() == "content_hash" for c in cols_info)
|
||||
|
||||
col_names = [c[0] for c in cols_info]
|
||||
quoted_cols = ", ".join(f'\"{c}\"' for c in col_names)
|
||||
@@ -330,6 +332,7 @@ class BaseOdsTask(BaseTask):
|
||||
|
||||
params: list[tuple] = []
|
||||
skipped = 0
|
||||
merged_records: list[dict] = []
|
||||
|
||||
root_site_profile = None
|
||||
if isinstance(response_payload, dict):
|
||||
@@ -345,6 +348,7 @@ class BaseOdsTask(BaseTask):
|
||||
continue
|
||||
|
||||
merged_rec = self._merge_record_layers(rec)
|
||||
merged_records.append({"raw": rec, "merged": merged_rec})
|
||||
if table in {"billiards_ods.recharge_settlements", "billiards_ods.settlement_records"}:
|
||||
site_profile = merged_rec.get("siteProfile") or merged_rec.get("site_profile") or root_site_profile
|
||||
if isinstance(site_profile, dict):
|
||||
@@ -363,9 +367,42 @@ class BaseOdsTask(BaseTask):
|
||||
_fill_missing("siteid", [site_profile.get("siteId"), site_profile.get("id")])
|
||||
_fill_missing("sitename", [site_profile.get("shop_name"), site_profile.get("siteName")])
|
||||
|
||||
has_fetched_at = any(c[0].lower() == "fetched_at" for c in cols_info)
|
||||
business_keys = [c for c in pk_cols if str(c).lower() != "content_hash"]
|
||||
compare_latest = bool(needs_content_hash and has_fetched_at and business_keys)
|
||||
latest_compare_hash: dict[tuple[Any, ...], str | None] = {}
|
||||
if compare_latest:
|
||||
key_values: list[tuple[Any, ...]] = []
|
||||
for item in merged_records:
|
||||
merged_rec = item["merged"]
|
||||
key = tuple(self._get_value_case_insensitive(merged_rec, k) for k in business_keys)
|
||||
if any(v is None or v == "" for v in key):
|
||||
continue
|
||||
key_values.append(key)
|
||||
|
||||
if key_values:
|
||||
with self.db.conn.cursor() as cur:
|
||||
latest_payloads = self._fetch_latest_payloads(cur, table, business_keys, key_values)
|
||||
for key, payload in latest_payloads.items():
|
||||
latest_compare_hash[key] = self._compute_compare_hash_from_payload(payload)
|
||||
|
||||
for item in merged_records:
|
||||
rec = item["raw"]
|
||||
merged_rec = item["merged"]
|
||||
|
||||
content_hash = None
|
||||
compare_hash = None
|
||||
if needs_content_hash:
|
||||
compare_hash = self._compute_content_hash(merged_rec, include_fetched_at=False)
|
||||
hash_record = dict(merged_rec)
|
||||
hash_record["fetched_at"] = now
|
||||
content_hash = self._compute_content_hash(hash_record, include_fetched_at=True)
|
||||
|
||||
if pk_cols:
|
||||
missing_pk = False
|
||||
for pk in pk_cols:
|
||||
if str(pk).lower() == "content_hash":
|
||||
continue
|
||||
pk_val = self._get_value_case_insensitive(merged_rec, pk)
|
||||
if pk_val is None or pk_val == "":
|
||||
missing_pk = True
|
||||
@@ -374,6 +411,16 @@ class BaseOdsTask(BaseTask):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if compare_latest and compare_hash is not None:
|
||||
key = tuple(self._get_value_case_insensitive(merged_rec, k) for k in business_keys)
|
||||
if any(v is None or v == "" for v in key):
|
||||
skipped += 1
|
||||
continue
|
||||
last_hash = latest_compare_hash.get(key)
|
||||
if last_hash is not None and last_hash == compare_hash:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
row_vals: list[Any] = []
|
||||
for (col_name, data_type, _udt) in cols_info:
|
||||
col_lower = col_name.lower()
|
||||
@@ -389,6 +436,9 @@ class BaseOdsTask(BaseTask):
|
||||
if col_lower == "fetched_at":
|
||||
row_vals.append(now)
|
||||
continue
|
||||
if col_lower == "content_hash":
|
||||
row_vals.append(content_hash)
|
||||
continue
|
||||
|
||||
value = self._normalize_scalar(self._get_value_case_insensitive(merged_rec, col_name))
|
||||
if col_lower in db_json_cols_lower:
|
||||
@@ -472,6 +522,93 @@ class BaseOdsTask(BaseTask):
|
||||
return resolver(spec.endpoint)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _hash_default(value):
|
||||
if isinstance(value, datetime):
|
||||
return value.isoformat()
|
||||
return str(value)
|
||||
|
||||
@classmethod
|
||||
def _sanitize_record_for_hash(cls, record: dict, *, include_fetched_at: bool) -> dict:
|
||||
exclude = {
|
||||
"data",
|
||||
"payload",
|
||||
"source_file",
|
||||
"source_endpoint",
|
||||
"content_hash",
|
||||
"record_index",
|
||||
}
|
||||
if not include_fetched_at:
|
||||
exclude.add("fetched_at")
|
||||
|
||||
def _strip(value):
|
||||
if isinstance(value, dict):
|
||||
cleaned = {}
|
||||
for k, v in value.items():
|
||||
if isinstance(k, str) and k.lower() in exclude:
|
||||
continue
|
||||
cleaned[k] = _strip(v)
|
||||
return cleaned
|
||||
if isinstance(value, list):
|
||||
return [_strip(v) for v in value]
|
||||
return value
|
||||
|
||||
return _strip(record or {})
|
||||
|
||||
@classmethod
|
||||
def _compute_content_hash(cls, record: dict, *, include_fetched_at: bool) -> str:
|
||||
cleaned = cls._sanitize_record_for_hash(record, include_fetched_at=include_fetched_at)
|
||||
payload = json.dumps(
|
||||
cleaned,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=cls._hash_default,
|
||||
)
|
||||
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
||||
|
||||
@staticmethod
|
||||
def _compute_compare_hash_from_payload(payload: Any) -> str | None:
|
||||
if payload is None:
|
||||
return None
|
||||
if isinstance(payload, str):
|
||||
try:
|
||||
payload = json.loads(payload)
|
||||
except Exception:
|
||||
return None
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
merged = BaseOdsTask._merge_record_layers(payload)
|
||||
return BaseOdsTask._compute_content_hash(merged, include_fetched_at=False)
|
||||
|
||||
@staticmethod
|
||||
def _fetch_latest_payloads(cur, table: str, business_keys: Sequence[str], key_values: Sequence[tuple]) -> dict:
|
||||
if not business_keys or not key_values:
|
||||
return {}
|
||||
keys_sql = ", ".join(f'"{k}"' for k in business_keys)
|
||||
sql = (
|
||||
f"WITH keys({keys_sql}) AS (VALUES %s) "
|
||||
f"SELECT DISTINCT ON ({keys_sql}) {keys_sql}, payload "
|
||||
f"FROM {table} t JOIN keys k USING ({keys_sql}) "
|
||||
f"ORDER BY {keys_sql}, fetched_at DESC NULLS LAST"
|
||||
)
|
||||
unique_keys = list({tuple(k) for k in key_values})
|
||||
execute_values(cur, sql, unique_keys, page_size=500)
|
||||
rows = cur.fetchall() or []
|
||||
result = {}
|
||||
if rows and isinstance(rows[0], dict):
|
||||
for r in rows:
|
||||
key = tuple(r[k] for k in business_keys)
|
||||
result[key] = r.get("payload")
|
||||
return result
|
||||
|
||||
key_len = len(business_keys)
|
||||
for r in rows:
|
||||
key = tuple(r[:key_len])
|
||||
payload = r[key_len] if len(r) > key_len else None
|
||||
result[key] = payload
|
||||
return result
|
||||
|
||||
|
||||
def _int_col(name: str, *sources: str, required: bool = False) -> ColumnSpec:
|
||||
return ColumnSpec(
|
||||
|
||||
@@ -176,6 +176,7 @@ class FakeCursor:
|
||||
("id", "bigint", "int8"),
|
||||
("sitegoodsstockid", "bigint", "int8"),
|
||||
("record_index", "integer", "int4"),
|
||||
("content_hash", "text", "text"),
|
||||
("source_file", "text", "text"),
|
||||
("source_endpoint", "text", "text"),
|
||||
("fetched_at", "timestamp with time zone", "timestamptz"),
|
||||
|
||||
55
etl_billiards/utils/ods_record_utils.py
Normal file
55
etl_billiards/utils/ods_record_utils.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Shared helpers for ODS/API record normalization."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def merge_record_layers(record: dict) -> dict:
|
||||
"""Flatten nested data/settleList layers into a single dict."""
|
||||
merged = record
|
||||
data_part = merged.get("data")
|
||||
while isinstance(data_part, dict):
|
||||
merged = {**data_part, **merged}
|
||||
data_part = data_part.get("data")
|
||||
settle_inner = merged.get("settleList")
|
||||
if isinstance(settle_inner, dict):
|
||||
merged = {**settle_inner, **merged}
|
||||
return merged
|
||||
|
||||
|
||||
def get_value_case_insensitive(record: dict | None, col: str | None):
|
||||
"""Fetch column value without case sensitivity."""
|
||||
if record is None or col is None:
|
||||
return None
|
||||
if col in record:
|
||||
return record.get(col)
|
||||
col_lower = col.lower()
|
||||
for k, v in record.items():
|
||||
if isinstance(k, str) and k.lower() == col_lower:
|
||||
return v
|
||||
return None
|
||||
|
||||
|
||||
def normalize_pk_value(value):
|
||||
"""Normalize PK value (e.g., digit string -> int)."""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str) and value.isdigit():
|
||||
try:
|
||||
return int(value)
|
||||
except Exception:
|
||||
return value
|
||||
return value
|
||||
|
||||
|
||||
def pk_tuple_from_record(record: dict, pk_cols: Iterable[str]) -> tuple | None:
|
||||
"""Extract PK tuple from a record."""
|
||||
merged = merge_record_layers(record)
|
||||
values = []
|
||||
for col in pk_cols:
|
||||
val = normalize_pk_value(get_value_case_insensitive(merged, col))
|
||||
if val is None or val == "":
|
||||
return None
|
||||
values.append(val)
|
||||
return tuple(values)
|
||||
Reference in New Issue
Block a user