微信小程序页面迁移校验之前 P5任务处理之前
This commit is contained in:
149
scripts/ops/_diagnose_spi_v3.py
Normal file
149
scripts/ops/_diagnose_spi_v3.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""检查 ODS/DWD 数据为什么只到 2/14,以及 SPI canonical_member_id 映射"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(Path(__file__).resolve().parents[2] / ".env")
|
||||
|
||||
PG_DSN = os.environ.get("PG_DSN")
|
||||
if not PG_DSN:
|
||||
raise RuntimeError("PG_DSN 未设置")
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
|
||||
conn = psycopg2.connect(PG_DSN)
|
||||
conn.autocommit = True
|
||||
|
||||
def q(sql, params=None):
|
||||
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
|
||||
cur.execute(sql, params or ())
|
||||
return cur.fetchall()
|
||||
|
||||
# 1. 查 ODS 结算表的实际表名
|
||||
print("ODS 结算相关表:")
|
||||
rows = q("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'ods' AND table_name LIKE '%settle%'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
for r in rows:
|
||||
print(f" {r['table_name']}")
|
||||
|
||||
rows2 = q("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'ods' AND table_name LIKE '%payment%'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
for r in rows2:
|
||||
print(f" {r['table_name']}")
|
||||
|
||||
# 2. 查 ODS payment 表的最新数据
|
||||
rows3 = q("""
|
||||
SELECT table_name FROM information_schema.tables
|
||||
WHERE table_schema = 'ods'
|
||||
ORDER BY table_name
|
||||
""")
|
||||
print(f"\nODS 全部表 ({len(rows3)} 张):")
|
||||
for r in rows3:
|
||||
print(f" {r['table_name']}")
|
||||
|
||||
# 3. 检查 SPI 的 canonical_member_id 映射
|
||||
print("\n" + "=" * 60)
|
||||
print("SPI canonical_member_id 映射分析")
|
||||
rows4 = q("""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
s.member_id AS raw_member_id,
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS canonical_member_id,
|
||||
s.pay_time,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = (SELECT DISTINCT site_id FROM dwd.dwd_settlement_head LIMIT 1)
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '90 days'
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) AS total_records,
|
||||
COUNT(DISTINCT raw_member_id) AS raw_members,
|
||||
COUNT(DISTINCT canonical_member_id) AS canonical_members,
|
||||
COUNT(*) FILTER (WHERE canonical_member_id IS NULL OR canonical_member_id = 0) AS null_canonical,
|
||||
COUNT(*) FILTER (WHERE raw_member_id != canonical_member_id) AS remapped
|
||||
FROM consume_source
|
||||
""")
|
||||
r = rows4[0]
|
||||
print(f" 总记录: {r['total_records']}")
|
||||
print(f" 原始 member_id 去重: {r['raw_members']}")
|
||||
print(f" canonical_member_id 去重: {r['canonical_members']}")
|
||||
print(f" canonical 为 NULL/0: {r['null_canonical']}")
|
||||
print(f" 被重映射的记录: {r['remapped']}")
|
||||
|
||||
# 4. 30 天窗口内 canonical_member_id 有消费的会员数
|
||||
rows5 = q("""
|
||||
WITH consume_source AS (
|
||||
SELECT
|
||||
COALESCE(NULLIF(s.member_id, 0), mca.tenant_member_id) AS canonical_member_id,
|
||||
s.pay_time,
|
||||
COALESCE(s.pay_amount, 0) AS pay_amount
|
||||
FROM dwd.dwd_settlement_head s
|
||||
LEFT JOIN dwd.dim_member_card_account mca
|
||||
ON s.member_card_account_id = mca.member_card_id
|
||||
AND mca.scd2_is_current = 1
|
||||
AND mca.register_site_id = s.site_id
|
||||
AND COALESCE(mca.is_delete, 0) = 0
|
||||
WHERE s.site_id = (SELECT DISTINCT site_id FROM dwd.dwd_settlement_head LIMIT 1)
|
||||
AND s.settle_type IN (1, 3)
|
||||
AND s.pay_time >= NOW() - INTERVAL '90 days'
|
||||
)
|
||||
SELECT
|
||||
canonical_member_id,
|
||||
SUM(pay_amount) AS spend_90,
|
||||
SUM(CASE WHEN pay_time >= NOW() - INTERVAL '30 days' THEN pay_amount ELSE 0 END) AS spend_30
|
||||
FROM consume_source
|
||||
WHERE canonical_member_id > 0
|
||||
GROUP BY canonical_member_id
|
||||
""")
|
||||
total = len(rows5)
|
||||
has_30 = sum(1 for r in rows5 if float(r['spend_30']) > 0)
|
||||
has_90 = sum(1 for r in rows5 if float(r['spend_90']) > 0)
|
||||
print(f"\nSPI 视角(canonical_member_id):")
|
||||
print(f" 90天有消费会员: {total}")
|
||||
print(f" 30天有消费会员: {has_30} ({has_30/total*100:.1f}%)")
|
||||
print(f" 30天无消费会员: {total - has_30} ({(total-has_30)/total*100:.1f}%)")
|
||||
|
||||
# 5. 中位数计算
|
||||
spend_30_vals = sorted([float(r['spend_30']) for r in rows5])
|
||||
spend_90_vals = sorted([float(r['spend_90']) for r in rows5])
|
||||
n = len(spend_30_vals)
|
||||
median_30 = spend_30_vals[n // 2] if n else 0
|
||||
median_90 = spend_90_vals[n // 2] if n else 0
|
||||
print(f"\n spend_30 中位数: {median_30:.2f}")
|
||||
print(f" spend_90 中位数: {median_90:.2f}")
|
||||
|
||||
# 6. 检查 API 拉取的最新数据时间(从 ODS 表看)
|
||||
print("\n" + "=" * 60)
|
||||
print("ODS 各表最新数据时间:")
|
||||
for r in rows3[:5]:
|
||||
tname = r['table_name']
|
||||
try:
|
||||
cols = q(f"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'ods' AND table_name = '{tname}'
|
||||
AND column_name IN ('pay_time', 'create_time', 'updated_at', 'etl_loaded_at')
|
||||
ORDER BY column_name
|
||||
""")
|
||||
if cols:
|
||||
col = cols[0]['column_name']
|
||||
maxr = q(f"SELECT MAX({col}) AS max_time FROM ods.{tname}")
|
||||
if maxr and maxr[0]['max_time']:
|
||||
print(f" {tname}.{col}: {maxr[0]['max_time']}")
|
||||
except Exception as e:
|
||||
print(f" {tname}: 查询失败 ({e})")
|
||||
|
||||
conn.close()
|
||||
print("\n诊断完成。")
|
||||
Reference in New Issue
Block a user