Files
Neo-ZQYY/scripts/ops/_final_root_cause_analysis.py

250 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
最终根因分析ODS 重复数据和 DWD 处理逻辑
"""
import os
import psycopg2
from datetime import datetime
from dotenv import load_dotenv
def main():
# 加载环境变量
load_dotenv()
test_db_dsn = os.environ.get('TEST_DB_DSN')
system_log_root = os.environ.get('SYSTEM_LOG_ROOT')
if not test_db_dsn or not system_log_root:
raise RuntimeError("环境变量未设置")
print("🚨 最终根因分析")
print("=" * 50)
with psycopg2.connect(test_db_dsn) as conn:
with conn.cursor() as cur:
# 1. 深入分析 ODS 重复数据
print("\n📊 1. ODS 重复数据分析")
cur.execute("""
SELECT
id,
COUNT(*) as duplicate_count,
ARRAY_AGG(DISTINCT paytime ORDER BY paytime) as pay_times,
ARRAY_AGG(DISTINCT payamount ORDER BY payamount) as pay_amounts
FROM ods.settlement_records
WHERE paytime::date BETWEEN '2026-02-10' AND '2026-02-14'
GROUP BY id
HAVING COUNT(*) > 1
ORDER BY duplicate_count DESC
LIMIT 10
""")
duplicates = cur.fetchall()
print(f"发现 {len(duplicates)} 个重复的订单ID (样本):")
for oid, count, times, amounts in duplicates:
print(f" ID {oid}: 重复 {count}")
print(f" 时间: {times}")
print(f" 金额: {amounts}")
# 统计重复情况
cur.execute("""
SELECT
COUNT(*) as duplicate_count,
COUNT(DISTINCT id) as unique_ids
FROM ods.settlement_records
WHERE paytime::date BETWEEN '2026-02-10' AND '2026-02-14'
""")
dup_stats = cur.fetchone()
print(f"\n重复统计 (2026-02-10 到 2026-02-14):")
print(f" 总记录数: {dup_stats[0]:,}")
print(f" 唯一ID数: {dup_stats[1]:,}")
print(f" 重复倍数: {dup_stats[0] / dup_stats[1]:.2f}")
# 2. 检查 DWD 如何处理重复数据
print("\n🔄 2. DWD 重复处理策略")
# 检查 DWD 是否有重复的 order_settle_id
cur.execute("""
SELECT
order_settle_id,
COUNT(*) as count
FROM dwd.dwd_settlement_head
GROUP BY order_settle_id
HAVING COUNT(*) > 1
LIMIT 5
""")
dwd_duplicates = cur.fetchall()
if dwd_duplicates:
print("DWD 中的重复记录:")
for oid, count in dwd_duplicates:
print(f" ID {oid}: {count}")
else:
print("DWD 中无重复记录 - 说明 DWD 有去重逻辑")
# 3. 分析历史数据处理情况
print("\n📈 3. 历史数据处理分析")
cur.execute("""
WITH monthly_stats AS (
SELECT
DATE_TRUNC('month', paytime) as month,
COUNT(*) as ods_count,
COUNT(DISTINCT id) as ods_unique
FROM ods.settlement_records
GROUP BY DATE_TRUNC('month', paytime)
),
dwd_monthly_stats AS (
SELECT
DATE_TRUNC('month', pay_time) as month,
COUNT(*) as dwd_count,
COUNT(DISTINCT order_settle_id) as dwd_unique
FROM dwd.dwd_settlement_head
GROUP BY DATE_TRUNC('month', pay_time)
)
SELECT
o.month,
o.ods_count,
o.ods_unique,
COALESCE(d.dwd_count, 0) as dwd_count,
COALESCE(d.dwd_unique, 0) as dwd_unique,
o.ods_count - COALESCE(d.dwd_count, 0) as missing_records,
o.ods_unique - COALESCE(d.dwd_unique, 0) as missing_unique
FROM monthly_stats o
LEFT JOIN dwd_monthly_stats d ON o.month = d.month
ORDER BY o.month DESC
LIMIT 6
""")
monthly_data = cur.fetchall()
print("按月数据处理情况:")
for month, ods_count, ods_unique, dwd_count, dwd_unique, missing_records, missing_unique in monthly_data:
print(f" {month.strftime('%Y-%m')}:")
print(f" ODS: {ods_count:,} 条 ({ods_unique:,} 唯一)")
print(f" DWD: {dwd_count:,} 条 ({dwd_unique:,} 唯一)")
print(f" 缺失: {missing_records:,} 条 ({missing_unique:,} 唯一)")
if ods_unique > 0:
coverage = (dwd_unique / ods_unique) * 100
print(f" 覆盖率: {coverage:.1f}%")
# 4. 检查 SPI 计算基础数据
print("\n💰 4. SPI 计算基础数据验证")
# 重新计算会员消费统计,使用正确的逻辑
cur.execute("""
WITH member_consumption AS (
SELECT
member_id,
COUNT(*) as order_count_30d,
SUM(pay_amount) as total_amount_30d,
AVG(pay_amount) as avg_amount_30d,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY pay_amount) as median_amount_30d
FROM dwd.dwd_settlement_head
WHERE member_id > 0
AND pay_time >= CURRENT_DATE - INTERVAL '30 days'
AND pay_amount > 0 -- 排除零和负数消费
GROUP BY member_id
)
SELECT
COUNT(*) as active_members_30d,
AVG(total_amount_30d) as avg_total_30d,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total_amount_30d) as median_total_30d,
AVG(avg_amount_30d) as avg_per_order_30d,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY avg_amount_30d) as median_per_order_30d
FROM member_consumption
""")
spi_stats = cur.fetchone()
if spi_stats and spi_stats[0] > 0:
print("修正后的 SPI 基础数据 (近30天排除零消费):")
print(f" 活跃会员数: {spi_stats[0]:,}")
print(f" 平均总消费: {spi_stats[1]:.2f}")
print(f" 中位数总消费: {spi_stats[2]:.2f}")
print(f" 平均单次消费: {spi_stats[3]:.2f}")
print(f" 中位数单次消费: {spi_stats[4]:.2f}")
else:
print("近30天无有效消费数据")
# 5. 检查数据质量问题
print("\n🔍 5. 数据质量问题检查")
# 检查负数和零消费
cur.execute("""
SELECT
CASE
WHEN pay_amount < 0 THEN '负数消费'
WHEN pay_amount = 0 THEN '零消费'
WHEN pay_amount > 0 AND pay_amount <= 10 THEN '小额消费(≤10)'
WHEN pay_amount > 10 THEN '正常消费(>10)'
END as amount_category,
COUNT(*) as record_count,
COUNT(DISTINCT member_id) as member_count,
AVG(pay_amount) as avg_amount
FROM dwd.dwd_settlement_head
WHERE pay_time >= CURRENT_DATE - INTERVAL '90 days'
GROUP BY
CASE
WHEN pay_amount < 0 THEN '负数消费'
WHEN pay_amount = 0 THEN '零消费'
WHEN pay_amount > 0 AND pay_amount <= 10 THEN '小额消费(≤10)'
WHEN pay_amount > 10 THEN '正常消费(>10)'
END
ORDER BY record_count DESC
""")
quality_stats = cur.fetchall()
print("90天内消费金额质量分析:")
for category, record_count, member_count, avg_amount in quality_stats:
print(f" {category}: {record_count:,} 条, {member_count:,} 会员, 平均 {avg_amount:.2f}")
# 生成最终报告
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
report_path = f"{system_log_root}/final_root_cause_analysis_{timestamp}.md"
report_content = f"""# ETL 数据问题最终根因分析报告
**生成时间**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
## 🎯 核心发现
### 1. ODS 数据重复问题
- ODS 表中存在大量重复记录每个订单ID平均重复2次
- 这导致 ODS 记录数看起来是实际订单数的2倍
### 2. DWD 去重处理
- DWD 层正确实现了去重逻辑,每个 order_settle_id 只保留一条记录
- 这解释了为什么 DWD 记录数约为 ODS 的50%
### 3. 历史数据缺失
- 总体上 DWD 缺失约60%的历史数据
- 这可能是由于历史 ETL 执行不完整导致的
### 4. SPI 警告根因
- 大量零消费和负数消费记录影响了中位数计算
- 近30天活跃会员数量极少导致统计基数不足
## 🔧 解决建议
1. **数据修复**: 运行完整的历史数据回填
2. **SPI 优化**: 在计算中排除零消费和负数消费
3. **监控改进**: 建立 ETL 数据完整性监控
4. **质量控制**: 加强数据质量检查和清洗
## 📊 影响评估
- **数据完整性**: 需要修复历史缺失数据
- **SPI 准确性**: 需要优化计算逻辑
- **业务影响**: 当前 SPI 指标可能不准确
"""
os.makedirs(os.path.dirname(report_path), exist_ok=True)
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report_content)
print(f"\n📝 最终根因分析报告: {report_path}")
if __name__ == "__main__":
main()