Files
feiqiu-ETL/etl_billiards/scripts/analyze_discount_patterns.py
2026-02-04 21:39:01 +08:00

637 lines
23 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
优惠口径抽样分析脚本
功能说明:
从dwd_settlement_head表抽样100单分析以下优惠字段的使用情况
- adjust_amount: 台费打折/调整(可能包含大客户优惠、其他优惠)
- member_discount_amount: 会员折扣
- rounding_amount: 抹零金额
- coupon_amount: 团购抵消台费
- gift_card_amount: 赠送卡支付
分析目标:
1. 大客户优惠:是否存在"大客户"标识?如何与普通调整区分?
2. 会员折扣:是否有非零值?使用场景是什么?
3. 抹零抹零规则与adjust_amount的关系
4. 其他优惠adjust_amount中还包含哪些优惠类型
输出:
- 控制台打印分析报告
- 生成 docs/analysis_discount_patterns.md 报告文件
作者ETL团队
创建日期2026-02-01
"""
import os
import sys
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
# 添加项目根目录到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
from etl_billiards.utils.config import Config
from etl_billiards.utils.db import DatabaseConnection
def analyze_discount_patterns():
"""
执行优惠口径抽样分析
"""
print("=" * 80)
print("优惠口径抽样分析")
print("=" * 80)
print()
# 加载配置和数据库连接
config = Config()
db = DatabaseConnection(config)
try:
# 1. 获取总体统计
print("【1. 总体统计】")
print("-" * 40)
overall_stats = get_overall_stats(db)
print_overall_stats(overall_stats)
print()
# 2. 抽样分析优惠订单
print("【2. 有优惠的订单抽样分析100单")
print("-" * 40)
sample_orders = get_sample_orders_with_discount(db, limit=100)
discount_analysis = analyze_sample_orders(sample_orders)
print_discount_analysis(discount_analysis)
print()
# 3. adjust_amount详细分析
print("【3. adjust_amount (台费打折/调整) 详细分析】")
print("-" * 40)
adjust_analysis = analyze_adjust_amount(db)
print_adjust_analysis(adjust_analysis)
print()
# 4. 会员折扣使用分析
print("【4. member_discount_amount (会员折扣) 使用分析】")
print("-" * 40)
member_discount_analysis = analyze_member_discount(db)
print_member_discount_analysis(member_discount_analysis)
print()
# 5. 抹零规则分析
print("【5. rounding_amount (抹零) 规则分析】")
print("-" * 40)
rounding_analysis = analyze_rounding(db)
print_rounding_analysis(rounding_analysis)
print()
# 6. 团购优惠分析
print("【6. 团购优惠分析】")
print("-" * 40)
groupbuy_analysis = analyze_groupbuy(db)
print_groupbuy_analysis(groupbuy_analysis)
print()
# 7. 生成分析报告
print("【7. 生成分析报告】")
print("-" * 40)
report = generate_report(
overall_stats,
discount_analysis,
adjust_analysis,
member_discount_analysis,
rounding_analysis,
groupbuy_analysis
)
# 保存报告
report_path = project_root / "etl_billiards" / "docs" / "analysis_discount_patterns.md"
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {report_path}")
finally:
db.close()
def get_overall_stats(db: DatabaseConnection) -> Dict[str, Any]:
"""
获取总体统计数据
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN adjust_amount != 0 THEN 1 END) AS orders_with_adjust,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS orders_with_member_discount,
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS orders_with_rounding,
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS orders_with_coupon,
COUNT(CASE WHEN gift_card_amount != 0 THEN 1 END) AS orders_with_gift_card,
SUM(adjust_amount) AS total_adjust,
SUM(member_discount_amount) AS total_member_discount,
SUM(rounding_amount) AS total_rounding,
SUM(coupon_amount) AS total_coupon,
SUM(gift_card_amount) AS total_gift_card,
SUM(consume_money) AS total_consume,
SUM(pay_amount) AS total_pay
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
return dict(rows[0]) if rows else {}
def get_sample_orders_with_discount(
db: DatabaseConnection,
limit: int = 100
) -> List[Dict[str, Any]]:
"""
抽样获取有优惠的订单
"""
sql = """
SELECT
order_settle_id,
order_trade_no,
create_time,
consume_money,
pay_amount,
adjust_amount,
member_discount_amount,
rounding_amount,
coupon_amount,
gift_card_amount,
balance_amount,
recharge_card_amount,
pl_coupon_sale_amount,
table_charge_money,
goods_money,
assistant_pd_money,
assistant_cx_money,
consume_money - pay_amount - COALESCE(recharge_card_amount, 0)
- COALESCE(gift_card_amount, 0) - COALESCE(balance_amount, 0) AS calculated_discount
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
OR member_discount_amount != 0
OR rounding_amount != 0
OR coupon_amount != 0
OR gift_card_amount != 0
ORDER BY RANDOM()
LIMIT %s
"""
rows = db.query(sql, (limit,))
return [dict(row) for row in rows] if rows else []
def analyze_sample_orders(orders: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
分析抽样订单
"""
analysis = {
'total_sampled': len(orders),
'with_adjust': 0,
'with_member_discount': 0,
'with_rounding': 0,
'with_coupon': 0,
'with_gift_card': 0,
'adjust_values': [],
'member_discount_values': [],
'rounding_values': [],
'coupon_values': [],
'gift_card_values': [],
}
for order in orders:
adjust = Decimal(str(order.get('adjust_amount', 0)))
member_discount = Decimal(str(order.get('member_discount_amount', 0)))
rounding = Decimal(str(order.get('rounding_amount', 0)))
coupon = Decimal(str(order.get('coupon_amount', 0)))
gift_card = Decimal(str(order.get('gift_card_amount', 0)))
if adjust != 0:
analysis['with_adjust'] += 1
analysis['adjust_values'].append(float(adjust))
if member_discount != 0:
analysis['with_member_discount'] += 1
analysis['member_discount_values'].append(float(member_discount))
if rounding != 0:
analysis['with_rounding'] += 1
analysis['rounding_values'].append(float(rounding))
if coupon != 0:
analysis['with_coupon'] += 1
analysis['coupon_values'].append(float(coupon))
if gift_card != 0:
analysis['with_gift_card'] += 1
analysis['gift_card_values'].append(float(gift_card))
return analysis
def analyze_adjust_amount(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析adjust_amount字段的分布和模式
"""
# 1. 值分布
sql_distribution = """
SELECT
CASE
WHEN adjust_amount = 0 THEN '0'
WHEN adjust_amount > 0 AND adjust_amount <= 10 THEN '0-10'
WHEN adjust_amount > 10 AND adjust_amount <= 50 THEN '10-50'
WHEN adjust_amount > 50 AND adjust_amount <= 100 THEN '50-100'
WHEN adjust_amount > 100 AND adjust_amount <= 500 THEN '100-500'
WHEN adjust_amount > 500 THEN '>500'
WHEN adjust_amount < 0 AND adjust_amount >= -10 THEN '-10-0'
WHEN adjust_amount < -10 AND adjust_amount >= -50 THEN '-50--10'
WHEN adjust_amount < -50 AND adjust_amount >= -100 THEN '-100--50'
WHEN adjust_amount < -100 THEN '<-100'
END AS range,
COUNT(*) AS count,
SUM(adjust_amount) AS total_amount
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
GROUP BY range
ORDER BY range
"""
distribution = db.query(sql_distribution)
# 2. 与消费金额的关系
sql_ratio = """
SELECT
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS discount_ratio,
COUNT(*) AS count
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0 AND consume_money > 0
GROUP BY discount_ratio
ORDER BY count DESC
LIMIT 20
"""
ratio_distribution = db.query(sql_ratio)
# 3. 典型样本
sql_samples = """
SELECT
order_settle_id,
consume_money,
adjust_amount,
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
ORDER BY ABS(adjust_amount) DESC
LIMIT 10
"""
samples = db.query(sql_samples)
return {
'distribution': [dict(r) for r in distribution] if distribution else [],
'ratio_distribution': [dict(r) for r in ratio_distribution] if ratio_distribution else [],
'top_samples': [dict(r) for r in samples] if samples else []
}
def analyze_member_discount(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析member_discount_amount字段的使用情况
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
SUM(member_discount_amount) AS total_discount,
AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount,
MAX(member_discount_amount) AS max_discount,
MIN(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS min_discount
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
stats = dict(rows[0]) if rows else {}
# 抽样有会员折扣的订单
sql_samples = """
SELECT
order_settle_id,
member_id,
consume_money,
member_discount_amount,
ROUND(member_discount_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
FROM billiards_dwd.dwd_settlement_head
WHERE member_discount_amount != 0
LIMIT 20
"""
samples = db.query(sql_samples)
return {
'stats': stats,
'samples': [dict(r) for r in samples] if samples else []
}
def analyze_rounding(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析rounding_amount字段的规则
"""
# 1. 抹零金额分布
sql_distribution = """
SELECT
rounding_amount,
COUNT(*) AS count
FROM billiards_dwd.dwd_settlement_head
WHERE rounding_amount != 0
GROUP BY rounding_amount
ORDER BY count DESC
LIMIT 20
"""
distribution = db.query(sql_distribution)
# 2. 抹零与实付金额的关系
sql_pattern = """
SELECT
pay_amount,
rounding_amount,
pay_amount + rounding_amount AS before_rounding,
MOD(CAST((pay_amount + rounding_amount) * 100 AS INTEGER), 100) AS cents
FROM billiards_dwd.dwd_settlement_head
WHERE rounding_amount != 0
LIMIT 20
"""
patterns = db.query(sql_pattern)
return {
'distribution': [dict(r) for r in distribution] if distribution else [],
'patterns': [dict(r) for r in patterns] if patterns else []
}
def analyze_groupbuy(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析团购优惠
"""
# 1. 团购使用统计
sql_stats = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS with_coupon,
COUNT(CASE WHEN pl_coupon_sale_amount != 0 THEN 1 END) AS with_pl_coupon,
SUM(coupon_amount) AS total_coupon_amount,
SUM(pl_coupon_sale_amount) AS total_pl_coupon_sale
FROM billiards_dwd.dwd_settlement_head
"""
stats = db.query(sql_stats)
# 2. 团购订单样本
sql_samples = """
SELECT
sh.order_settle_id,
sh.coupon_amount,
sh.pl_coupon_sale_amount,
gr.ledger_amount AS groupbuy_ledger_amount,
gr.ledger_unit_price AS groupbuy_unit_price
FROM billiards_dwd.dwd_settlement_head sh
LEFT JOIN billiards_dwd.dwd_groupbuy_redemption gr
ON sh.order_settle_id = gr.order_settle_id
WHERE sh.coupon_amount != 0
LIMIT 20
"""
samples = db.query(sql_samples)
return {
'stats': dict(stats[0]) if stats else {},
'samples': [dict(r) for r in samples] if samples else []
}
def print_overall_stats(stats: Dict[str, Any]):
"""打印总体统计"""
total = stats.get('total_orders', 0)
print(f"总订单数: {total:,}")
print(f"有adjust_amount的订单: {stats.get('orders_with_adjust', 0):,} ({stats.get('orders_with_adjust', 0)/total*100:.2f}%)")
print(f"有member_discount的订单: {stats.get('orders_with_member_discount', 0):,} ({stats.get('orders_with_member_discount', 0)/total*100:.2f}%)")
print(f"有rounding的订单: {stats.get('orders_with_rounding', 0):,} ({stats.get('orders_with_rounding', 0)/total*100:.2f}%)")
print(f"有coupon的订单: {stats.get('orders_with_coupon', 0):,} ({stats.get('orders_with_coupon', 0)/total*100:.2f}%)")
print(f"有gift_card的订单: {stats.get('orders_with_gift_card', 0):,} ({stats.get('orders_with_gift_card', 0)/total*100:.2f}%)")
print()
print(f"adjust_amount总额: {stats.get('total_adjust', 0):,.2f}")
print(f"member_discount总额: {stats.get('total_member_discount', 0):,.2f}")
print(f"rounding总额: {stats.get('total_rounding', 0):,.2f}")
print(f"coupon总额: {stats.get('total_coupon', 0):,.2f}")
print(f"gift_card总额: {stats.get('total_gift_card', 0):,.2f}")
def print_discount_analysis(analysis: Dict[str, Any]):
"""打印抽样分析结果"""
print(f"抽样订单数: {analysis['total_sampled']}")
print(f" - 有adjust_amount: {analysis['with_adjust']}")
print(f" - 有member_discount: {analysis['with_member_discount']}")
print(f" - 有rounding: {analysis['with_rounding']}")
print(f" - 有coupon: {analysis['with_coupon']}")
print(f" - 有gift_card: {analysis['with_gift_card']}")
def print_adjust_analysis(analysis: Dict[str, Any]):
"""打印adjust_amount分析结果"""
print("值分布:")
for item in analysis.get('distribution', []):
print(f" {item.get('range', 'N/A')}: {item.get('count', 0):,} 单, 总额 {item.get('total_amount', 0):,.2f}")
print("\n折扣比例分布 (Top 10):")
for item in analysis.get('ratio_distribution', [])[:10]:
print(f" {item.get('discount_ratio', 0)}%: {item.get('count', 0):,}")
print("\n大额调整样本 (Top 10):")
for item in analysis.get('top_samples', []):
print(f" 订单{item.get('order_settle_id')}: 消费{item.get('consume_money', 0):,.2f}, 调整{item.get('adjust_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
def print_member_discount_analysis(analysis: Dict[str, Any]):
"""打印会员折扣分析结果"""
stats = analysis.get('stats', {})
print(f"总订单数: {stats.get('total_orders', 0):,}")
print(f"有会员折扣的订单: {stats.get('with_discount', 0):,}")
print(f"会员折扣总额: {stats.get('total_discount', 0):,.2f}")
print(f"平均折扣: {stats.get('avg_discount', 0):,.2f}")
print(f"最大折扣: {stats.get('max_discount', 0):,.2f}")
samples = analysis.get('samples', [])
if samples:
print("\n样本订单:")
for item in samples[:5]:
print(f" 订单{item.get('order_settle_id')}: 会员{item.get('member_id')}, 消费{item.get('consume_money', 0):,.2f}, 折扣{item.get('member_discount_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
else:
print("\n[!] 未发现使用会员折扣的订单,该字段可能未启用")
def print_rounding_analysis(analysis: Dict[str, Any]):
"""打印抹零分析结果"""
print("抹零金额分布:")
for item in analysis.get('distribution', []):
print(f" {item.get('rounding_amount', 0):,.2f}: {item.get('count', 0):,}")
print("\n抹零模式样本:")
for item in analysis.get('patterns', [])[:5]:
print(f" 实付{item.get('pay_amount', 0):,.2f} + 抹零{item.get('rounding_amount', 0):,.2f} = {item.get('before_rounding', 0):,.2f}")
def print_groupbuy_analysis(analysis: Dict[str, Any]):
"""打印团购分析结果"""
stats = analysis.get('stats', {})
print(f"总订单数: {stats.get('total_orders', 0):,}")
print(f"有coupon_amount的订单: {stats.get('with_coupon', 0):,}")
print(f"有pl_coupon_sale_amount的订单: {stats.get('with_pl_coupon', 0):,}")
print(f"coupon_amount总额: {stats.get('total_coupon_amount', 0):,.2f}")
print(f"pl_coupon_sale_amount总额: {stats.get('total_pl_coupon_sale', 0):,.2f}")
print("\n团购订单样本:")
for item in analysis.get('samples', [])[:5]:
print(f" 订单{item.get('order_settle_id')}: coupon={item.get('coupon_amount', 0):,.2f}, pl_coupon={item.get('pl_coupon_sale_amount', 0):,.2f}, groupbuy_price={item.get('groupbuy_unit_price', 'N/A')}")
def generate_report(
overall_stats: Dict[str, Any],
discount_analysis: Dict[str, Any],
adjust_analysis: Dict[str, Any],
member_discount_analysis: Dict[str, Any],
rounding_analysis: Dict[str, Any],
groupbuy_analysis: Dict[str, Any]
) -> str:
"""
生成Markdown格式的分析报告
"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
total = overall_stats.get('total_orders', 1)
report = f"""# 优惠口径抽样分析报告
**生成时间**: {now}
## 一、总体统计
| 指标 | 数值 | 占比 |
|------|------|------|
| 总订单数 | {overall_stats.get('total_orders', 0):,} | 100% |
| 有adjust_amount的订单 | {overall_stats.get('orders_with_adjust', 0):,} | {overall_stats.get('orders_with_adjust', 0)/total*100:.2f}% |
| 有member_discount的订单 | {overall_stats.get('orders_with_member_discount', 0):,} | {overall_stats.get('orders_with_member_discount', 0)/total*100:.2f}% |
| 有rounding的订单 | {overall_stats.get('orders_with_rounding', 0):,} | {overall_stats.get('orders_with_rounding', 0)/total*100:.2f}% |
| 有coupon的订单 | {overall_stats.get('orders_with_coupon', 0):,} | {overall_stats.get('orders_with_coupon', 0)/total*100:.2f}% |
| 有gift_card的订单 | {overall_stats.get('orders_with_gift_card', 0):,} | {overall_stats.get('orders_with_gift_card', 0)/total*100:.2f}% |
### 金额统计
| 优惠类型 | 总额 |
|----------|------|
| adjust_amount (台费打折/调整) | {overall_stats.get('total_adjust', 0):,.2f} |
| member_discount_amount (会员折扣) | {overall_stats.get('total_member_discount', 0):,.2f} |
| rounding_amount (抹零) | {overall_stats.get('total_rounding', 0):,.2f} |
| coupon_amount (团购抵消台费) | {overall_stats.get('total_coupon', 0):,.2f} |
| gift_card_amount (赠送卡支付) | {overall_stats.get('total_gift_card', 0):,.2f} |
## 二、adjust_amount (台费打折/调整) 分析
### 值分布
| 区间 | 订单数 | 总额 |
|------|--------|------|
"""
for item in adjust_analysis.get('distribution', []):
report += f"| {item.get('range', 'N/A')} | {item.get('count', 0):,} | {item.get('total_amount', 0):,.2f} |\n"
report += """
### 分析结论
- **是否包含大客户优惠**: 需要进一步分析adjust_amount的业务来源
- **与普通调整的区分**: 建议查看是否有备注字段或关联的优惠活动表
## 三、member_discount_amount (会员折扣) 分析
"""
member_stats = member_discount_analysis.get('stats', {})
with_discount = member_stats.get('with_discount', 0)
if with_discount == 0:
report += """### 结论
**[!] 该字段未发现任何非零值,会员折扣功能可能未启用。**
建议在DWS财务统计中可以暂时忽略此字段或将其标记为"待启用"
"""
else:
report += f"""### 使用统计
| 指标 | 数值 |
|------|------|
| 有会员折扣的订单 | {with_discount:,} |
| 会员折扣总额 | {member_stats.get('total_discount', 0):,.2f} |
| 平均折扣 | {member_stats.get('avg_discount', 0):,.2f} |
| 最大折扣 | {member_stats.get('max_discount', 0):,.2f} |
"""
report += """
## 四、rounding_amount (抹零) 分析
### 抹零金额分布
| 抹零金额 | 订单数 |
|----------|--------|
"""
for item in rounding_analysis.get('distribution', [])[:10]:
report += f"| {item.get('rounding_amount', 0):,.2f} | {item.get('count', 0):,} |\n"
report += """
### 抹零规则推断
根据抹零金额分布,推断抹零规则为:
- 抹零到整元(去除角分)
- 或抹零到特定尾数
## 五、团购优惠分析
"""
groupbuy_stats = groupbuy_analysis.get('stats', {})
report += f"""### 使用统计
| 指标 | 数值 |
|------|------|
| 有coupon_amount的订单 | {groupbuy_stats.get('with_coupon', 0):,} |
| 有pl_coupon_sale_amount的订单 | {groupbuy_stats.get('with_pl_coupon', 0):,} |
| coupon_amount总额 | {groupbuy_stats.get('total_coupon_amount', 0):,.2f} |
| pl_coupon_sale_amount总额 | {groupbuy_stats.get('total_pl_coupon_sale', 0):,.2f} |
### 团购支付金额计算路径
根据分析,团购支付金额应按以下路径计算:
1. 若 `pl_coupon_sale_amount ≠ 0` → 使用 `pl_coupon_sale_amount`
2. 若 `pl_coupon_sale_amount = 0` 且 `coupon_amount ≠ 0` → 通过 `order_settle_id` 关联 `dwd_groupbuy_redemption` 获取 `ledger_unit_price`
团购优惠金额 = coupon_amount - 团购支付金额
## 六、建议与结论
### 优惠口径定义建议
| 优惠类型 | 字段来源 | 计算公式 | 状态 |
|----------|----------|----------|------|
| 团购优惠 | settlement + groupbuy | coupon_amount - 团购支付金额 | 可用 |
| 会员折扣 | settlement.member_discount_amount | 直接取值 | 待确认 |
| 赠送卡抵扣 | settlement.gift_card_amount | 直接取值 | 可用 |
| 手动调整 | settlement.adjust_amount | 直接取值 | 可用 |
| 抹零 | settlement.rounding_amount | 直接取值 | 可用 |
| 大客户优惠 | 待分析 | 需要业务确认 | 待定义 |
| 其他优惠 | 待分析 | 需要业务确认 | 待定义 |
### 下一步行动
1. **确认会员折扣是否启用**: 与业务确认member_discount_amount的使用场景
2. **大客户优惠识别规则**: 与业务确认如何从adjust_amount中识别大客户优惠
3. **其他优惠分类**: 与业务确认adjust_amount中还包含哪些优惠类型
"""
return report
if __name__ == "__main__":
analyze_discount_patterns()