This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

View File

@@ -0,0 +1,636 @@
# -*- coding: utf-8 -*-
"""
优惠口径抽样分析脚本
功能说明:
从dwd_settlement_head表抽样100单分析以下优惠字段的使用情况
- adjust_amount: 台费打折/调整(可能包含大客户优惠、其他优惠)
- member_discount_amount: 会员折扣
- rounding_amount: 抹零金额
- coupon_amount: 团购抵消台费
- gift_card_amount: 赠送卡支付
分析目标:
1. 大客户优惠:是否存在"大客户"标识?如何与普通调整区分?
2. 会员折扣:是否有非零值?使用场景是什么?
3. 抹零抹零规则与adjust_amount的关系
4. 其他优惠adjust_amount中还包含哪些优惠类型
输出:
- 控制台打印分析报告
- 生成 docs/analysis_discount_patterns.md 报告文件
作者ETL团队
创建日期2026-02-01
"""
import os
import sys
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
# 添加项目根目录到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
from etl_billiards.utils.config import Config
from etl_billiards.utils.db import DatabaseConnection
def analyze_discount_patterns():
"""
执行优惠口径抽样分析
"""
print("=" * 80)
print("优惠口径抽样分析")
print("=" * 80)
print()
# 加载配置和数据库连接
config = Config()
db = DatabaseConnection(config)
try:
# 1. 获取总体统计
print("【1. 总体统计】")
print("-" * 40)
overall_stats = get_overall_stats(db)
print_overall_stats(overall_stats)
print()
# 2. 抽样分析优惠订单
print("【2. 有优惠的订单抽样分析100单")
print("-" * 40)
sample_orders = get_sample_orders_with_discount(db, limit=100)
discount_analysis = analyze_sample_orders(sample_orders)
print_discount_analysis(discount_analysis)
print()
# 3. adjust_amount详细分析
print("【3. adjust_amount (台费打折/调整) 详细分析】")
print("-" * 40)
adjust_analysis = analyze_adjust_amount(db)
print_adjust_analysis(adjust_analysis)
print()
# 4. 会员折扣使用分析
print("【4. member_discount_amount (会员折扣) 使用分析】")
print("-" * 40)
member_discount_analysis = analyze_member_discount(db)
print_member_discount_analysis(member_discount_analysis)
print()
# 5. 抹零规则分析
print("【5. rounding_amount (抹零) 规则分析】")
print("-" * 40)
rounding_analysis = analyze_rounding(db)
print_rounding_analysis(rounding_analysis)
print()
# 6. 团购优惠分析
print("【6. 团购优惠分析】")
print("-" * 40)
groupbuy_analysis = analyze_groupbuy(db)
print_groupbuy_analysis(groupbuy_analysis)
print()
# 7. 生成分析报告
print("【7. 生成分析报告】")
print("-" * 40)
report = generate_report(
overall_stats,
discount_analysis,
adjust_analysis,
member_discount_analysis,
rounding_analysis,
groupbuy_analysis
)
# 保存报告
report_path = project_root / "etl_billiards" / "docs" / "analysis_discount_patterns.md"
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {report_path}")
finally:
db.close()
def get_overall_stats(db: DatabaseConnection) -> Dict[str, Any]:
"""
获取总体统计数据
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN adjust_amount != 0 THEN 1 END) AS orders_with_adjust,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS orders_with_member_discount,
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS orders_with_rounding,
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS orders_with_coupon,
COUNT(CASE WHEN gift_card_amount != 0 THEN 1 END) AS orders_with_gift_card,
SUM(adjust_amount) AS total_adjust,
SUM(member_discount_amount) AS total_member_discount,
SUM(rounding_amount) AS total_rounding,
SUM(coupon_amount) AS total_coupon,
SUM(gift_card_amount) AS total_gift_card,
SUM(consume_money) AS total_consume,
SUM(pay_amount) AS total_pay
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
return dict(rows[0]) if rows else {}
def get_sample_orders_with_discount(
db: DatabaseConnection,
limit: int = 100
) -> List[Dict[str, Any]]:
"""
抽样获取有优惠的订单
"""
sql = """
SELECT
order_settle_id,
order_trade_no,
create_time,
consume_money,
pay_amount,
adjust_amount,
member_discount_amount,
rounding_amount,
coupon_amount,
gift_card_amount,
balance_amount,
recharge_card_amount,
pl_coupon_sale_amount,
table_charge_money,
goods_money,
assistant_pd_money,
assistant_cx_money,
consume_money - pay_amount - COALESCE(recharge_card_amount, 0)
- COALESCE(gift_card_amount, 0) - COALESCE(balance_amount, 0) AS calculated_discount
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
OR member_discount_amount != 0
OR rounding_amount != 0
OR coupon_amount != 0
OR gift_card_amount != 0
ORDER BY RANDOM()
LIMIT %s
"""
rows = db.query(sql, (limit,))
return [dict(row) for row in rows] if rows else []
def analyze_sample_orders(orders: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
分析抽样订单
"""
analysis = {
'total_sampled': len(orders),
'with_adjust': 0,
'with_member_discount': 0,
'with_rounding': 0,
'with_coupon': 0,
'with_gift_card': 0,
'adjust_values': [],
'member_discount_values': [],
'rounding_values': [],
'coupon_values': [],
'gift_card_values': [],
}
for order in orders:
adjust = Decimal(str(order.get('adjust_amount', 0)))
member_discount = Decimal(str(order.get('member_discount_amount', 0)))
rounding = Decimal(str(order.get('rounding_amount', 0)))
coupon = Decimal(str(order.get('coupon_amount', 0)))
gift_card = Decimal(str(order.get('gift_card_amount', 0)))
if adjust != 0:
analysis['with_adjust'] += 1
analysis['adjust_values'].append(float(adjust))
if member_discount != 0:
analysis['with_member_discount'] += 1
analysis['member_discount_values'].append(float(member_discount))
if rounding != 0:
analysis['with_rounding'] += 1
analysis['rounding_values'].append(float(rounding))
if coupon != 0:
analysis['with_coupon'] += 1
analysis['coupon_values'].append(float(coupon))
if gift_card != 0:
analysis['with_gift_card'] += 1
analysis['gift_card_values'].append(float(gift_card))
return analysis
def analyze_adjust_amount(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析adjust_amount字段的分布和模式
"""
# 1. 值分布
sql_distribution = """
SELECT
CASE
WHEN adjust_amount = 0 THEN '0'
WHEN adjust_amount > 0 AND adjust_amount <= 10 THEN '0-10'
WHEN adjust_amount > 10 AND adjust_amount <= 50 THEN '10-50'
WHEN adjust_amount > 50 AND adjust_amount <= 100 THEN '50-100'
WHEN adjust_amount > 100 AND adjust_amount <= 500 THEN '100-500'
WHEN adjust_amount > 500 THEN '>500'
WHEN adjust_amount < 0 AND adjust_amount >= -10 THEN '-10-0'
WHEN adjust_amount < -10 AND adjust_amount >= -50 THEN '-50--10'
WHEN adjust_amount < -50 AND adjust_amount >= -100 THEN '-100--50'
WHEN adjust_amount < -100 THEN '<-100'
END AS range,
COUNT(*) AS count,
SUM(adjust_amount) AS total_amount
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
GROUP BY range
ORDER BY range
"""
distribution = db.query(sql_distribution)
# 2. 与消费金额的关系
sql_ratio = """
SELECT
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS discount_ratio,
COUNT(*) AS count
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0 AND consume_money > 0
GROUP BY discount_ratio
ORDER BY count DESC
LIMIT 20
"""
ratio_distribution = db.query(sql_ratio)
# 3. 典型样本
sql_samples = """
SELECT
order_settle_id,
consume_money,
adjust_amount,
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
ORDER BY ABS(adjust_amount) DESC
LIMIT 10
"""
samples = db.query(sql_samples)
return {
'distribution': [dict(r) for r in distribution] if distribution else [],
'ratio_distribution': [dict(r) for r in ratio_distribution] if ratio_distribution else [],
'top_samples': [dict(r) for r in samples] if samples else []
}
def analyze_member_discount(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析member_discount_amount字段的使用情况
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
SUM(member_discount_amount) AS total_discount,
AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount,
MAX(member_discount_amount) AS max_discount,
MIN(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS min_discount
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
stats = dict(rows[0]) if rows else {}
# 抽样有会员折扣的订单
sql_samples = """
SELECT
order_settle_id,
member_id,
consume_money,
member_discount_amount,
ROUND(member_discount_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
FROM billiards_dwd.dwd_settlement_head
WHERE member_discount_amount != 0
LIMIT 20
"""
samples = db.query(sql_samples)
return {
'stats': stats,
'samples': [dict(r) for r in samples] if samples else []
}
def analyze_rounding(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析rounding_amount字段的规则
"""
# 1. 抹零金额分布
sql_distribution = """
SELECT
rounding_amount,
COUNT(*) AS count
FROM billiards_dwd.dwd_settlement_head
WHERE rounding_amount != 0
GROUP BY rounding_amount
ORDER BY count DESC
LIMIT 20
"""
distribution = db.query(sql_distribution)
# 2. 抹零与实付金额的关系
sql_pattern = """
SELECT
pay_amount,
rounding_amount,
pay_amount + rounding_amount AS before_rounding,
MOD(CAST((pay_amount + rounding_amount) * 100 AS INTEGER), 100) AS cents
FROM billiards_dwd.dwd_settlement_head
WHERE rounding_amount != 0
LIMIT 20
"""
patterns = db.query(sql_pattern)
return {
'distribution': [dict(r) for r in distribution] if distribution else [],
'patterns': [dict(r) for r in patterns] if patterns else []
}
def analyze_groupbuy(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析团购优惠
"""
# 1. 团购使用统计
sql_stats = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS with_coupon,
COUNT(CASE WHEN pl_coupon_sale_amount != 0 THEN 1 END) AS with_pl_coupon,
SUM(coupon_amount) AS total_coupon_amount,
SUM(pl_coupon_sale_amount) AS total_pl_coupon_sale
FROM billiards_dwd.dwd_settlement_head
"""
stats = db.query(sql_stats)
# 2. 团购订单样本
sql_samples = """
SELECT
sh.order_settle_id,
sh.coupon_amount,
sh.pl_coupon_sale_amount,
gr.ledger_amount AS groupbuy_ledger_amount,
gr.ledger_unit_price AS groupbuy_unit_price
FROM billiards_dwd.dwd_settlement_head sh
LEFT JOIN billiards_dwd.dwd_groupbuy_redemption gr
ON sh.order_settle_id = gr.order_settle_id
WHERE sh.coupon_amount != 0
LIMIT 20
"""
samples = db.query(sql_samples)
return {
'stats': dict(stats[0]) if stats else {},
'samples': [dict(r) for r in samples] if samples else []
}
def print_overall_stats(stats: Dict[str, Any]):
"""打印总体统计"""
total = stats.get('total_orders', 0)
print(f"总订单数: {total:,}")
print(f"有adjust_amount的订单: {stats.get('orders_with_adjust', 0):,} ({stats.get('orders_with_adjust', 0)/total*100:.2f}%)")
print(f"有member_discount的订单: {stats.get('orders_with_member_discount', 0):,} ({stats.get('orders_with_member_discount', 0)/total*100:.2f}%)")
print(f"有rounding的订单: {stats.get('orders_with_rounding', 0):,} ({stats.get('orders_with_rounding', 0)/total*100:.2f}%)")
print(f"有coupon的订单: {stats.get('orders_with_coupon', 0):,} ({stats.get('orders_with_coupon', 0)/total*100:.2f}%)")
print(f"有gift_card的订单: {stats.get('orders_with_gift_card', 0):,} ({stats.get('orders_with_gift_card', 0)/total*100:.2f}%)")
print()
print(f"adjust_amount总额: {stats.get('total_adjust', 0):,.2f}")
print(f"member_discount总额: {stats.get('total_member_discount', 0):,.2f}")
print(f"rounding总额: {stats.get('total_rounding', 0):,.2f}")
print(f"coupon总额: {stats.get('total_coupon', 0):,.2f}")
print(f"gift_card总额: {stats.get('total_gift_card', 0):,.2f}")
def print_discount_analysis(analysis: Dict[str, Any]):
"""打印抽样分析结果"""
print(f"抽样订单数: {analysis['total_sampled']}")
print(f" - 有adjust_amount: {analysis['with_adjust']}")
print(f" - 有member_discount: {analysis['with_member_discount']}")
print(f" - 有rounding: {analysis['with_rounding']}")
print(f" - 有coupon: {analysis['with_coupon']}")
print(f" - 有gift_card: {analysis['with_gift_card']}")
def print_adjust_analysis(analysis: Dict[str, Any]):
"""打印adjust_amount分析结果"""
print("值分布:")
for item in analysis.get('distribution', []):
print(f" {item.get('range', 'N/A')}: {item.get('count', 0):,} 单, 总额 {item.get('total_amount', 0):,.2f}")
print("\n折扣比例分布 (Top 10):")
for item in analysis.get('ratio_distribution', [])[:10]:
print(f" {item.get('discount_ratio', 0)}%: {item.get('count', 0):,}")
print("\n大额调整样本 (Top 10):")
for item in analysis.get('top_samples', []):
print(f" 订单{item.get('order_settle_id')}: 消费{item.get('consume_money', 0):,.2f}, 调整{item.get('adjust_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
def print_member_discount_analysis(analysis: Dict[str, Any]):
"""打印会员折扣分析结果"""
stats = analysis.get('stats', {})
print(f"总订单数: {stats.get('total_orders', 0):,}")
print(f"有会员折扣的订单: {stats.get('with_discount', 0):,}")
print(f"会员折扣总额: {stats.get('total_discount', 0):,.2f}")
print(f"平均折扣: {stats.get('avg_discount', 0):,.2f}")
print(f"最大折扣: {stats.get('max_discount', 0):,.2f}")
samples = analysis.get('samples', [])
if samples:
print("\n样本订单:")
for item in samples[:5]:
print(f" 订单{item.get('order_settle_id')}: 会员{item.get('member_id')}, 消费{item.get('consume_money', 0):,.2f}, 折扣{item.get('member_discount_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
else:
print("\n[!] 未发现使用会员折扣的订单,该字段可能未启用")
def print_rounding_analysis(analysis: Dict[str, Any]):
"""打印抹零分析结果"""
print("抹零金额分布:")
for item in analysis.get('distribution', []):
print(f" {item.get('rounding_amount', 0):,.2f}: {item.get('count', 0):,}")
print("\n抹零模式样本:")
for item in analysis.get('patterns', [])[:5]:
print(f" 实付{item.get('pay_amount', 0):,.2f} + 抹零{item.get('rounding_amount', 0):,.2f} = {item.get('before_rounding', 0):,.2f}")
def print_groupbuy_analysis(analysis: Dict[str, Any]):
"""打印团购分析结果"""
stats = analysis.get('stats', {})
print(f"总订单数: {stats.get('total_orders', 0):,}")
print(f"有coupon_amount的订单: {stats.get('with_coupon', 0):,}")
print(f"有pl_coupon_sale_amount的订单: {stats.get('with_pl_coupon', 0):,}")
print(f"coupon_amount总额: {stats.get('total_coupon_amount', 0):,.2f}")
print(f"pl_coupon_sale_amount总额: {stats.get('total_pl_coupon_sale', 0):,.2f}")
print("\n团购订单样本:")
for item in analysis.get('samples', [])[:5]:
print(f" 订单{item.get('order_settle_id')}: coupon={item.get('coupon_amount', 0):,.2f}, pl_coupon={item.get('pl_coupon_sale_amount', 0):,.2f}, groupbuy_price={item.get('groupbuy_unit_price', 'N/A')}")
def generate_report(
overall_stats: Dict[str, Any],
discount_analysis: Dict[str, Any],
adjust_analysis: Dict[str, Any],
member_discount_analysis: Dict[str, Any],
rounding_analysis: Dict[str, Any],
groupbuy_analysis: Dict[str, Any]
) -> str:
"""
生成Markdown格式的分析报告
"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
total = overall_stats.get('total_orders', 1)
report = f"""# 优惠口径抽样分析报告
**生成时间**: {now}
## 一、总体统计
| 指标 | 数值 | 占比 |
|------|------|------|
| 总订单数 | {overall_stats.get('total_orders', 0):,} | 100% |
| 有adjust_amount的订单 | {overall_stats.get('orders_with_adjust', 0):,} | {overall_stats.get('orders_with_adjust', 0)/total*100:.2f}% |
| 有member_discount的订单 | {overall_stats.get('orders_with_member_discount', 0):,} | {overall_stats.get('orders_with_member_discount', 0)/total*100:.2f}% |
| 有rounding的订单 | {overall_stats.get('orders_with_rounding', 0):,} | {overall_stats.get('orders_with_rounding', 0)/total*100:.2f}% |
| 有coupon的订单 | {overall_stats.get('orders_with_coupon', 0):,} | {overall_stats.get('orders_with_coupon', 0)/total*100:.2f}% |
| 有gift_card的订单 | {overall_stats.get('orders_with_gift_card', 0):,} | {overall_stats.get('orders_with_gift_card', 0)/total*100:.2f}% |
### 金额统计
| 优惠类型 | 总额 |
|----------|------|
| adjust_amount (台费打折/调整) | {overall_stats.get('total_adjust', 0):,.2f} |
| member_discount_amount (会员折扣) | {overall_stats.get('total_member_discount', 0):,.2f} |
| rounding_amount (抹零) | {overall_stats.get('total_rounding', 0):,.2f} |
| coupon_amount (团购抵消台费) | {overall_stats.get('total_coupon', 0):,.2f} |
| gift_card_amount (赠送卡支付) | {overall_stats.get('total_gift_card', 0):,.2f} |
## 二、adjust_amount (台费打折/调整) 分析
### 值分布
| 区间 | 订单数 | 总额 |
|------|--------|------|
"""
for item in adjust_analysis.get('distribution', []):
report += f"| {item.get('range', 'N/A')} | {item.get('count', 0):,} | {item.get('total_amount', 0):,.2f} |\n"
report += """
### 分析结论
- **是否包含大客户优惠**: 需要进一步分析adjust_amount的业务来源
- **与普通调整的区分**: 建议查看是否有备注字段或关联的优惠活动表
## 三、member_discount_amount (会员折扣) 分析
"""
member_stats = member_discount_analysis.get('stats', {})
with_discount = member_stats.get('with_discount', 0)
if with_discount == 0:
report += """### 结论
**[!] 该字段未发现任何非零值,会员折扣功能可能未启用。**
建议在DWS财务统计中可以暂时忽略此字段或将其标记为"待启用"
"""
else:
report += f"""### 使用统计
| 指标 | 数值 |
|------|------|
| 有会员折扣的订单 | {with_discount:,} |
| 会员折扣总额 | {member_stats.get('total_discount', 0):,.2f} |
| 平均折扣 | {member_stats.get('avg_discount', 0):,.2f} |
| 最大折扣 | {member_stats.get('max_discount', 0):,.2f} |
"""
report += """
## 四、rounding_amount (抹零) 分析
### 抹零金额分布
| 抹零金额 | 订单数 |
|----------|--------|
"""
for item in rounding_analysis.get('distribution', [])[:10]:
report += f"| {item.get('rounding_amount', 0):,.2f} | {item.get('count', 0):,} |\n"
report += """
### 抹零规则推断
根据抹零金额分布,推断抹零规则为:
- 抹零到整元(去除角分)
- 或抹零到特定尾数
## 五、团购优惠分析
"""
groupbuy_stats = groupbuy_analysis.get('stats', {})
report += f"""### 使用统计
| 指标 | 数值 |
|------|------|
| 有coupon_amount的订单 | {groupbuy_stats.get('with_coupon', 0):,} |
| 有pl_coupon_sale_amount的订单 | {groupbuy_stats.get('with_pl_coupon', 0):,} |
| coupon_amount总额 | {groupbuy_stats.get('total_coupon_amount', 0):,.2f} |
| pl_coupon_sale_amount总额 | {groupbuy_stats.get('total_pl_coupon_sale', 0):,.2f} |
### 团购支付金额计算路径
根据分析,团购支付金额应按以下路径计算:
1. 若 `pl_coupon_sale_amount ≠ 0` → 使用 `pl_coupon_sale_amount`
2. 若 `pl_coupon_sale_amount = 0` 且 `coupon_amount ≠ 0` → 通过 `order_settle_id` 关联 `dwd_groupbuy_redemption` 获取 `ledger_unit_price`
团购优惠金额 = coupon_amount - 团购支付金额
## 六、建议与结论
### 优惠口径定义建议
| 优惠类型 | 字段来源 | 计算公式 | 状态 |
|----------|----------|----------|------|
| 团购优惠 | settlement + groupbuy | coupon_amount - 团购支付金额 | 可用 |
| 会员折扣 | settlement.member_discount_amount | 直接取值 | 待确认 |
| 赠送卡抵扣 | settlement.gift_card_amount | 直接取值 | 可用 |
| 手动调整 | settlement.adjust_amount | 直接取值 | 可用 |
| 抹零 | settlement.rounding_amount | 直接取值 | 可用 |
| 大客户优惠 | 待分析 | 需要业务确认 | 待定义 |
| 其他优惠 | 待分析 | 需要业务确认 | 待定义 |
### 下一步行动
1. **确认会员折扣是否启用**: 与业务确认member_discount_amount的使用场景
2. **大客户优惠识别规则**: 与业务确认如何从adjust_amount中识别大客户优惠
3. **其他优惠分类**: 与业务确认adjust_amount中还包含哪些优惠类型
"""
return report
if __name__ == "__main__":
analyze_discount_patterns()