# -*- coding: utf-8 -*- """ 优惠口径抽样分析脚本 功能说明: 从dwd_settlement_head表抽样100单,分析以下优惠字段的使用情况: - adjust_amount: 台费打折/调整(可能包含大客户优惠、其他优惠) - member_discount_amount: 会员折扣 - rounding_amount: 抹零金额 - coupon_amount: 团购抵消台费 - gift_card_amount: 赠送卡支付 分析目标: 1. 大客户优惠:是否存在"大客户"标识?如何与普通调整区分? 2. 会员折扣:是否有非零值?使用场景是什么? 3. 抹零:抹零规则?与adjust_amount的关系? 4. 其他优惠:adjust_amount中还包含哪些优惠类型? 输出: - 控制台打印分析报告 - 生成 docs/analysis_discount_patterns.md 报告文件 作者:ETL团队 创建日期:2026-02-01 """ import os import sys from datetime import datetime from decimal import Decimal from pathlib import Path from typing import Any, Dict, List, Optional, Tuple # 添加项目根目录到Python路径 project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) from etl_billiards.utils.config import Config from etl_billiards.utils.db import DatabaseConnection def analyze_discount_patterns(): """ 执行优惠口径抽样分析 """ print("=" * 80) print("优惠口径抽样分析") print("=" * 80) print() # 加载配置和数据库连接 config = Config() db = DatabaseConnection(config) try: # 1. 获取总体统计 print("【1. 总体统计】") print("-" * 40) overall_stats = get_overall_stats(db) print_overall_stats(overall_stats) print() # 2. 抽样分析优惠订单 print("【2. 有优惠的订单抽样分析(100单)】") print("-" * 40) sample_orders = get_sample_orders_with_discount(db, limit=100) discount_analysis = analyze_sample_orders(sample_orders) print_discount_analysis(discount_analysis) print() # 3. adjust_amount详细分析 print("【3. adjust_amount (台费打折/调整) 详细分析】") print("-" * 40) adjust_analysis = analyze_adjust_amount(db) print_adjust_analysis(adjust_analysis) print() # 4. 会员折扣使用分析 print("【4. member_discount_amount (会员折扣) 使用分析】") print("-" * 40) member_discount_analysis = analyze_member_discount(db) print_member_discount_analysis(member_discount_analysis) print() # 5. 抹零规则分析 print("【5. rounding_amount (抹零) 规则分析】") print("-" * 40) rounding_analysis = analyze_rounding(db) print_rounding_analysis(rounding_analysis) print() # 6. 团购优惠分析 print("【6. 团购优惠分析】") print("-" * 40) groupbuy_analysis = analyze_groupbuy(db) print_groupbuy_analysis(groupbuy_analysis) print() # 7. 生成分析报告 print("【7. 生成分析报告】") print("-" * 40) report = generate_report( overall_stats, discount_analysis, adjust_analysis, member_discount_analysis, rounding_analysis, groupbuy_analysis ) # 保存报告 report_path = project_root / "etl_billiards" / "docs" / "analysis_discount_patterns.md" with open(report_path, 'w', encoding='utf-8') as f: f.write(report) print(f"报告已保存到: {report_path}") finally: db.close() def get_overall_stats(db: DatabaseConnection) -> Dict[str, Any]: """ 获取总体统计数据 """ sql = """ SELECT COUNT(*) AS total_orders, COUNT(CASE WHEN adjust_amount != 0 THEN 1 END) AS orders_with_adjust, COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS orders_with_member_discount, COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS orders_with_rounding, COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS orders_with_coupon, COUNT(CASE WHEN gift_card_amount != 0 THEN 1 END) AS orders_with_gift_card, SUM(adjust_amount) AS total_adjust, SUM(member_discount_amount) AS total_member_discount, SUM(rounding_amount) AS total_rounding, SUM(coupon_amount) AS total_coupon, SUM(gift_card_amount) AS total_gift_card, SUM(consume_money) AS total_consume, SUM(pay_amount) AS total_pay FROM billiards_dwd.dwd_settlement_head """ rows = db.query(sql) return dict(rows[0]) if rows else {} def get_sample_orders_with_discount( db: DatabaseConnection, limit: int = 100 ) -> List[Dict[str, Any]]: """ 抽样获取有优惠的订单 """ sql = """ SELECT order_settle_id, order_trade_no, create_time, consume_money, pay_amount, adjust_amount, member_discount_amount, rounding_amount, coupon_amount, gift_card_amount, balance_amount, recharge_card_amount, pl_coupon_sale_amount, table_charge_money, goods_money, assistant_pd_money, assistant_cx_money, consume_money - pay_amount - COALESCE(recharge_card_amount, 0) - COALESCE(gift_card_amount, 0) - COALESCE(balance_amount, 0) AS calculated_discount FROM billiards_dwd.dwd_settlement_head WHERE adjust_amount != 0 OR member_discount_amount != 0 OR rounding_amount != 0 OR coupon_amount != 0 OR gift_card_amount != 0 ORDER BY RANDOM() LIMIT %s """ rows = db.query(sql, (limit,)) return [dict(row) for row in rows] if rows else [] def analyze_sample_orders(orders: List[Dict[str, Any]]) -> Dict[str, Any]: """ 分析抽样订单 """ analysis = { 'total_sampled': len(orders), 'with_adjust': 0, 'with_member_discount': 0, 'with_rounding': 0, 'with_coupon': 0, 'with_gift_card': 0, 'adjust_values': [], 'member_discount_values': [], 'rounding_values': [], 'coupon_values': [], 'gift_card_values': [], } for order in orders: adjust = Decimal(str(order.get('adjust_amount', 0))) member_discount = Decimal(str(order.get('member_discount_amount', 0))) rounding = Decimal(str(order.get('rounding_amount', 0))) coupon = Decimal(str(order.get('coupon_amount', 0))) gift_card = Decimal(str(order.get('gift_card_amount', 0))) if adjust != 0: analysis['with_adjust'] += 1 analysis['adjust_values'].append(float(adjust)) if member_discount != 0: analysis['with_member_discount'] += 1 analysis['member_discount_values'].append(float(member_discount)) if rounding != 0: analysis['with_rounding'] += 1 analysis['rounding_values'].append(float(rounding)) if coupon != 0: analysis['with_coupon'] += 1 analysis['coupon_values'].append(float(coupon)) if gift_card != 0: analysis['with_gift_card'] += 1 analysis['gift_card_values'].append(float(gift_card)) return analysis def analyze_adjust_amount(db: DatabaseConnection) -> Dict[str, Any]: """ 分析adjust_amount字段的分布和模式 """ # 1. 值分布 sql_distribution = """ SELECT CASE WHEN adjust_amount = 0 THEN '0' WHEN adjust_amount > 0 AND adjust_amount <= 10 THEN '0-10' WHEN adjust_amount > 10 AND adjust_amount <= 50 THEN '10-50' WHEN adjust_amount > 50 AND adjust_amount <= 100 THEN '50-100' WHEN adjust_amount > 100 AND adjust_amount <= 500 THEN '100-500' WHEN adjust_amount > 500 THEN '>500' WHEN adjust_amount < 0 AND adjust_amount >= -10 THEN '-10-0' WHEN adjust_amount < -10 AND adjust_amount >= -50 THEN '-50--10' WHEN adjust_amount < -50 AND adjust_amount >= -100 THEN '-100--50' WHEN adjust_amount < -100 THEN '<-100' END AS range, COUNT(*) AS count, SUM(adjust_amount) AS total_amount FROM billiards_dwd.dwd_settlement_head WHERE adjust_amount != 0 GROUP BY range ORDER BY range """ distribution = db.query(sql_distribution) # 2. 与消费金额的关系 sql_ratio = """ SELECT ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS discount_ratio, COUNT(*) AS count FROM billiards_dwd.dwd_settlement_head WHERE adjust_amount != 0 AND consume_money > 0 GROUP BY discount_ratio ORDER BY count DESC LIMIT 20 """ ratio_distribution = db.query(sql_ratio) # 3. 典型样本 sql_samples = """ SELECT order_settle_id, consume_money, adjust_amount, ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio FROM billiards_dwd.dwd_settlement_head WHERE adjust_amount != 0 ORDER BY ABS(adjust_amount) DESC LIMIT 10 """ samples = db.query(sql_samples) return { 'distribution': [dict(r) for r in distribution] if distribution else [], 'ratio_distribution': [dict(r) for r in ratio_distribution] if ratio_distribution else [], 'top_samples': [dict(r) for r in samples] if samples else [] } def analyze_member_discount(db: DatabaseConnection) -> Dict[str, Any]: """ 分析member_discount_amount字段的使用情况 """ sql = """ SELECT COUNT(*) AS total_orders, COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount, SUM(member_discount_amount) AS total_discount, AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount, MAX(member_discount_amount) AS max_discount, MIN(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS min_discount FROM billiards_dwd.dwd_settlement_head """ rows = db.query(sql) stats = dict(rows[0]) if rows else {} # 抽样有会员折扣的订单 sql_samples = """ SELECT order_settle_id, member_id, consume_money, member_discount_amount, ROUND(member_discount_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio FROM billiards_dwd.dwd_settlement_head WHERE member_discount_amount != 0 LIMIT 20 """ samples = db.query(sql_samples) return { 'stats': stats, 'samples': [dict(r) for r in samples] if samples else [] } def analyze_rounding(db: DatabaseConnection) -> Dict[str, Any]: """ 分析rounding_amount字段的规则 """ # 1. 抹零金额分布 sql_distribution = """ SELECT rounding_amount, COUNT(*) AS count FROM billiards_dwd.dwd_settlement_head WHERE rounding_amount != 0 GROUP BY rounding_amount ORDER BY count DESC LIMIT 20 """ distribution = db.query(sql_distribution) # 2. 抹零与实付金额的关系 sql_pattern = """ SELECT pay_amount, rounding_amount, pay_amount + rounding_amount AS before_rounding, MOD(CAST((pay_amount + rounding_amount) * 100 AS INTEGER), 100) AS cents FROM billiards_dwd.dwd_settlement_head WHERE rounding_amount != 0 LIMIT 20 """ patterns = db.query(sql_pattern) return { 'distribution': [dict(r) for r in distribution] if distribution else [], 'patterns': [dict(r) for r in patterns] if patterns else [] } def analyze_groupbuy(db: DatabaseConnection) -> Dict[str, Any]: """ 分析团购优惠 """ # 1. 团购使用统计 sql_stats = """ SELECT COUNT(*) AS total_orders, COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS with_coupon, COUNT(CASE WHEN pl_coupon_sale_amount != 0 THEN 1 END) AS with_pl_coupon, SUM(coupon_amount) AS total_coupon_amount, SUM(pl_coupon_sale_amount) AS total_pl_coupon_sale FROM billiards_dwd.dwd_settlement_head """ stats = db.query(sql_stats) # 2. 团购订单样本 sql_samples = """ SELECT sh.order_settle_id, sh.coupon_amount, sh.pl_coupon_sale_amount, gr.ledger_amount AS groupbuy_ledger_amount, gr.ledger_unit_price AS groupbuy_unit_price FROM billiards_dwd.dwd_settlement_head sh LEFT JOIN billiards_dwd.dwd_groupbuy_redemption gr ON sh.order_settle_id = gr.order_settle_id WHERE sh.coupon_amount != 0 LIMIT 20 """ samples = db.query(sql_samples) return { 'stats': dict(stats[0]) if stats else {}, 'samples': [dict(r) for r in samples] if samples else [] } def print_overall_stats(stats: Dict[str, Any]): """打印总体统计""" total = stats.get('total_orders', 0) print(f"总订单数: {total:,}") print(f"有adjust_amount的订单: {stats.get('orders_with_adjust', 0):,} ({stats.get('orders_with_adjust', 0)/total*100:.2f}%)") print(f"有member_discount的订单: {stats.get('orders_with_member_discount', 0):,} ({stats.get('orders_with_member_discount', 0)/total*100:.2f}%)") print(f"有rounding的订单: {stats.get('orders_with_rounding', 0):,} ({stats.get('orders_with_rounding', 0)/total*100:.2f}%)") print(f"有coupon的订单: {stats.get('orders_with_coupon', 0):,} ({stats.get('orders_with_coupon', 0)/total*100:.2f}%)") print(f"有gift_card的订单: {stats.get('orders_with_gift_card', 0):,} ({stats.get('orders_with_gift_card', 0)/total*100:.2f}%)") print() print(f"adjust_amount总额: {stats.get('total_adjust', 0):,.2f}") print(f"member_discount总额: {stats.get('total_member_discount', 0):,.2f}") print(f"rounding总额: {stats.get('total_rounding', 0):,.2f}") print(f"coupon总额: {stats.get('total_coupon', 0):,.2f}") print(f"gift_card总额: {stats.get('total_gift_card', 0):,.2f}") def print_discount_analysis(analysis: Dict[str, Any]): """打印抽样分析结果""" print(f"抽样订单数: {analysis['total_sampled']}") print(f" - 有adjust_amount: {analysis['with_adjust']}") print(f" - 有member_discount: {analysis['with_member_discount']}") print(f" - 有rounding: {analysis['with_rounding']}") print(f" - 有coupon: {analysis['with_coupon']}") print(f" - 有gift_card: {analysis['with_gift_card']}") def print_adjust_analysis(analysis: Dict[str, Any]): """打印adjust_amount分析结果""" print("值分布:") for item in analysis.get('distribution', []): print(f" {item.get('range', 'N/A')}: {item.get('count', 0):,} 单, 总额 {item.get('total_amount', 0):,.2f}") print("\n折扣比例分布 (Top 10):") for item in analysis.get('ratio_distribution', [])[:10]: print(f" {item.get('discount_ratio', 0)}%: {item.get('count', 0):,} 单") print("\n大额调整样本 (Top 10):") for item in analysis.get('top_samples', []): print(f" 订单{item.get('order_settle_id')}: 消费{item.get('consume_money', 0):,.2f}, 调整{item.get('adjust_amount', 0):,.2f} ({item.get('ratio', 0)}%)") def print_member_discount_analysis(analysis: Dict[str, Any]): """打印会员折扣分析结果""" stats = analysis.get('stats', {}) print(f"总订单数: {stats.get('total_orders', 0):,}") print(f"有会员折扣的订单: {stats.get('with_discount', 0):,}") print(f"会员折扣总额: {stats.get('total_discount', 0):,.2f}") print(f"平均折扣: {stats.get('avg_discount', 0):,.2f}") print(f"最大折扣: {stats.get('max_discount', 0):,.2f}") samples = analysis.get('samples', []) if samples: print("\n样本订单:") for item in samples[:5]: print(f" 订单{item.get('order_settle_id')}: 会员{item.get('member_id')}, 消费{item.get('consume_money', 0):,.2f}, 折扣{item.get('member_discount_amount', 0):,.2f} ({item.get('ratio', 0)}%)") else: print("\n[!] 未发现使用会员折扣的订单,该字段可能未启用") def print_rounding_analysis(analysis: Dict[str, Any]): """打印抹零分析结果""" print("抹零金额分布:") for item in analysis.get('distribution', []): print(f" {item.get('rounding_amount', 0):,.2f}: {item.get('count', 0):,} 单") print("\n抹零模式样本:") for item in analysis.get('patterns', [])[:5]: print(f" 实付{item.get('pay_amount', 0):,.2f} + 抹零{item.get('rounding_amount', 0):,.2f} = {item.get('before_rounding', 0):,.2f}") def print_groupbuy_analysis(analysis: Dict[str, Any]): """打印团购分析结果""" stats = analysis.get('stats', {}) print(f"总订单数: {stats.get('total_orders', 0):,}") print(f"有coupon_amount的订单: {stats.get('with_coupon', 0):,}") print(f"有pl_coupon_sale_amount的订单: {stats.get('with_pl_coupon', 0):,}") print(f"coupon_amount总额: {stats.get('total_coupon_amount', 0):,.2f}") print(f"pl_coupon_sale_amount总额: {stats.get('total_pl_coupon_sale', 0):,.2f}") print("\n团购订单样本:") for item in analysis.get('samples', [])[:5]: print(f" 订单{item.get('order_settle_id')}: coupon={item.get('coupon_amount', 0):,.2f}, pl_coupon={item.get('pl_coupon_sale_amount', 0):,.2f}, groupbuy_price={item.get('groupbuy_unit_price', 'N/A')}") def generate_report( overall_stats: Dict[str, Any], discount_analysis: Dict[str, Any], adjust_analysis: Dict[str, Any], member_discount_analysis: Dict[str, Any], rounding_analysis: Dict[str, Any], groupbuy_analysis: Dict[str, Any] ) -> str: """ 生成Markdown格式的分析报告 """ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") total = overall_stats.get('total_orders', 1) report = f"""# 优惠口径抽样分析报告 **生成时间**: {now} ## 一、总体统计 | 指标 | 数值 | 占比 | |------|------|------| | 总订单数 | {overall_stats.get('total_orders', 0):,} | 100% | | 有adjust_amount的订单 | {overall_stats.get('orders_with_adjust', 0):,} | {overall_stats.get('orders_with_adjust', 0)/total*100:.2f}% | | 有member_discount的订单 | {overall_stats.get('orders_with_member_discount', 0):,} | {overall_stats.get('orders_with_member_discount', 0)/total*100:.2f}% | | 有rounding的订单 | {overall_stats.get('orders_with_rounding', 0):,} | {overall_stats.get('orders_with_rounding', 0)/total*100:.2f}% | | 有coupon的订单 | {overall_stats.get('orders_with_coupon', 0):,} | {overall_stats.get('orders_with_coupon', 0)/total*100:.2f}% | | 有gift_card的订单 | {overall_stats.get('orders_with_gift_card', 0):,} | {overall_stats.get('orders_with_gift_card', 0)/total*100:.2f}% | ### 金额统计 | 优惠类型 | 总额 | |----------|------| | adjust_amount (台费打折/调整) | {overall_stats.get('total_adjust', 0):,.2f} | | member_discount_amount (会员折扣) | {overall_stats.get('total_member_discount', 0):,.2f} | | rounding_amount (抹零) | {overall_stats.get('total_rounding', 0):,.2f} | | coupon_amount (团购抵消台费) | {overall_stats.get('total_coupon', 0):,.2f} | | gift_card_amount (赠送卡支付) | {overall_stats.get('total_gift_card', 0):,.2f} | ## 二、adjust_amount (台费打折/调整) 分析 ### 值分布 | 区间 | 订单数 | 总额 | |------|--------|------| """ for item in adjust_analysis.get('distribution', []): report += f"| {item.get('range', 'N/A')} | {item.get('count', 0):,} | {item.get('total_amount', 0):,.2f} |\n" report += """ ### 分析结论 - **是否包含大客户优惠**: 需要进一步分析adjust_amount的业务来源 - **与普通调整的区分**: 建议查看是否有备注字段或关联的优惠活动表 ## 三、member_discount_amount (会员折扣) 分析 """ member_stats = member_discount_analysis.get('stats', {}) with_discount = member_stats.get('with_discount', 0) if with_discount == 0: report += """### 结论 **[!] 该字段未发现任何非零值,会员折扣功能可能未启用。** 建议:在DWS财务统计中,可以暂时忽略此字段,或将其标记为"待启用"。 """ else: report += f"""### 使用统计 | 指标 | 数值 | |------|------| | 有会员折扣的订单 | {with_discount:,} | | 会员折扣总额 | {member_stats.get('total_discount', 0):,.2f} | | 平均折扣 | {member_stats.get('avg_discount', 0):,.2f} | | 最大折扣 | {member_stats.get('max_discount', 0):,.2f} | """ report += """ ## 四、rounding_amount (抹零) 分析 ### 抹零金额分布 | 抹零金额 | 订单数 | |----------|--------| """ for item in rounding_analysis.get('distribution', [])[:10]: report += f"| {item.get('rounding_amount', 0):,.2f} | {item.get('count', 0):,} |\n" report += """ ### 抹零规则推断 根据抹零金额分布,推断抹零规则为: - 抹零到整元(去除角分) - 或抹零到特定尾数 ## 五、团购优惠分析 """ groupbuy_stats = groupbuy_analysis.get('stats', {}) report += f"""### 使用统计 | 指标 | 数值 | |------|------| | 有coupon_amount的订单 | {groupbuy_stats.get('with_coupon', 0):,} | | 有pl_coupon_sale_amount的订单 | {groupbuy_stats.get('with_pl_coupon', 0):,} | | coupon_amount总额 | {groupbuy_stats.get('total_coupon_amount', 0):,.2f} | | pl_coupon_sale_amount总额 | {groupbuy_stats.get('total_pl_coupon_sale', 0):,.2f} | ### 团购支付金额计算路径 根据分析,团购支付金额应按以下路径计算: 1. 若 `pl_coupon_sale_amount ≠ 0` → 使用 `pl_coupon_sale_amount` 2. 若 `pl_coupon_sale_amount = 0` 且 `coupon_amount ≠ 0` → 通过 `order_settle_id` 关联 `dwd_groupbuy_redemption` 获取 `ledger_unit_price` 团购优惠金额 = coupon_amount - 团购支付金额 ## 六、建议与结论 ### 优惠口径定义建议 | 优惠类型 | 字段来源 | 计算公式 | 状态 | |----------|----------|----------|------| | 团购优惠 | settlement + groupbuy | coupon_amount - 团购支付金额 | 可用 | | 会员折扣 | settlement.member_discount_amount | 直接取值 | 待确认 | | 赠送卡抵扣 | settlement.gift_card_amount | 直接取值 | 可用 | | 手动调整 | settlement.adjust_amount | 直接取值 | 可用 | | 抹零 | settlement.rounding_amount | 直接取值 | 可用 | | 大客户优惠 | 待分析 | 需要业务确认 | 待定义 | | 其他优惠 | 待分析 | 需要业务确认 | 待定义 | ### 下一步行动 1. **确认会员折扣是否启用**: 与业务确认member_discount_amount的使用场景 2. **大客户优惠识别规则**: 与业务确认如何从adjust_amount中识别大客户优惠 3. **其他优惠分类**: 与业务确认adjust_amount中还包含哪些优惠类型 """ return report if __name__ == "__main__": analyze_discount_patterns()