Updata2
This commit is contained in:
636
etl_billiards/scripts/analyze_discount_patterns.py
Normal file
636
etl_billiards/scripts/analyze_discount_patterns.py
Normal file
@@ -0,0 +1,636 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
优惠口径抽样分析脚本
|
||||
|
||||
功能说明:
|
||||
从dwd_settlement_head表抽样100单,分析以下优惠字段的使用情况:
|
||||
- adjust_amount: 台费打折/调整(可能包含大客户优惠、其他优惠)
|
||||
- member_discount_amount: 会员折扣
|
||||
- rounding_amount: 抹零金额
|
||||
- coupon_amount: 团购抵消台费
|
||||
- gift_card_amount: 赠送卡支付
|
||||
|
||||
分析目标:
|
||||
1. 大客户优惠:是否存在"大客户"标识?如何与普通调整区分?
|
||||
2. 会员折扣:是否有非零值?使用场景是什么?
|
||||
3. 抹零:抹零规则?与adjust_amount的关系?
|
||||
4. 其他优惠:adjust_amount中还包含哪些优惠类型?
|
||||
|
||||
输出:
|
||||
- 控制台打印分析报告
|
||||
- 生成 docs/analysis_discount_patterns.md 报告文件
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from etl_billiards.utils.config import Config
|
||||
from etl_billiards.utils.db import DatabaseConnection
|
||||
|
||||
|
||||
def analyze_discount_patterns():
|
||||
"""
|
||||
执行优惠口径抽样分析
|
||||
"""
|
||||
print("=" * 80)
|
||||
print("优惠口径抽样分析")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
# 加载配置和数据库连接
|
||||
config = Config()
|
||||
db = DatabaseConnection(config)
|
||||
|
||||
try:
|
||||
# 1. 获取总体统计
|
||||
print("【1. 总体统计】")
|
||||
print("-" * 40)
|
||||
overall_stats = get_overall_stats(db)
|
||||
print_overall_stats(overall_stats)
|
||||
print()
|
||||
|
||||
# 2. 抽样分析优惠订单
|
||||
print("【2. 有优惠的订单抽样分析(100单)】")
|
||||
print("-" * 40)
|
||||
sample_orders = get_sample_orders_with_discount(db, limit=100)
|
||||
discount_analysis = analyze_sample_orders(sample_orders)
|
||||
print_discount_analysis(discount_analysis)
|
||||
print()
|
||||
|
||||
# 3. adjust_amount详细分析
|
||||
print("【3. adjust_amount (台费打折/调整) 详细分析】")
|
||||
print("-" * 40)
|
||||
adjust_analysis = analyze_adjust_amount(db)
|
||||
print_adjust_analysis(adjust_analysis)
|
||||
print()
|
||||
|
||||
# 4. 会员折扣使用分析
|
||||
print("【4. member_discount_amount (会员折扣) 使用分析】")
|
||||
print("-" * 40)
|
||||
member_discount_analysis = analyze_member_discount(db)
|
||||
print_member_discount_analysis(member_discount_analysis)
|
||||
print()
|
||||
|
||||
# 5. 抹零规则分析
|
||||
print("【5. rounding_amount (抹零) 规则分析】")
|
||||
print("-" * 40)
|
||||
rounding_analysis = analyze_rounding(db)
|
||||
print_rounding_analysis(rounding_analysis)
|
||||
print()
|
||||
|
||||
# 6. 团购优惠分析
|
||||
print("【6. 团购优惠分析】")
|
||||
print("-" * 40)
|
||||
groupbuy_analysis = analyze_groupbuy(db)
|
||||
print_groupbuy_analysis(groupbuy_analysis)
|
||||
print()
|
||||
|
||||
# 7. 生成分析报告
|
||||
print("【7. 生成分析报告】")
|
||||
print("-" * 40)
|
||||
report = generate_report(
|
||||
overall_stats,
|
||||
discount_analysis,
|
||||
adjust_analysis,
|
||||
member_discount_analysis,
|
||||
rounding_analysis,
|
||||
groupbuy_analysis
|
||||
)
|
||||
|
||||
# 保存报告
|
||||
report_path = project_root / "etl_billiards" / "docs" / "analysis_discount_patterns.md"
|
||||
with open(report_path, 'w', encoding='utf-8') as f:
|
||||
f.write(report)
|
||||
print(f"报告已保存到: {report_path}")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def get_overall_stats(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
获取总体统计数据
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN adjust_amount != 0 THEN 1 END) AS orders_with_adjust,
|
||||
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS orders_with_member_discount,
|
||||
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS orders_with_rounding,
|
||||
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS orders_with_coupon,
|
||||
COUNT(CASE WHEN gift_card_amount != 0 THEN 1 END) AS orders_with_gift_card,
|
||||
SUM(adjust_amount) AS total_adjust,
|
||||
SUM(member_discount_amount) AS total_member_discount,
|
||||
SUM(rounding_amount) AS total_rounding,
|
||||
SUM(coupon_amount) AS total_coupon,
|
||||
SUM(gift_card_amount) AS total_gift_card,
|
||||
SUM(consume_money) AS total_consume,
|
||||
SUM(pay_amount) AS total_pay
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
return dict(rows[0]) if rows else {}
|
||||
|
||||
|
||||
def get_sample_orders_with_discount(
|
||||
db: DatabaseConnection,
|
||||
limit: int = 100
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
抽样获取有优惠的订单
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
order_settle_id,
|
||||
order_trade_no,
|
||||
create_time,
|
||||
consume_money,
|
||||
pay_amount,
|
||||
adjust_amount,
|
||||
member_discount_amount,
|
||||
rounding_amount,
|
||||
coupon_amount,
|
||||
gift_card_amount,
|
||||
balance_amount,
|
||||
recharge_card_amount,
|
||||
pl_coupon_sale_amount,
|
||||
table_charge_money,
|
||||
goods_money,
|
||||
assistant_pd_money,
|
||||
assistant_cx_money,
|
||||
consume_money - pay_amount - COALESCE(recharge_card_amount, 0)
|
||||
- COALESCE(gift_card_amount, 0) - COALESCE(balance_amount, 0) AS calculated_discount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE adjust_amount != 0
|
||||
OR member_discount_amount != 0
|
||||
OR rounding_amount != 0
|
||||
OR coupon_amount != 0
|
||||
OR gift_card_amount != 0
|
||||
ORDER BY RANDOM()
|
||||
LIMIT %s
|
||||
"""
|
||||
rows = db.query(sql, (limit,))
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
|
||||
def analyze_sample_orders(orders: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
分析抽样订单
|
||||
"""
|
||||
analysis = {
|
||||
'total_sampled': len(orders),
|
||||
'with_adjust': 0,
|
||||
'with_member_discount': 0,
|
||||
'with_rounding': 0,
|
||||
'with_coupon': 0,
|
||||
'with_gift_card': 0,
|
||||
'adjust_values': [],
|
||||
'member_discount_values': [],
|
||||
'rounding_values': [],
|
||||
'coupon_values': [],
|
||||
'gift_card_values': [],
|
||||
}
|
||||
|
||||
for order in orders:
|
||||
adjust = Decimal(str(order.get('adjust_amount', 0)))
|
||||
member_discount = Decimal(str(order.get('member_discount_amount', 0)))
|
||||
rounding = Decimal(str(order.get('rounding_amount', 0)))
|
||||
coupon = Decimal(str(order.get('coupon_amount', 0)))
|
||||
gift_card = Decimal(str(order.get('gift_card_amount', 0)))
|
||||
|
||||
if adjust != 0:
|
||||
analysis['with_adjust'] += 1
|
||||
analysis['adjust_values'].append(float(adjust))
|
||||
if member_discount != 0:
|
||||
analysis['with_member_discount'] += 1
|
||||
analysis['member_discount_values'].append(float(member_discount))
|
||||
if rounding != 0:
|
||||
analysis['with_rounding'] += 1
|
||||
analysis['rounding_values'].append(float(rounding))
|
||||
if coupon != 0:
|
||||
analysis['with_coupon'] += 1
|
||||
analysis['coupon_values'].append(float(coupon))
|
||||
if gift_card != 0:
|
||||
analysis['with_gift_card'] += 1
|
||||
analysis['gift_card_values'].append(float(gift_card))
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
def analyze_adjust_amount(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
分析adjust_amount字段的分布和模式
|
||||
"""
|
||||
# 1. 值分布
|
||||
sql_distribution = """
|
||||
SELECT
|
||||
CASE
|
||||
WHEN adjust_amount = 0 THEN '0'
|
||||
WHEN adjust_amount > 0 AND adjust_amount <= 10 THEN '0-10'
|
||||
WHEN adjust_amount > 10 AND adjust_amount <= 50 THEN '10-50'
|
||||
WHEN adjust_amount > 50 AND adjust_amount <= 100 THEN '50-100'
|
||||
WHEN adjust_amount > 100 AND adjust_amount <= 500 THEN '100-500'
|
||||
WHEN adjust_amount > 500 THEN '>500'
|
||||
WHEN adjust_amount < 0 AND adjust_amount >= -10 THEN '-10-0'
|
||||
WHEN adjust_amount < -10 AND adjust_amount >= -50 THEN '-50--10'
|
||||
WHEN adjust_amount < -50 AND adjust_amount >= -100 THEN '-100--50'
|
||||
WHEN adjust_amount < -100 THEN '<-100'
|
||||
END AS range,
|
||||
COUNT(*) AS count,
|
||||
SUM(adjust_amount) AS total_amount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE adjust_amount != 0
|
||||
GROUP BY range
|
||||
ORDER BY range
|
||||
"""
|
||||
distribution = db.query(sql_distribution)
|
||||
|
||||
# 2. 与消费金额的关系
|
||||
sql_ratio = """
|
||||
SELECT
|
||||
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS discount_ratio,
|
||||
COUNT(*) AS count
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE adjust_amount != 0 AND consume_money > 0
|
||||
GROUP BY discount_ratio
|
||||
ORDER BY count DESC
|
||||
LIMIT 20
|
||||
"""
|
||||
ratio_distribution = db.query(sql_ratio)
|
||||
|
||||
# 3. 典型样本
|
||||
sql_samples = """
|
||||
SELECT
|
||||
order_settle_id,
|
||||
consume_money,
|
||||
adjust_amount,
|
||||
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE adjust_amount != 0
|
||||
ORDER BY ABS(adjust_amount) DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
samples = db.query(sql_samples)
|
||||
|
||||
return {
|
||||
'distribution': [dict(r) for r in distribution] if distribution else [],
|
||||
'ratio_distribution': [dict(r) for r in ratio_distribution] if ratio_distribution else [],
|
||||
'top_samples': [dict(r) for r in samples] if samples else []
|
||||
}
|
||||
|
||||
|
||||
def analyze_member_discount(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
分析member_discount_amount字段的使用情况
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
|
||||
SUM(member_discount_amount) AS total_discount,
|
||||
AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount,
|
||||
MAX(member_discount_amount) AS max_discount,
|
||||
MIN(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS min_discount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
stats = dict(rows[0]) if rows else {}
|
||||
|
||||
# 抽样有会员折扣的订单
|
||||
sql_samples = """
|
||||
SELECT
|
||||
order_settle_id,
|
||||
member_id,
|
||||
consume_money,
|
||||
member_discount_amount,
|
||||
ROUND(member_discount_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE member_discount_amount != 0
|
||||
LIMIT 20
|
||||
"""
|
||||
samples = db.query(sql_samples)
|
||||
|
||||
return {
|
||||
'stats': stats,
|
||||
'samples': [dict(r) for r in samples] if samples else []
|
||||
}
|
||||
|
||||
|
||||
def analyze_rounding(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
分析rounding_amount字段的规则
|
||||
"""
|
||||
# 1. 抹零金额分布
|
||||
sql_distribution = """
|
||||
SELECT
|
||||
rounding_amount,
|
||||
COUNT(*) AS count
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE rounding_amount != 0
|
||||
GROUP BY rounding_amount
|
||||
ORDER BY count DESC
|
||||
LIMIT 20
|
||||
"""
|
||||
distribution = db.query(sql_distribution)
|
||||
|
||||
# 2. 抹零与实付金额的关系
|
||||
sql_pattern = """
|
||||
SELECT
|
||||
pay_amount,
|
||||
rounding_amount,
|
||||
pay_amount + rounding_amount AS before_rounding,
|
||||
MOD(CAST((pay_amount + rounding_amount) * 100 AS INTEGER), 100) AS cents
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
WHERE rounding_amount != 0
|
||||
LIMIT 20
|
||||
"""
|
||||
patterns = db.query(sql_pattern)
|
||||
|
||||
return {
|
||||
'distribution': [dict(r) for r in distribution] if distribution else [],
|
||||
'patterns': [dict(r) for r in patterns] if patterns else []
|
||||
}
|
||||
|
||||
|
||||
def analyze_groupbuy(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
分析团购优惠
|
||||
"""
|
||||
# 1. 团购使用统计
|
||||
sql_stats = """
|
||||
SELECT
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS with_coupon,
|
||||
COUNT(CASE WHEN pl_coupon_sale_amount != 0 THEN 1 END) AS with_pl_coupon,
|
||||
SUM(coupon_amount) AS total_coupon_amount,
|
||||
SUM(pl_coupon_sale_amount) AS total_pl_coupon_sale
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
"""
|
||||
stats = db.query(sql_stats)
|
||||
|
||||
# 2. 团购订单样本
|
||||
sql_samples = """
|
||||
SELECT
|
||||
sh.order_settle_id,
|
||||
sh.coupon_amount,
|
||||
sh.pl_coupon_sale_amount,
|
||||
gr.ledger_amount AS groupbuy_ledger_amount,
|
||||
gr.ledger_unit_price AS groupbuy_unit_price
|
||||
FROM billiards_dwd.dwd_settlement_head sh
|
||||
LEFT JOIN billiards_dwd.dwd_groupbuy_redemption gr
|
||||
ON sh.order_settle_id = gr.order_settle_id
|
||||
WHERE sh.coupon_amount != 0
|
||||
LIMIT 20
|
||||
"""
|
||||
samples = db.query(sql_samples)
|
||||
|
||||
return {
|
||||
'stats': dict(stats[0]) if stats else {},
|
||||
'samples': [dict(r) for r in samples] if samples else []
|
||||
}
|
||||
|
||||
|
||||
def print_overall_stats(stats: Dict[str, Any]):
|
||||
"""打印总体统计"""
|
||||
total = stats.get('total_orders', 0)
|
||||
print(f"总订单数: {total:,}")
|
||||
print(f"有adjust_amount的订单: {stats.get('orders_with_adjust', 0):,} ({stats.get('orders_with_adjust', 0)/total*100:.2f}%)")
|
||||
print(f"有member_discount的订单: {stats.get('orders_with_member_discount', 0):,} ({stats.get('orders_with_member_discount', 0)/total*100:.2f}%)")
|
||||
print(f"有rounding的订单: {stats.get('orders_with_rounding', 0):,} ({stats.get('orders_with_rounding', 0)/total*100:.2f}%)")
|
||||
print(f"有coupon的订单: {stats.get('orders_with_coupon', 0):,} ({stats.get('orders_with_coupon', 0)/total*100:.2f}%)")
|
||||
print(f"有gift_card的订单: {stats.get('orders_with_gift_card', 0):,} ({stats.get('orders_with_gift_card', 0)/total*100:.2f}%)")
|
||||
print()
|
||||
print(f"adjust_amount总额: {stats.get('total_adjust', 0):,.2f}")
|
||||
print(f"member_discount总额: {stats.get('total_member_discount', 0):,.2f}")
|
||||
print(f"rounding总额: {stats.get('total_rounding', 0):,.2f}")
|
||||
print(f"coupon总额: {stats.get('total_coupon', 0):,.2f}")
|
||||
print(f"gift_card总额: {stats.get('total_gift_card', 0):,.2f}")
|
||||
|
||||
|
||||
def print_discount_analysis(analysis: Dict[str, Any]):
|
||||
"""打印抽样分析结果"""
|
||||
print(f"抽样订单数: {analysis['total_sampled']}")
|
||||
print(f" - 有adjust_amount: {analysis['with_adjust']}")
|
||||
print(f" - 有member_discount: {analysis['with_member_discount']}")
|
||||
print(f" - 有rounding: {analysis['with_rounding']}")
|
||||
print(f" - 有coupon: {analysis['with_coupon']}")
|
||||
print(f" - 有gift_card: {analysis['with_gift_card']}")
|
||||
|
||||
|
||||
def print_adjust_analysis(analysis: Dict[str, Any]):
|
||||
"""打印adjust_amount分析结果"""
|
||||
print("值分布:")
|
||||
for item in analysis.get('distribution', []):
|
||||
print(f" {item.get('range', 'N/A')}: {item.get('count', 0):,} 单, 总额 {item.get('total_amount', 0):,.2f}")
|
||||
|
||||
print("\n折扣比例分布 (Top 10):")
|
||||
for item in analysis.get('ratio_distribution', [])[:10]:
|
||||
print(f" {item.get('discount_ratio', 0)}%: {item.get('count', 0):,} 单")
|
||||
|
||||
print("\n大额调整样本 (Top 10):")
|
||||
for item in analysis.get('top_samples', []):
|
||||
print(f" 订单{item.get('order_settle_id')}: 消费{item.get('consume_money', 0):,.2f}, 调整{item.get('adjust_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
|
||||
|
||||
|
||||
def print_member_discount_analysis(analysis: Dict[str, Any]):
|
||||
"""打印会员折扣分析结果"""
|
||||
stats = analysis.get('stats', {})
|
||||
print(f"总订单数: {stats.get('total_orders', 0):,}")
|
||||
print(f"有会员折扣的订单: {stats.get('with_discount', 0):,}")
|
||||
print(f"会员折扣总额: {stats.get('total_discount', 0):,.2f}")
|
||||
print(f"平均折扣: {stats.get('avg_discount', 0):,.2f}")
|
||||
print(f"最大折扣: {stats.get('max_discount', 0):,.2f}")
|
||||
|
||||
samples = analysis.get('samples', [])
|
||||
if samples:
|
||||
print("\n样本订单:")
|
||||
for item in samples[:5]:
|
||||
print(f" 订单{item.get('order_settle_id')}: 会员{item.get('member_id')}, 消费{item.get('consume_money', 0):,.2f}, 折扣{item.get('member_discount_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
|
||||
else:
|
||||
print("\n[!] 未发现使用会员折扣的订单,该字段可能未启用")
|
||||
|
||||
|
||||
def print_rounding_analysis(analysis: Dict[str, Any]):
|
||||
"""打印抹零分析结果"""
|
||||
print("抹零金额分布:")
|
||||
for item in analysis.get('distribution', []):
|
||||
print(f" {item.get('rounding_amount', 0):,.2f}: {item.get('count', 0):,} 单")
|
||||
|
||||
print("\n抹零模式样本:")
|
||||
for item in analysis.get('patterns', [])[:5]:
|
||||
print(f" 实付{item.get('pay_amount', 0):,.2f} + 抹零{item.get('rounding_amount', 0):,.2f} = {item.get('before_rounding', 0):,.2f}")
|
||||
|
||||
|
||||
def print_groupbuy_analysis(analysis: Dict[str, Any]):
|
||||
"""打印团购分析结果"""
|
||||
stats = analysis.get('stats', {})
|
||||
print(f"总订单数: {stats.get('total_orders', 0):,}")
|
||||
print(f"有coupon_amount的订单: {stats.get('with_coupon', 0):,}")
|
||||
print(f"有pl_coupon_sale_amount的订单: {stats.get('with_pl_coupon', 0):,}")
|
||||
print(f"coupon_amount总额: {stats.get('total_coupon_amount', 0):,.2f}")
|
||||
print(f"pl_coupon_sale_amount总额: {stats.get('total_pl_coupon_sale', 0):,.2f}")
|
||||
|
||||
print("\n团购订单样本:")
|
||||
for item in analysis.get('samples', [])[:5]:
|
||||
print(f" 订单{item.get('order_settle_id')}: coupon={item.get('coupon_amount', 0):,.2f}, pl_coupon={item.get('pl_coupon_sale_amount', 0):,.2f}, groupbuy_price={item.get('groupbuy_unit_price', 'N/A')}")
|
||||
|
||||
|
||||
def generate_report(
|
||||
overall_stats: Dict[str, Any],
|
||||
discount_analysis: Dict[str, Any],
|
||||
adjust_analysis: Dict[str, Any],
|
||||
member_discount_analysis: Dict[str, Any],
|
||||
rounding_analysis: Dict[str, Any],
|
||||
groupbuy_analysis: Dict[str, Any]
|
||||
) -> str:
|
||||
"""
|
||||
生成Markdown格式的分析报告
|
||||
"""
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
total = overall_stats.get('total_orders', 1)
|
||||
|
||||
report = f"""# 优惠口径抽样分析报告
|
||||
|
||||
**生成时间**: {now}
|
||||
|
||||
## 一、总体统计
|
||||
|
||||
| 指标 | 数值 | 占比 |
|
||||
|------|------|------|
|
||||
| 总订单数 | {overall_stats.get('total_orders', 0):,} | 100% |
|
||||
| 有adjust_amount的订单 | {overall_stats.get('orders_with_adjust', 0):,} | {overall_stats.get('orders_with_adjust', 0)/total*100:.2f}% |
|
||||
| 有member_discount的订单 | {overall_stats.get('orders_with_member_discount', 0):,} | {overall_stats.get('orders_with_member_discount', 0)/total*100:.2f}% |
|
||||
| 有rounding的订单 | {overall_stats.get('orders_with_rounding', 0):,} | {overall_stats.get('orders_with_rounding', 0)/total*100:.2f}% |
|
||||
| 有coupon的订单 | {overall_stats.get('orders_with_coupon', 0):,} | {overall_stats.get('orders_with_coupon', 0)/total*100:.2f}% |
|
||||
| 有gift_card的订单 | {overall_stats.get('orders_with_gift_card', 0):,} | {overall_stats.get('orders_with_gift_card', 0)/total*100:.2f}% |
|
||||
|
||||
### 金额统计
|
||||
|
||||
| 优惠类型 | 总额 |
|
||||
|----------|------|
|
||||
| adjust_amount (台费打折/调整) | {overall_stats.get('total_adjust', 0):,.2f} |
|
||||
| member_discount_amount (会员折扣) | {overall_stats.get('total_member_discount', 0):,.2f} |
|
||||
| rounding_amount (抹零) | {overall_stats.get('total_rounding', 0):,.2f} |
|
||||
| coupon_amount (团购抵消台费) | {overall_stats.get('total_coupon', 0):,.2f} |
|
||||
| gift_card_amount (赠送卡支付) | {overall_stats.get('total_gift_card', 0):,.2f} |
|
||||
|
||||
## 二、adjust_amount (台费打折/调整) 分析
|
||||
|
||||
### 值分布
|
||||
|
||||
| 区间 | 订单数 | 总额 |
|
||||
|------|--------|------|
|
||||
"""
|
||||
|
||||
for item in adjust_analysis.get('distribution', []):
|
||||
report += f"| {item.get('range', 'N/A')} | {item.get('count', 0):,} | {item.get('total_amount', 0):,.2f} |\n"
|
||||
|
||||
report += """
|
||||
### 分析结论
|
||||
|
||||
- **是否包含大客户优惠**: 需要进一步分析adjust_amount的业务来源
|
||||
- **与普通调整的区分**: 建议查看是否有备注字段或关联的优惠活动表
|
||||
|
||||
## 三、member_discount_amount (会员折扣) 分析
|
||||
|
||||
"""
|
||||
|
||||
member_stats = member_discount_analysis.get('stats', {})
|
||||
with_discount = member_stats.get('with_discount', 0)
|
||||
|
||||
if with_discount == 0:
|
||||
report += """### 结论
|
||||
|
||||
**[!] 该字段未发现任何非零值,会员折扣功能可能未启用。**
|
||||
|
||||
建议:在DWS财务统计中,可以暂时忽略此字段,或将其标记为"待启用"。
|
||||
"""
|
||||
else:
|
||||
report += f"""### 使用统计
|
||||
|
||||
| 指标 | 数值 |
|
||||
|------|------|
|
||||
| 有会员折扣的订单 | {with_discount:,} |
|
||||
| 会员折扣总额 | {member_stats.get('total_discount', 0):,.2f} |
|
||||
| 平均折扣 | {member_stats.get('avg_discount', 0):,.2f} |
|
||||
| 最大折扣 | {member_stats.get('max_discount', 0):,.2f} |
|
||||
"""
|
||||
|
||||
report += """
|
||||
## 四、rounding_amount (抹零) 分析
|
||||
|
||||
### 抹零金额分布
|
||||
|
||||
| 抹零金额 | 订单数 |
|
||||
|----------|--------|
|
||||
"""
|
||||
|
||||
for item in rounding_analysis.get('distribution', [])[:10]:
|
||||
report += f"| {item.get('rounding_amount', 0):,.2f} | {item.get('count', 0):,} |\n"
|
||||
|
||||
report += """
|
||||
### 抹零规则推断
|
||||
|
||||
根据抹零金额分布,推断抹零规则为:
|
||||
- 抹零到整元(去除角分)
|
||||
- 或抹零到特定尾数
|
||||
|
||||
## 五、团购优惠分析
|
||||
|
||||
"""
|
||||
|
||||
groupbuy_stats = groupbuy_analysis.get('stats', {})
|
||||
report += f"""### 使用统计
|
||||
|
||||
| 指标 | 数值 |
|
||||
|------|------|
|
||||
| 有coupon_amount的订单 | {groupbuy_stats.get('with_coupon', 0):,} |
|
||||
| 有pl_coupon_sale_amount的订单 | {groupbuy_stats.get('with_pl_coupon', 0):,} |
|
||||
| coupon_amount总额 | {groupbuy_stats.get('total_coupon_amount', 0):,.2f} |
|
||||
| pl_coupon_sale_amount总额 | {groupbuy_stats.get('total_pl_coupon_sale', 0):,.2f} |
|
||||
|
||||
### 团购支付金额计算路径
|
||||
|
||||
根据分析,团购支付金额应按以下路径计算:
|
||||
1. 若 `pl_coupon_sale_amount ≠ 0` → 使用 `pl_coupon_sale_amount`
|
||||
2. 若 `pl_coupon_sale_amount = 0` 且 `coupon_amount ≠ 0` → 通过 `order_settle_id` 关联 `dwd_groupbuy_redemption` 获取 `ledger_unit_price`
|
||||
|
||||
团购优惠金额 = coupon_amount - 团购支付金额
|
||||
|
||||
## 六、建议与结论
|
||||
|
||||
### 优惠口径定义建议
|
||||
|
||||
| 优惠类型 | 字段来源 | 计算公式 | 状态 |
|
||||
|----------|----------|----------|------|
|
||||
| 团购优惠 | settlement + groupbuy | coupon_amount - 团购支付金额 | 可用 |
|
||||
| 会员折扣 | settlement.member_discount_amount | 直接取值 | 待确认 |
|
||||
| 赠送卡抵扣 | settlement.gift_card_amount | 直接取值 | 可用 |
|
||||
| 手动调整 | settlement.adjust_amount | 直接取值 | 可用 |
|
||||
| 抹零 | settlement.rounding_amount | 直接取值 | 可用 |
|
||||
| 大客户优惠 | 待分析 | 需要业务确认 | 待定义 |
|
||||
| 其他优惠 | 待分析 | 需要业务确认 | 待定义 |
|
||||
|
||||
### 下一步行动
|
||||
|
||||
1. **确认会员折扣是否启用**: 与业务确认member_discount_amount的使用场景
|
||||
2. **大客户优惠识别规则**: 与业务确认如何从adjust_amount中识别大客户优惠
|
||||
3. **其他优惠分类**: 与业务确认adjust_amount中还包含哪些优惠类型
|
||||
"""
|
||||
|
||||
return report
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_discount_patterns()
|
||||
287
etl_billiards/scripts/analyze_member_discount_usage.py
Normal file
287
etl_billiards/scripts/analyze_member_discount_usage.py
Normal file
@@ -0,0 +1,287 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
会员折扣启用分析脚本
|
||||
|
||||
功能说明:
|
||||
确认 dwd_settlement_head.member_discount_amount 字段是否已启用
|
||||
|
||||
分析内容:
|
||||
1. 统计非零记录数
|
||||
2. 按时间分布分析
|
||||
3. 按会员类型分析
|
||||
4. 与其他字段的关联分析
|
||||
|
||||
输出:
|
||||
- 控制台打印分析结果
|
||||
- 结论:字段是否已启用,使用场景
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from etl_billiards.utils.config import Config
|
||||
from etl_billiards.utils.db import DatabaseConnection
|
||||
|
||||
|
||||
def analyze_member_discount_usage():
|
||||
"""
|
||||
执行会员折扣启用分析
|
||||
"""
|
||||
print("=" * 80)
|
||||
print("会员折扣启用分析 (member_discount_amount)")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
# 加载配置和数据库连接
|
||||
config = Config()
|
||||
db = DatabaseConnection(config)
|
||||
|
||||
try:
|
||||
# 1. 基础统计
|
||||
print("【1. 基础统计】")
|
||||
print("-" * 40)
|
||||
basic_stats = get_basic_stats(db)
|
||||
print_basic_stats(basic_stats)
|
||||
print()
|
||||
|
||||
# 2. 时间分布分析
|
||||
print("【2. 时间分布分析】")
|
||||
print("-" * 40)
|
||||
time_distribution = get_time_distribution(db)
|
||||
print_time_distribution(time_distribution)
|
||||
print()
|
||||
|
||||
# 3. 会员类型分析
|
||||
print("【3. 与会员的关联分析】")
|
||||
print("-" * 40)
|
||||
member_analysis = get_member_analysis(db)
|
||||
print_member_analysis(member_analysis)
|
||||
print()
|
||||
|
||||
# 4. 样本数据
|
||||
print("【4. 样本数据】")
|
||||
print("-" * 40)
|
||||
samples = get_sample_data(db)
|
||||
print_samples(samples)
|
||||
print()
|
||||
|
||||
# 5. 结论
|
||||
print("【5. 分析结论】")
|
||||
print("-" * 40)
|
||||
print_conclusion(basic_stats)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def get_basic_stats(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
获取基础统计数据
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_member_discount,
|
||||
COUNT(CASE WHEN member_discount_amount > 0 THEN 1 END) AS positive_discount,
|
||||
COUNT(CASE WHEN member_discount_amount < 0 THEN 1 END) AS negative_discount,
|
||||
SUM(member_discount_amount) AS total_member_discount,
|
||||
AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount,
|
||||
MAX(member_discount_amount) AS max_discount,
|
||||
MIN(member_discount_amount) AS min_discount,
|
||||
STDDEV(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS stddev_discount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
return dict(rows[0]) if rows else {}
|
||||
|
||||
|
||||
def get_time_distribution(db: DatabaseConnection) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取按月份的时间分布
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
DATE_TRUNC('month', create_time)::DATE AS month,
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
|
||||
SUM(member_discount_amount) AS total_discount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
GROUP BY DATE_TRUNC('month', create_time)
|
||||
ORDER BY month DESC
|
||||
LIMIT 12
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
|
||||
def get_member_analysis(db: DatabaseConnection) -> Dict[str, Any]:
|
||||
"""
|
||||
分析与会员的关联
|
||||
"""
|
||||
# 会员vs非会员
|
||||
sql_member_vs_guest = """
|
||||
SELECT
|
||||
CASE WHEN member_id = 0 THEN '散客' ELSE '会员' END AS customer_type,
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
|
||||
SUM(member_discount_amount) AS total_discount
|
||||
FROM billiards_dwd.dwd_settlement_head
|
||||
GROUP BY CASE WHEN member_id = 0 THEN '散客' ELSE '会员' END
|
||||
"""
|
||||
member_vs_guest = db.query(sql_member_vs_guest)
|
||||
|
||||
# 按会员卡等级
|
||||
sql_by_grade = """
|
||||
SELECT
|
||||
COALESCE(m.member_card_grade_name, '未知') AS grade_name,
|
||||
COUNT(*) AS total_orders,
|
||||
COUNT(CASE WHEN sh.member_discount_amount != 0 THEN 1 END) AS with_discount,
|
||||
SUM(sh.member_discount_amount) AS total_discount
|
||||
FROM billiards_dwd.dwd_settlement_head sh
|
||||
LEFT JOIN billiards_dwd.dim_member m ON sh.member_id = m.member_id
|
||||
WHERE sh.member_id != 0
|
||||
GROUP BY COALESCE(m.member_card_grade_name, '未知')
|
||||
ORDER BY total_orders DESC
|
||||
"""
|
||||
by_grade = db.query(sql_by_grade)
|
||||
|
||||
return {
|
||||
'member_vs_guest': [dict(row) for row in member_vs_guest] if member_vs_guest else [],
|
||||
'by_grade': [dict(row) for row in by_grade] if by_grade else []
|
||||
}
|
||||
|
||||
|
||||
def get_sample_data(db: DatabaseConnection) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取有会员折扣的样本数据
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
sh.order_settle_id,
|
||||
sh.order_trade_no,
|
||||
sh.create_time,
|
||||
sh.member_id,
|
||||
m.nickname AS member_name,
|
||||
m.member_card_grade_name,
|
||||
sh.consume_money,
|
||||
sh.pay_amount,
|
||||
sh.member_discount_amount,
|
||||
ROUND(sh.member_discount_amount / NULLIF(sh.consume_money, 0) * 100, 2) AS discount_ratio
|
||||
FROM billiards_dwd.dwd_settlement_head sh
|
||||
LEFT JOIN billiards_dwd.dim_member m ON sh.member_id = m.member_id
|
||||
WHERE sh.member_discount_amount != 0
|
||||
ORDER BY sh.create_time DESC
|
||||
LIMIT 20
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
return [dict(row) for row in rows] if rows else []
|
||||
|
||||
|
||||
def print_basic_stats(stats: Dict[str, Any]):
|
||||
"""打印基础统计"""
|
||||
total = stats.get('total_orders', 1)
|
||||
with_discount = stats.get('with_member_discount', 0)
|
||||
|
||||
print(f"总订单数: {total:,}")
|
||||
print(f"有会员折扣的订单: {with_discount:,} ({with_discount/total*100:.4f}%)")
|
||||
print(f" - 正值(折扣): {stats.get('positive_discount', 0):,}")
|
||||
print(f" - 负值(加价?): {stats.get('negative_discount', 0):,}")
|
||||
print()
|
||||
print(f"会员折扣总额: {stats.get('total_member_discount', 0):,.2f}")
|
||||
print(f"平均折扣: {stats.get('avg_discount', 0) or 0:,.2f}")
|
||||
print(f"最大折扣: {stats.get('max_discount', 0):,.2f}")
|
||||
print(f"最小折扣: {stats.get('min_discount', 0):,.2f}")
|
||||
|
||||
|
||||
def print_time_distribution(distribution: List[Dict[str, Any]]):
|
||||
"""打印时间分布"""
|
||||
if not distribution:
|
||||
print("无数据")
|
||||
return
|
||||
|
||||
print(f"{'月份':<12} {'总订单':>10} {'有折扣':>10} {'折扣总额':>15}")
|
||||
print("-" * 50)
|
||||
for item in distribution:
|
||||
month = str(item.get('month', 'N/A'))[:7]
|
||||
total = item.get('total_orders', 0)
|
||||
with_discount = item.get('with_discount', 0)
|
||||
total_discount = item.get('total_discount', 0)
|
||||
print(f"{month:<12} {total:>10,} {with_discount:>10,} {total_discount:>15,.2f}")
|
||||
|
||||
|
||||
def print_member_analysis(analysis: Dict[str, Any]):
|
||||
"""打印会员分析"""
|
||||
print("会员 vs 散客:")
|
||||
for item in analysis.get('member_vs_guest', []):
|
||||
print(f" {item.get('customer_type', 'N/A')}: {item.get('total_orders', 0):,} 单, {item.get('with_discount', 0)} 单有折扣, 折扣总额 {item.get('total_discount', 0):,.2f}")
|
||||
|
||||
print("\n按会员卡等级:")
|
||||
for item in analysis.get('by_grade', []):
|
||||
print(f" {item.get('grade_name', 'N/A')}: {item.get('total_orders', 0):,} 单, {item.get('with_discount', 0)} 单有折扣")
|
||||
|
||||
|
||||
def print_samples(samples: List[Dict[str, Any]]):
|
||||
"""打印样本数据"""
|
||||
if not samples:
|
||||
print("[!] 未发现使用会员折扣的订单")
|
||||
return
|
||||
|
||||
print(f"{'订单ID':<20} {'会员':<15} {'等级':<10} {'消费':>12} {'折扣':>12} {'比例':>8}")
|
||||
print("-" * 80)
|
||||
for item in samples[:10]:
|
||||
order_id = str(item.get('order_settle_id', 'N/A'))[:18]
|
||||
member = str(item.get('member_name', 'N/A'))[:13]
|
||||
grade = str(item.get('member_card_grade_name', 'N/A'))[:8]
|
||||
consume = item.get('consume_money', 0)
|
||||
discount = item.get('member_discount_amount', 0)
|
||||
ratio = item.get('discount_ratio', 0)
|
||||
print(f"{order_id:<20} {member:<15} {grade:<10} {consume:>12,.2f} {discount:>12,.2f} {ratio:>7}%")
|
||||
|
||||
|
||||
def print_conclusion(stats: Dict[str, Any]):
|
||||
"""打印分析结论"""
|
||||
with_discount = stats.get('with_member_discount', 0)
|
||||
total = stats.get('total_orders', 1)
|
||||
ratio = with_discount / total * 100
|
||||
|
||||
if with_discount == 0:
|
||||
print("【结论】: member_discount_amount 字段 **未启用**")
|
||||
print()
|
||||
print("该字段在所有订单中均为0,表明:")
|
||||
print(" 1. 会员折扣功能在业务系统中未开启")
|
||||
print(" 2. 或会员折扣通过其他方式(如adjust_amount)记录")
|
||||
print()
|
||||
print("【建议】:")
|
||||
print(" 1. 在DWS财务统计中,暂时不处理此字段")
|
||||
print(" 2. 将此字段标记为'预留/待启用'")
|
||||
print(" 3. 后续如果业务启用,再更新统计逻辑")
|
||||
elif ratio < 1:
|
||||
print(f"【结论】: member_discount_amount 字段 **极少使用** (仅{ratio:.4f}%订单)")
|
||||
print()
|
||||
print("该字段使用率极低,可能是:")
|
||||
print(" 1. 会员折扣功能刚启用不久")
|
||||
print(" 2. 仅特定场景使用")
|
||||
print()
|
||||
print("【建议】:")
|
||||
print(" 1. 在DWS财务统计中保留此字段的处理逻辑")
|
||||
print(" 2. 定期监控使用率变化")
|
||||
else:
|
||||
print(f"【结论】: member_discount_amount 字段 **已启用** ({ratio:.2f}%订单使用)")
|
||||
print()
|
||||
print("【建议】:")
|
||||
print(" 1. 在DWS财务优惠明细中正常统计此字段")
|
||||
print(" 2. 关注会员折扣与其他优惠的叠加规则")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
analyze_member_discount_usage()
|
||||
74
etl_billiards/scripts/check_assistant_dim.py
Normal file
74
etl_billiards/scripts/check_assistant_dim.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
sys.path.insert(0, '.')
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
|
||||
config = AppConfig.load()
|
||||
db_conn = DatabaseConnection(config.config['db']['dsn'])
|
||||
db = DatabaseOperations(db_conn)
|
||||
|
||||
# 检查dim_assistant表结构
|
||||
print('=== dim_assistant columns ===')
|
||||
sql0 = """
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_schema = 'billiards_dwd' AND table_name = 'dim_assistant'
|
||||
"""
|
||||
for row in db.query(sql0):
|
||||
print(f' {dict(row)["column_name"]}')
|
||||
|
||||
# 检查dim_assistant数量
|
||||
print()
|
||||
print('=== dim_assistant ===')
|
||||
sql1 = 'SELECT COUNT(*) as cnt FROM billiards_dwd.dim_assistant WHERE scd2_is_current = 1'
|
||||
rows = db.query(sql1)
|
||||
print(f'dim_assistant current count: {dict(rows[0])["cnt"]}')
|
||||
|
||||
# 检查服务记录中的nickname分布
|
||||
print()
|
||||
print('=== Service by nickname ===')
|
||||
sql2 = """
|
||||
SELECT nickname, COUNT(*) as service_count, COUNT(DISTINCT tenant_member_id) as member_count
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
GROUP BY nickname
|
||||
ORDER BY service_count DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
for row in db.query(sql2):
|
||||
r = dict(row)
|
||||
print(f' {r["nickname"]}: {r["service_count"]} services, {r["member_count"]} members')
|
||||
|
||||
# 检查assistant_no分布
|
||||
print()
|
||||
print('=== Service by assistant_no ===')
|
||||
sql3 = """
|
||||
SELECT assistant_no, nickname, COUNT(*) as service_count, COUNT(DISTINCT tenant_member_id) as member_count
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
GROUP BY assistant_no, nickname
|
||||
ORDER BY service_count DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
for row in db.query(sql3):
|
||||
r = dict(row)
|
||||
print(f' {r["assistant_no"]} ({r["nickname"]}): {r["service_count"]} services, {r["member_count"]} members')
|
||||
|
||||
# 近60天
|
||||
print()
|
||||
print('=== Last 60 days by nickname ===')
|
||||
sql4 = """
|
||||
SELECT nickname, COUNT(*) as service_count, COUNT(DISTINCT tenant_member_id) as member_count
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
AND last_use_time >= NOW() - INTERVAL '60 days'
|
||||
GROUP BY nickname
|
||||
ORDER BY service_count DESC
|
||||
LIMIT 15
|
||||
"""
|
||||
for row in db.query(sql4):
|
||||
r = dict(row)
|
||||
print(f' {r["nickname"]}: {r["service_count"]} services, {r["member_count"]} members')
|
||||
|
||||
db_conn.close()
|
||||
82
etl_billiards/scripts/check_dwd_service.py
Normal file
82
etl_billiards/scripts/check_dwd_service.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
sys.path.insert(0, '.')
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
|
||||
config = AppConfig.load()
|
||||
db_conn = DatabaseConnection(config.config['db']['dsn'])
|
||||
db = DatabaseOperations(db_conn)
|
||||
|
||||
# 检查DWD层服务记录分布
|
||||
print("=== DWD层服务记录分析 ===")
|
||||
print()
|
||||
|
||||
# 1. 总体统计
|
||||
sql1 = """
|
||||
SELECT
|
||||
COUNT(*) as total_records,
|
||||
COUNT(DISTINCT tenant_member_id) as unique_members,
|
||||
COUNT(DISTINCT site_assistant_id) as unique_assistants,
|
||||
COUNT(DISTINCT (tenant_member_id, site_assistant_id)) as unique_pairs
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
"""
|
||||
r = dict(db.query(sql1)[0])
|
||||
print("总体统计:")
|
||||
print(f" 总服务记录数: {r['total_records']}")
|
||||
print(f" 唯一会员数: {r['unique_members']}")
|
||||
print(f" 唯一助教数: {r['unique_assistants']}")
|
||||
print(f" 唯一客户-助教对: {r['unique_pairs']}")
|
||||
|
||||
# 2. 助教服务会员数分布
|
||||
print()
|
||||
print("助教服务会员数分布 (Top 10):")
|
||||
sql2 = """
|
||||
SELECT site_assistant_id, COUNT(DISTINCT tenant_member_id) as member_count
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
GROUP BY site_assistant_id
|
||||
ORDER BY member_count DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
for row in db.query(sql2):
|
||||
r = dict(row)
|
||||
print(f" 助教 {r['site_assistant_id']}: 服务 {r['member_count']} 个会员")
|
||||
|
||||
# 3. 每个客户-助教对的服务次数分布
|
||||
print()
|
||||
print("客户-助教对 服务次数分布 (Top 10):")
|
||||
sql3 = """
|
||||
SELECT tenant_member_id, site_assistant_id, COUNT(*) as service_count
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
GROUP BY tenant_member_id, site_assistant_id
|
||||
ORDER BY service_count DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
for row in db.query(sql3):
|
||||
r = dict(row)
|
||||
print(f" 会员 {r['tenant_member_id']} - 助教 {r['site_assistant_id']}: {r['service_count']} 次服务")
|
||||
|
||||
# 4. 近60天的数据
|
||||
print()
|
||||
print("=== 近60天数据 ===")
|
||||
sql4 = """
|
||||
SELECT
|
||||
COUNT(*) as total_records,
|
||||
COUNT(DISTINCT tenant_member_id) as unique_members,
|
||||
COUNT(DISTINCT site_assistant_id) as unique_assistants,
|
||||
COUNT(DISTINCT (tenant_member_id, site_assistant_id)) as unique_pairs
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
AND last_use_time >= NOW() - INTERVAL '60 days'
|
||||
"""
|
||||
r4 = dict(db.query(sql4)[0])
|
||||
print(f" 总服务记录数: {r4['total_records']}")
|
||||
print(f" 唯一会员数: {r4['unique_members']}")
|
||||
print(f" 唯一助教数: {r4['unique_assistants']}")
|
||||
print(f" 唯一客户-助教对: {r4['unique_pairs']}")
|
||||
|
||||
db_conn.close()
|
||||
57
etl_billiards/scripts/check_intimacy_stats.py
Normal file
57
etl_billiards/scripts/check_intimacy_stats.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
sys.path.insert(0, '.')
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
|
||||
config = AppConfig.load()
|
||||
db_conn = DatabaseConnection(config.config['db']['dsn'])
|
||||
db = DatabaseOperations(db_conn)
|
||||
|
||||
# 检查实际统计
|
||||
sql = """
|
||||
SELECT
|
||||
COUNT(*) as total_pairs,
|
||||
COUNT(DISTINCT member_id) as unique_members,
|
||||
COUNT(DISTINCT assistant_id) as unique_assistants
|
||||
FROM billiards_dws.dws_member_assistant_intimacy
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
r = dict(rows[0])
|
||||
print("DWS亲密指数统计:")
|
||||
print(f" 总记录数(对): {r['total_pairs']}")
|
||||
print(f" 唯一会员数: {r['unique_members']}")
|
||||
print(f" 唯一助教数: {r['unique_assistants']}")
|
||||
|
||||
# 查看助教分布
|
||||
sql2 = """
|
||||
SELECT assistant_id, COUNT(*) as member_count
|
||||
FROM billiards_dws.dws_member_assistant_intimacy
|
||||
GROUP BY assistant_id
|
||||
ORDER BY member_count DESC
|
||||
LIMIT 10
|
||||
"""
|
||||
rows2 = db.query(sql2)
|
||||
print()
|
||||
print("Top 10 助教 (按服务会员数):")
|
||||
for row in rows2:
|
||||
r = dict(row)
|
||||
print(f" 助教 {r['assistant_id']}: 服务 {r['member_count']} 个会员")
|
||||
|
||||
# 检查DWD层原始数据
|
||||
sql3 = """
|
||||
SELECT
|
||||
COUNT(DISTINCT site_assistant_id) as unique_assistants,
|
||||
COUNT(DISTINCT tenant_member_id) as unique_members
|
||||
FROM billiards_dwd.dwd_assistant_service_log
|
||||
WHERE tenant_member_id > 0 AND is_delete = 0
|
||||
"""
|
||||
rows3 = db.query(sql3)
|
||||
r3 = dict(rows3[0])
|
||||
print()
|
||||
print("DWD层原始数据:")
|
||||
print(f" 唯一助教数: {r3['unique_assistants']}")
|
||||
print(f" 唯一会员数: {r3['unique_members']}")
|
||||
|
||||
db_conn.close()
|
||||
@@ -702,6 +702,7 @@ def run_gap_check(
|
||||
content_sample_limit: int | None = None,
|
||||
window_split_unit: str | None = None,
|
||||
window_compensation_hours: int | None = None,
|
||||
tag: str = "",
|
||||
) -> dict:
|
||||
cfg = cfg or AppConfig.load({})
|
||||
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
|
||||
@@ -800,7 +801,7 @@ def run_gap_check(
|
||||
if cutoff:
|
||||
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), cutoff_overlap_hours)
|
||||
|
||||
tag_suffix = f"_{args.tag}" if args.tag else ""
|
||||
tag_suffix = f"_{tag}" if tag else ""
|
||||
client = build_recording_client(cfg, task_code=f"ODS_GAP_CHECK{tag_suffix}")
|
||||
|
||||
db_state = _init_db_state(cfg)
|
||||
|
||||
185
etl_billiards/scripts/create_index_tables.py
Normal file
185
etl_billiards/scripts/create_index_tables.py
Normal file
@@ -0,0 +1,185 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
创建指数算法相关表
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
|
||||
# 表DDL
|
||||
DDL_STATEMENTS = [
|
||||
# 参数配置表
|
||||
"""
|
||||
DROP TABLE IF EXISTS billiards_dws.cfg_index_parameters CASCADE;
|
||||
CREATE TABLE billiards_dws.cfg_index_parameters (
|
||||
param_id SERIAL PRIMARY KEY,
|
||||
index_type VARCHAR(50) NOT NULL,
|
||||
param_name VARCHAR(100) NOT NULL,
|
||||
param_value NUMERIC(14,6) NOT NULL,
|
||||
description TEXT,
|
||||
effective_from DATE NOT NULL DEFAULT CURRENT_DATE,
|
||||
effective_to DATE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uk_cfg_index_parameters UNIQUE (index_type, param_name, effective_from)
|
||||
);
|
||||
CREATE INDEX idx_cfg_index_params_type ON billiards_dws.cfg_index_parameters (index_type);
|
||||
""",
|
||||
|
||||
# 召回指数表
|
||||
"""
|
||||
DROP TABLE IF EXISTS billiards_dws.dws_member_recall_index CASCADE;
|
||||
CREATE TABLE billiards_dws.dws_member_recall_index (
|
||||
recall_id BIGSERIAL PRIMARY KEY,
|
||||
site_id BIGINT NOT NULL,
|
||||
tenant_id BIGINT NOT NULL,
|
||||
member_id BIGINT NOT NULL,
|
||||
days_since_last_visit INTEGER,
|
||||
visit_interval_median NUMERIC(10,2),
|
||||
visit_interval_mad NUMERIC(10,2),
|
||||
days_since_first_visit INTEGER,
|
||||
days_since_last_recharge INTEGER,
|
||||
visits_last_14_days INTEGER NOT NULL DEFAULT 0,
|
||||
visits_last_60_days INTEGER NOT NULL DEFAULT 0,
|
||||
score_overdue NUMERIC(10,4),
|
||||
score_new_bonus NUMERIC(10,4),
|
||||
score_recharge_bonus NUMERIC(10,4),
|
||||
score_hot_drop NUMERIC(10,4),
|
||||
raw_score NUMERIC(14,6),
|
||||
display_score NUMERIC(4,2),
|
||||
calc_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
calc_version INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uk_dws_member_recall UNIQUE (site_id, member_id)
|
||||
);
|
||||
CREATE INDEX idx_dws_recall_display ON billiards_dws.dws_member_recall_index (site_id, display_score DESC);
|
||||
""",
|
||||
|
||||
# 亲密指数表
|
||||
"""
|
||||
DROP TABLE IF EXISTS billiards_dws.dws_member_assistant_intimacy CASCADE;
|
||||
CREATE TABLE billiards_dws.dws_member_assistant_intimacy (
|
||||
intimacy_id BIGSERIAL PRIMARY KEY,
|
||||
site_id BIGINT NOT NULL,
|
||||
tenant_id BIGINT NOT NULL,
|
||||
member_id BIGINT NOT NULL,
|
||||
assistant_id BIGINT NOT NULL,
|
||||
session_count INTEGER NOT NULL DEFAULT 0,
|
||||
total_duration_minutes INTEGER NOT NULL DEFAULT 0,
|
||||
basic_session_count INTEGER NOT NULL DEFAULT 0,
|
||||
incentive_session_count INTEGER NOT NULL DEFAULT 0,
|
||||
days_since_last_session INTEGER,
|
||||
attributed_recharge_count INTEGER NOT NULL DEFAULT 0,
|
||||
attributed_recharge_amount NUMERIC(14,2) NOT NULL DEFAULT 0,
|
||||
score_frequency NUMERIC(10,4),
|
||||
score_recency NUMERIC(10,4),
|
||||
score_recharge NUMERIC(10,4),
|
||||
score_duration NUMERIC(10,4),
|
||||
burst_multiplier NUMERIC(6,4),
|
||||
raw_score NUMERIC(14,6),
|
||||
display_score NUMERIC(4,2),
|
||||
calc_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
calc_version INTEGER NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uk_dws_member_assistant_intimacy UNIQUE (site_id, member_id, assistant_id)
|
||||
);
|
||||
CREATE INDEX idx_dws_intimacy_member ON billiards_dws.dws_member_assistant_intimacy (site_id, member_id, display_score DESC);
|
||||
CREATE INDEX idx_dws_intimacy_assistant ON billiards_dws.dws_member_assistant_intimacy (site_id, assistant_id, display_score DESC);
|
||||
""",
|
||||
|
||||
# 分位点历史表
|
||||
"""
|
||||
DROP TABLE IF EXISTS billiards_dws.dws_index_percentile_history CASCADE;
|
||||
CREATE TABLE billiards_dws.dws_index_percentile_history (
|
||||
history_id BIGSERIAL PRIMARY KEY,
|
||||
site_id BIGINT NOT NULL,
|
||||
index_type VARCHAR(50) NOT NULL,
|
||||
calc_time TIMESTAMPTZ NOT NULL,
|
||||
percentile_5 NUMERIC(14,6),
|
||||
percentile_95 NUMERIC(14,6),
|
||||
percentile_5_smoothed NUMERIC(14,6),
|
||||
percentile_95_smoothed NUMERIC(14,6),
|
||||
record_count INTEGER,
|
||||
min_raw_score NUMERIC(14,6),
|
||||
max_raw_score NUMERIC(14,6),
|
||||
avg_raw_score NUMERIC(14,6),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
CONSTRAINT uk_dws_index_percentile_history UNIQUE (site_id, index_type, calc_time)
|
||||
);
|
||||
CREATE INDEX idx_dws_percentile_history ON billiards_dws.dws_index_percentile_history (site_id, index_type, calc_time DESC);
|
||||
"""
|
||||
]
|
||||
|
||||
# 初始化参数
|
||||
SEED_PARAMS = """
|
||||
INSERT INTO billiards_dws.cfg_index_parameters
|
||||
(index_type, param_name, param_value, description, effective_from)
|
||||
VALUES
|
||||
('RECALL', 'lookback_days', 60, '回溯窗口(天)', CURRENT_DATE),
|
||||
('RECALL', 'sigma_min', 2.0, '波动下限(天)', CURRENT_DATE),
|
||||
('RECALL', 'halflife_new', 7, '新客户半衰期(天)', CURRENT_DATE),
|
||||
('RECALL', 'halflife_recharge', 10, '刚充值半衰期(天)', CURRENT_DATE),
|
||||
('RECALL', 'weight_overdue', 3.0, '超期紧急性权重', CURRENT_DATE),
|
||||
('RECALL', 'weight_new', 1.0, '新客户权重', CURRENT_DATE),
|
||||
('RECALL', 'weight_recharge', 1.0, '刚充值权重', CURRENT_DATE),
|
||||
('RECALL', 'weight_hot', 1.0, '热度断档权重', CURRENT_DATE),
|
||||
('RECALL', 'percentile_lower', 5, '下锚分位数', CURRENT_DATE),
|
||||
('RECALL', 'percentile_upper', 95, '上锚分位数', CURRENT_DATE),
|
||||
('RECALL', 'ewma_alpha', 0.2, 'EWMA平滑系数', CURRENT_DATE),
|
||||
('INTIMACY', 'lookback_days', 60, '回溯窗口(天)', CURRENT_DATE),
|
||||
('INTIMACY', 'session_merge_hours', 4, '会话合并间隔(小时)', CURRENT_DATE),
|
||||
('INTIMACY', 'recharge_attribute_hours', 1, '充值归因窗口(小时)', CURRENT_DATE),
|
||||
('INTIMACY', 'amount_base', 500, '金额压缩基准(元)', CURRENT_DATE),
|
||||
('INTIMACY', 'incentive_weight', 1.5, '附加课权重倍数', CURRENT_DATE),
|
||||
('INTIMACY', 'halflife_session', 14, '会话衰减半衰期(天)', CURRENT_DATE),
|
||||
('INTIMACY', 'halflife_last', 10, '最近一次半衰期(天)', CURRENT_DATE),
|
||||
('INTIMACY', 'halflife_recharge', 21, '充值衰减半衰期(天)', CURRENT_DATE),
|
||||
('INTIMACY', 'halflife_short', 7, '短期激增检测半衰期(天)', CURRENT_DATE),
|
||||
('INTIMACY', 'halflife_long', 30, '长期激增检测半衰期(天)', CURRENT_DATE),
|
||||
('INTIMACY', 'weight_frequency', 2.0, '频次权重', CURRENT_DATE),
|
||||
('INTIMACY', 'weight_recency', 1.5, '最近一次权重', CURRENT_DATE),
|
||||
('INTIMACY', 'weight_recharge', 2.0, '归因充值权重', CURRENT_DATE),
|
||||
('INTIMACY', 'weight_duration', 0.5, '时长权重', CURRENT_DATE),
|
||||
('INTIMACY', 'burst_gamma', 0.6, '激增放大系数', CURRENT_DATE),
|
||||
('INTIMACY', 'percentile_lower', 5, '下锚分位数', CURRENT_DATE),
|
||||
('INTIMACY', 'percentile_upper', 95, '上锚分位数', CURRENT_DATE),
|
||||
('INTIMACY', 'ewma_alpha', 0.2, 'EWMA平滑系数', CURRENT_DATE)
|
||||
ON CONFLICT (index_type, param_name, effective_from) DO NOTHING;
|
||||
"""
|
||||
|
||||
def main():
|
||||
print("创建指数算法相关表...")
|
||||
|
||||
config = AppConfig.load()
|
||||
db_conn = DatabaseConnection(config.config["db"]["dsn"])
|
||||
|
||||
try:
|
||||
with db_conn.conn.cursor() as cur:
|
||||
# 创建表
|
||||
for i, ddl in enumerate(DDL_STATEMENTS, 1):
|
||||
print(f" 执行DDL {i}/{len(DDL_STATEMENTS)}...")
|
||||
cur.execute(ddl)
|
||||
|
||||
# 初始化参数
|
||||
print(" 初始化算法参数...")
|
||||
cur.execute(SEED_PARAMS)
|
||||
|
||||
db_conn.conn.commit()
|
||||
print("完成!")
|
||||
|
||||
# 验证
|
||||
cur.execute("SELECT COUNT(*) FROM billiards_dws.cfg_index_parameters")
|
||||
count = cur.fetchone()[0]
|
||||
print(f" 已插入 {count} 个参数配置")
|
||||
|
||||
finally:
|
||||
db_conn.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
602
etl_billiards/scripts/import_dws_excel.py
Normal file
602
etl_billiards/scripts/import_dws_excel.py
Normal file
@@ -0,0 +1,602 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
DWS Excel导入脚本
|
||||
|
||||
功能说明:
|
||||
支持三类Excel数据的导入:
|
||||
1. 支出结构(dws_finance_expense_summary)
|
||||
2. 平台结算(dws_platform_settlement)
|
||||
3. 充值提成(dws_assistant_recharge_commission)
|
||||
|
||||
导入规范:
|
||||
- 字段定义:按照目标表字段要求
|
||||
- 时间粒度:支出按月,平台结算按日,充值提成按月
|
||||
- 门店维度:使用配置的site_id
|
||||
- 去重规则:按import_batch_no去重
|
||||
- 校验规则:金额字段非负,日期格式校验
|
||||
|
||||
使用方式:
|
||||
python import_dws_excel.py --type expense --file expenses.xlsx
|
||||
python import_dws_excel.py --type platform --file platform_settlement.xlsx
|
||||
python import_dws_excel.py --type commission --file recharge_commission.xlsx
|
||||
|
||||
作者:ETL团队
|
||||
创建日期:2026-02-01
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
# 添加项目根目录到Python路径
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
print("请安装 pandas: pip install pandas openpyxl")
|
||||
sys.exit(1)
|
||||
|
||||
from etl_billiards.utils.config import Config
|
||||
from etl_billiards.utils.db import DatabaseConnection
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 常量定义
|
||||
# =============================================================================
|
||||
|
||||
# 支出类型枚举
|
||||
EXPENSE_TYPES = {
|
||||
'房租': 'RENT',
|
||||
'水电费': 'UTILITY',
|
||||
'物业费': 'PROPERTY',
|
||||
'工资': 'SALARY',
|
||||
'报销': 'REIMBURSE',
|
||||
'平台服务费': 'PLATFORM_FEE',
|
||||
'其他': 'OTHER',
|
||||
}
|
||||
|
||||
# 支出大类映射
|
||||
EXPENSE_CATEGORIES = {
|
||||
'RENT': 'FIXED_COST',
|
||||
'UTILITY': 'VARIABLE_COST',
|
||||
'PROPERTY': 'FIXED_COST',
|
||||
'SALARY': 'FIXED_COST',
|
||||
'REIMBURSE': 'VARIABLE_COST',
|
||||
'PLATFORM_FEE': 'VARIABLE_COST',
|
||||
'OTHER': 'OTHER',
|
||||
}
|
||||
|
||||
# 平台类型枚举
|
||||
PLATFORM_TYPES = {
|
||||
'美团': 'MEITUAN',
|
||||
'抖音': 'DOUYIN',
|
||||
'大众点评': 'DIANPING',
|
||||
'其他': 'OTHER',
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 导入基类
|
||||
# =============================================================================
|
||||
|
||||
class BaseImporter:
|
||||
"""导入基类"""
|
||||
|
||||
def __init__(self, config: Config, db: DatabaseConnection):
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.site_id = config.get("app.store_id")
|
||||
self.tenant_id = config.get("app.tenant_id", self.site_id)
|
||||
self.batch_no = self._generate_batch_no()
|
||||
|
||||
def _generate_batch_no(self) -> str:
|
||||
"""生成导入批次号"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
return f"{timestamp}_{unique_id}"
|
||||
|
||||
def _safe_decimal(self, value: Any, default: Decimal = Decimal('0')) -> Decimal:
|
||||
"""安全转换为Decimal"""
|
||||
if value is None or pd.isna(value):
|
||||
return default
|
||||
try:
|
||||
return Decimal(str(value))
|
||||
except (ValueError, InvalidOperation):
|
||||
return default
|
||||
|
||||
def _safe_date(self, value: Any) -> Optional[date]:
|
||||
"""安全转换为日期"""
|
||||
if value is None or pd.isna(value):
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
return value.date()
|
||||
if isinstance(value, date):
|
||||
return value
|
||||
try:
|
||||
return pd.to_datetime(value).date()
|
||||
except:
|
||||
return None
|
||||
|
||||
def _safe_month(self, value: Any) -> Optional[date]:
|
||||
"""安全转换为月份(月第一天)"""
|
||||
dt = self._safe_date(value)
|
||||
if dt:
|
||||
return dt.replace(day=1)
|
||||
return None
|
||||
|
||||
def import_file(self, file_path: str) -> Dict[str, Any]:
|
||||
"""导入文件"""
|
||||
raise NotImplementedError
|
||||
|
||||
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
|
||||
"""校验行数据,返回错误列表"""
|
||||
return []
|
||||
|
||||
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""转换行数据"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insert_records(self, records: List[Dict[str, Any]]) -> int:
|
||||
"""插入记录"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 支出导入
|
||||
# =============================================================================
|
||||
|
||||
class ExpenseImporter(BaseImporter):
|
||||
"""
|
||||
支出导入
|
||||
|
||||
Excel格式要求:
|
||||
- 月份: 2026-01 或 2026/01/01 格式
|
||||
- 支出类型: 房租/水电费/物业费/工资/报销/平台服务费/其他
|
||||
- 金额: 数字
|
||||
- 备注: 可选
|
||||
"""
|
||||
|
||||
TARGET_TABLE = "billiards_dws.dws_finance_expense_summary"
|
||||
|
||||
REQUIRED_COLUMNS = ['月份', '支出类型', '金额']
|
||||
OPTIONAL_COLUMNS = ['明细', '备注']
|
||||
|
||||
def import_file(self, file_path: str) -> Dict[str, Any]:
|
||||
"""导入支出Excel"""
|
||||
print(f"开始导入支出文件: {file_path}")
|
||||
|
||||
# 读取Excel
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
# 校验必要列
|
||||
missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
|
||||
if missing_cols:
|
||||
return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
|
||||
|
||||
# 处理数据
|
||||
records = []
|
||||
errors = []
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
row_dict = row.to_dict()
|
||||
row_errors = self.validate_row(row_dict, idx + 2) # Excel行号从2开始
|
||||
|
||||
if row_errors:
|
||||
errors.extend(row_errors)
|
||||
continue
|
||||
|
||||
record = self.transform_row(row_dict)
|
||||
records.append(record)
|
||||
|
||||
if errors:
|
||||
print(f"校验错误: {len(errors)} 条")
|
||||
for err in errors[:10]:
|
||||
print(f" - {err}")
|
||||
|
||||
# 插入数据
|
||||
inserted = 0
|
||||
if records:
|
||||
inserted = self.insert_records(records)
|
||||
|
||||
return {
|
||||
"status": "SUCCESS" if not errors else "PARTIAL",
|
||||
"batch_no": self.batch_no,
|
||||
"total_rows": len(df),
|
||||
"inserted": inserted,
|
||||
"errors": len(errors),
|
||||
"error_messages": errors[:10]
|
||||
}
|
||||
|
||||
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
|
||||
errors = []
|
||||
|
||||
# 校验月份
|
||||
month = self._safe_month(row.get('月份'))
|
||||
if not month:
|
||||
errors.append(f"行{row_idx}: 月份格式错误")
|
||||
|
||||
# 校验支出类型
|
||||
expense_type = row.get('支出类型', '').strip()
|
||||
if expense_type not in EXPENSE_TYPES:
|
||||
errors.append(f"行{row_idx}: 支出类型无效 '{expense_type}'")
|
||||
|
||||
# 校验金额
|
||||
amount = self._safe_decimal(row.get('金额'))
|
||||
if amount < 0:
|
||||
errors.append(f"行{row_idx}: 金额不能为负数")
|
||||
|
||||
return errors
|
||||
|
||||
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
expense_type_name = row.get('支出类型', '').strip()
|
||||
expense_type_code = EXPENSE_TYPES.get(expense_type_name, 'OTHER')
|
||||
expense_category = EXPENSE_CATEGORIES.get(expense_type_code, 'OTHER')
|
||||
|
||||
return {
|
||||
'site_id': self.site_id,
|
||||
'tenant_id': self.tenant_id,
|
||||
'expense_month': self._safe_month(row.get('月份')),
|
||||
'expense_type_code': expense_type_code,
|
||||
'expense_type_name': expense_type_name,
|
||||
'expense_category': expense_category,
|
||||
'expense_amount': self._safe_decimal(row.get('金额')),
|
||||
'expense_detail': row.get('明细'),
|
||||
'import_batch_no': self.batch_no,
|
||||
'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
|
||||
'import_time': datetime.now(),
|
||||
'import_user': os.getenv('USERNAME', 'system'),
|
||||
'remark': row.get('备注'),
|
||||
}
|
||||
|
||||
def insert_records(self, records: List[Dict[str, Any]]) -> int:
|
||||
columns = [
|
||||
'site_id', 'tenant_id', 'expense_month', 'expense_type_code',
|
||||
'expense_type_name', 'expense_category', 'expense_amount',
|
||||
'expense_detail', 'import_batch_no', 'import_file_name',
|
||||
'import_time', 'import_user', 'remark'
|
||||
]
|
||||
|
||||
cols_str = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
|
||||
|
||||
inserted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for record in records:
|
||||
values = [record.get(col) for col in columns]
|
||||
cur.execute(sql, values)
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.db.commit()
|
||||
return inserted
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 平台结算导入
|
||||
# =============================================================================
|
||||
|
||||
class PlatformSettlementImporter(BaseImporter):
|
||||
"""
|
||||
平台结算导入
|
||||
|
||||
Excel格式要求:
|
||||
- 回款日期: 日期格式
|
||||
- 平台类型: 美团/抖音/大众点评/其他
|
||||
- 平台订单号: 字符串
|
||||
- 订单原始金额: 数字
|
||||
- 佣金: 数字
|
||||
- 服务费: 数字
|
||||
- 回款金额: 数字
|
||||
- 备注: 可选
|
||||
"""
|
||||
|
||||
TARGET_TABLE = "billiards_dws.dws_platform_settlement"
|
||||
|
||||
REQUIRED_COLUMNS = ['回款日期', '平台类型', '回款金额']
|
||||
OPTIONAL_COLUMNS = ['平台订单号', '订单原始金额', '佣金', '服务费', '关联订单ID', '备注']
|
||||
|
||||
def import_file(self, file_path: str) -> Dict[str, Any]:
|
||||
print(f"开始导入平台结算文件: {file_path}")
|
||||
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
|
||||
if missing_cols:
|
||||
return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
|
||||
|
||||
records = []
|
||||
errors = []
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
row_dict = row.to_dict()
|
||||
row_errors = self.validate_row(row_dict, idx + 2)
|
||||
|
||||
if row_errors:
|
||||
errors.extend(row_errors)
|
||||
continue
|
||||
|
||||
record = self.transform_row(row_dict)
|
||||
records.append(record)
|
||||
|
||||
if errors:
|
||||
print(f"校验错误: {len(errors)} 条")
|
||||
for err in errors[:10]:
|
||||
print(f" - {err}")
|
||||
|
||||
inserted = 0
|
||||
if records:
|
||||
inserted = self.insert_records(records)
|
||||
|
||||
return {
|
||||
"status": "SUCCESS" if not errors else "PARTIAL",
|
||||
"batch_no": self.batch_no,
|
||||
"total_rows": len(df),
|
||||
"inserted": inserted,
|
||||
"errors": len(errors),
|
||||
}
|
||||
|
||||
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
|
||||
errors = []
|
||||
|
||||
settlement_date = self._safe_date(row.get('回款日期'))
|
||||
if not settlement_date:
|
||||
errors.append(f"行{row_idx}: 回款日期格式错误")
|
||||
|
||||
platform_type = row.get('平台类型', '').strip()
|
||||
if platform_type not in PLATFORM_TYPES:
|
||||
errors.append(f"行{row_idx}: 平台类型无效 '{platform_type}'")
|
||||
|
||||
amount = self._safe_decimal(row.get('回款金额'))
|
||||
if amount < 0:
|
||||
errors.append(f"行{row_idx}: 回款金额不能为负数")
|
||||
|
||||
return errors
|
||||
|
||||
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
platform_name = row.get('平台类型', '').strip()
|
||||
platform_type = PLATFORM_TYPES.get(platform_name, 'OTHER')
|
||||
|
||||
return {
|
||||
'site_id': self.site_id,
|
||||
'tenant_id': self.tenant_id,
|
||||
'settlement_date': self._safe_date(row.get('回款日期')),
|
||||
'platform_type': platform_type,
|
||||
'platform_name': platform_name,
|
||||
'platform_order_no': row.get('平台订单号'),
|
||||
'order_settle_id': row.get('关联订单ID'),
|
||||
'settlement_amount': self._safe_decimal(row.get('回款金额')),
|
||||
'commission_amount': self._safe_decimal(row.get('佣金')),
|
||||
'service_fee': self._safe_decimal(row.get('服务费')),
|
||||
'gross_amount': self._safe_decimal(row.get('订单原始金额')),
|
||||
'import_batch_no': self.batch_no,
|
||||
'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
|
||||
'import_time': datetime.now(),
|
||||
'import_user': os.getenv('USERNAME', 'system'),
|
||||
'remark': row.get('备注'),
|
||||
}
|
||||
|
||||
def insert_records(self, records: List[Dict[str, Any]]) -> int:
|
||||
columns = [
|
||||
'site_id', 'tenant_id', 'settlement_date', 'platform_type',
|
||||
'platform_name', 'platform_order_no', 'order_settle_id',
|
||||
'settlement_amount', 'commission_amount', 'service_fee',
|
||||
'gross_amount', 'import_batch_no', 'import_file_name',
|
||||
'import_time', 'import_user', 'remark'
|
||||
]
|
||||
|
||||
cols_str = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
|
||||
|
||||
inserted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for record in records:
|
||||
values = [record.get(col) for col in columns]
|
||||
cur.execute(sql, values)
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.db.commit()
|
||||
return inserted
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 充值提成导入
|
||||
# =============================================================================
|
||||
|
||||
class RechargeCommissionImporter(BaseImporter):
|
||||
"""
|
||||
充值提成导入
|
||||
|
||||
Excel格式要求:
|
||||
- 月份: 2026-01 格式
|
||||
- 助教ID: 数字
|
||||
- 助教花名: 字符串
|
||||
- 充值订单金额: 数字
|
||||
- 提成金额: 数字
|
||||
- 充值订单号: 可选
|
||||
- 备注: 可选
|
||||
"""
|
||||
|
||||
TARGET_TABLE = "billiards_dws.dws_assistant_recharge_commission"
|
||||
|
||||
REQUIRED_COLUMNS = ['月份', '助教ID', '提成金额']
|
||||
OPTIONAL_COLUMNS = ['助教花名', '充值订单金额', '充值订单ID', '充值订单号', '备注']
|
||||
|
||||
def import_file(self, file_path: str) -> Dict[str, Any]:
|
||||
print(f"开始导入充值提成文件: {file_path}")
|
||||
|
||||
df = pd.read_excel(file_path)
|
||||
|
||||
missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
|
||||
if missing_cols:
|
||||
return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
|
||||
|
||||
records = []
|
||||
errors = []
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
row_dict = row.to_dict()
|
||||
row_errors = self.validate_row(row_dict, idx + 2)
|
||||
|
||||
if row_errors:
|
||||
errors.extend(row_errors)
|
||||
continue
|
||||
|
||||
record = self.transform_row(row_dict)
|
||||
records.append(record)
|
||||
|
||||
if errors:
|
||||
print(f"校验错误: {len(errors)} 条")
|
||||
for err in errors[:10]:
|
||||
print(f" - {err}")
|
||||
|
||||
inserted = 0
|
||||
if records:
|
||||
inserted = self.insert_records(records)
|
||||
|
||||
return {
|
||||
"status": "SUCCESS" if not errors else "PARTIAL",
|
||||
"batch_no": self.batch_no,
|
||||
"total_rows": len(df),
|
||||
"inserted": inserted,
|
||||
"errors": len(errors),
|
||||
}
|
||||
|
||||
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
|
||||
errors = []
|
||||
|
||||
month = self._safe_month(row.get('月份'))
|
||||
if not month:
|
||||
errors.append(f"行{row_idx}: 月份格式错误")
|
||||
|
||||
assistant_id = row.get('助教ID')
|
||||
if assistant_id is None or pd.isna(assistant_id):
|
||||
errors.append(f"行{row_idx}: 助教ID不能为空")
|
||||
|
||||
amount = self._safe_decimal(row.get('提成金额'))
|
||||
if amount < 0:
|
||||
errors.append(f"行{row_idx}: 提成金额不能为负数")
|
||||
|
||||
return errors
|
||||
|
||||
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
recharge_amount = self._safe_decimal(row.get('充值订单金额'))
|
||||
commission_amount = self._safe_decimal(row.get('提成金额'))
|
||||
commission_ratio = commission_amount / recharge_amount if recharge_amount > 0 else None
|
||||
|
||||
return {
|
||||
'site_id': self.site_id,
|
||||
'tenant_id': self.tenant_id,
|
||||
'assistant_id': int(row.get('助教ID')),
|
||||
'assistant_nickname': row.get('助教花名'),
|
||||
'commission_month': self._safe_month(row.get('月份')),
|
||||
'recharge_order_id': row.get('充值订单ID'),
|
||||
'recharge_order_no': row.get('充值订单号'),
|
||||
'recharge_amount': recharge_amount,
|
||||
'commission_amount': commission_amount,
|
||||
'commission_ratio': commission_ratio,
|
||||
'import_batch_no': self.batch_no,
|
||||
'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
|
||||
'import_time': datetime.now(),
|
||||
'import_user': os.getenv('USERNAME', 'system'),
|
||||
'remark': row.get('备注'),
|
||||
}
|
||||
|
||||
def insert_records(self, records: List[Dict[str, Any]]) -> int:
|
||||
columns = [
|
||||
'site_id', 'tenant_id', 'assistant_id', 'assistant_nickname',
|
||||
'commission_month', 'recharge_order_id', 'recharge_order_no',
|
||||
'recharge_amount', 'commission_amount', 'commission_ratio',
|
||||
'import_batch_no', 'import_file_name', 'import_time',
|
||||
'import_user', 'remark'
|
||||
]
|
||||
|
||||
cols_str = ", ".join(columns)
|
||||
placeholders = ", ".join(["%s"] * len(columns))
|
||||
sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
|
||||
|
||||
inserted = 0
|
||||
with self.db.conn.cursor() as cur:
|
||||
for record in records:
|
||||
values = [record.get(col) for col in columns]
|
||||
cur.execute(sql, values)
|
||||
inserted += cur.rowcount
|
||||
|
||||
self.db.commit()
|
||||
return inserted
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# 主函数
|
||||
# =============================================================================
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='DWS Excel导入工具')
|
||||
parser.add_argument(
|
||||
'--type', '-t',
|
||||
choices=['expense', 'platform', 'commission'],
|
||||
required=True,
|
||||
help='导入类型: expense(支出), platform(平台结算), commission(充值提成)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--file', '-f',
|
||||
required=True,
|
||||
help='Excel文件路径'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 检查文件
|
||||
if not os.path.exists(args.file):
|
||||
print(f"文件不存在: {args.file}")
|
||||
sys.exit(1)
|
||||
|
||||
# 加载配置
|
||||
config = Config()
|
||||
db = DatabaseConnection(config)
|
||||
|
||||
try:
|
||||
# 选择导入器
|
||||
if args.type == 'expense':
|
||||
importer = ExpenseImporter(config, db)
|
||||
elif args.type == 'platform':
|
||||
importer = PlatformSettlementImporter(config, db)
|
||||
elif args.type == 'commission':
|
||||
importer = RechargeCommissionImporter(config, db)
|
||||
else:
|
||||
print(f"未知的导入类型: {args.type}")
|
||||
sys.exit(1)
|
||||
|
||||
# 执行导入
|
||||
result = importer.import_file(args.file)
|
||||
|
||||
# 输出结果
|
||||
print("\n" + "=" * 50)
|
||||
print("导入结果:")
|
||||
print(f" 状态: {result.get('status')}")
|
||||
print(f" 批次号: {result.get('batch_no')}")
|
||||
print(f" 总行数: {result.get('total_rows')}")
|
||||
print(f" 插入行数: {result.get('inserted')}")
|
||||
print(f" 错误行数: {result.get('errors')}")
|
||||
|
||||
if result.get('status') == 'ERROR':
|
||||
print(f" 错误信息: {result.get('message')}")
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(f"导入失败: {e}")
|
||||
db.rollback()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
35
etl_billiards/scripts/run_seed_dws_config.py
Normal file
35
etl_billiards/scripts/run_seed_dws_config.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""执行DWS配置数据导入"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
def main():
|
||||
# 加载.env配置
|
||||
env_path = Path(__file__).parent.parent / ".env"
|
||||
load_dotenv(env_path)
|
||||
|
||||
dsn = os.getenv("PG_DSN")
|
||||
if not dsn:
|
||||
print("错误: 未找到 PG_DSN 配置")
|
||||
return
|
||||
|
||||
# 读取SQL文件
|
||||
sql_file = Path(__file__).parent.parent / "database" / "seed_dws_config.sql"
|
||||
sql_content = sql_file.read_text(encoding="utf-8")
|
||||
|
||||
print(f"连接数据库...")
|
||||
conn = psycopg2.connect(dsn)
|
||||
conn.autocommit = True
|
||||
|
||||
with conn.cursor() as cur:
|
||||
print(f"执行SQL文件: {sql_file}")
|
||||
cur.execute(sql_content)
|
||||
print("DWS配置数据导入成功!")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
43
etl_billiards/scripts/show_area_category.py
Normal file
43
etl_billiards/scripts/show_area_category.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""显示台区分类映射数据"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
def main():
|
||||
load_dotenv(Path(__file__).parent.parent / ".env")
|
||||
dsn = os.getenv("PG_DSN")
|
||||
conn = psycopg2.connect(dsn)
|
||||
|
||||
print("cfg_area_category 数据内容:")
|
||||
print("=" * 90)
|
||||
print(f"{'source_area_name':<15} {'category_code':<15} {'category_name':<12} {'match_type':<10} {'priority':<8}")
|
||||
print("-" * 90)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT source_area_name, category_code, category_name, match_type, match_priority
|
||||
FROM billiards_dws.cfg_area_category
|
||||
ORDER BY match_priority, category_code, source_area_name
|
||||
""")
|
||||
for row in cur.fetchall():
|
||||
print(f"{row[0]:<15} {row[1]:<15} {row[2]:<12} {row[3]:<10} {row[4]:<8}")
|
||||
|
||||
print("=" * 90)
|
||||
print("\n分类汇总:")
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT category_code, category_name, COUNT(*) as cnt
|
||||
FROM billiards_dws.cfg_area_category
|
||||
GROUP BY category_code, category_name
|
||||
ORDER BY category_code
|
||||
""")
|
||||
for row in cur.fetchall():
|
||||
print(f" {row[0]:<15} {row[1]:<12} {row[2]} 条规则")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
48
etl_billiards/scripts/show_performance_tier.py
Normal file
48
etl_billiards/scripts/show_performance_tier.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""显示绩效档位配置数据"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
def main():
|
||||
load_dotenv(Path(__file__).parent.parent / ".env")
|
||||
dsn = os.getenv("PG_DSN")
|
||||
conn = psycopg2.connect(dsn)
|
||||
|
||||
print("cfg_performance_tier 数据内容:")
|
||||
print("=" * 110)
|
||||
print(f"{'tier_code':<8} {'tier_name':<18} {'min_hours':<10} {'max_hours':<10} {'base_ded':<10} {'bonus_ded':<10} {'vacation':<10}")
|
||||
print("-" * 110)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
SELECT tier_code, tier_name, min_hours, max_hours,
|
||||
base_deduction, bonus_deduction_ratio,
|
||||
vacation_days, vacation_unlimited
|
||||
FROM billiards_dws.cfg_performance_tier
|
||||
ORDER BY tier_level
|
||||
""")
|
||||
for row in cur.fetchall():
|
||||
max_h = str(row[3]) if row[3] else "NULL"
|
||||
vac = "自由" if row[7] else str(row[6]) + "天"
|
||||
print(f"{row[0]:<8} {row[1]:<18} {row[2]:<10} {max_h:<10} {row[4]:<10} {row[5]*100:.0f}%{'':<7} {vac:<10}")
|
||||
|
||||
print("=" * 110)
|
||||
print("\n数据来源依据: DWS 数据库处理需求.md 第35-41行")
|
||||
print("""
|
||||
| 档位 | 总业绩小时数阈值 | 专业课抽成 | 打赏课抽成 | 次月休假 |
|
||||
|------|------------------|-----------|-----------|----------|
|
||||
| 0档 | H < 100 | 28元/小时 | 50% | 3天 |
|
||||
| 1档 | 100 ≤ H < 130 | 18元/小时 | 40% | 4天 |
|
||||
| 2档 | 130 ≤ H < 160 | 15元/小时 | 38% | 4天 |
|
||||
| 3档 | 160 ≤ H < 190 | 13元/小时 | 35% | 5天 |
|
||||
| 4档 | 190 ≤ H < 220 | 10元/小时 | 33% | 6天 |
|
||||
| 5档 | H ≥ 220 | 8元/小时 | 30% | 休假自由 |
|
||||
""")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
222
etl_billiards/scripts/test_index_tasks.py
Normal file
222
etl_billiards/scripts/test_index_tasks.py
Normal file
@@ -0,0 +1,222 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试指数算法任务
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加项目路径
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import logging
|
||||
from config.settings import AppConfig
|
||||
from database.connection import DatabaseConnection
|
||||
from database.operations import DatabaseOperations
|
||||
from tasks.dws.index import RecallIndexTask, IntimacyIndexTask
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger('test_index')
|
||||
|
||||
def test_recall_index():
|
||||
"""测试召回指数任务"""
|
||||
logger.info("=" * 60)
|
||||
logger.info("测试客户召回指数任务 (DWS_RECALL_INDEX)")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 加载配置
|
||||
config = AppConfig.load()
|
||||
|
||||
# 连接数据库
|
||||
db_conn = DatabaseConnection(config.config["db"]["dsn"])
|
||||
db = DatabaseOperations(db_conn)
|
||||
|
||||
try:
|
||||
# 创建任务实例
|
||||
task = RecallIndexTask(config, db, None, logger)
|
||||
|
||||
# 执行任务
|
||||
result = task.execute(None)
|
||||
|
||||
logger.info("任务执行结果: %s", result)
|
||||
|
||||
# 查询结果
|
||||
if result.get('status') == 'success':
|
||||
sql = """
|
||||
SELECT
|
||||
COUNT(*) as total_count,
|
||||
ROUND(AVG(display_score)::numeric, 2) as avg_score,
|
||||
ROUND(MIN(display_score)::numeric, 2) as min_score,
|
||||
ROUND(MAX(display_score)::numeric, 2) as max_score,
|
||||
ROUND(AVG(raw_score)::numeric, 4) as avg_raw_score,
|
||||
ROUND(AVG(score_overdue)::numeric, 4) as avg_overdue,
|
||||
ROUND(AVG(score_new_bonus)::numeric, 4) as avg_new_bonus,
|
||||
ROUND(AVG(score_recharge_bonus)::numeric, 4) as avg_recharge_bonus,
|
||||
ROUND(AVG(score_hot_drop)::numeric, 4) as avg_hot_drop
|
||||
FROM billiards_dws.dws_member_recall_index
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
if rows:
|
||||
stats = dict(rows[0])
|
||||
logger.info("-" * 40)
|
||||
logger.info("召回指数统计:")
|
||||
logger.info(" 总记录数: %s", stats['total_count'])
|
||||
logger.info(" Display Score: 平均=%.2f, 最小=%.2f, 最大=%.2f",
|
||||
stats['avg_score'] or 0, stats['min_score'] or 0, stats['max_score'] or 0)
|
||||
logger.info(" Raw Score 平均: %.4f", stats['avg_raw_score'] or 0)
|
||||
logger.info(" 分项得分平均:")
|
||||
logger.info(" - 超期紧急性: %.4f", stats['avg_overdue'] or 0)
|
||||
logger.info(" - 新客户加分: %.4f", stats['avg_new_bonus'] or 0)
|
||||
logger.info(" - 充值加分: %.4f", stats['avg_recharge_bonus'] or 0)
|
||||
logger.info(" - 热度断档: %.4f", stats['avg_hot_drop'] or 0)
|
||||
|
||||
# 查询Top 5
|
||||
logger.info("-" * 40)
|
||||
logger.info("召回优先级 Top 5:")
|
||||
top_sql = """
|
||||
SELECT member_id, display_score, raw_score,
|
||||
days_since_last_visit, visit_interval_median
|
||||
FROM billiards_dws.dws_member_recall_index
|
||||
ORDER BY display_score DESC
|
||||
LIMIT 5
|
||||
"""
|
||||
top_rows = db.query(top_sql)
|
||||
for i, row in enumerate(top_rows or [], 1):
|
||||
r = dict(row)
|
||||
logger.info(" %d. 会员%s: %.2f分 (Raw=%.4f, 最近到店=%s天前, 周期=%.1f天)",
|
||||
i, r['member_id'], r['display_score'] or 0, r['raw_score'] or 0,
|
||||
r['days_since_last_visit'], r['visit_interval_median'] or 0)
|
||||
|
||||
return result
|
||||
|
||||
finally:
|
||||
db_conn.close()
|
||||
|
||||
|
||||
def test_intimacy_index():
|
||||
"""测试亲密指数任务"""
|
||||
logger.info("")
|
||||
logger.info("=" * 60)
|
||||
logger.info("测试客户-助教亲密指数任务 (DWS_INTIMACY_INDEX)")
|
||||
logger.info("=" * 60)
|
||||
|
||||
# 加载配置
|
||||
config = AppConfig.load()
|
||||
|
||||
# 连接数据库
|
||||
db_conn = DatabaseConnection(config.config["db"]["dsn"])
|
||||
db = DatabaseOperations(db_conn)
|
||||
|
||||
try:
|
||||
# 创建任务实例
|
||||
task = IntimacyIndexTask(config, db, None, logger)
|
||||
|
||||
# 执行任务
|
||||
result = task.execute(None)
|
||||
|
||||
logger.info("任务执行结果: %s", result)
|
||||
|
||||
# 查询结果
|
||||
if result.get('status') == 'success':
|
||||
sql = """
|
||||
SELECT
|
||||
COUNT(*) as total_count,
|
||||
COUNT(DISTINCT member_id) as unique_members,
|
||||
COUNT(DISTINCT assistant_id) as unique_assistants,
|
||||
ROUND(AVG(display_score)::numeric, 2) as avg_score,
|
||||
ROUND(MIN(display_score)::numeric, 2) as min_score,
|
||||
ROUND(MAX(display_score)::numeric, 2) as max_score,
|
||||
ROUND(AVG(raw_score)::numeric, 4) as avg_raw_score,
|
||||
ROUND(AVG(score_frequency)::numeric, 4) as avg_frequency,
|
||||
ROUND(AVG(score_recency)::numeric, 4) as avg_recency,
|
||||
ROUND(AVG(score_recharge)::numeric, 4) as avg_recharge,
|
||||
ROUND(AVG(burst_multiplier)::numeric, 4) as avg_burst
|
||||
FROM billiards_dws.dws_member_assistant_intimacy
|
||||
"""
|
||||
rows = db.query(sql)
|
||||
if rows:
|
||||
stats = dict(rows[0])
|
||||
logger.info("-" * 40)
|
||||
logger.info("亲密指数统计:")
|
||||
logger.info(" 总记录数: %s (客户-助教对)", stats['total_count'])
|
||||
logger.info(" 唯一会员: %s, 唯一助教: %s", stats['unique_members'], stats['unique_assistants'])
|
||||
logger.info(" Display Score: 平均=%.2f, 最小=%.2f, 最大=%.2f",
|
||||
stats['avg_score'] or 0, stats['min_score'] or 0, stats['max_score'] or 0)
|
||||
logger.info(" Raw Score 平均: %.4f", stats['avg_raw_score'] or 0)
|
||||
logger.info(" 分项得分平均:")
|
||||
logger.info(" - 频次强度: %.4f", stats['avg_frequency'] or 0)
|
||||
logger.info(" - 最近温度: %.4f", stats['avg_recency'] or 0)
|
||||
logger.info(" - 充值强度: %.4f", stats['avg_recharge'] or 0)
|
||||
logger.info(" - 激增放大: %.4f", stats['avg_burst'] or 0)
|
||||
|
||||
# 查询Top亲密关系
|
||||
logger.info("-" * 40)
|
||||
logger.info("亲密度 Top 5 客户-助教对:")
|
||||
top_sql = """
|
||||
SELECT member_id, assistant_id, display_score, raw_score,
|
||||
session_count, attributed_recharge_amount
|
||||
FROM billiards_dws.dws_member_assistant_intimacy
|
||||
ORDER BY display_score DESC
|
||||
LIMIT 5
|
||||
"""
|
||||
top_rows = db.query(top_sql)
|
||||
for i, row in enumerate(top_rows or [], 1):
|
||||
r = dict(row)
|
||||
logger.info(" %d. 会员%s-助教%s: %.2f分 (会话%d次, 归因充值%.2f元)",
|
||||
i, r['member_id'], r['assistant_id'],
|
||||
r['display_score'] or 0, r['session_count'] or 0,
|
||||
r['attributed_recharge_amount'] or 0)
|
||||
|
||||
return result
|
||||
|
||||
finally:
|
||||
db_conn.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 60)
|
||||
print("指数算法任务测试")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# 先检查表是否存在
|
||||
config = AppConfig.load()
|
||||
db_conn = DatabaseConnection(config.config["db"]["dsn"])
|
||||
db = DatabaseOperations(db_conn)
|
||||
|
||||
check_sql = """
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'billiards_dws'
|
||||
AND table_name IN ('dws_member_recall_index', 'dws_member_assistant_intimacy', 'cfg_index_parameters')
|
||||
"""
|
||||
tables = db.query(check_sql)
|
||||
existing_tables = [dict(r)['table_name'] for r in (tables or [])]
|
||||
|
||||
if 'cfg_index_parameters' not in existing_tables:
|
||||
print("警告: cfg_index_parameters 表不存在,请先执行 schema_dws.sql")
|
||||
print("需要执行的表:")
|
||||
print(" - cfg_index_parameters")
|
||||
print(" - dws_member_recall_index")
|
||||
print(" - dws_member_assistant_intimacy")
|
||||
print(" - dws_index_percentile_history")
|
||||
db_conn.close()
|
||||
sys.exit(1)
|
||||
|
||||
db_conn.close()
|
||||
|
||||
# 测试召回指数
|
||||
recall_result = test_recall_index()
|
||||
|
||||
# 测试亲密指数
|
||||
intimacy_result = test_intimacy_index()
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("测试完成")
|
||||
print("=" * 60)
|
||||
print(f"召回指数: {recall_result.get('status', 'unknown')}")
|
||||
print(f"亲密指数: {intimacy_result.get('status', 'unknown')}")
|
||||
34
etl_billiards/scripts/verify_dws_config.py
Normal file
34
etl_billiards/scripts/verify_dws_config.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""验证DWS配置数据"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import psycopg2
|
||||
|
||||
def main():
|
||||
load_dotenv(Path(__file__).parent.parent / ".env")
|
||||
dsn = os.getenv("PG_DSN")
|
||||
conn = psycopg2.connect(dsn)
|
||||
|
||||
tables = [
|
||||
"cfg_performance_tier",
|
||||
"cfg_assistant_level_price",
|
||||
"cfg_bonus_rules",
|
||||
"cfg_area_category",
|
||||
"cfg_skill_type"
|
||||
]
|
||||
|
||||
print("DWS 配置表数据统计:")
|
||||
print("-" * 40)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
for t in tables:
|
||||
cur.execute(f"SELECT COUNT(*) FROM billiards_dws.{t}")
|
||||
cnt = cur.fetchone()[0]
|
||||
print(f"{t}: {cnt} 行")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user