This commit is contained in:
Neo
2026-02-04 21:39:01 +08:00
parent ee773a9b52
commit a3f4d04335
148 changed files with 31455 additions and 182 deletions

View File

@@ -0,0 +1,636 @@
# -*- coding: utf-8 -*-
"""
优惠口径抽样分析脚本
功能说明:
从dwd_settlement_head表抽样100单分析以下优惠字段的使用情况
- adjust_amount: 台费打折/调整(可能包含大客户优惠、其他优惠)
- member_discount_amount: 会员折扣
- rounding_amount: 抹零金额
- coupon_amount: 团购抵消台费
- gift_card_amount: 赠送卡支付
分析目标:
1. 大客户优惠:是否存在"大客户"标识?如何与普通调整区分?
2. 会员折扣:是否有非零值?使用场景是什么?
3. 抹零抹零规则与adjust_amount的关系
4. 其他优惠adjust_amount中还包含哪些优惠类型
输出:
- 控制台打印分析报告
- 生成 docs/analysis_discount_patterns.md 报告文件
作者ETL团队
创建日期2026-02-01
"""
import os
import sys
from datetime import datetime
from decimal import Decimal
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
# 添加项目根目录到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
from etl_billiards.utils.config import Config
from etl_billiards.utils.db import DatabaseConnection
def analyze_discount_patterns():
"""
执行优惠口径抽样分析
"""
print("=" * 80)
print("优惠口径抽样分析")
print("=" * 80)
print()
# 加载配置和数据库连接
config = Config()
db = DatabaseConnection(config)
try:
# 1. 获取总体统计
print("【1. 总体统计】")
print("-" * 40)
overall_stats = get_overall_stats(db)
print_overall_stats(overall_stats)
print()
# 2. 抽样分析优惠订单
print("【2. 有优惠的订单抽样分析100单")
print("-" * 40)
sample_orders = get_sample_orders_with_discount(db, limit=100)
discount_analysis = analyze_sample_orders(sample_orders)
print_discount_analysis(discount_analysis)
print()
# 3. adjust_amount详细分析
print("【3. adjust_amount (台费打折/调整) 详细分析】")
print("-" * 40)
adjust_analysis = analyze_adjust_amount(db)
print_adjust_analysis(adjust_analysis)
print()
# 4. 会员折扣使用分析
print("【4. member_discount_amount (会员折扣) 使用分析】")
print("-" * 40)
member_discount_analysis = analyze_member_discount(db)
print_member_discount_analysis(member_discount_analysis)
print()
# 5. 抹零规则分析
print("【5. rounding_amount (抹零) 规则分析】")
print("-" * 40)
rounding_analysis = analyze_rounding(db)
print_rounding_analysis(rounding_analysis)
print()
# 6. 团购优惠分析
print("【6. 团购优惠分析】")
print("-" * 40)
groupbuy_analysis = analyze_groupbuy(db)
print_groupbuy_analysis(groupbuy_analysis)
print()
# 7. 生成分析报告
print("【7. 生成分析报告】")
print("-" * 40)
report = generate_report(
overall_stats,
discount_analysis,
adjust_analysis,
member_discount_analysis,
rounding_analysis,
groupbuy_analysis
)
# 保存报告
report_path = project_root / "etl_billiards" / "docs" / "analysis_discount_patterns.md"
with open(report_path, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存到: {report_path}")
finally:
db.close()
def get_overall_stats(db: DatabaseConnection) -> Dict[str, Any]:
"""
获取总体统计数据
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN adjust_amount != 0 THEN 1 END) AS orders_with_adjust,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS orders_with_member_discount,
COUNT(CASE WHEN rounding_amount != 0 THEN 1 END) AS orders_with_rounding,
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS orders_with_coupon,
COUNT(CASE WHEN gift_card_amount != 0 THEN 1 END) AS orders_with_gift_card,
SUM(adjust_amount) AS total_adjust,
SUM(member_discount_amount) AS total_member_discount,
SUM(rounding_amount) AS total_rounding,
SUM(coupon_amount) AS total_coupon,
SUM(gift_card_amount) AS total_gift_card,
SUM(consume_money) AS total_consume,
SUM(pay_amount) AS total_pay
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
return dict(rows[0]) if rows else {}
def get_sample_orders_with_discount(
db: DatabaseConnection,
limit: int = 100
) -> List[Dict[str, Any]]:
"""
抽样获取有优惠的订单
"""
sql = """
SELECT
order_settle_id,
order_trade_no,
create_time,
consume_money,
pay_amount,
adjust_amount,
member_discount_amount,
rounding_amount,
coupon_amount,
gift_card_amount,
balance_amount,
recharge_card_amount,
pl_coupon_sale_amount,
table_charge_money,
goods_money,
assistant_pd_money,
assistant_cx_money,
consume_money - pay_amount - COALESCE(recharge_card_amount, 0)
- COALESCE(gift_card_amount, 0) - COALESCE(balance_amount, 0) AS calculated_discount
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
OR member_discount_amount != 0
OR rounding_amount != 0
OR coupon_amount != 0
OR gift_card_amount != 0
ORDER BY RANDOM()
LIMIT %s
"""
rows = db.query(sql, (limit,))
return [dict(row) for row in rows] if rows else []
def analyze_sample_orders(orders: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
分析抽样订单
"""
analysis = {
'total_sampled': len(orders),
'with_adjust': 0,
'with_member_discount': 0,
'with_rounding': 0,
'with_coupon': 0,
'with_gift_card': 0,
'adjust_values': [],
'member_discount_values': [],
'rounding_values': [],
'coupon_values': [],
'gift_card_values': [],
}
for order in orders:
adjust = Decimal(str(order.get('adjust_amount', 0)))
member_discount = Decimal(str(order.get('member_discount_amount', 0)))
rounding = Decimal(str(order.get('rounding_amount', 0)))
coupon = Decimal(str(order.get('coupon_amount', 0)))
gift_card = Decimal(str(order.get('gift_card_amount', 0)))
if adjust != 0:
analysis['with_adjust'] += 1
analysis['adjust_values'].append(float(adjust))
if member_discount != 0:
analysis['with_member_discount'] += 1
analysis['member_discount_values'].append(float(member_discount))
if rounding != 0:
analysis['with_rounding'] += 1
analysis['rounding_values'].append(float(rounding))
if coupon != 0:
analysis['with_coupon'] += 1
analysis['coupon_values'].append(float(coupon))
if gift_card != 0:
analysis['with_gift_card'] += 1
analysis['gift_card_values'].append(float(gift_card))
return analysis
def analyze_adjust_amount(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析adjust_amount字段的分布和模式
"""
# 1. 值分布
sql_distribution = """
SELECT
CASE
WHEN adjust_amount = 0 THEN '0'
WHEN adjust_amount > 0 AND adjust_amount <= 10 THEN '0-10'
WHEN adjust_amount > 10 AND adjust_amount <= 50 THEN '10-50'
WHEN adjust_amount > 50 AND adjust_amount <= 100 THEN '50-100'
WHEN adjust_amount > 100 AND adjust_amount <= 500 THEN '100-500'
WHEN adjust_amount > 500 THEN '>500'
WHEN adjust_amount < 0 AND adjust_amount >= -10 THEN '-10-0'
WHEN adjust_amount < -10 AND adjust_amount >= -50 THEN '-50--10'
WHEN adjust_amount < -50 AND adjust_amount >= -100 THEN '-100--50'
WHEN adjust_amount < -100 THEN '<-100'
END AS range,
COUNT(*) AS count,
SUM(adjust_amount) AS total_amount
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
GROUP BY range
ORDER BY range
"""
distribution = db.query(sql_distribution)
# 2. 与消费金额的关系
sql_ratio = """
SELECT
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS discount_ratio,
COUNT(*) AS count
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0 AND consume_money > 0
GROUP BY discount_ratio
ORDER BY count DESC
LIMIT 20
"""
ratio_distribution = db.query(sql_ratio)
# 3. 典型样本
sql_samples = """
SELECT
order_settle_id,
consume_money,
adjust_amount,
ROUND(adjust_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
FROM billiards_dwd.dwd_settlement_head
WHERE adjust_amount != 0
ORDER BY ABS(adjust_amount) DESC
LIMIT 10
"""
samples = db.query(sql_samples)
return {
'distribution': [dict(r) for r in distribution] if distribution else [],
'ratio_distribution': [dict(r) for r in ratio_distribution] if ratio_distribution else [],
'top_samples': [dict(r) for r in samples] if samples else []
}
def analyze_member_discount(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析member_discount_amount字段的使用情况
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
SUM(member_discount_amount) AS total_discount,
AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount,
MAX(member_discount_amount) AS max_discount,
MIN(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS min_discount
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
stats = dict(rows[0]) if rows else {}
# 抽样有会员折扣的订单
sql_samples = """
SELECT
order_settle_id,
member_id,
consume_money,
member_discount_amount,
ROUND(member_discount_amount / NULLIF(consume_money, 0) * 100, 2) AS ratio
FROM billiards_dwd.dwd_settlement_head
WHERE member_discount_amount != 0
LIMIT 20
"""
samples = db.query(sql_samples)
return {
'stats': stats,
'samples': [dict(r) for r in samples] if samples else []
}
def analyze_rounding(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析rounding_amount字段的规则
"""
# 1. 抹零金额分布
sql_distribution = """
SELECT
rounding_amount,
COUNT(*) AS count
FROM billiards_dwd.dwd_settlement_head
WHERE rounding_amount != 0
GROUP BY rounding_amount
ORDER BY count DESC
LIMIT 20
"""
distribution = db.query(sql_distribution)
# 2. 抹零与实付金额的关系
sql_pattern = """
SELECT
pay_amount,
rounding_amount,
pay_amount + rounding_amount AS before_rounding,
MOD(CAST((pay_amount + rounding_amount) * 100 AS INTEGER), 100) AS cents
FROM billiards_dwd.dwd_settlement_head
WHERE rounding_amount != 0
LIMIT 20
"""
patterns = db.query(sql_pattern)
return {
'distribution': [dict(r) for r in distribution] if distribution else [],
'patterns': [dict(r) for r in patterns] if patterns else []
}
def analyze_groupbuy(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析团购优惠
"""
# 1. 团购使用统计
sql_stats = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN coupon_amount != 0 THEN 1 END) AS with_coupon,
COUNT(CASE WHEN pl_coupon_sale_amount != 0 THEN 1 END) AS with_pl_coupon,
SUM(coupon_amount) AS total_coupon_amount,
SUM(pl_coupon_sale_amount) AS total_pl_coupon_sale
FROM billiards_dwd.dwd_settlement_head
"""
stats = db.query(sql_stats)
# 2. 团购订单样本
sql_samples = """
SELECT
sh.order_settle_id,
sh.coupon_amount,
sh.pl_coupon_sale_amount,
gr.ledger_amount AS groupbuy_ledger_amount,
gr.ledger_unit_price AS groupbuy_unit_price
FROM billiards_dwd.dwd_settlement_head sh
LEFT JOIN billiards_dwd.dwd_groupbuy_redemption gr
ON sh.order_settle_id = gr.order_settle_id
WHERE sh.coupon_amount != 0
LIMIT 20
"""
samples = db.query(sql_samples)
return {
'stats': dict(stats[0]) if stats else {},
'samples': [dict(r) for r in samples] if samples else []
}
def print_overall_stats(stats: Dict[str, Any]):
"""打印总体统计"""
total = stats.get('total_orders', 0)
print(f"总订单数: {total:,}")
print(f"有adjust_amount的订单: {stats.get('orders_with_adjust', 0):,} ({stats.get('orders_with_adjust', 0)/total*100:.2f}%)")
print(f"有member_discount的订单: {stats.get('orders_with_member_discount', 0):,} ({stats.get('orders_with_member_discount', 0)/total*100:.2f}%)")
print(f"有rounding的订单: {stats.get('orders_with_rounding', 0):,} ({stats.get('orders_with_rounding', 0)/total*100:.2f}%)")
print(f"有coupon的订单: {stats.get('orders_with_coupon', 0):,} ({stats.get('orders_with_coupon', 0)/total*100:.2f}%)")
print(f"有gift_card的订单: {stats.get('orders_with_gift_card', 0):,} ({stats.get('orders_with_gift_card', 0)/total*100:.2f}%)")
print()
print(f"adjust_amount总额: {stats.get('total_adjust', 0):,.2f}")
print(f"member_discount总额: {stats.get('total_member_discount', 0):,.2f}")
print(f"rounding总额: {stats.get('total_rounding', 0):,.2f}")
print(f"coupon总额: {stats.get('total_coupon', 0):,.2f}")
print(f"gift_card总额: {stats.get('total_gift_card', 0):,.2f}")
def print_discount_analysis(analysis: Dict[str, Any]):
"""打印抽样分析结果"""
print(f"抽样订单数: {analysis['total_sampled']}")
print(f" - 有adjust_amount: {analysis['with_adjust']}")
print(f" - 有member_discount: {analysis['with_member_discount']}")
print(f" - 有rounding: {analysis['with_rounding']}")
print(f" - 有coupon: {analysis['with_coupon']}")
print(f" - 有gift_card: {analysis['with_gift_card']}")
def print_adjust_analysis(analysis: Dict[str, Any]):
"""打印adjust_amount分析结果"""
print("值分布:")
for item in analysis.get('distribution', []):
print(f" {item.get('range', 'N/A')}: {item.get('count', 0):,} 单, 总额 {item.get('total_amount', 0):,.2f}")
print("\n折扣比例分布 (Top 10):")
for item in analysis.get('ratio_distribution', [])[:10]:
print(f" {item.get('discount_ratio', 0)}%: {item.get('count', 0):,}")
print("\n大额调整样本 (Top 10):")
for item in analysis.get('top_samples', []):
print(f" 订单{item.get('order_settle_id')}: 消费{item.get('consume_money', 0):,.2f}, 调整{item.get('adjust_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
def print_member_discount_analysis(analysis: Dict[str, Any]):
"""打印会员折扣分析结果"""
stats = analysis.get('stats', {})
print(f"总订单数: {stats.get('total_orders', 0):,}")
print(f"有会员折扣的订单: {stats.get('with_discount', 0):,}")
print(f"会员折扣总额: {stats.get('total_discount', 0):,.2f}")
print(f"平均折扣: {stats.get('avg_discount', 0):,.2f}")
print(f"最大折扣: {stats.get('max_discount', 0):,.2f}")
samples = analysis.get('samples', [])
if samples:
print("\n样本订单:")
for item in samples[:5]:
print(f" 订单{item.get('order_settle_id')}: 会员{item.get('member_id')}, 消费{item.get('consume_money', 0):,.2f}, 折扣{item.get('member_discount_amount', 0):,.2f} ({item.get('ratio', 0)}%)")
else:
print("\n[!] 未发现使用会员折扣的订单,该字段可能未启用")
def print_rounding_analysis(analysis: Dict[str, Any]):
"""打印抹零分析结果"""
print("抹零金额分布:")
for item in analysis.get('distribution', []):
print(f" {item.get('rounding_amount', 0):,.2f}: {item.get('count', 0):,}")
print("\n抹零模式样本:")
for item in analysis.get('patterns', [])[:5]:
print(f" 实付{item.get('pay_amount', 0):,.2f} + 抹零{item.get('rounding_amount', 0):,.2f} = {item.get('before_rounding', 0):,.2f}")
def print_groupbuy_analysis(analysis: Dict[str, Any]):
"""打印团购分析结果"""
stats = analysis.get('stats', {})
print(f"总订单数: {stats.get('total_orders', 0):,}")
print(f"有coupon_amount的订单: {stats.get('with_coupon', 0):,}")
print(f"有pl_coupon_sale_amount的订单: {stats.get('with_pl_coupon', 0):,}")
print(f"coupon_amount总额: {stats.get('total_coupon_amount', 0):,.2f}")
print(f"pl_coupon_sale_amount总额: {stats.get('total_pl_coupon_sale', 0):,.2f}")
print("\n团购订单样本:")
for item in analysis.get('samples', [])[:5]:
print(f" 订单{item.get('order_settle_id')}: coupon={item.get('coupon_amount', 0):,.2f}, pl_coupon={item.get('pl_coupon_sale_amount', 0):,.2f}, groupbuy_price={item.get('groupbuy_unit_price', 'N/A')}")
def generate_report(
overall_stats: Dict[str, Any],
discount_analysis: Dict[str, Any],
adjust_analysis: Dict[str, Any],
member_discount_analysis: Dict[str, Any],
rounding_analysis: Dict[str, Any],
groupbuy_analysis: Dict[str, Any]
) -> str:
"""
生成Markdown格式的分析报告
"""
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
total = overall_stats.get('total_orders', 1)
report = f"""# 优惠口径抽样分析报告
**生成时间**: {now}
## 一、总体统计
| 指标 | 数值 | 占比 |
|------|------|------|
| 总订单数 | {overall_stats.get('total_orders', 0):,} | 100% |
| 有adjust_amount的订单 | {overall_stats.get('orders_with_adjust', 0):,} | {overall_stats.get('orders_with_adjust', 0)/total*100:.2f}% |
| 有member_discount的订单 | {overall_stats.get('orders_with_member_discount', 0):,} | {overall_stats.get('orders_with_member_discount', 0)/total*100:.2f}% |
| 有rounding的订单 | {overall_stats.get('orders_with_rounding', 0):,} | {overall_stats.get('orders_with_rounding', 0)/total*100:.2f}% |
| 有coupon的订单 | {overall_stats.get('orders_with_coupon', 0):,} | {overall_stats.get('orders_with_coupon', 0)/total*100:.2f}% |
| 有gift_card的订单 | {overall_stats.get('orders_with_gift_card', 0):,} | {overall_stats.get('orders_with_gift_card', 0)/total*100:.2f}% |
### 金额统计
| 优惠类型 | 总额 |
|----------|------|
| adjust_amount (台费打折/调整) | {overall_stats.get('total_adjust', 0):,.2f} |
| member_discount_amount (会员折扣) | {overall_stats.get('total_member_discount', 0):,.2f} |
| rounding_amount (抹零) | {overall_stats.get('total_rounding', 0):,.2f} |
| coupon_amount (团购抵消台费) | {overall_stats.get('total_coupon', 0):,.2f} |
| gift_card_amount (赠送卡支付) | {overall_stats.get('total_gift_card', 0):,.2f} |
## 二、adjust_amount (台费打折/调整) 分析
### 值分布
| 区间 | 订单数 | 总额 |
|------|--------|------|
"""
for item in adjust_analysis.get('distribution', []):
report += f"| {item.get('range', 'N/A')} | {item.get('count', 0):,} | {item.get('total_amount', 0):,.2f} |\n"
report += """
### 分析结论
- **是否包含大客户优惠**: 需要进一步分析adjust_amount的业务来源
- **与普通调整的区分**: 建议查看是否有备注字段或关联的优惠活动表
## 三、member_discount_amount (会员折扣) 分析
"""
member_stats = member_discount_analysis.get('stats', {})
with_discount = member_stats.get('with_discount', 0)
if with_discount == 0:
report += """### 结论
**[!] 该字段未发现任何非零值,会员折扣功能可能未启用。**
建议在DWS财务统计中可以暂时忽略此字段或将其标记为"待启用"
"""
else:
report += f"""### 使用统计
| 指标 | 数值 |
|------|------|
| 有会员折扣的订单 | {with_discount:,} |
| 会员折扣总额 | {member_stats.get('total_discount', 0):,.2f} |
| 平均折扣 | {member_stats.get('avg_discount', 0):,.2f} |
| 最大折扣 | {member_stats.get('max_discount', 0):,.2f} |
"""
report += """
## 四、rounding_amount (抹零) 分析
### 抹零金额分布
| 抹零金额 | 订单数 |
|----------|--------|
"""
for item in rounding_analysis.get('distribution', [])[:10]:
report += f"| {item.get('rounding_amount', 0):,.2f} | {item.get('count', 0):,} |\n"
report += """
### 抹零规则推断
根据抹零金额分布,推断抹零规则为:
- 抹零到整元(去除角分)
- 或抹零到特定尾数
## 五、团购优惠分析
"""
groupbuy_stats = groupbuy_analysis.get('stats', {})
report += f"""### 使用统计
| 指标 | 数值 |
|------|------|
| 有coupon_amount的订单 | {groupbuy_stats.get('with_coupon', 0):,} |
| 有pl_coupon_sale_amount的订单 | {groupbuy_stats.get('with_pl_coupon', 0):,} |
| coupon_amount总额 | {groupbuy_stats.get('total_coupon_amount', 0):,.2f} |
| pl_coupon_sale_amount总额 | {groupbuy_stats.get('total_pl_coupon_sale', 0):,.2f} |
### 团购支付金额计算路径
根据分析,团购支付金额应按以下路径计算:
1. 若 `pl_coupon_sale_amount ≠ 0` → 使用 `pl_coupon_sale_amount`
2. 若 `pl_coupon_sale_amount = 0` 且 `coupon_amount ≠ 0` → 通过 `order_settle_id` 关联 `dwd_groupbuy_redemption` 获取 `ledger_unit_price`
团购优惠金额 = coupon_amount - 团购支付金额
## 六、建议与结论
### 优惠口径定义建议
| 优惠类型 | 字段来源 | 计算公式 | 状态 |
|----------|----------|----------|------|
| 团购优惠 | settlement + groupbuy | coupon_amount - 团购支付金额 | 可用 |
| 会员折扣 | settlement.member_discount_amount | 直接取值 | 待确认 |
| 赠送卡抵扣 | settlement.gift_card_amount | 直接取值 | 可用 |
| 手动调整 | settlement.adjust_amount | 直接取值 | 可用 |
| 抹零 | settlement.rounding_amount | 直接取值 | 可用 |
| 大客户优惠 | 待分析 | 需要业务确认 | 待定义 |
| 其他优惠 | 待分析 | 需要业务确认 | 待定义 |
### 下一步行动
1. **确认会员折扣是否启用**: 与业务确认member_discount_amount的使用场景
2. **大客户优惠识别规则**: 与业务确认如何从adjust_amount中识别大客户优惠
3. **其他优惠分类**: 与业务确认adjust_amount中还包含哪些优惠类型
"""
return report
if __name__ == "__main__":
analyze_discount_patterns()

View File

@@ -0,0 +1,287 @@
# -*- coding: utf-8 -*-
"""
会员折扣启用分析脚本
功能说明:
确认 dwd_settlement_head.member_discount_amount 字段是否已启用
分析内容:
1. 统计非零记录数
2. 按时间分布分析
3. 按会员类型分析
4. 与其他字段的关联分析
输出:
- 控制台打印分析结果
- 结论:字段是否已启用,使用场景
作者ETL团队
创建日期2026-02-01
"""
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List
# 添加项目根目录到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
from etl_billiards.utils.config import Config
from etl_billiards.utils.db import DatabaseConnection
def analyze_member_discount_usage():
"""
执行会员折扣启用分析
"""
print("=" * 80)
print("会员折扣启用分析 (member_discount_amount)")
print("=" * 80)
print()
# 加载配置和数据库连接
config = Config()
db = DatabaseConnection(config)
try:
# 1. 基础统计
print("【1. 基础统计】")
print("-" * 40)
basic_stats = get_basic_stats(db)
print_basic_stats(basic_stats)
print()
# 2. 时间分布分析
print("【2. 时间分布分析】")
print("-" * 40)
time_distribution = get_time_distribution(db)
print_time_distribution(time_distribution)
print()
# 3. 会员类型分析
print("【3. 与会员的关联分析】")
print("-" * 40)
member_analysis = get_member_analysis(db)
print_member_analysis(member_analysis)
print()
# 4. 样本数据
print("【4. 样本数据】")
print("-" * 40)
samples = get_sample_data(db)
print_samples(samples)
print()
# 5. 结论
print("【5. 分析结论】")
print("-" * 40)
print_conclusion(basic_stats)
finally:
db.close()
def get_basic_stats(db: DatabaseConnection) -> Dict[str, Any]:
"""
获取基础统计数据
"""
sql = """
SELECT
COUNT(*) AS total_orders,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_member_discount,
COUNT(CASE WHEN member_discount_amount > 0 THEN 1 END) AS positive_discount,
COUNT(CASE WHEN member_discount_amount < 0 THEN 1 END) AS negative_discount,
SUM(member_discount_amount) AS total_member_discount,
AVG(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS avg_discount,
MAX(member_discount_amount) AS max_discount,
MIN(member_discount_amount) AS min_discount,
STDDEV(CASE WHEN member_discount_amount != 0 THEN member_discount_amount END) AS stddev_discount
FROM billiards_dwd.dwd_settlement_head
"""
rows = db.query(sql)
return dict(rows[0]) if rows else {}
def get_time_distribution(db: DatabaseConnection) -> List[Dict[str, Any]]:
"""
获取按月份的时间分布
"""
sql = """
SELECT
DATE_TRUNC('month', create_time)::DATE AS month,
COUNT(*) AS total_orders,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
SUM(member_discount_amount) AS total_discount
FROM billiards_dwd.dwd_settlement_head
GROUP BY DATE_TRUNC('month', create_time)
ORDER BY month DESC
LIMIT 12
"""
rows = db.query(sql)
return [dict(row) for row in rows] if rows else []
def get_member_analysis(db: DatabaseConnection) -> Dict[str, Any]:
"""
分析与会员的关联
"""
# 会员vs非会员
sql_member_vs_guest = """
SELECT
CASE WHEN member_id = 0 THEN '散客' ELSE '会员' END AS customer_type,
COUNT(*) AS total_orders,
COUNT(CASE WHEN member_discount_amount != 0 THEN 1 END) AS with_discount,
SUM(member_discount_amount) AS total_discount
FROM billiards_dwd.dwd_settlement_head
GROUP BY CASE WHEN member_id = 0 THEN '散客' ELSE '会员' END
"""
member_vs_guest = db.query(sql_member_vs_guest)
# 按会员卡等级
sql_by_grade = """
SELECT
COALESCE(m.member_card_grade_name, '未知') AS grade_name,
COUNT(*) AS total_orders,
COUNT(CASE WHEN sh.member_discount_amount != 0 THEN 1 END) AS with_discount,
SUM(sh.member_discount_amount) AS total_discount
FROM billiards_dwd.dwd_settlement_head sh
LEFT JOIN billiards_dwd.dim_member m ON sh.member_id = m.member_id
WHERE sh.member_id != 0
GROUP BY COALESCE(m.member_card_grade_name, '未知')
ORDER BY total_orders DESC
"""
by_grade = db.query(sql_by_grade)
return {
'member_vs_guest': [dict(row) for row in member_vs_guest] if member_vs_guest else [],
'by_grade': [dict(row) for row in by_grade] if by_grade else []
}
def get_sample_data(db: DatabaseConnection) -> List[Dict[str, Any]]:
"""
获取有会员折扣的样本数据
"""
sql = """
SELECT
sh.order_settle_id,
sh.order_trade_no,
sh.create_time,
sh.member_id,
m.nickname AS member_name,
m.member_card_grade_name,
sh.consume_money,
sh.pay_amount,
sh.member_discount_amount,
ROUND(sh.member_discount_amount / NULLIF(sh.consume_money, 0) * 100, 2) AS discount_ratio
FROM billiards_dwd.dwd_settlement_head sh
LEFT JOIN billiards_dwd.dim_member m ON sh.member_id = m.member_id
WHERE sh.member_discount_amount != 0
ORDER BY sh.create_time DESC
LIMIT 20
"""
rows = db.query(sql)
return [dict(row) for row in rows] if rows else []
def print_basic_stats(stats: Dict[str, Any]):
"""打印基础统计"""
total = stats.get('total_orders', 1)
with_discount = stats.get('with_member_discount', 0)
print(f"总订单数: {total:,}")
print(f"有会员折扣的订单: {with_discount:,} ({with_discount/total*100:.4f}%)")
print(f" - 正值(折扣): {stats.get('positive_discount', 0):,}")
print(f" - 负值(加价?): {stats.get('negative_discount', 0):,}")
print()
print(f"会员折扣总额: {stats.get('total_member_discount', 0):,.2f}")
print(f"平均折扣: {stats.get('avg_discount', 0) or 0:,.2f}")
print(f"最大折扣: {stats.get('max_discount', 0):,.2f}")
print(f"最小折扣: {stats.get('min_discount', 0):,.2f}")
def print_time_distribution(distribution: List[Dict[str, Any]]):
"""打印时间分布"""
if not distribution:
print("无数据")
return
print(f"{'月份':<12} {'总订单':>10} {'有折扣':>10} {'折扣总额':>15}")
print("-" * 50)
for item in distribution:
month = str(item.get('month', 'N/A'))[:7]
total = item.get('total_orders', 0)
with_discount = item.get('with_discount', 0)
total_discount = item.get('total_discount', 0)
print(f"{month:<12} {total:>10,} {with_discount:>10,} {total_discount:>15,.2f}")
def print_member_analysis(analysis: Dict[str, Any]):
"""打印会员分析"""
print("会员 vs 散客:")
for item in analysis.get('member_vs_guest', []):
print(f" {item.get('customer_type', 'N/A')}: {item.get('total_orders', 0):,} 单, {item.get('with_discount', 0)} 单有折扣, 折扣总额 {item.get('total_discount', 0):,.2f}")
print("\n按会员卡等级:")
for item in analysis.get('by_grade', []):
print(f" {item.get('grade_name', 'N/A')}: {item.get('total_orders', 0):,} 单, {item.get('with_discount', 0)} 单有折扣")
def print_samples(samples: List[Dict[str, Any]]):
"""打印样本数据"""
if not samples:
print("[!] 未发现使用会员折扣的订单")
return
print(f"{'订单ID':<20} {'会员':<15} {'等级':<10} {'消费':>12} {'折扣':>12} {'比例':>8}")
print("-" * 80)
for item in samples[:10]:
order_id = str(item.get('order_settle_id', 'N/A'))[:18]
member = str(item.get('member_name', 'N/A'))[:13]
grade = str(item.get('member_card_grade_name', 'N/A'))[:8]
consume = item.get('consume_money', 0)
discount = item.get('member_discount_amount', 0)
ratio = item.get('discount_ratio', 0)
print(f"{order_id:<20} {member:<15} {grade:<10} {consume:>12,.2f} {discount:>12,.2f} {ratio:>7}%")
def print_conclusion(stats: Dict[str, Any]):
"""打印分析结论"""
with_discount = stats.get('with_member_discount', 0)
total = stats.get('total_orders', 1)
ratio = with_discount / total * 100
if with_discount == 0:
print("【结论】: member_discount_amount 字段 **未启用**")
print()
print("该字段在所有订单中均为0表明")
print(" 1. 会员折扣功能在业务系统中未开启")
print(" 2. 或会员折扣通过其他方式如adjust_amount记录")
print()
print("【建议】:")
print(" 1. 在DWS财务统计中暂时不处理此字段")
print(" 2. 将此字段标记为'预留/待启用'")
print(" 3. 后续如果业务启用,再更新统计逻辑")
elif ratio < 1:
print(f"【结论】: member_discount_amount 字段 **极少使用** (仅{ratio:.4f}%订单)")
print()
print("该字段使用率极低,可能是:")
print(" 1. 会员折扣功能刚启用不久")
print(" 2. 仅特定场景使用")
print()
print("【建议】:")
print(" 1. 在DWS财务统计中保留此字段的处理逻辑")
print(" 2. 定期监控使用率变化")
else:
print(f"【结论】: member_discount_amount 字段 **已启用** ({ratio:.2f}%订单使用)")
print()
print("【建议】:")
print(" 1. 在DWS财务优惠明细中正常统计此字段")
print(" 2. 关注会员折扣与其他优惠的叠加规则")
if __name__ == "__main__":
analyze_member_discount_usage()

View File

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
import sys
sys.path.insert(0, '.')
from config.settings import AppConfig
from database.connection import DatabaseConnection
from database.operations import DatabaseOperations
config = AppConfig.load()
db_conn = DatabaseConnection(config.config['db']['dsn'])
db = DatabaseOperations(db_conn)
# 检查dim_assistant表结构
print('=== dim_assistant columns ===')
sql0 = """
SELECT column_name FROM information_schema.columns
WHERE table_schema = 'billiards_dwd' AND table_name = 'dim_assistant'
"""
for row in db.query(sql0):
print(f' {dict(row)["column_name"]}')
# 检查dim_assistant数量
print()
print('=== dim_assistant ===')
sql1 = 'SELECT COUNT(*) as cnt FROM billiards_dwd.dim_assistant WHERE scd2_is_current = 1'
rows = db.query(sql1)
print(f'dim_assistant current count: {dict(rows[0])["cnt"]}')
# 检查服务记录中的nickname分布
print()
print('=== Service by nickname ===')
sql2 = """
SELECT nickname, COUNT(*) as service_count, COUNT(DISTINCT tenant_member_id) as member_count
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
GROUP BY nickname
ORDER BY service_count DESC
LIMIT 10
"""
for row in db.query(sql2):
r = dict(row)
print(f' {r["nickname"]}: {r["service_count"]} services, {r["member_count"]} members')
# 检查assistant_no分布
print()
print('=== Service by assistant_no ===')
sql3 = """
SELECT assistant_no, nickname, COUNT(*) as service_count, COUNT(DISTINCT tenant_member_id) as member_count
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
GROUP BY assistant_no, nickname
ORDER BY service_count DESC
LIMIT 10
"""
for row in db.query(sql3):
r = dict(row)
print(f' {r["assistant_no"]} ({r["nickname"]}): {r["service_count"]} services, {r["member_count"]} members')
# 近60天
print()
print('=== Last 60 days by nickname ===')
sql4 = """
SELECT nickname, COUNT(*) as service_count, COUNT(DISTINCT tenant_member_id) as member_count
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
AND last_use_time >= NOW() - INTERVAL '60 days'
GROUP BY nickname
ORDER BY service_count DESC
LIMIT 15
"""
for row in db.query(sql4):
r = dict(row)
print(f' {r["nickname"]}: {r["service_count"]} services, {r["member_count"]} members')
db_conn.close()

View File

@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
import sys
sys.path.insert(0, '.')
from config.settings import AppConfig
from database.connection import DatabaseConnection
from database.operations import DatabaseOperations
config = AppConfig.load()
db_conn = DatabaseConnection(config.config['db']['dsn'])
db = DatabaseOperations(db_conn)
# 检查DWD层服务记录分布
print("=== DWD层服务记录分析 ===")
print()
# 1. 总体统计
sql1 = """
SELECT
COUNT(*) as total_records,
COUNT(DISTINCT tenant_member_id) as unique_members,
COUNT(DISTINCT site_assistant_id) as unique_assistants,
COUNT(DISTINCT (tenant_member_id, site_assistant_id)) as unique_pairs
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
"""
r = dict(db.query(sql1)[0])
print("总体统计:")
print(f" 总服务记录数: {r['total_records']}")
print(f" 唯一会员数: {r['unique_members']}")
print(f" 唯一助教数: {r['unique_assistants']}")
print(f" 唯一客户-助教对: {r['unique_pairs']}")
# 2. 助教服务会员数分布
print()
print("助教服务会员数分布 (Top 10):")
sql2 = """
SELECT site_assistant_id, COUNT(DISTINCT tenant_member_id) as member_count
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
GROUP BY site_assistant_id
ORDER BY member_count DESC
LIMIT 10
"""
for row in db.query(sql2):
r = dict(row)
print(f" 助教 {r['site_assistant_id']}: 服务 {r['member_count']} 个会员")
# 3. 每个客户-助教对的服务次数分布
print()
print("客户-助教对 服务次数分布 (Top 10):")
sql3 = """
SELECT tenant_member_id, site_assistant_id, COUNT(*) as service_count
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
GROUP BY tenant_member_id, site_assistant_id
ORDER BY service_count DESC
LIMIT 10
"""
for row in db.query(sql3):
r = dict(row)
print(f" 会员 {r['tenant_member_id']} - 助教 {r['site_assistant_id']}: {r['service_count']} 次服务")
# 4. 近60天的数据
print()
print("=== 近60天数据 ===")
sql4 = """
SELECT
COUNT(*) as total_records,
COUNT(DISTINCT tenant_member_id) as unique_members,
COUNT(DISTINCT site_assistant_id) as unique_assistants,
COUNT(DISTINCT (tenant_member_id, site_assistant_id)) as unique_pairs
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
AND last_use_time >= NOW() - INTERVAL '60 days'
"""
r4 = dict(db.query(sql4)[0])
print(f" 总服务记录数: {r4['total_records']}")
print(f" 唯一会员数: {r4['unique_members']}")
print(f" 唯一助教数: {r4['unique_assistants']}")
print(f" 唯一客户-助教对: {r4['unique_pairs']}")
db_conn.close()

View File

@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
import sys
sys.path.insert(0, '.')
from config.settings import AppConfig
from database.connection import DatabaseConnection
from database.operations import DatabaseOperations
config = AppConfig.load()
db_conn = DatabaseConnection(config.config['db']['dsn'])
db = DatabaseOperations(db_conn)
# 检查实际统计
sql = """
SELECT
COUNT(*) as total_pairs,
COUNT(DISTINCT member_id) as unique_members,
COUNT(DISTINCT assistant_id) as unique_assistants
FROM billiards_dws.dws_member_assistant_intimacy
"""
rows = db.query(sql)
r = dict(rows[0])
print("DWS亲密指数统计:")
print(f" 总记录数(对): {r['total_pairs']}")
print(f" 唯一会员数: {r['unique_members']}")
print(f" 唯一助教数: {r['unique_assistants']}")
# 查看助教分布
sql2 = """
SELECT assistant_id, COUNT(*) as member_count
FROM billiards_dws.dws_member_assistant_intimacy
GROUP BY assistant_id
ORDER BY member_count DESC
LIMIT 10
"""
rows2 = db.query(sql2)
print()
print("Top 10 助教 (按服务会员数):")
for row in rows2:
r = dict(row)
print(f" 助教 {r['assistant_id']}: 服务 {r['member_count']} 个会员")
# 检查DWD层原始数据
sql3 = """
SELECT
COUNT(DISTINCT site_assistant_id) as unique_assistants,
COUNT(DISTINCT tenant_member_id) as unique_members
FROM billiards_dwd.dwd_assistant_service_log
WHERE tenant_member_id > 0 AND is_delete = 0
"""
rows3 = db.query(sql3)
r3 = dict(rows3[0])
print()
print("DWD层原始数据:")
print(f" 唯一助教数: {r3['unique_assistants']}")
print(f" 唯一会员数: {r3['unique_members']}")
db_conn.close()

View File

@@ -702,6 +702,7 @@ def run_gap_check(
content_sample_limit: int | None = None,
window_split_unit: str | None = None,
window_compensation_hours: int | None = None,
tag: str = "",
) -> dict:
cfg = cfg or AppConfig.load({})
tz = ZoneInfo(cfg.get("app.timezone", "Asia/Taipei"))
@@ -800,7 +801,7 @@ def run_gap_check(
if cutoff:
logger.info("CUTOFF=%s overlap_hours=%s", cutoff.isoformat(), cutoff_overlap_hours)
tag_suffix = f"_{args.tag}" if args.tag else ""
tag_suffix = f"_{tag}" if tag else ""
client = build_recording_client(cfg, task_code=f"ODS_GAP_CHECK{tag_suffix}")
db_state = _init_db_state(cfg)

View File

@@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
"""
创建指数算法相关表
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config.settings import AppConfig
from database.connection import DatabaseConnection
from database.operations import DatabaseOperations
# 表DDL
DDL_STATEMENTS = [
# 参数配置表
"""
DROP TABLE IF EXISTS billiards_dws.cfg_index_parameters CASCADE;
CREATE TABLE billiards_dws.cfg_index_parameters (
param_id SERIAL PRIMARY KEY,
index_type VARCHAR(50) NOT NULL,
param_name VARCHAR(100) NOT NULL,
param_value NUMERIC(14,6) NOT NULL,
description TEXT,
effective_from DATE NOT NULL DEFAULT CURRENT_DATE,
effective_to DATE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_cfg_index_parameters UNIQUE (index_type, param_name, effective_from)
);
CREATE INDEX idx_cfg_index_params_type ON billiards_dws.cfg_index_parameters (index_type);
""",
# 召回指数表
"""
DROP TABLE IF EXISTS billiards_dws.dws_member_recall_index CASCADE;
CREATE TABLE billiards_dws.dws_member_recall_index (
recall_id BIGSERIAL PRIMARY KEY,
site_id BIGINT NOT NULL,
tenant_id BIGINT NOT NULL,
member_id BIGINT NOT NULL,
days_since_last_visit INTEGER,
visit_interval_median NUMERIC(10,2),
visit_interval_mad NUMERIC(10,2),
days_since_first_visit INTEGER,
days_since_last_recharge INTEGER,
visits_last_14_days INTEGER NOT NULL DEFAULT 0,
visits_last_60_days INTEGER NOT NULL DEFAULT 0,
score_overdue NUMERIC(10,4),
score_new_bonus NUMERIC(10,4),
score_recharge_bonus NUMERIC(10,4),
score_hot_drop NUMERIC(10,4),
raw_score NUMERIC(14,6),
display_score NUMERIC(4,2),
calc_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
calc_version INTEGER NOT NULL DEFAULT 1,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_dws_member_recall UNIQUE (site_id, member_id)
);
CREATE INDEX idx_dws_recall_display ON billiards_dws.dws_member_recall_index (site_id, display_score DESC);
""",
# 亲密指数表
"""
DROP TABLE IF EXISTS billiards_dws.dws_member_assistant_intimacy CASCADE;
CREATE TABLE billiards_dws.dws_member_assistant_intimacy (
intimacy_id BIGSERIAL PRIMARY KEY,
site_id BIGINT NOT NULL,
tenant_id BIGINT NOT NULL,
member_id BIGINT NOT NULL,
assistant_id BIGINT NOT NULL,
session_count INTEGER NOT NULL DEFAULT 0,
total_duration_minutes INTEGER NOT NULL DEFAULT 0,
basic_session_count INTEGER NOT NULL DEFAULT 0,
incentive_session_count INTEGER NOT NULL DEFAULT 0,
days_since_last_session INTEGER,
attributed_recharge_count INTEGER NOT NULL DEFAULT 0,
attributed_recharge_amount NUMERIC(14,2) NOT NULL DEFAULT 0,
score_frequency NUMERIC(10,4),
score_recency NUMERIC(10,4),
score_recharge NUMERIC(10,4),
score_duration NUMERIC(10,4),
burst_multiplier NUMERIC(6,4),
raw_score NUMERIC(14,6),
display_score NUMERIC(4,2),
calc_time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
calc_version INTEGER NOT NULL DEFAULT 1,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_dws_member_assistant_intimacy UNIQUE (site_id, member_id, assistant_id)
);
CREATE INDEX idx_dws_intimacy_member ON billiards_dws.dws_member_assistant_intimacy (site_id, member_id, display_score DESC);
CREATE INDEX idx_dws_intimacy_assistant ON billiards_dws.dws_member_assistant_intimacy (site_id, assistant_id, display_score DESC);
""",
# 分位点历史表
"""
DROP TABLE IF EXISTS billiards_dws.dws_index_percentile_history CASCADE;
CREATE TABLE billiards_dws.dws_index_percentile_history (
history_id BIGSERIAL PRIMARY KEY,
site_id BIGINT NOT NULL,
index_type VARCHAR(50) NOT NULL,
calc_time TIMESTAMPTZ NOT NULL,
percentile_5 NUMERIC(14,6),
percentile_95 NUMERIC(14,6),
percentile_5_smoothed NUMERIC(14,6),
percentile_95_smoothed NUMERIC(14,6),
record_count INTEGER,
min_raw_score NUMERIC(14,6),
max_raw_score NUMERIC(14,6),
avg_raw_score NUMERIC(14,6),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
CONSTRAINT uk_dws_index_percentile_history UNIQUE (site_id, index_type, calc_time)
);
CREATE INDEX idx_dws_percentile_history ON billiards_dws.dws_index_percentile_history (site_id, index_type, calc_time DESC);
"""
]
# 初始化参数
SEED_PARAMS = """
INSERT INTO billiards_dws.cfg_index_parameters
(index_type, param_name, param_value, description, effective_from)
VALUES
('RECALL', 'lookback_days', 60, '回溯窗口(天)', CURRENT_DATE),
('RECALL', 'sigma_min', 2.0, '波动下限(天)', CURRENT_DATE),
('RECALL', 'halflife_new', 7, '新客户半衰期(天)', CURRENT_DATE),
('RECALL', 'halflife_recharge', 10, '刚充值半衰期(天)', CURRENT_DATE),
('RECALL', 'weight_overdue', 3.0, '超期紧急性权重', CURRENT_DATE),
('RECALL', 'weight_new', 1.0, '新客户权重', CURRENT_DATE),
('RECALL', 'weight_recharge', 1.0, '刚充值权重', CURRENT_DATE),
('RECALL', 'weight_hot', 1.0, '热度断档权重', CURRENT_DATE),
('RECALL', 'percentile_lower', 5, '下锚分位数', CURRENT_DATE),
('RECALL', 'percentile_upper', 95, '上锚分位数', CURRENT_DATE),
('RECALL', 'ewma_alpha', 0.2, 'EWMA平滑系数', CURRENT_DATE),
('INTIMACY', 'lookback_days', 60, '回溯窗口(天)', CURRENT_DATE),
('INTIMACY', 'session_merge_hours', 4, '会话合并间隔(小时)', CURRENT_DATE),
('INTIMACY', 'recharge_attribute_hours', 1, '充值归因窗口(小时)', CURRENT_DATE),
('INTIMACY', 'amount_base', 500, '金额压缩基准(元)', CURRENT_DATE),
('INTIMACY', 'incentive_weight', 1.5, '附加课权重倍数', CURRENT_DATE),
('INTIMACY', 'halflife_session', 14, '会话衰减半衰期(天)', CURRENT_DATE),
('INTIMACY', 'halflife_last', 10, '最近一次半衰期(天)', CURRENT_DATE),
('INTIMACY', 'halflife_recharge', 21, '充值衰减半衰期(天)', CURRENT_DATE),
('INTIMACY', 'halflife_short', 7, '短期激增检测半衰期(天)', CURRENT_DATE),
('INTIMACY', 'halflife_long', 30, '长期激增检测半衰期(天)', CURRENT_DATE),
('INTIMACY', 'weight_frequency', 2.0, '频次权重', CURRENT_DATE),
('INTIMACY', 'weight_recency', 1.5, '最近一次权重', CURRENT_DATE),
('INTIMACY', 'weight_recharge', 2.0, '归因充值权重', CURRENT_DATE),
('INTIMACY', 'weight_duration', 0.5, '时长权重', CURRENT_DATE),
('INTIMACY', 'burst_gamma', 0.6, '激增放大系数', CURRENT_DATE),
('INTIMACY', 'percentile_lower', 5, '下锚分位数', CURRENT_DATE),
('INTIMACY', 'percentile_upper', 95, '上锚分位数', CURRENT_DATE),
('INTIMACY', 'ewma_alpha', 0.2, 'EWMA平滑系数', CURRENT_DATE)
ON CONFLICT (index_type, param_name, effective_from) DO NOTHING;
"""
def main():
print("创建指数算法相关表...")
config = AppConfig.load()
db_conn = DatabaseConnection(config.config["db"]["dsn"])
try:
with db_conn.conn.cursor() as cur:
# 创建表
for i, ddl in enumerate(DDL_STATEMENTS, 1):
print(f" 执行DDL {i}/{len(DDL_STATEMENTS)}...")
cur.execute(ddl)
# 初始化参数
print(" 初始化算法参数...")
cur.execute(SEED_PARAMS)
db_conn.conn.commit()
print("完成!")
# 验证
cur.execute("SELECT COUNT(*) FROM billiards_dws.cfg_index_parameters")
count = cur.fetchone()[0]
print(f" 已插入 {count} 个参数配置")
finally:
db_conn.close()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,602 @@
# -*- coding: utf-8 -*-
"""
DWS Excel导入脚本
功能说明:
支持三类Excel数据的导入
1. 支出结构dws_finance_expense_summary
2. 平台结算dws_platform_settlement
3. 充值提成dws_assistant_recharge_commission
导入规范:
- 字段定义:按照目标表字段要求
- 时间粒度:支出按月,平台结算按日,充值提成按月
- 门店维度使用配置的site_id
- 去重规则按import_batch_no去重
- 校验规则:金额字段非负,日期格式校验
使用方式:
python import_dws_excel.py --type expense --file expenses.xlsx
python import_dws_excel.py --type platform --file platform_settlement.xlsx
python import_dws_excel.py --type commission --file recharge_commission.xlsx
作者ETL团队
创建日期2026-02-01
"""
import argparse
import os
import sys
import uuid
from datetime import date, datetime
from decimal import Decimal, InvalidOperation
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
# 添加项目根目录到Python路径
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
try:
import pandas as pd
except ImportError:
print("请安装 pandas: pip install pandas openpyxl")
sys.exit(1)
from etl_billiards.utils.config import Config
from etl_billiards.utils.db import DatabaseConnection
# =============================================================================
# 常量定义
# =============================================================================
# 支出类型枚举
EXPENSE_TYPES = {
'房租': 'RENT',
'水电费': 'UTILITY',
'物业费': 'PROPERTY',
'工资': 'SALARY',
'报销': 'REIMBURSE',
'平台服务费': 'PLATFORM_FEE',
'其他': 'OTHER',
}
# 支出大类映射
EXPENSE_CATEGORIES = {
'RENT': 'FIXED_COST',
'UTILITY': 'VARIABLE_COST',
'PROPERTY': 'FIXED_COST',
'SALARY': 'FIXED_COST',
'REIMBURSE': 'VARIABLE_COST',
'PLATFORM_FEE': 'VARIABLE_COST',
'OTHER': 'OTHER',
}
# 平台类型枚举
PLATFORM_TYPES = {
'美团': 'MEITUAN',
'抖音': 'DOUYIN',
'大众点评': 'DIANPING',
'其他': 'OTHER',
}
# =============================================================================
# 导入基类
# =============================================================================
class BaseImporter:
"""导入基类"""
def __init__(self, config: Config, db: DatabaseConnection):
self.config = config
self.db = db
self.site_id = config.get("app.store_id")
self.tenant_id = config.get("app.tenant_id", self.site_id)
self.batch_no = self._generate_batch_no()
def _generate_batch_no(self) -> str:
"""生成导入批次号"""
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
unique_id = str(uuid.uuid4())[:8]
return f"{timestamp}_{unique_id}"
def _safe_decimal(self, value: Any, default: Decimal = Decimal('0')) -> Decimal:
"""安全转换为Decimal"""
if value is None or pd.isna(value):
return default
try:
return Decimal(str(value))
except (ValueError, InvalidOperation):
return default
def _safe_date(self, value: Any) -> Optional[date]:
"""安全转换为日期"""
if value is None or pd.isna(value):
return None
if isinstance(value, datetime):
return value.date()
if isinstance(value, date):
return value
try:
return pd.to_datetime(value).date()
except:
return None
def _safe_month(self, value: Any) -> Optional[date]:
"""安全转换为月份(月第一天)"""
dt = self._safe_date(value)
if dt:
return dt.replace(day=1)
return None
def import_file(self, file_path: str) -> Dict[str, Any]:
"""导入文件"""
raise NotImplementedError
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
"""校验行数据,返回错误列表"""
return []
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
"""转换行数据"""
raise NotImplementedError
def insert_records(self, records: List[Dict[str, Any]]) -> int:
"""插入记录"""
raise NotImplementedError
# =============================================================================
# 支出导入
# =============================================================================
class ExpenseImporter(BaseImporter):
"""
支出导入
Excel格式要求
- 月份: 2026-01 或 2026/01/01 格式
- 支出类型: 房租/水电费/物业费/工资/报销/平台服务费/其他
- 金额: 数字
- 备注: 可选
"""
TARGET_TABLE = "billiards_dws.dws_finance_expense_summary"
REQUIRED_COLUMNS = ['月份', '支出类型', '金额']
OPTIONAL_COLUMNS = ['明细', '备注']
def import_file(self, file_path: str) -> Dict[str, Any]:
"""导入支出Excel"""
print(f"开始导入支出文件: {file_path}")
# 读取Excel
df = pd.read_excel(file_path)
# 校验必要列
missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
if missing_cols:
return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
# 处理数据
records = []
errors = []
for idx, row in df.iterrows():
row_dict = row.to_dict()
row_errors = self.validate_row(row_dict, idx + 2) # Excel行号从2开始
if row_errors:
errors.extend(row_errors)
continue
record = self.transform_row(row_dict)
records.append(record)
if errors:
print(f"校验错误: {len(errors)}")
for err in errors[:10]:
print(f" - {err}")
# 插入数据
inserted = 0
if records:
inserted = self.insert_records(records)
return {
"status": "SUCCESS" if not errors else "PARTIAL",
"batch_no": self.batch_no,
"total_rows": len(df),
"inserted": inserted,
"errors": len(errors),
"error_messages": errors[:10]
}
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
errors = []
# 校验月份
month = self._safe_month(row.get('月份'))
if not month:
errors.append(f"{row_idx}: 月份格式错误")
# 校验支出类型
expense_type = row.get('支出类型', '').strip()
if expense_type not in EXPENSE_TYPES:
errors.append(f"{row_idx}: 支出类型无效 '{expense_type}'")
# 校验金额
amount = self._safe_decimal(row.get('金额'))
if amount < 0:
errors.append(f"{row_idx}: 金额不能为负数")
return errors
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
expense_type_name = row.get('支出类型', '').strip()
expense_type_code = EXPENSE_TYPES.get(expense_type_name, 'OTHER')
expense_category = EXPENSE_CATEGORIES.get(expense_type_code, 'OTHER')
return {
'site_id': self.site_id,
'tenant_id': self.tenant_id,
'expense_month': self._safe_month(row.get('月份')),
'expense_type_code': expense_type_code,
'expense_type_name': expense_type_name,
'expense_category': expense_category,
'expense_amount': self._safe_decimal(row.get('金额')),
'expense_detail': row.get('明细'),
'import_batch_no': self.batch_no,
'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
'import_time': datetime.now(),
'import_user': os.getenv('USERNAME', 'system'),
'remark': row.get('备注'),
}
def insert_records(self, records: List[Dict[str, Any]]) -> int:
columns = [
'site_id', 'tenant_id', 'expense_month', 'expense_type_code',
'expense_type_name', 'expense_category', 'expense_amount',
'expense_detail', 'import_batch_no', 'import_file_name',
'import_time', 'import_user', 'remark'
]
cols_str = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
inserted = 0
with self.db.conn.cursor() as cur:
for record in records:
values = [record.get(col) for col in columns]
cur.execute(sql, values)
inserted += cur.rowcount
self.db.commit()
return inserted
# =============================================================================
# 平台结算导入
# =============================================================================
class PlatformSettlementImporter(BaseImporter):
"""
平台结算导入
Excel格式要求
- 回款日期: 日期格式
- 平台类型: 美团/抖音/大众点评/其他
- 平台订单号: 字符串
- 订单原始金额: 数字
- 佣金: 数字
- 服务费: 数字
- 回款金额: 数字
- 备注: 可选
"""
TARGET_TABLE = "billiards_dws.dws_platform_settlement"
REQUIRED_COLUMNS = ['回款日期', '平台类型', '回款金额']
OPTIONAL_COLUMNS = ['平台订单号', '订单原始金额', '佣金', '服务费', '关联订单ID', '备注']
def import_file(self, file_path: str) -> Dict[str, Any]:
print(f"开始导入平台结算文件: {file_path}")
df = pd.read_excel(file_path)
missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
if missing_cols:
return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
records = []
errors = []
for idx, row in df.iterrows():
row_dict = row.to_dict()
row_errors = self.validate_row(row_dict, idx + 2)
if row_errors:
errors.extend(row_errors)
continue
record = self.transform_row(row_dict)
records.append(record)
if errors:
print(f"校验错误: {len(errors)}")
for err in errors[:10]:
print(f" - {err}")
inserted = 0
if records:
inserted = self.insert_records(records)
return {
"status": "SUCCESS" if not errors else "PARTIAL",
"batch_no": self.batch_no,
"total_rows": len(df),
"inserted": inserted,
"errors": len(errors),
}
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
errors = []
settlement_date = self._safe_date(row.get('回款日期'))
if not settlement_date:
errors.append(f"{row_idx}: 回款日期格式错误")
platform_type = row.get('平台类型', '').strip()
if platform_type not in PLATFORM_TYPES:
errors.append(f"{row_idx}: 平台类型无效 '{platform_type}'")
amount = self._safe_decimal(row.get('回款金额'))
if amount < 0:
errors.append(f"{row_idx}: 回款金额不能为负数")
return errors
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
platform_name = row.get('平台类型', '').strip()
platform_type = PLATFORM_TYPES.get(platform_name, 'OTHER')
return {
'site_id': self.site_id,
'tenant_id': self.tenant_id,
'settlement_date': self._safe_date(row.get('回款日期')),
'platform_type': platform_type,
'platform_name': platform_name,
'platform_order_no': row.get('平台订单号'),
'order_settle_id': row.get('关联订单ID'),
'settlement_amount': self._safe_decimal(row.get('回款金额')),
'commission_amount': self._safe_decimal(row.get('佣金')),
'service_fee': self._safe_decimal(row.get('服务费')),
'gross_amount': self._safe_decimal(row.get('订单原始金额')),
'import_batch_no': self.batch_no,
'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
'import_time': datetime.now(),
'import_user': os.getenv('USERNAME', 'system'),
'remark': row.get('备注'),
}
def insert_records(self, records: List[Dict[str, Any]]) -> int:
columns = [
'site_id', 'tenant_id', 'settlement_date', 'platform_type',
'platform_name', 'platform_order_no', 'order_settle_id',
'settlement_amount', 'commission_amount', 'service_fee',
'gross_amount', 'import_batch_no', 'import_file_name',
'import_time', 'import_user', 'remark'
]
cols_str = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
inserted = 0
with self.db.conn.cursor() as cur:
for record in records:
values = [record.get(col) for col in columns]
cur.execute(sql, values)
inserted += cur.rowcount
self.db.commit()
return inserted
# =============================================================================
# 充值提成导入
# =============================================================================
class RechargeCommissionImporter(BaseImporter):
"""
充值提成导入
Excel格式要求
- 月份: 2026-01 格式
- 助教ID: 数字
- 助教花名: 字符串
- 充值订单金额: 数字
- 提成金额: 数字
- 充值订单号: 可选
- 备注: 可选
"""
TARGET_TABLE = "billiards_dws.dws_assistant_recharge_commission"
REQUIRED_COLUMNS = ['月份', '助教ID', '提成金额']
OPTIONAL_COLUMNS = ['助教花名', '充值订单金额', '充值订单ID', '充值订单号', '备注']
def import_file(self, file_path: str) -> Dict[str, Any]:
print(f"开始导入充值提成文件: {file_path}")
df = pd.read_excel(file_path)
missing_cols = [c for c in self.REQUIRED_COLUMNS if c not in df.columns]
if missing_cols:
return {"status": "ERROR", "message": f"缺少必要列: {missing_cols}"}
records = []
errors = []
for idx, row in df.iterrows():
row_dict = row.to_dict()
row_errors = self.validate_row(row_dict, idx + 2)
if row_errors:
errors.extend(row_errors)
continue
record = self.transform_row(row_dict)
records.append(record)
if errors:
print(f"校验错误: {len(errors)}")
for err in errors[:10]:
print(f" - {err}")
inserted = 0
if records:
inserted = self.insert_records(records)
return {
"status": "SUCCESS" if not errors else "PARTIAL",
"batch_no": self.batch_no,
"total_rows": len(df),
"inserted": inserted,
"errors": len(errors),
}
def validate_row(self, row: Dict[str, Any], row_idx: int) -> List[str]:
errors = []
month = self._safe_month(row.get('月份'))
if not month:
errors.append(f"{row_idx}: 月份格式错误")
assistant_id = row.get('助教ID')
if assistant_id is None or pd.isna(assistant_id):
errors.append(f"{row_idx}: 助教ID不能为空")
amount = self._safe_decimal(row.get('提成金额'))
if amount < 0:
errors.append(f"{row_idx}: 提成金额不能为负数")
return errors
def transform_row(self, row: Dict[str, Any]) -> Dict[str, Any]:
recharge_amount = self._safe_decimal(row.get('充值订单金额'))
commission_amount = self._safe_decimal(row.get('提成金额'))
commission_ratio = commission_amount / recharge_amount if recharge_amount > 0 else None
return {
'site_id': self.site_id,
'tenant_id': self.tenant_id,
'assistant_id': int(row.get('助教ID')),
'assistant_nickname': row.get('助教花名'),
'commission_month': self._safe_month(row.get('月份')),
'recharge_order_id': row.get('充值订单ID'),
'recharge_order_no': row.get('充值订单号'),
'recharge_amount': recharge_amount,
'commission_amount': commission_amount,
'commission_ratio': commission_ratio,
'import_batch_no': self.batch_no,
'import_file_name': os.path.basename(str(row.get('_file_path', ''))),
'import_time': datetime.now(),
'import_user': os.getenv('USERNAME', 'system'),
'remark': row.get('备注'),
}
def insert_records(self, records: List[Dict[str, Any]]) -> int:
columns = [
'site_id', 'tenant_id', 'assistant_id', 'assistant_nickname',
'commission_month', 'recharge_order_id', 'recharge_order_no',
'recharge_amount', 'commission_amount', 'commission_ratio',
'import_batch_no', 'import_file_name', 'import_time',
'import_user', 'remark'
]
cols_str = ", ".join(columns)
placeholders = ", ".join(["%s"] * len(columns))
sql = f"INSERT INTO {self.TARGET_TABLE} ({cols_str}) VALUES ({placeholders})"
inserted = 0
with self.db.conn.cursor() as cur:
for record in records:
values = [record.get(col) for col in columns]
cur.execute(sql, values)
inserted += cur.rowcount
self.db.commit()
return inserted
# =============================================================================
# 主函数
# =============================================================================
def main():
parser = argparse.ArgumentParser(description='DWS Excel导入工具')
parser.add_argument(
'--type', '-t',
choices=['expense', 'platform', 'commission'],
required=True,
help='导入类型: expense(支出), platform(平台结算), commission(充值提成)'
)
parser.add_argument(
'--file', '-f',
required=True,
help='Excel文件路径'
)
args = parser.parse_args()
# 检查文件
if not os.path.exists(args.file):
print(f"文件不存在: {args.file}")
sys.exit(1)
# 加载配置
config = Config()
db = DatabaseConnection(config)
try:
# 选择导入器
if args.type == 'expense':
importer = ExpenseImporter(config, db)
elif args.type == 'platform':
importer = PlatformSettlementImporter(config, db)
elif args.type == 'commission':
importer = RechargeCommissionImporter(config, db)
else:
print(f"未知的导入类型: {args.type}")
sys.exit(1)
# 执行导入
result = importer.import_file(args.file)
# 输出结果
print("\n" + "=" * 50)
print("导入结果:")
print(f" 状态: {result.get('status')}")
print(f" 批次号: {result.get('batch_no')}")
print(f" 总行数: {result.get('total_rows')}")
print(f" 插入行数: {result.get('inserted')}")
print(f" 错误行数: {result.get('errors')}")
if result.get('status') == 'ERROR':
print(f" 错误信息: {result.get('message')}")
sys.exit(1)
except Exception as e:
print(f"导入失败: {e}")
db.rollback()
sys.exit(1)
finally:
db.close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
"""执行DWS配置数据导入"""
import os
from pathlib import Path
from dotenv import load_dotenv
import psycopg2
def main():
# 加载.env配置
env_path = Path(__file__).parent.parent / ".env"
load_dotenv(env_path)
dsn = os.getenv("PG_DSN")
if not dsn:
print("错误: 未找到 PG_DSN 配置")
return
# 读取SQL文件
sql_file = Path(__file__).parent.parent / "database" / "seed_dws_config.sql"
sql_content = sql_file.read_text(encoding="utf-8")
print(f"连接数据库...")
conn = psycopg2.connect(dsn)
conn.autocommit = True
with conn.cursor() as cur:
print(f"执行SQL文件: {sql_file}")
cur.execute(sql_content)
print("DWS配置数据导入成功!")
conn.close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
"""显示台区分类映射数据"""
import os
from pathlib import Path
from dotenv import load_dotenv
import psycopg2
def main():
load_dotenv(Path(__file__).parent.parent / ".env")
dsn = os.getenv("PG_DSN")
conn = psycopg2.connect(dsn)
print("cfg_area_category 数据内容:")
print("=" * 90)
print(f"{'source_area_name':<15} {'category_code':<15} {'category_name':<12} {'match_type':<10} {'priority':<8}")
print("-" * 90)
with conn.cursor() as cur:
cur.execute("""
SELECT source_area_name, category_code, category_name, match_type, match_priority
FROM billiards_dws.cfg_area_category
ORDER BY match_priority, category_code, source_area_name
""")
for row in cur.fetchall():
print(f"{row[0]:<15} {row[1]:<15} {row[2]:<12} {row[3]:<10} {row[4]:<8}")
print("=" * 90)
print("\n分类汇总:")
with conn.cursor() as cur:
cur.execute("""
SELECT category_code, category_name, COUNT(*) as cnt
FROM billiards_dws.cfg_area_category
GROUP BY category_code, category_name
ORDER BY category_code
""")
for row in cur.fetchall():
print(f" {row[0]:<15} {row[1]:<12} {row[2]} 条规则")
conn.close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""显示绩效档位配置数据"""
import os
from pathlib import Path
from dotenv import load_dotenv
import psycopg2
def main():
load_dotenv(Path(__file__).parent.parent / ".env")
dsn = os.getenv("PG_DSN")
conn = psycopg2.connect(dsn)
print("cfg_performance_tier 数据内容:")
print("=" * 110)
print(f"{'tier_code':<8} {'tier_name':<18} {'min_hours':<10} {'max_hours':<10} {'base_ded':<10} {'bonus_ded':<10} {'vacation':<10}")
print("-" * 110)
with conn.cursor() as cur:
cur.execute("""
SELECT tier_code, tier_name, min_hours, max_hours,
base_deduction, bonus_deduction_ratio,
vacation_days, vacation_unlimited
FROM billiards_dws.cfg_performance_tier
ORDER BY tier_level
""")
for row in cur.fetchall():
max_h = str(row[3]) if row[3] else "NULL"
vac = "自由" if row[7] else str(row[6]) + ""
print(f"{row[0]:<8} {row[1]:<18} {row[2]:<10} {max_h:<10} {row[4]:<10} {row[5]*100:.0f}%{'':<7} {vac:<10}")
print("=" * 110)
print("\n数据来源依据: DWS 数据库处理需求.md 第35-41行")
print("""
| 档位 | 总业绩小时数阈值 | 专业课抽成 | 打赏课抽成 | 次月休假 |
|------|------------------|-----------|-----------|----------|
| 0档 | H < 100 | 28元/小时 | 50% | 3天 |
| 1档 | 100 ≤ H < 130 | 18元/小时 | 40% | 4天 |
| 2档 | 130 ≤ H < 160 | 15元/小时 | 38% | 4天 |
| 3档 | 160 ≤ H < 190 | 13元/小时 | 35% | 5天 |
| 4档 | 190 ≤ H < 220 | 10元/小时 | 33% | 6天 |
| 5档 | H ≥ 220 | 8元/小时 | 30% | 休假自由 |
""")
conn.close()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,222 @@
# -*- coding: utf-8 -*-
"""
测试指数算法任务
"""
import sys
import os
# 添加项目路径
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import logging
from config.settings import AppConfig
from database.connection import DatabaseConnection
from database.operations import DatabaseOperations
from tasks.dws.index import RecallIndexTask, IntimacyIndexTask
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('test_index')
def test_recall_index():
"""测试召回指数任务"""
logger.info("=" * 60)
logger.info("测试客户召回指数任务 (DWS_RECALL_INDEX)")
logger.info("=" * 60)
# 加载配置
config = AppConfig.load()
# 连接数据库
db_conn = DatabaseConnection(config.config["db"]["dsn"])
db = DatabaseOperations(db_conn)
try:
# 创建任务实例
task = RecallIndexTask(config, db, None, logger)
# 执行任务
result = task.execute(None)
logger.info("任务执行结果: %s", result)
# 查询结果
if result.get('status') == 'success':
sql = """
SELECT
COUNT(*) as total_count,
ROUND(AVG(display_score)::numeric, 2) as avg_score,
ROUND(MIN(display_score)::numeric, 2) as min_score,
ROUND(MAX(display_score)::numeric, 2) as max_score,
ROUND(AVG(raw_score)::numeric, 4) as avg_raw_score,
ROUND(AVG(score_overdue)::numeric, 4) as avg_overdue,
ROUND(AVG(score_new_bonus)::numeric, 4) as avg_new_bonus,
ROUND(AVG(score_recharge_bonus)::numeric, 4) as avg_recharge_bonus,
ROUND(AVG(score_hot_drop)::numeric, 4) as avg_hot_drop
FROM billiards_dws.dws_member_recall_index
"""
rows = db.query(sql)
if rows:
stats = dict(rows[0])
logger.info("-" * 40)
logger.info("召回指数统计:")
logger.info(" 总记录数: %s", stats['total_count'])
logger.info(" Display Score: 平均=%.2f, 最小=%.2f, 最大=%.2f",
stats['avg_score'] or 0, stats['min_score'] or 0, stats['max_score'] or 0)
logger.info(" Raw Score 平均: %.4f", stats['avg_raw_score'] or 0)
logger.info(" 分项得分平均:")
logger.info(" - 超期紧急性: %.4f", stats['avg_overdue'] or 0)
logger.info(" - 新客户加分: %.4f", stats['avg_new_bonus'] or 0)
logger.info(" - 充值加分: %.4f", stats['avg_recharge_bonus'] or 0)
logger.info(" - 热度断档: %.4f", stats['avg_hot_drop'] or 0)
# 查询Top 5
logger.info("-" * 40)
logger.info("召回优先级 Top 5:")
top_sql = """
SELECT member_id, display_score, raw_score,
days_since_last_visit, visit_interval_median
FROM billiards_dws.dws_member_recall_index
ORDER BY display_score DESC
LIMIT 5
"""
top_rows = db.query(top_sql)
for i, row in enumerate(top_rows or [], 1):
r = dict(row)
logger.info(" %d. 会员%s: %.2f分 (Raw=%.4f, 最近到店=%s天前, 周期=%.1f天)",
i, r['member_id'], r['display_score'] or 0, r['raw_score'] or 0,
r['days_since_last_visit'], r['visit_interval_median'] or 0)
return result
finally:
db_conn.close()
def test_intimacy_index():
"""测试亲密指数任务"""
logger.info("")
logger.info("=" * 60)
logger.info("测试客户-助教亲密指数任务 (DWS_INTIMACY_INDEX)")
logger.info("=" * 60)
# 加载配置
config = AppConfig.load()
# 连接数据库
db_conn = DatabaseConnection(config.config["db"]["dsn"])
db = DatabaseOperations(db_conn)
try:
# 创建任务实例
task = IntimacyIndexTask(config, db, None, logger)
# 执行任务
result = task.execute(None)
logger.info("任务执行结果: %s", result)
# 查询结果
if result.get('status') == 'success':
sql = """
SELECT
COUNT(*) as total_count,
COUNT(DISTINCT member_id) as unique_members,
COUNT(DISTINCT assistant_id) as unique_assistants,
ROUND(AVG(display_score)::numeric, 2) as avg_score,
ROUND(MIN(display_score)::numeric, 2) as min_score,
ROUND(MAX(display_score)::numeric, 2) as max_score,
ROUND(AVG(raw_score)::numeric, 4) as avg_raw_score,
ROUND(AVG(score_frequency)::numeric, 4) as avg_frequency,
ROUND(AVG(score_recency)::numeric, 4) as avg_recency,
ROUND(AVG(score_recharge)::numeric, 4) as avg_recharge,
ROUND(AVG(burst_multiplier)::numeric, 4) as avg_burst
FROM billiards_dws.dws_member_assistant_intimacy
"""
rows = db.query(sql)
if rows:
stats = dict(rows[0])
logger.info("-" * 40)
logger.info("亲密指数统计:")
logger.info(" 总记录数: %s (客户-助教对)", stats['total_count'])
logger.info(" 唯一会员: %s, 唯一助教: %s", stats['unique_members'], stats['unique_assistants'])
logger.info(" Display Score: 平均=%.2f, 最小=%.2f, 最大=%.2f",
stats['avg_score'] or 0, stats['min_score'] or 0, stats['max_score'] or 0)
logger.info(" Raw Score 平均: %.4f", stats['avg_raw_score'] or 0)
logger.info(" 分项得分平均:")
logger.info(" - 频次强度: %.4f", stats['avg_frequency'] or 0)
logger.info(" - 最近温度: %.4f", stats['avg_recency'] or 0)
logger.info(" - 充值强度: %.4f", stats['avg_recharge'] or 0)
logger.info(" - 激增放大: %.4f", stats['avg_burst'] or 0)
# 查询Top亲密关系
logger.info("-" * 40)
logger.info("亲密度 Top 5 客户-助教对:")
top_sql = """
SELECT member_id, assistant_id, display_score, raw_score,
session_count, attributed_recharge_amount
FROM billiards_dws.dws_member_assistant_intimacy
ORDER BY display_score DESC
LIMIT 5
"""
top_rows = db.query(top_sql)
for i, row in enumerate(top_rows or [], 1):
r = dict(row)
logger.info(" %d. 会员%s-助教%s: %.2f分 (会话%d次, 归因充值%.2f元)",
i, r['member_id'], r['assistant_id'],
r['display_score'] or 0, r['session_count'] or 0,
r['attributed_recharge_amount'] or 0)
return result
finally:
db_conn.close()
if __name__ == '__main__':
print("=" * 60)
print("指数算法任务测试")
print("=" * 60)
print()
# 先检查表是否存在
config = AppConfig.load()
db_conn = DatabaseConnection(config.config["db"]["dsn"])
db = DatabaseOperations(db_conn)
check_sql = """
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'billiards_dws'
AND table_name IN ('dws_member_recall_index', 'dws_member_assistant_intimacy', 'cfg_index_parameters')
"""
tables = db.query(check_sql)
existing_tables = [dict(r)['table_name'] for r in (tables or [])]
if 'cfg_index_parameters' not in existing_tables:
print("警告: cfg_index_parameters 表不存在,请先执行 schema_dws.sql")
print("需要执行的表:")
print(" - cfg_index_parameters")
print(" - dws_member_recall_index")
print(" - dws_member_assistant_intimacy")
print(" - dws_index_percentile_history")
db_conn.close()
sys.exit(1)
db_conn.close()
# 测试召回指数
recall_result = test_recall_index()
# 测试亲密指数
intimacy_result = test_intimacy_index()
print()
print("=" * 60)
print("测试完成")
print("=" * 60)
print(f"召回指数: {recall_result.get('status', 'unknown')}")
print(f"亲密指数: {intimacy_result.get('status', 'unknown')}")

View File

@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""验证DWS配置数据"""
import os
from pathlib import Path
from dotenv import load_dotenv
import psycopg2
def main():
load_dotenv(Path(__file__).parent.parent / ".env")
dsn = os.getenv("PG_DSN")
conn = psycopg2.connect(dsn)
tables = [
"cfg_performance_tier",
"cfg_assistant_level_price",
"cfg_bonus_rules",
"cfg_area_category",
"cfg_skill_type"
]
print("DWS 配置表数据统计:")
print("-" * 40)
with conn.cursor() as cur:
for t in tables:
cur.execute(f"SELECT COUNT(*) FROM billiards_dws.{t}")
cnt = cur.fetchone()[0]
print(f"{t}: {cnt}")
conn.close()
if __name__ == "__main__":
main()