Files
Neo-ZQYY/scripts/ops/sample_consumption_cases.py

539 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
一次性脚本:从 test_etl_feiqiu DWD 层采样消费用例订单。
按 7 个维度分类采样,每个用例最多 10 个样本,
输出到 {EXPORT_ROOT}/ETL-Connectors/feiqiu/REPORTS/consumption_cases_sample.json
"""
from __future__ import annotations
import json
import os
from datetime import datetime, date
from decimal import Decimal
from pathlib import Path
# ── 环境加载 ──────────────────────────────────────────────
from _env_paths import get_output_path # 内部已 load_dotenv
TEST_DB_DSN = os.environ.get("TEST_DB_DSN")
if not TEST_DB_DSN:
raise RuntimeError(
"环境变量 TEST_DB_DSN 未定义。请在根 .env 中配置测试库连接串。"
)
import psycopg2
import psycopg2.extras
# ── 输出路径 ──────────────────────────────────────────────
EXPORT_ROOT = get_output_path("EXPORT_ROOT")
OUTPUT_DIR = EXPORT_ROOT / "REPORTS"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_FILE = OUTPUT_DIR / "consumption_cases_sample.json"
# ── 时间范围 ──────────────────────────────────────────────
TIME_FILTER = "create_time >= NOW() - INTERVAL '5 months'"
SAMPLE_LIMIT = 10
# ── JSON 序列化 ───────────────────────────────────────────
def _default(obj):
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, (datetime, date)):
return obj.isoformat()
raise TypeError(f"无法序列化类型: {type(obj)}")
# ── 数据库工具 ────────────────────────────────────────────
def get_conn():
conn = psycopg2.connect(TEST_DB_DSN)
# 将 timestamptz/timestamp 返回为字符串,避免异常年份(如 year=-1导致 Python 解析失败
import psycopg2.extensions as ext
text_caster = ext.new_type(
(1114, 1184), # timestamp, timestamptz OIDs
"TEXT_TIMESTAMP",
lambda val, cur: val,
)
ext.register_type(text_caster, conn)
return conn
def query_rows(cur, sql, params=None) -> list[dict]:
cur.execute(sql, params or ())
if cur.description is None:
return []
cols = [d[0] for d in cur.description]
return [dict(zip(cols, row)) for row in cur.fetchall()]
def query_ids(cur, sql, params=None) -> list:
"""查询单列 ID 列表"""
cur.execute(sql, params or ())
return [row[0] for row in cur.fetchall()]
# ── 关联表查询 ────────────────────────────────────────────
def fetch_settlement_detail(cur, order_settle_id: int) -> dict:
"""查询一个 order_settle_id 的所有关联表数据"""
head = query_rows(cur, "SELECT * FROM dwd.dwd_settlement_head WHERE order_settle_id = %s", (order_settle_id,))
head_ex = query_rows(cur, "SELECT * FROM dwd.dwd_settlement_head_ex WHERE order_settle_id = %s", (order_settle_id,))
table_fees = query_rows(cur, "SELECT * FROM dwd.dwd_table_fee_log WHERE order_settle_id = %s", (order_settle_id,))
table_adjusts = query_rows(cur, "SELECT * FROM dwd.dwd_table_fee_adjust WHERE order_settle_id = %s", (order_settle_id,))
goods = query_rows(cur, "SELECT * FROM dwd.dwd_store_goods_sale WHERE order_settle_id = %s", (order_settle_id,))
assistants = query_rows(cur, "SELECT * FROM dwd.dwd_assistant_service_log WHERE order_settle_id = %s", (order_settle_id,))
groupbuys = query_rows(cur, "SELECT * FROM dwd.dwd_groupbuy_redemption WHERE order_settle_id = %s", (order_settle_id,))
# platform_coupon_redemption 通过 site_order_id 关联
# site_order_id 对应 settlement_head 的 order_trade_no
order_trade_no = head[0]["order_trade_no"] if head else None
if order_trade_no:
pl_coupons = query_rows(
cur,
"SELECT * FROM dwd.dwd_platform_coupon_redemption WHERE site_order_id = %s",
(order_trade_no,),
)
else:
pl_coupons = []
# 会员余额变动relate_id 在 _ex 表from_type 在主表
balance_changes = query_rows(
cur,
"""
SELECT bc.*, bce.relate_id, bce.refund_amount AS ex_refund_amount,
bce.operator_id AS ex_operator_id, bce.operator_name AS ex_operator_name,
bce.principal_data
FROM dwd.dwd_member_balance_change bc
JOIN dwd.dwd_member_balance_change_ex bce USING (balance_change_id)
WHERE bce.relate_id = %s AND bc.from_type = 1
""",
(order_settle_id,),
)
# 支付记录
payments = query_rows(
cur,
"SELECT * FROM dwd.dwd_payment WHERE relate_id = %s AND relate_type = 2",
(order_settle_id,),
)
# 退款记录
refunds = query_rows(
cur,
"SELECT r.*, re.* FROM dwd.dwd_refund r LEFT JOIN dwd.dwd_refund_ex re USING (refund_id) WHERE r.relate_id = %s",
(order_settle_id,),
)
return {
"order_settle_id": order_settle_id,
"settlement_head": head[0] if head else None,
"settlement_head_ex": head_ex[0] if head_ex else None,
"table_fee_logs": table_fees,
"table_fee_adjusts": table_adjusts,
"goods_sales": goods,
"assistant_services": assistants,
"groupbuy_redemptions": groupbuys,
"platform_coupon_redemptions": pl_coupons,
"balance_changes": balance_changes,
"payments": payments,
"refunds": refunds,
}
def fetch_recharge_detail(cur, recharge_order_id: int) -> dict:
"""查询一个 recharge_order_id 的所有关联表数据"""
main = query_rows(cur, "SELECT * FROM dwd.dwd_recharge_order WHERE recharge_order_id = %s", (recharge_order_id,))
ex = query_rows(cur, "SELECT * FROM dwd.dwd_recharge_order_ex WHERE recharge_order_id = %s", (recharge_order_id,))
balance_changes = query_rows(
cur,
"""
SELECT bc.*, bce.relate_id, bce.refund_amount AS ex_refund_amount,
bce.operator_id AS ex_operator_id, bce.operator_name AS ex_operator_name,
bce.principal_data
FROM dwd.dwd_member_balance_change bc
JOIN dwd.dwd_member_balance_change_ex bce USING (balance_change_id)
WHERE bce.relate_id = %s AND bc.from_type = 2
""",
(recharge_order_id,),
)
payments = query_rows(
cur,
"SELECT * FROM dwd.dwd_payment WHERE relate_id = %s AND relate_type = 5",
(recharge_order_id,),
)
return {
"recharge_order_id": recharge_order_id,
"recharge_order": main[0] if main else None,
"recharge_order_ex": ex[0] if ex else None,
"balance_changes": balance_changes,
"payments": payments,
}
def fetch_refund_detail(cur, refund_id: int) -> dict:
"""查询一个 refund_id 的所有关联表数据"""
main = query_rows(cur, "SELECT * FROM dwd.dwd_refund WHERE refund_id = %s", (refund_id,))
ex = query_rows(cur, "SELECT * FROM dwd.dwd_refund_ex WHERE refund_id = %s", (refund_id,))
balance_changes = query_rows(
cur,
"""
SELECT bc.*, bce.relate_id, bce.refund_amount AS ex_refund_amount,
bce.operator_id AS ex_operator_id, bce.operator_name AS ex_operator_name,
bce.principal_data
FROM dwd.dwd_member_balance_change bc
JOIN dwd.dwd_member_balance_change_ex bce USING (balance_change_id)
WHERE bce.relate_id = %s AND bc.from_type = 3
""",
(refund_id,),
)
return {
"refund_id": refund_id,
"refund": main[0] if main else None,
"refund_ex": ex[0] if ex else None,
"balance_changes": balance_changes,
}
# ── 用例采样函数 ──────────────────────────────────────────
def sample_case(cur, label: str, where_sql: str, id_col: str = "order_settle_id",
table: str = "dwd.dwd_settlement_head",
fetch_fn=None, join_clause: str = "") -> dict:
"""
通用采样:查出符合条件的 ID取前 SAMPLE_LIMIT 个,逐个查关联表。
"""
if fetch_fn is None:
fetch_fn = fetch_settlement_detail
count_sql = f"SELECT COUNT(*) FROM {table} {join_clause} WHERE {where_sql}"
cur.execute(count_sql)
count = cur.fetchone()[0]
ids_sql = f"SELECT {id_col} FROM {table} {join_clause} WHERE {where_sql} LIMIT {SAMPLE_LIMIT}"
ids = query_ids(cur, ids_sql)
samples = []
for _id in ids:
samples.append(fetch_fn(cur, _id))
print(f" [{label}] count={count}, sampled={len(samples)}")
return {"count": count, "samples": samples}
# ── 维度一:结算类型 ──────────────────────────────────────
def dim_settle_type(cur) -> dict:
print("\n=== 维度一:结算类型 ===")
h = "dwd.dwd_settlement_head"
tf = TIME_FILTER
return {
"消费结算(settle_type=1)": sample_case(cur, "消费结算", f"settle_type = 1 AND {tf}", table=h),
"商品结算(settle_type=3)": sample_case(cur, "商品结算", f"settle_type = 3 AND {tf}", table=h),
"充值撤销(settle_type=7)": sample_case(cur, "充值撤销", f"settle_type = 7 AND {tf}", table=h),
}
# ── 维度二:支付方式(仅 settle_type=1──────────────────
def dim_payment_method(cur) -> dict:
print("\n=== 维度二:支付方式 ===")
h = "dwd.dwd_settlement_head"
# 需要 JOIN ex 表获取 online_amount, cash_amount, card_amount
join = "JOIN dwd.dwd_settlement_head_ex ex USING (order_settle_id)"
base = f"dwd.dwd_settlement_head.settle_type = 1 AND dwd.dwd_settlement_head.{TIME_FILTER}"
# 用别名 s 简化引用
s = "dwd.dwd_settlement_head"
# 条件部分(不含 base用于后续排除
cond_map = {
"纯优惠券/团购核销": (
f"{s}.coupon_amount > 0 AND {s}.pay_amount = 0 "
f"AND {s}.balance_amount = 0 AND {s}.point_amount = 0"
),
"纯会员折扣全免": (
f"{s}.member_discount_amount = {s}.consume_money AND {s}.consume_money > 0 "
f"AND {s}.pay_amount = 0 AND {s}.balance_amount = 0 "
f"AND {s}.coupon_amount = 0 AND {s}.point_amount = 0"
),
"纯余额": (
f"{s}.balance_amount > 0 "
f"AND ex.online_amount = 0 AND ex.cash_amount = 0 AND ex.card_amount = 0"
),
"纯积分抵扣": (
f"{s}.point_amount > 0 AND {s}.balance_amount = 0 "
f"AND ex.online_amount = 0 AND ex.cash_amount = 0"
),
"余额+积分": f"{s}.balance_amount > 0 AND {s}.point_amount > 0",
"纯现金": f"ex.cash_amount > 0 AND {s}.balance_amount = 0 AND ex.online_amount = 0",
"余额+现金": f"{s}.balance_amount > 0 AND ex.cash_amount > 0",
"券+积分": f"{s}.coupon_amount > 0 AND {s}.point_amount > 0",
"零消费": f"{s}.consume_money = 0 AND {s}.pay_amount = 0",
}
result = {}
for label, cond in cond_map.items():
full_where = f"{base} AND {cond}"
result[label] = sample_case(
cur, label, full_where,
id_col=f"{s}.order_settle_id",
table=h, join_clause=join,
)
# 其他组合:排除以上所有条件
exclude_parts = " AND ".join(f"NOT ({cond})" for cond in cond_map.values())
other_where = f"{base} AND {exclude_parts}"
result["其他组合"] = sample_case(
cur, "其他组合", other_where,
id_col=f"{s}.order_settle_id",
table=h, join_clause=join,
)
return result
# ── 维度三:消费类目(仅 settle_type=1──────────────────
def dim_consumption_category(cur) -> dict:
print("\n=== 维度三:消费类目 ===")
h = "dwd.dwd_settlement_head"
base = f"settle_type = 1 AND {TIME_FILTER}"
return {
"纯台费": sample_case(
cur, "纯台费",
f"{base} AND table_charge_money > 0 AND goods_money = 0 "
f"AND assistant_pd_money = 0 AND assistant_cx_money = 0",
table=h,
),
"台费+商品": sample_case(
cur, "台费+商品",
f"{base} AND table_charge_money > 0 AND goods_money > 0 "
f"AND assistant_pd_money = 0 AND assistant_cx_money = 0",
table=h,
),
"台费+助教": sample_case(
cur, "台费+助教",
f"{base} AND table_charge_money > 0 AND goods_money = 0 "
f"AND (assistant_pd_money > 0 OR assistant_cx_money > 0)",
table=h,
),
"台费+商品+助教": sample_case(
cur, "台费+商品+助教",
f"{base} AND table_charge_money > 0 AND goods_money > 0 "
f"AND (assistant_pd_money > 0 OR assistant_cx_money > 0)",
table=h,
),
"纯商品": sample_case(
cur, "纯商品",
f"{base} AND table_charge_money = 0 AND goods_money > 0",
table=h,
),
"零消费": sample_case(
cur, "零消费(类目)",
f"{base} AND table_charge_money = 0 AND goods_money = 0 "
f"AND assistant_pd_money = 0 AND assistant_cx_money = 0",
table=h,
),
}
# ── 维度四:优惠类型 ──────────────────────────────────────
def dim_discount_type(cur) -> dict:
print("\n=== 维度四:优惠类型 ===")
h = "dwd.dwd_settlement_head"
base = f"settle_type = 1 AND {TIME_FILTER}"
return {
"仅优惠券/团购": sample_case(
cur, "仅优惠券/团购",
f"{base} AND coupon_amount > 0 AND member_discount_amount = 0 "
f"AND adjust_amount = 0 AND point_amount = 0",
table=h,
),
"仅会员折扣": sample_case(
cur, "仅会员折扣",
f"{base} AND member_discount_amount > 0 AND coupon_amount = 0 "
f"AND adjust_amount = 0 AND point_amount = 0",
table=h,
),
"仅台费调整": sample_case(
cur, "仅台费调整",
f"{base} AND adjust_amount != 0 AND coupon_amount = 0 "
f"AND member_discount_amount = 0 AND point_amount = 0",
table=h,
),
"仅积分抵扣": sample_case(
cur, "仅积分抵扣",
f"{base} AND point_amount > 0 AND coupon_amount = 0 "
f"AND member_discount_amount = 0 AND adjust_amount = 0",
table=h,
),
"券+积分": sample_case(
cur, "券+积分",
f"{base} AND coupon_amount > 0 AND point_amount > 0",
table=h,
),
"券+台费调整": sample_case(
cur, "券+台费调整",
f"{base} AND coupon_amount > 0 AND adjust_amount != 0",
table=h,
),
"会员折扣+积分": sample_case(
cur, "会员折扣+积分",
f"{base} AND member_discount_amount > 0 AND point_amount > 0",
table=h,
),
"无优惠": sample_case(
cur, "无优惠",
f"{base} AND coupon_amount = 0 AND member_discount_amount = 0 "
f"AND adjust_amount = 0 AND point_amount = 0",
table=h,
),
}
# ── 维度五:特殊场景 ──────────────────────────────────────
def dim_special_cases(cur) -> dict:
print("\n=== 维度五:特殊场景 ===")
h = "dwd.dwd_settlement_head"
base = f"{TIME_FILTER}"
# 多台桌合并:同一 order_settle_id 关联 >= 2 个不同 site_table_id
multi_table = sample_case(
cur, "多台桌合并",
"TRUE", # 子查询内部已过滤
id_col="sub.order_settle_id",
table=(
"(SELECT order_settle_id "
"FROM dwd.dwd_table_fee_log "
f"WHERE {TIME_FILTER} "
"GROUP BY order_settle_id "
"HAVING COUNT(DISTINCT site_table_id) >= 2) AS sub"
),
join_clause="",
)
# 多助教同台
multi_assistant = sample_case(
cur, "多助教同台",
"TRUE", # 子查询内部已过滤
id_col="sub.order_settle_id",
table=(
"(SELECT order_settle_id "
"FROM dwd.dwd_assistant_service_log "
f"WHERE {TIME_FILTER} "
"GROUP BY order_settle_id "
"HAVING COUNT(DISTINCT site_assistant_id) >= 2) AS sub"
),
join_clause="",
)
return {
"多台桌合并": multi_table,
"多助教同台": multi_assistant,
"含抹零": sample_case(cur, "含抹零", f"{base} AND rounding_amount != 0", table=h),
"含礼品卡": sample_case(cur, "含礼品卡", f"{base} AND gift_card_amount != 0", table=h),
"含充值卡支付": sample_case(cur, "含充值卡支付", f"{base} AND recharge_card_amount != 0", table=h),
"含平台券销售": sample_case(cur, "含平台券销售", f"{base} AND pl_coupon_sale_amount != 0", table=h),
}
# ── 维度六:充值场景 ──────────────────────────────────────
def dim_recharge(cur) -> dict:
print("\n=== 维度六:充值场景 ===")
r = "dwd.dwd_recharge_order"
tf = TIME_FILTER
return {
"首充(is_first=1)": sample_case(
cur, "首充", f"is_first = 1 AND {tf}",
id_col="recharge_order_id", table=r,
fetch_fn=fetch_recharge_detail,
),
"非首充(is_first=2)": sample_case(
cur, "非首充", f"is_first = 2 AND {tf}",
id_col="recharge_order_id", table=r,
fetch_fn=fetch_recharge_detail,
),
"含退款的充值": sample_case(
cur, "含退款充值", f"refund_amount != 0 AND {tf}",
id_col="recharge_order_id", table=r,
fetch_fn=fetch_recharge_detail,
),
}
# ── 维度七:退款场景 ──────────────────────────────────────
def dim_refund(cur) -> dict:
print("\n=== 维度七:退款场景 ===")
rf = "dwd.dwd_refund"
tf = TIME_FILTER
return {
"relate_type=1(结算退款)": sample_case(
cur, "结算退款", f"relate_type = 1 AND {tf}",
id_col="refund_id", table=rf,
fetch_fn=fetch_refund_detail,
),
"relate_type=2(充值退款)": sample_case(
cur, "充值退款", f"relate_type = 2 AND {tf}",
id_col="refund_id", table=rf,
fetch_fn=fetch_refund_detail,
),
"relate_type=5(转账退款)": sample_case(
cur, "转账退款", f"relate_type = 5 AND {tf}",
id_col="refund_id", table=rf,
fetch_fn=fetch_refund_detail,
),
"payment_method=4(余额退款)": sample_case(
cur, "余额退款", f"payment_method = 4 AND {tf}",
id_col="refund_id", table=rf,
fetch_fn=fetch_refund_detail,
),
"payment_method=2(线上退款)": sample_case(
cur, "线上退款", f"payment_method = 2 AND {tf}",
id_col="refund_id", table=rf,
fetch_fn=fetch_refund_detail,
),
}
# ── 主流程 ────────────────────────────────────────────────
def main():
print(f"连接测试库: {TEST_DB_DSN[:30]}...")
conn = get_conn()
try:
cur = conn.cursor()
result = {
"generated_at": datetime.now().isoformat(),
"time_range": "最近5个月",
"cases": {
"结算类型": dim_settle_type(cur),
"支付方式": dim_payment_method(cur),
"消费类目": dim_consumption_category(cur),
"优惠类型": dim_discount_type(cur),
"特殊场景": dim_special_cases(cur),
"充值场景": dim_recharge(cur),
"退款场景": dim_refund(cur),
},
}
cur.close()
finally:
conn.close()
# 写入 JSON
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2, default=_default)
print(f"\n✅ 输出完成: {OUTPUT_FILE}")
print(f" 文件大小: {OUTPUT_FILE.stat().st_size / 1024:.1f} KB")
if __name__ == "__main__":
main()