# -*- coding: utf-8 -*- """ 一次性脚本:从 test_etl_feiqiu DWD 层采样消费用例订单。 按 7 个维度分类采样,每个用例最多 10 个样本, 输出到 {EXPORT_ROOT}/ETL-Connectors/feiqiu/REPORTS/consumption_cases_sample.json """ from __future__ import annotations import json import os from datetime import datetime, date from decimal import Decimal from pathlib import Path # ── 环境加载 ────────────────────────────────────────────── from _env_paths import get_output_path # 内部已 load_dotenv TEST_DB_DSN = os.environ.get("TEST_DB_DSN") if not TEST_DB_DSN: raise RuntimeError( "环境变量 TEST_DB_DSN 未定义。请在根 .env 中配置测试库连接串。" ) import psycopg2 import psycopg2.extras # ── 输出路径 ────────────────────────────────────────────── EXPORT_ROOT = get_output_path("EXPORT_ROOT") OUTPUT_DIR = EXPORT_ROOT / "REPORTS" OUTPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_FILE = OUTPUT_DIR / "consumption_cases_sample.json" # ── 时间范围 ────────────────────────────────────────────── TIME_FILTER = "create_time >= NOW() - INTERVAL '5 months'" SAMPLE_LIMIT = 10 # ── JSON 序列化 ─────────────────────────────────────────── def _default(obj): if isinstance(obj, Decimal): return float(obj) if isinstance(obj, (datetime, date)): return obj.isoformat() raise TypeError(f"无法序列化类型: {type(obj)}") # ── 数据库工具 ──────────────────────────────────────────── def get_conn(): conn = psycopg2.connect(TEST_DB_DSN) # 将 timestamptz/timestamp 返回为字符串,避免异常年份(如 year=-1)导致 Python 解析失败 import psycopg2.extensions as ext text_caster = ext.new_type( (1114, 1184), # timestamp, timestamptz OIDs "TEXT_TIMESTAMP", lambda val, cur: val, ) ext.register_type(text_caster, conn) return conn def query_rows(cur, sql, params=None) -> list[dict]: cur.execute(sql, params or ()) if cur.description is None: return [] cols = [d[0] for d in cur.description] return [dict(zip(cols, row)) for row in cur.fetchall()] def query_ids(cur, sql, params=None) -> list: """查询单列 ID 列表""" cur.execute(sql, params or ()) return [row[0] for row in cur.fetchall()] # ── 关联表查询 ──────────────────────────────────────────── def fetch_settlement_detail(cur, order_settle_id: int) -> dict: """查询一个 order_settle_id 的所有关联表数据""" head = query_rows(cur, "SELECT * FROM dwd.dwd_settlement_head WHERE order_settle_id = %s", (order_settle_id,)) head_ex = query_rows(cur, "SELECT * FROM dwd.dwd_settlement_head_ex WHERE order_settle_id = %s", (order_settle_id,)) table_fees = query_rows(cur, "SELECT * FROM dwd.dwd_table_fee_log WHERE order_settle_id = %s", (order_settle_id,)) table_adjusts = query_rows(cur, "SELECT * FROM dwd.dwd_table_fee_adjust WHERE order_settle_id = %s", (order_settle_id,)) goods = query_rows(cur, "SELECT * FROM dwd.dwd_store_goods_sale WHERE order_settle_id = %s", (order_settle_id,)) assistants = query_rows(cur, "SELECT * FROM dwd.dwd_assistant_service_log WHERE order_settle_id = %s", (order_settle_id,)) groupbuys = query_rows(cur, "SELECT * FROM dwd.dwd_groupbuy_redemption WHERE order_settle_id = %s", (order_settle_id,)) # platform_coupon_redemption 通过 site_order_id 关联 # site_order_id 对应 settlement_head 的 order_trade_no order_trade_no = head[0]["order_trade_no"] if head else None if order_trade_no: pl_coupons = query_rows( cur, "SELECT * FROM dwd.dwd_platform_coupon_redemption WHERE site_order_id = %s", (order_trade_no,), ) else: pl_coupons = [] # 会员余额变动:relate_id 在 _ex 表,from_type 在主表 balance_changes = query_rows( cur, """ SELECT bc.*, bce.relate_id, bce.refund_amount AS ex_refund_amount, bce.operator_id AS ex_operator_id, bce.operator_name AS ex_operator_name, bce.principal_data FROM dwd.dwd_member_balance_change bc JOIN dwd.dwd_member_balance_change_ex bce USING (balance_change_id) WHERE bce.relate_id = %s AND bc.from_type = 1 """, (order_settle_id,), ) # 支付记录 payments = query_rows( cur, "SELECT * FROM dwd.dwd_payment WHERE relate_id = %s AND relate_type = 2", (order_settle_id,), ) # 退款记录 refunds = query_rows( cur, "SELECT r.*, re.* FROM dwd.dwd_refund r LEFT JOIN dwd.dwd_refund_ex re USING (refund_id) WHERE r.relate_id = %s", (order_settle_id,), ) return { "order_settle_id": order_settle_id, "settlement_head": head[0] if head else None, "settlement_head_ex": head_ex[0] if head_ex else None, "table_fee_logs": table_fees, "table_fee_adjusts": table_adjusts, "goods_sales": goods, "assistant_services": assistants, "groupbuy_redemptions": groupbuys, "platform_coupon_redemptions": pl_coupons, "balance_changes": balance_changes, "payments": payments, "refunds": refunds, } def fetch_recharge_detail(cur, recharge_order_id: int) -> dict: """查询一个 recharge_order_id 的所有关联表数据""" main = query_rows(cur, "SELECT * FROM dwd.dwd_recharge_order WHERE recharge_order_id = %s", (recharge_order_id,)) ex = query_rows(cur, "SELECT * FROM dwd.dwd_recharge_order_ex WHERE recharge_order_id = %s", (recharge_order_id,)) balance_changes = query_rows( cur, """ SELECT bc.*, bce.relate_id, bce.refund_amount AS ex_refund_amount, bce.operator_id AS ex_operator_id, bce.operator_name AS ex_operator_name, bce.principal_data FROM dwd.dwd_member_balance_change bc JOIN dwd.dwd_member_balance_change_ex bce USING (balance_change_id) WHERE bce.relate_id = %s AND bc.from_type = 2 """, (recharge_order_id,), ) payments = query_rows( cur, "SELECT * FROM dwd.dwd_payment WHERE relate_id = %s AND relate_type = 5", (recharge_order_id,), ) return { "recharge_order_id": recharge_order_id, "recharge_order": main[0] if main else None, "recharge_order_ex": ex[0] if ex else None, "balance_changes": balance_changes, "payments": payments, } def fetch_refund_detail(cur, refund_id: int) -> dict: """查询一个 refund_id 的所有关联表数据""" main = query_rows(cur, "SELECT * FROM dwd.dwd_refund WHERE refund_id = %s", (refund_id,)) ex = query_rows(cur, "SELECT * FROM dwd.dwd_refund_ex WHERE refund_id = %s", (refund_id,)) balance_changes = query_rows( cur, """ SELECT bc.*, bce.relate_id, bce.refund_amount AS ex_refund_amount, bce.operator_id AS ex_operator_id, bce.operator_name AS ex_operator_name, bce.principal_data FROM dwd.dwd_member_balance_change bc JOIN dwd.dwd_member_balance_change_ex bce USING (balance_change_id) WHERE bce.relate_id = %s AND bc.from_type = 3 """, (refund_id,), ) return { "refund_id": refund_id, "refund": main[0] if main else None, "refund_ex": ex[0] if ex else None, "balance_changes": balance_changes, } # ── 用例采样函数 ────────────────────────────────────────── def sample_case(cur, label: str, where_sql: str, id_col: str = "order_settle_id", table: str = "dwd.dwd_settlement_head", fetch_fn=None, join_clause: str = "") -> dict: """ 通用采样:查出符合条件的 ID,取前 SAMPLE_LIMIT 个,逐个查关联表。 """ if fetch_fn is None: fetch_fn = fetch_settlement_detail count_sql = f"SELECT COUNT(*) FROM {table} {join_clause} WHERE {where_sql}" cur.execute(count_sql) count = cur.fetchone()[0] ids_sql = f"SELECT {id_col} FROM {table} {join_clause} WHERE {where_sql} LIMIT {SAMPLE_LIMIT}" ids = query_ids(cur, ids_sql) samples = [] for _id in ids: samples.append(fetch_fn(cur, _id)) print(f" [{label}] count={count}, sampled={len(samples)}") return {"count": count, "samples": samples} # ── 维度一:结算类型 ────────────────────────────────────── def dim_settle_type(cur) -> dict: print("\n=== 维度一:结算类型 ===") h = "dwd.dwd_settlement_head" tf = TIME_FILTER return { "消费结算(settle_type=1)": sample_case(cur, "消费结算", f"settle_type = 1 AND {tf}", table=h), "商品结算(settle_type=3)": sample_case(cur, "商品结算", f"settle_type = 3 AND {tf}", table=h), "充值撤销(settle_type=7)": sample_case(cur, "充值撤销", f"settle_type = 7 AND {tf}", table=h), } # ── 维度二:支付方式(仅 settle_type=1)────────────────── def dim_payment_method(cur) -> dict: print("\n=== 维度二:支付方式 ===") h = "dwd.dwd_settlement_head" # 需要 JOIN ex 表获取 online_amount, cash_amount, card_amount join = "JOIN dwd.dwd_settlement_head_ex ex USING (order_settle_id)" base = f"dwd.dwd_settlement_head.settle_type = 1 AND dwd.dwd_settlement_head.{TIME_FILTER}" # 用别名 s 简化引用 s = "dwd.dwd_settlement_head" # 条件部分(不含 base),用于后续排除 cond_map = { "纯优惠券/团购核销": ( f"{s}.coupon_amount > 0 AND {s}.pay_amount = 0 " f"AND {s}.balance_amount = 0 AND {s}.point_amount = 0" ), "纯会员折扣全免": ( f"{s}.member_discount_amount = {s}.consume_money AND {s}.consume_money > 0 " f"AND {s}.pay_amount = 0 AND {s}.balance_amount = 0 " f"AND {s}.coupon_amount = 0 AND {s}.point_amount = 0" ), "纯余额": ( f"{s}.balance_amount > 0 " f"AND ex.online_amount = 0 AND ex.cash_amount = 0 AND ex.card_amount = 0" ), "纯积分抵扣": ( f"{s}.point_amount > 0 AND {s}.balance_amount = 0 " f"AND ex.online_amount = 0 AND ex.cash_amount = 0" ), "余额+积分": f"{s}.balance_amount > 0 AND {s}.point_amount > 0", "纯现金": f"ex.cash_amount > 0 AND {s}.balance_amount = 0 AND ex.online_amount = 0", "余额+现金": f"{s}.balance_amount > 0 AND ex.cash_amount > 0", "券+积分": f"{s}.coupon_amount > 0 AND {s}.point_amount > 0", "零消费": f"{s}.consume_money = 0 AND {s}.pay_amount = 0", } result = {} for label, cond in cond_map.items(): full_where = f"{base} AND {cond}" result[label] = sample_case( cur, label, full_where, id_col=f"{s}.order_settle_id", table=h, join_clause=join, ) # 其他组合:排除以上所有条件 exclude_parts = " AND ".join(f"NOT ({cond})" for cond in cond_map.values()) other_where = f"{base} AND {exclude_parts}" result["其他组合"] = sample_case( cur, "其他组合", other_where, id_col=f"{s}.order_settle_id", table=h, join_clause=join, ) return result # ── 维度三:消费类目(仅 settle_type=1)────────────────── def dim_consumption_category(cur) -> dict: print("\n=== 维度三:消费类目 ===") h = "dwd.dwd_settlement_head" base = f"settle_type = 1 AND {TIME_FILTER}" return { "纯台费": sample_case( cur, "纯台费", f"{base} AND table_charge_money > 0 AND goods_money = 0 " f"AND assistant_pd_money = 0 AND assistant_cx_money = 0", table=h, ), "台费+商品": sample_case( cur, "台费+商品", f"{base} AND table_charge_money > 0 AND goods_money > 0 " f"AND assistant_pd_money = 0 AND assistant_cx_money = 0", table=h, ), "台费+助教": sample_case( cur, "台费+助教", f"{base} AND table_charge_money > 0 AND goods_money = 0 " f"AND (assistant_pd_money > 0 OR assistant_cx_money > 0)", table=h, ), "台费+商品+助教": sample_case( cur, "台费+商品+助教", f"{base} AND table_charge_money > 0 AND goods_money > 0 " f"AND (assistant_pd_money > 0 OR assistant_cx_money > 0)", table=h, ), "纯商品": sample_case( cur, "纯商品", f"{base} AND table_charge_money = 0 AND goods_money > 0", table=h, ), "零消费": sample_case( cur, "零消费(类目)", f"{base} AND table_charge_money = 0 AND goods_money = 0 " f"AND assistant_pd_money = 0 AND assistant_cx_money = 0", table=h, ), } # ── 维度四:优惠类型 ────────────────────────────────────── def dim_discount_type(cur) -> dict: print("\n=== 维度四:优惠类型 ===") h = "dwd.dwd_settlement_head" base = f"settle_type = 1 AND {TIME_FILTER}" return { "仅优惠券/团购": sample_case( cur, "仅优惠券/团购", f"{base} AND coupon_amount > 0 AND member_discount_amount = 0 " f"AND adjust_amount = 0 AND point_amount = 0", table=h, ), "仅会员折扣": sample_case( cur, "仅会员折扣", f"{base} AND member_discount_amount > 0 AND coupon_amount = 0 " f"AND adjust_amount = 0 AND point_amount = 0", table=h, ), "仅台费调整": sample_case( cur, "仅台费调整", f"{base} AND adjust_amount != 0 AND coupon_amount = 0 " f"AND member_discount_amount = 0 AND point_amount = 0", table=h, ), "仅积分抵扣": sample_case( cur, "仅积分抵扣", f"{base} AND point_amount > 0 AND coupon_amount = 0 " f"AND member_discount_amount = 0 AND adjust_amount = 0", table=h, ), "券+积分": sample_case( cur, "券+积分", f"{base} AND coupon_amount > 0 AND point_amount > 0", table=h, ), "券+台费调整": sample_case( cur, "券+台费调整", f"{base} AND coupon_amount > 0 AND adjust_amount != 0", table=h, ), "会员折扣+积分": sample_case( cur, "会员折扣+积分", f"{base} AND member_discount_amount > 0 AND point_amount > 0", table=h, ), "无优惠": sample_case( cur, "无优惠", f"{base} AND coupon_amount = 0 AND member_discount_amount = 0 " f"AND adjust_amount = 0 AND point_amount = 0", table=h, ), } # ── 维度五:特殊场景 ────────────────────────────────────── def dim_special_cases(cur) -> dict: print("\n=== 维度五:特殊场景 ===") h = "dwd.dwd_settlement_head" base = f"{TIME_FILTER}" # 多台桌合并:同一 order_settle_id 关联 >= 2 个不同 site_table_id multi_table = sample_case( cur, "多台桌合并", "TRUE", # 子查询内部已过滤 id_col="sub.order_settle_id", table=( "(SELECT order_settle_id " "FROM dwd.dwd_table_fee_log " f"WHERE {TIME_FILTER} " "GROUP BY order_settle_id " "HAVING COUNT(DISTINCT site_table_id) >= 2) AS sub" ), join_clause="", ) # 多助教同台 multi_assistant = sample_case( cur, "多助教同台", "TRUE", # 子查询内部已过滤 id_col="sub.order_settle_id", table=( "(SELECT order_settle_id " "FROM dwd.dwd_assistant_service_log " f"WHERE {TIME_FILTER} " "GROUP BY order_settle_id " "HAVING COUNT(DISTINCT site_assistant_id) >= 2) AS sub" ), join_clause="", ) return { "多台桌合并": multi_table, "多助教同台": multi_assistant, "含抹零": sample_case(cur, "含抹零", f"{base} AND rounding_amount != 0", table=h), "含礼品卡": sample_case(cur, "含礼品卡", f"{base} AND gift_card_amount != 0", table=h), "含充值卡支付": sample_case(cur, "含充值卡支付", f"{base} AND recharge_card_amount != 0", table=h), "含平台券销售": sample_case(cur, "含平台券销售", f"{base} AND pl_coupon_sale_amount != 0", table=h), } # ── 维度六:充值场景 ────────────────────────────────────── def dim_recharge(cur) -> dict: print("\n=== 维度六:充值场景 ===") r = "dwd.dwd_recharge_order" tf = TIME_FILTER return { "首充(is_first=1)": sample_case( cur, "首充", f"is_first = 1 AND {tf}", id_col="recharge_order_id", table=r, fetch_fn=fetch_recharge_detail, ), "非首充(is_first=2)": sample_case( cur, "非首充", f"is_first = 2 AND {tf}", id_col="recharge_order_id", table=r, fetch_fn=fetch_recharge_detail, ), "含退款的充值": sample_case( cur, "含退款充值", f"refund_amount != 0 AND {tf}", id_col="recharge_order_id", table=r, fetch_fn=fetch_recharge_detail, ), } # ── 维度七:退款场景 ────────────────────────────────────── def dim_refund(cur) -> dict: print("\n=== 维度七:退款场景 ===") rf = "dwd.dwd_refund" tf = TIME_FILTER return { "relate_type=1(结算退款)": sample_case( cur, "结算退款", f"relate_type = 1 AND {tf}", id_col="refund_id", table=rf, fetch_fn=fetch_refund_detail, ), "relate_type=2(充值退款)": sample_case( cur, "充值退款", f"relate_type = 2 AND {tf}", id_col="refund_id", table=rf, fetch_fn=fetch_refund_detail, ), "relate_type=5(转账退款)": sample_case( cur, "转账退款", f"relate_type = 5 AND {tf}", id_col="refund_id", table=rf, fetch_fn=fetch_refund_detail, ), "payment_method=4(余额退款)": sample_case( cur, "余额退款", f"payment_method = 4 AND {tf}", id_col="refund_id", table=rf, fetch_fn=fetch_refund_detail, ), "payment_method=2(线上退款)": sample_case( cur, "线上退款", f"payment_method = 2 AND {tf}", id_col="refund_id", table=rf, fetch_fn=fetch_refund_detail, ), } # ── 主流程 ──────────────────────────────────────────────── def main(): print(f"连接测试库: {TEST_DB_DSN[:30]}...") conn = get_conn() try: cur = conn.cursor() result = { "generated_at": datetime.now().isoformat(), "time_range": "最近5个月", "cases": { "结算类型": dim_settle_type(cur), "支付方式": dim_payment_method(cur), "消费类目": dim_consumption_category(cur), "优惠类型": dim_discount_type(cur), "特殊场景": dim_special_cases(cur), "充值场景": dim_recharge(cur), "退款场景": dim_refund(cur), }, } cur.close() finally: conn.close() # 写入 JSON with open(OUTPUT_FILE, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2, default=_default) print(f"\n✅ 输出完成: {OUTPUT_FILE}") print(f" 文件大小: {OUTPUT_FILE.stat().st_size / 1024:.1f} KB") if __name__ == "__main__": main()