# -*- coding: utf-8 -*- """生成 2025年10-12月 助教排行榜 + 助教详情表(CSV + MD)。 输出目录:etl_billiards/docs/table_2025-12-19 注意:客户流水/充值归因涉及“多助教/多订单命中”时按全额复制计入,会导致助教汇总>门店汇总,表格说明会写明。 """ from __future__ import annotations import csv import re from dataclasses import dataclass from decimal import Decimal from pathlib import Path from statistics import median from typing import Any import psycopg2 import psycopg2.extras SITE_ID = 2790685415443269 TZ = "Asia/Shanghai" WIN_OCT = ("2025-10-01 00:00:00+08", "2025-11-01 00:00:00+08") WIN_NOV = ("2025-11-01 00:00:00+08", "2025-12-01 00:00:00+08") WIN_DEC = ("2025-12-01 00:00:00+08", "2026-01-01 00:00:00+08") WIN_ALL = (WIN_OCT[0], WIN_DEC[1]) MONTHS = [ ("2025-10", "10月", WIN_OCT), ("2025-11", "11月", WIN_NOV), ("2025-12", "12月", WIN_DEC), ] REPO_ROOT = Path(__file__).resolve().parents[3] ENV_PATH = REPO_ROOT / "etl_billiards" / ".env" OUT_DIR = Path(__file__).resolve().parent @dataclass(frozen=True) class SqlBlock: title: str sql: str def read_pg_dsn() -> str: text = ENV_PATH.read_text(encoding="utf-8") m = re.search(r"^PG_DSN=(.+)$", text, re.M) if not m: raise RuntimeError(f"未在 {ENV_PATH} 中找到 PG_DSN") return m.group(1).strip() def conn(): return psycopg2.connect(read_pg_dsn(), connect_timeout=10) def sanitize_filename(name: str) -> str: name = name.strip() name = re.sub(r"[<>:\"/\\|?*]+", "_", name) name = re.sub(r"\s+", " ", name) return name def d(v: Any) -> Decimal: if v is None: return Decimal("0") if isinstance(v, Decimal): return v return Decimal(str(v)) def fmt_money(v: Any) -> str: return f"{d(v):.2f}" def fmt_hours(v: Any, digits: int = 2) -> str: q = Decimal("1").scaleb(-digits) return f"{d(v).quantize(q):f}h" def write_csv(path: Path, title: str, description: str, header_rows: list[list[str]], rows: list[list[Any]]) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", newline="", encoding="utf-8") as f: w = csv.writer(f) w.writerow([title]) w.writerow([description]) w.writerow([]) for hr in header_rows: w.writerow(hr) for r in rows: w.writerow(["" if v is None else v for v in r]) def write_csv_sections(path: Path, title: str, description: str, section_rows: list[list[Any]]) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", newline="", encoding="utf-8") as f: w = csv.writer(f) w.writerow([title]) w.writerow([description]) w.writerow([]) for r in section_rows: w.writerow(["" if v is None else v for v in r]) def write_md(path: Path, title: str, thinking: str, description: str, sql_blocks: list[SqlBlock]) -> None: parts: list[str] = [] parts.append(f"# {title}\n") parts.append("## 思考过程\n") parts.append(thinking.strip() + "\n") parts.append("\n## 查询说明\n") parts.append(description.strip() + "\n") parts.append("\n## SQL\n") for b in sql_blocks: parts.append(f"\n### {b.title}\n") parts.append("```sql\n") parts.append(b.sql.strip() + "\n") parts.append("```\n") path.parent.mkdir(parents=True, exist_ok=True) path.write_text("".join(parts), encoding="utf-8") def fetch_all(cur, sql: str, params: dict[str, Any]) -> list[dict[str, Any]]: cur.execute(sql, params) return list(cur.fetchall()) def month_case(ts_expr: str) -> str: parts = [] for month_key, _, (ws, we) in MONTHS: parts.append( f"when {ts_expr} >= '{ws}'::timestamptz and {ts_expr} < '{we}'::timestamptz then '{month_key}'" ) return "case " + " ".join(parts) + " else null end" def sql_order_base(window_start: str, window_end: str) -> str: return f""" with base_orders as ( select tfl.order_settle_id, max(tfl.member_id) as member_id, min(tfl.start_use_time) as order_start_time, max(tfl.ledger_end_time) as order_end_time, sum(tfl.ledger_amount) as table_amount from billiards_dwd.dwd_table_fee_log tfl where tfl.site_id = %(site_id)s and coalesce(tfl.is_delete,0) = 0 and tfl.start_use_time >= '{window_start}'::timestamptz and tfl.start_use_time < '{window_end}'::timestamptz group by tfl.order_settle_id ), assistant_amount as ( select asl.order_settle_id, sum(asl.ledger_amount) as assistant_amount from billiards_dwd.dwd_assistant_service_log asl join base_orders bo on bo.order_settle_id = asl.order_settle_id where asl.site_id = %(site_id)s and coalesce(asl.is_delete,0) = 0 group by asl.order_settle_id ), goods_amount as ( select g.order_settle_id, sum(g.ledger_amount) as goods_amount from billiards_dwd.dwd_store_goods_sale g join base_orders bo on bo.order_settle_id = g.order_settle_id where g.site_id = %(site_id)s and coalesce(g.is_delete,0) = 0 group by g.order_settle_id ), orders as ( select bo.order_settle_id, bo.member_id, bo.order_start_time, bo.order_end_time, coalesce(bo.table_amount,0) + coalesce(a.assistant_amount,0) + coalesce(g.goods_amount,0) as order_amount from base_orders bo left join assistant_amount a on a.order_settle_id = bo.order_settle_id left join goods_amount g on g.order_settle_id = bo.order_settle_id ) """ def dense_rank_desc(values: dict[str, Decimal]) -> dict[str, int]: uniq = sorted({v for v in values.values() if v > 0}, reverse=True) rank_map = {v: i + 1 for i, v in enumerate(uniq)} return {k: rank_map.get(v, 0) for k, v in values.items()} def calc_diff(all_values: dict[str, Decimal], current: Decimal) -> tuple[Decimal, Decimal]: xs = [v for v in all_values.values() if v > 0] if not xs or current <= 0: return Decimal("0"), Decimal("0") avg = sum(xs) / Decimal(len(xs)) med = Decimal(str(median([float(v) for v in xs]))) return current - avg, current - med def main() -> None: OUT_DIR.mkdir(parents=True, exist_ok=True) with conn() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: assistants_rows = fetch_all( cur, """ select distinct nickname as assistant from billiards_dwd.dwd_assistant_service_log where site_id=%(site_id)s and coalesce(is_delete,0)=0 and start_use_time >= %(window_start)s::timestamptz and start_use_time < %(window_end)s::timestamptz order by assistant; """, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]}, ) assistants = [r["assistant"] for r in assistants_rows if r.get("assistant")] # 助教-客户-月份:服务时长 sql_svc = f""" with raw as ( select asl.nickname as assistant, asl.tenant_member_id as member_id, {month_case('asl.start_use_time')} as month_key, asl.order_assistant_type, asl.income_seconds from billiards_dwd.dwd_assistant_service_log asl where asl.site_id=%(site_id)s and coalesce(asl.is_delete,0)=0 and asl.start_use_time >= %(window_start)s::timestamptz and asl.start_use_time < %(window_end)s::timestamptz and asl.tenant_member_id is not null and asl.tenant_member_id<>0 ) select assistant, member_id, month_key, sum(case when order_assistant_type=1 then income_seconds else 0 end)/3600.0 as base_hours, sum(case when order_assistant_type=2 then income_seconds else 0 end)/3600.0 as extra_hours from raw where month_key is not null group by assistant, member_id, month_key; """ svc_rows = fetch_all(cur, sql_svc, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]}) # 助教-客户-月份:客户流水 sql_rev = sql_order_base(WIN_ALL[0], WIN_ALL[1]) + f""" , assistant_orders as ( select distinct order_settle_id, nickname as assistant from billiards_dwd.dwd_assistant_service_log where site_id=%(site_id)s and coalesce(is_delete,0)=0 and start_use_time >= %(window_start)s::timestamptz and start_use_time < %(window_end)s::timestamptz ), raw as ( select ao.assistant, o.member_id, {month_case('o.order_start_time')} as month_key, o.order_amount from orders o join assistant_orders ao on ao.order_settle_id=o.order_settle_id where o.member_id is not null and o.member_id<>0 ) select assistant, member_id, month_key, sum(order_amount) as revenue_amount from raw where month_key is not null group by assistant, member_id, month_key; """ rev_rows = fetch_all(cur, sql_rev, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]}) # 助教-客户-月份:充值归因 sql_rech = f""" with base_orders as ( select tfl.order_settle_id, max(tfl.member_id) as member_id, min(tfl.start_use_time) as table_start_time, max(tfl.ledger_end_time) as table_end_time from billiards_dwd.dwd_table_fee_log tfl where tfl.site_id=%(site_id)s and coalesce(tfl.is_delete,0)=0 and tfl.start_use_time >= %(window_start)s::timestamptz and tfl.start_use_time < %(window_end)s::timestamptz group by tfl.order_settle_id ), assistant_time as ( select asl.order_settle_id, min(asl.start_use_time) as assistant_start_time, max(asl.last_use_time) as assistant_end_time from billiards_dwd.dwd_assistant_service_log asl join base_orders bo on bo.order_settle_id=asl.order_settle_id where asl.site_id=%(site_id)s and coalesce(asl.is_delete,0)=0 group by asl.order_settle_id ), order_windows as ( select bo.order_settle_id, bo.member_id, least(bo.table_start_time, coalesce(at.assistant_start_time, bo.table_start_time)) as win_start, greatest(bo.table_end_time, coalesce(at.assistant_end_time, bo.table_end_time)) as win_end from base_orders bo left join assistant_time at on at.order_settle_id=bo.order_settle_id where bo.member_id is not null and bo.member_id<>0 ), assistant_orders as ( select distinct order_settle_id, nickname as assistant from billiards_dwd.dwd_assistant_service_log where site_id=%(site_id)s and coalesce(is_delete,0)=0 and start_use_time >= %(window_start)s::timestamptz and start_use_time < %(window_end)s::timestamptz ), recharge_pay as ( select p.pay_time, r.member_id, p.pay_amount from billiards_dwd.dwd_payment p join billiards_dwd.dwd_recharge_order r on r.recharge_order_id=p.relate_id where p.site_id=%(site_id)s and p.relate_type=5 and p.pay_status=2 and p.pay_amount>0 and p.pay_time >= %(window_start)s::timestamptz and p.pay_time < %(window_end)s::timestamptz ), matched as ( select rp.pay_time, ow.order_settle_id, ow.member_id, rp.pay_amount from recharge_pay rp join order_windows ow on ow.member_id=rp.member_id and rp.pay_time >= ow.win_start - interval '30 minutes' and rp.pay_time <= ow.win_end + interval '30 minutes' ), raw as ( select ao.assistant, m.member_id, {month_case('m.pay_time')} as month_key, m.pay_amount from matched m join assistant_orders ao on ao.order_settle_id=m.order_settle_id ) select assistant, member_id, month_key, sum(pay_amount) as recharge_amount from raw where month_key is not null group by assistant, member_id, month_key; """ rech_rows = fetch_all(cur, sql_rech, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]}) # 汇总:月度助教指标 svc_map = {mk: {a: {"base": Decimal('0'), "extra": Decimal('0')} for a in assistants} for mk,_,_ in MONTHS} for r in svc_rows: mk = r["month_key"]; a = r["assistant"] if mk in svc_map and a in svc_map[mk]: svc_map[mk][a]["base"] += d(r["base_hours"]) svc_map[mk][a]["extra"] += d(r["extra_hours"]) revenue_map = {mk: {a: Decimal('0') for a in assistants} for mk,_,_ in MONTHS} for r in rev_rows: mk = r["month_key"]; a = r["assistant"] if mk in revenue_map and a in revenue_map[mk]: revenue_map[mk][a] += d(r["revenue_amount"]) recharge_map = {mk: {a: Decimal('0') for a in assistants} for mk,_,_ in MONTHS} for r in rech_rows: mk = r["month_key"]; a = r["assistant"] if mk in recharge_map and a in recharge_map[mk]: recharge_map[mk][a] += d(r["recharge_amount"]) # ====== 输出4张排行榜 ====== def write_rank(file_stem: str, title: str, desc: str, rows: list[list[Any]]): write_csv(OUT_DIR / f"{file_stem}.csv", title, desc, [["月份", "排名", "助教昵称", "指标"]], rows) write_md(OUT_DIR / f"{file_stem}.md", title, "按月聚合并做dense_rank排名。", desc, []) rows = [] for mk,_,_ in MONTHS: values = {a: svc_map[mk][a]["base"] for a in assistants} ranks = dense_rank_desc(values) for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)): v = values[a] if v > 0: rows.append([mk, ranks[a], a, fmt_hours(v, 2)]) write_rank( "助教_基础课时长排行_2025年10-12月", "2025年10-12月 助教基础课时长排行榜", "口径:order_assistant_type=1,时长=income_seconds/3600(小时),按月排名。", rows, ) rows = [] for mk,_,_ in MONTHS: values = {a: svc_map[mk][a]["extra"] for a in assistants} ranks = dense_rank_desc(values) for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)): v = values[a] if v > 0: rows.append([mk, ranks[a], a, fmt_hours(v, 2)]) write_rank( "助教_附加课时长排行_2025年10-12月", "2025年10-12月 助教附加课(超休)时长排行榜", "口径:order_assistant_type=2,超休时长=income_seconds/3600(小时),按月排名。", rows, ) rows = [] for mk,_,_ in MONTHS: values = revenue_map[mk] ranks = dense_rank_desc(values) for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)): v = values[a] if v > 0: rows.append([mk, ranks[a], a, fmt_money(v)]) write_rank( "助教_客户流水排行_2025年10-12月", "2025年10-12月 助教客户流水排行榜(全额复制口径)", "口径:客户流水=台费+助教+商品应付金额按订单归集后,全额计入订单内每位助教;多助教会导致汇总>门店总额。", rows, ) rows = [] for mk,_,_ in MONTHS: values = recharge_map[mk] ranks = dense_rank_desc(values) for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)): v = values[a] if v > 0: rows.append([mk, ranks[a], a, fmt_money(v)]) write_rank( "助教_客户充值归因排行_2025年10-12月", "2025年10-12月 助教客户充值归因排行榜(全额复制口径)", "口径:充值支付(dwd_payment.relate_type=5)在消费窗口±30分钟内命中且订单有助教,则全额计入助教;多助教/多订单命中会重复计入。", rows, ) # ====== 输出助教详情(每人一份) ====== # 会员昵称 cur.execute("select member_id, nickname from billiards_dwd.dim_member where scd2_is_current=1") member_name = {r["member_id"]: (r.get("nickname") or "") for r in cur.fetchall()} # 索引:assistant->member->month svc_idx = {a: {} for a in assistants} for r in svc_rows: a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"] svc_idx.setdefault(a, {}).setdefault(mid, {})[mk] = {"base": d(r["base_hours"]), "extra": d(r["extra_hours"])} rev_idx = {a: {} for a in assistants} for r in rev_rows: a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"] rev_idx.setdefault(a, {}).setdefault(mid, {})[mk] = d(r["revenue_amount"]) rech_idx = {a: {} for a in assistants} for r in rech_rows: a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"] rech_idx.setdefault(a, {}).setdefault(mid, {})[mk] = d(r["recharge_amount"]) for a in assistants: safe = sanitize_filename(a) csv_path = OUT_DIR / f"助教详情_{safe}.csv" md_path = OUT_DIR / f"助教详情_{safe}.md" # 评价(简短) base_total = sum((svc_map[mk][a]["base"] for mk,_,_ in MONTHS), Decimal('0')) extra_total = sum((svc_map[mk][a]["extra"] for mk,_,_ in MONTHS), Decimal('0')) rev_total = sum((revenue_map[mk][a] for mk,_,_ in MONTHS), Decimal('0')) rech_total = sum((recharge_map[mk][a] for mk,_,_ in MONTHS), Decimal('0')) # 头部客户 Top100(按12月消费业绩) members = set(rev_idx.get(a, {}).keys()) | set(svc_idx.get(a, {}).keys()) | set(rech_idx.get(a, {}).keys()) def rev_dec(mid: int) -> Decimal: return rev_idx.get(a, {}).get(mid, {}).get('2025-12', Decimal('0')) top_members = sorted(members, key=lambda mid: rev_dec(mid), reverse=True)[:100] top3 = '、'.join([(member_name.get(mid) or str(mid)) for mid in top_members[:3]]) assistant_review = ( f"评价:基础{fmt_hours(base_total,1)},附加{fmt_hours(extra_total,1)};" f"客户流水¥{rev_total:.2f},充值归因¥{rech_total:.2f};" f"头部客户(12月)Top3:{top3 or '无'}。" ) # Part1-4 part1=[]; part2=[]; part3=[]; part4=[] for mk, mcn, _ in MONTHS: base_v = svc_map[mk][a]["base"] extra_v = svc_map[mk][a]["extra"] rev_v = revenue_map[mk][a] rech_v = recharge_map[mk][a] base_all = {x: svc_map[mk][x]["base"] for x in assistants} extra_all = {x: svc_map[mk][x]["extra"] for x in assistants} rev_all = {x: revenue_map[mk][x] for x in assistants} rech_all = {x: recharge_map[mk][x] for x in assistants} base_rank = dense_rank_desc(base_all).get(a, 0) extra_rank = dense_rank_desc(extra_all).get(a, 0) rev_rank = dense_rank_desc(rev_all).get(a, 0) rech_rank = dense_rank_desc(rech_all).get(a, 0) base_da, base_dm = calc_diff(base_all, base_v) extra_da, extra_dm = calc_diff(extra_all, extra_v) rev_da, rev_dm = calc_diff(rev_all, rev_v) rech_da, rech_dm = calc_diff(rech_all, rech_v) part1.append([mcn, fmt_hours(base_v,2), base_rank or "", fmt_hours(base_da,2), fmt_hours(base_dm,2)]) part2.append([mcn, fmt_hours(extra_v,2), extra_rank or "", fmt_hours(extra_da,2), fmt_hours(extra_dm,2)]) part3.append([mcn, fmt_money(rev_v), rev_rank or "", fmt_money(rev_da), fmt_money(rev_dm)]) part4.append([mcn, fmt_money(rech_v), rech_rank or "", fmt_money(rech_da), fmt_money(rech_dm)]) # Part5 rows part5=[] for i, mid in enumerate(top_members, start=1): def h_pair(month_key: str) -> str: v = svc_idx.get(a, {}).get(mid, {}).get(month_key, {}) return f"{fmt_hours(v.get('base',Decimal('0')),1)} / {fmt_hours(v.get('extra',Decimal('0')),1)}" def rev_m(month_key: str) -> Decimal: return rev_idx.get(a, {}).get(mid, {}).get(month_key, Decimal('0')) def rech_m(month_key: str) -> Decimal: return rech_idx.get(a, {}).get(mid, {}).get(month_key, Decimal('0')) name = member_name.get(mid) or str(mid) part5.append([ i, name, h_pair('2025-12'), fmt_money(rev_m('2025-12')), fmt_money(rech_m('2025-12')), h_pair('2025-11'), fmt_money(rev_m('2025-11')), fmt_money(rech_m('2025-11')), h_pair('2025-10'), fmt_money(rev_m('2025-10')), fmt_money(rech_m('2025-10')), ]) title = f"助教详情:{a}(2025年10-12月)" desc = ( "本表包含5个部分:基础课业绩、附加课业绩、客户消费业绩、客户充值业绩、头部客户情况。" "均值/中位数差值对比集合为当月该指标>0的助教。" "充值/客户流水多助教与多订单命中均按全额复制计入,故汇总可能大于门店总额。" ) rows=[] rows += [["一、基础课业绩"], ["说明:" + assistant_review], []] rows += [["月份", "基础课业绩", "基础课业绩", "基础课业绩", "基础课业绩"], ["月份", "小时数", "排名", "平均值差值小时数", "中位数值差值小时数"]] rows += part1 rows += [[], ["二、附加课业绩"], ["说明:附加课=order_assistant_type=2。"], []] rows += [["月份", "附加课业绩", "附加课业绩", "附加课业绩", "附加课业绩"], ["月份", "小时数", "排名", "平均值差值小时数", "中位数值差值小时数"]] rows += part2 rows += [[], ["三、客户消费业绩"], ["说明:订单台费+助教+商品应付金额全额计入订单内助教。"], []] rows += [["月份", "客户消费业绩", "客户消费业绩", "客户消费业绩", "客户消费业绩"], ["月份", "合计元", "排名", "平均值差值元", "中位数值差值元"]] rows += part3 rows += [[], ["四、客户充值业绩"], ["说明:充值命中消费窗口±30分钟且有助教则归因;全额复制。"], []] rows += [["月份", "客户充值业绩", "客户充值业绩", "客户充值业绩", "客户充值业绩"], ["月份", "合计元", "排名", "平均值差值元", "中位数值差值元"]] rows += part4 rows += [[], ["五、头部客户(按12月消费业绩排序,Top100)"], ["说明:基础/附加课时=基础h/附加h。"], []] rows += [["排名", "客户名称", "12月", "12月", "12月", "11月", "11月", "11月", "10月", "10月", "10月"], ["排名", "客户名称", "基础/附加课时", "消费业绩(元)", "客户充值(元)", "基础/附加课时", "消费业绩(元)", "客户充值(元)", "基础/附加课时", "消费业绩(元)", "客户充值(元)"]] rows += part5 write_csv_sections(csv_path, title, desc, rows) write_md( md_path, title, "按模板拆分5部分输出;月度排名采用dense_rank;均值/中位数在当月该指标>0助教集合上计算。", desc + "\n" + assistant_review, [ SqlBlock("服务时长(助教-客户-月份)", sql_svc), SqlBlock("客户流水(助教-客户-月份)", sql_rev), SqlBlock("充值归因(助教-客户-月份)", sql_rech), ], ) print(f"完成:{OUT_DIR}") if __name__ == "__main__": main()