Files
feiqiu-ETL/etl_billiards/docs/table_2025-12-19/_generate_assistant_tables.py
2026-01-18 22:37:38 +08:00

586 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""生成 2025年10-12月 助教排行榜 + 助教详情表CSV + MD
输出目录etl_billiards/docs/table_2025-12-19
注意:客户流水/充值归因涉及“多助教/多订单命中”时按全额复制计入,会导致助教汇总>门店汇总,表格说明会写明。
"""
from __future__ import annotations
import csv
import re
from dataclasses import dataclass
from decimal import Decimal
from pathlib import Path
from statistics import median
from typing import Any
import psycopg2
import psycopg2.extras
SITE_ID = 2790685415443269
TZ = "Asia/Shanghai"
WIN_OCT = ("2025-10-01 00:00:00+08", "2025-11-01 00:00:00+08")
WIN_NOV = ("2025-11-01 00:00:00+08", "2025-12-01 00:00:00+08")
WIN_DEC = ("2025-12-01 00:00:00+08", "2026-01-01 00:00:00+08")
WIN_ALL = (WIN_OCT[0], WIN_DEC[1])
MONTHS = [
("2025-10", "10月", WIN_OCT),
("2025-11", "11月", WIN_NOV),
("2025-12", "12月", WIN_DEC),
]
REPO_ROOT = Path(__file__).resolve().parents[3]
ENV_PATH = REPO_ROOT / "etl_billiards" / ".env"
OUT_DIR = Path(__file__).resolve().parent
@dataclass(frozen=True)
class SqlBlock:
title: str
sql: str
def read_pg_dsn() -> str:
text = ENV_PATH.read_text(encoding="utf-8")
m = re.search(r"^PG_DSN=(.+)$", text, re.M)
if not m:
raise RuntimeError(f"未在 {ENV_PATH} 中找到 PG_DSN")
return m.group(1).strip()
def conn():
return psycopg2.connect(read_pg_dsn(), connect_timeout=10)
def sanitize_filename(name: str) -> str:
name = name.strip()
name = re.sub(r"[<>:\"/\\|?*]+", "_", name)
name = re.sub(r"\s+", " ", name)
return name
def d(v: Any) -> Decimal:
if v is None:
return Decimal("0")
if isinstance(v, Decimal):
return v
return Decimal(str(v))
def fmt_money(v: Any) -> str:
return f"{d(v):.2f}"
def fmt_hours(v: Any, digits: int = 2) -> str:
q = Decimal("1").scaleb(-digits)
return f"{d(v).quantize(q):f}h"
def write_csv(path: Path, title: str, description: str, header_rows: list[list[str]], rows: list[list[Any]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding="utf-8") as f:
w = csv.writer(f)
w.writerow([title])
w.writerow([description])
w.writerow([])
for hr in header_rows:
w.writerow(hr)
for r in rows:
w.writerow(["" if v is None else v for v in r])
def write_csv_sections(path: Path, title: str, description: str, section_rows: list[list[Any]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding="utf-8") as f:
w = csv.writer(f)
w.writerow([title])
w.writerow([description])
w.writerow([])
for r in section_rows:
w.writerow(["" if v is None else v for v in r])
def write_md(path: Path, title: str, thinking: str, description: str, sql_blocks: list[SqlBlock]) -> None:
parts: list[str] = []
parts.append(f"# {title}\n")
parts.append("## 思考过程\n")
parts.append(thinking.strip() + "\n")
parts.append("\n## 查询说明\n")
parts.append(description.strip() + "\n")
parts.append("\n## SQL\n")
for b in sql_blocks:
parts.append(f"\n### {b.title}\n")
parts.append("```sql\n")
parts.append(b.sql.strip() + "\n")
parts.append("```\n")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("".join(parts), encoding="utf-8")
def fetch_all(cur, sql: str, params: dict[str, Any]) -> list[dict[str, Any]]:
cur.execute(sql, params)
return list(cur.fetchall())
def month_case(ts_expr: str) -> str:
parts = []
for month_key, _, (ws, we) in MONTHS:
parts.append(
f"when {ts_expr} >= '{ws}'::timestamptz and {ts_expr} < '{we}'::timestamptz then '{month_key}'"
)
return "case " + " ".join(parts) + " else null end"
def sql_order_base(window_start: str, window_end: str) -> str:
return f"""
with base_orders as (
select
tfl.order_settle_id,
max(tfl.member_id) as member_id,
min(tfl.start_use_time) as order_start_time,
max(tfl.ledger_end_time) as order_end_time,
sum(tfl.ledger_amount) as table_amount
from billiards_dwd.dwd_table_fee_log tfl
where tfl.site_id = %(site_id)s
and coalesce(tfl.is_delete,0) = 0
and tfl.start_use_time >= '{window_start}'::timestamptz
and tfl.start_use_time < '{window_end}'::timestamptz
group by tfl.order_settle_id
),
assistant_amount as (
select
asl.order_settle_id,
sum(asl.ledger_amount) as assistant_amount
from billiards_dwd.dwd_assistant_service_log asl
join base_orders bo on bo.order_settle_id = asl.order_settle_id
where asl.site_id = %(site_id)s
and coalesce(asl.is_delete,0) = 0
group by asl.order_settle_id
),
goods_amount as (
select
g.order_settle_id,
sum(g.ledger_amount) as goods_amount
from billiards_dwd.dwd_store_goods_sale g
join base_orders bo on bo.order_settle_id = g.order_settle_id
where g.site_id = %(site_id)s
and coalesce(g.is_delete,0) = 0
group by g.order_settle_id
),
orders as (
select
bo.order_settle_id,
bo.member_id,
bo.order_start_time,
bo.order_end_time,
coalesce(bo.table_amount,0) + coalesce(a.assistant_amount,0) + coalesce(g.goods_amount,0) as order_amount
from base_orders bo
left join assistant_amount a on a.order_settle_id = bo.order_settle_id
left join goods_amount g on g.order_settle_id = bo.order_settle_id
)
"""
def dense_rank_desc(values: dict[str, Decimal]) -> dict[str, int]:
uniq = sorted({v for v in values.values() if v > 0}, reverse=True)
rank_map = {v: i + 1 for i, v in enumerate(uniq)}
return {k: rank_map.get(v, 0) for k, v in values.items()}
def calc_diff(all_values: dict[str, Decimal], current: Decimal) -> tuple[Decimal, Decimal]:
xs = [v for v in all_values.values() if v > 0]
if not xs or current <= 0:
return Decimal("0"), Decimal("0")
avg = sum(xs) / Decimal(len(xs))
med = Decimal(str(median([float(v) for v in xs])))
return current - avg, current - med
def main() -> None:
OUT_DIR.mkdir(parents=True, exist_ok=True)
with conn() as c, c.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
assistants_rows = fetch_all(
cur,
"""
select distinct nickname as assistant
from billiards_dwd.dwd_assistant_service_log
where site_id=%(site_id)s and coalesce(is_delete,0)=0
and start_use_time >= %(window_start)s::timestamptz
and start_use_time < %(window_end)s::timestamptz
order by assistant;
""",
{"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]},
)
assistants = [r["assistant"] for r in assistants_rows if r.get("assistant")]
# 助教-客户-月份:服务时长
sql_svc = f"""
with raw as (
select
asl.nickname as assistant,
asl.tenant_member_id as member_id,
{month_case('asl.start_use_time')} as month_key,
asl.order_assistant_type,
asl.income_seconds
from billiards_dwd.dwd_assistant_service_log asl
where asl.site_id=%(site_id)s and coalesce(asl.is_delete,0)=0
and asl.start_use_time >= %(window_start)s::timestamptz
and asl.start_use_time < %(window_end)s::timestamptz
and asl.tenant_member_id is not null and asl.tenant_member_id<>0
)
select
assistant,
member_id,
month_key,
sum(case when order_assistant_type=1 then income_seconds else 0 end)/3600.0 as base_hours,
sum(case when order_assistant_type=2 then income_seconds else 0 end)/3600.0 as extra_hours
from raw
where month_key is not null
group by assistant, member_id, month_key;
"""
svc_rows = fetch_all(cur, sql_svc, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]})
# 助教-客户-月份:客户流水
sql_rev = sql_order_base(WIN_ALL[0], WIN_ALL[1]) + f"""
, assistant_orders as (
select distinct order_settle_id, nickname as assistant
from billiards_dwd.dwd_assistant_service_log
where site_id=%(site_id)s and coalesce(is_delete,0)=0
and start_use_time >= %(window_start)s::timestamptz
and start_use_time < %(window_end)s::timestamptz
),
raw as (
select
ao.assistant,
o.member_id,
{month_case('o.order_start_time')} as month_key,
o.order_amount
from orders o
join assistant_orders ao on ao.order_settle_id=o.order_settle_id
where o.member_id is not null and o.member_id<>0
)
select
assistant,
member_id,
month_key,
sum(order_amount) as revenue_amount
from raw
where month_key is not null
group by assistant, member_id, month_key;
"""
rev_rows = fetch_all(cur, sql_rev, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]})
# 助教-客户-月份:充值归因
sql_rech = f"""
with base_orders as (
select
tfl.order_settle_id,
max(tfl.member_id) as member_id,
min(tfl.start_use_time) as table_start_time,
max(tfl.ledger_end_time) as table_end_time
from billiards_dwd.dwd_table_fee_log tfl
where tfl.site_id=%(site_id)s and coalesce(tfl.is_delete,0)=0
and tfl.start_use_time >= %(window_start)s::timestamptz
and tfl.start_use_time < %(window_end)s::timestamptz
group by tfl.order_settle_id
),
assistant_time as (
select
asl.order_settle_id,
min(asl.start_use_time) as assistant_start_time,
max(asl.last_use_time) as assistant_end_time
from billiards_dwd.dwd_assistant_service_log asl
join base_orders bo on bo.order_settle_id=asl.order_settle_id
where asl.site_id=%(site_id)s and coalesce(asl.is_delete,0)=0
group by asl.order_settle_id
),
order_windows as (
select
bo.order_settle_id,
bo.member_id,
least(bo.table_start_time, coalesce(at.assistant_start_time, bo.table_start_time)) as win_start,
greatest(bo.table_end_time, coalesce(at.assistant_end_time, bo.table_end_time)) as win_end
from base_orders bo
left join assistant_time at on at.order_settle_id=bo.order_settle_id
where bo.member_id is not null and bo.member_id<>0
),
assistant_orders as (
select distinct order_settle_id, nickname as assistant
from billiards_dwd.dwd_assistant_service_log
where site_id=%(site_id)s and coalesce(is_delete,0)=0
and start_use_time >= %(window_start)s::timestamptz
and start_use_time < %(window_end)s::timestamptz
),
recharge_pay as (
select
p.pay_time,
r.member_id,
p.pay_amount
from billiards_dwd.dwd_payment p
join billiards_dwd.dwd_recharge_order r on r.recharge_order_id=p.relate_id
where p.site_id=%(site_id)s
and p.relate_type=5
and p.pay_status=2
and p.pay_amount>0
and p.pay_time >= %(window_start)s::timestamptz
and p.pay_time < %(window_end)s::timestamptz
),
matched as (
select
rp.pay_time,
ow.order_settle_id,
ow.member_id,
rp.pay_amount
from recharge_pay rp
join order_windows ow
on ow.member_id=rp.member_id
and rp.pay_time >= ow.win_start - interval '30 minutes'
and rp.pay_time <= ow.win_end + interval '30 minutes'
),
raw as (
select
ao.assistant,
m.member_id,
{month_case('m.pay_time')} as month_key,
m.pay_amount
from matched m
join assistant_orders ao on ao.order_settle_id=m.order_settle_id
)
select
assistant,
member_id,
month_key,
sum(pay_amount) as recharge_amount
from raw
where month_key is not null
group by assistant, member_id, month_key;
"""
rech_rows = fetch_all(cur, sql_rech, {"site_id": SITE_ID, "window_start": WIN_ALL[0], "window_end": WIN_ALL[1]})
# 汇总:月度助教指标
svc_map = {mk: {a: {"base": Decimal('0'), "extra": Decimal('0')} for a in assistants} for mk,_,_ in MONTHS}
for r in svc_rows:
mk = r["month_key"]; a = r["assistant"]
if mk in svc_map and a in svc_map[mk]:
svc_map[mk][a]["base"] += d(r["base_hours"])
svc_map[mk][a]["extra"] += d(r["extra_hours"])
revenue_map = {mk: {a: Decimal('0') for a in assistants} for mk,_,_ in MONTHS}
for r in rev_rows:
mk = r["month_key"]; a = r["assistant"]
if mk in revenue_map and a in revenue_map[mk]:
revenue_map[mk][a] += d(r["revenue_amount"])
recharge_map = {mk: {a: Decimal('0') for a in assistants} for mk,_,_ in MONTHS}
for r in rech_rows:
mk = r["month_key"]; a = r["assistant"]
if mk in recharge_map and a in recharge_map[mk]:
recharge_map[mk][a] += d(r["recharge_amount"])
# ====== 输出4张排行榜 ======
def write_rank(file_stem: str, title: str, desc: str, rows: list[list[Any]]):
write_csv(OUT_DIR / f"{file_stem}.csv", title, desc, [["月份", "排名", "助教昵称", "指标"]], rows)
write_md(OUT_DIR / f"{file_stem}.md", title, "按月聚合并做dense_rank排名。", desc, [])
rows = []
for mk,_,_ in MONTHS:
values = {a: svc_map[mk][a]["base"] for a in assistants}
ranks = dense_rank_desc(values)
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
v = values[a]
if v > 0:
rows.append([mk, ranks[a], a, fmt_hours(v, 2)])
write_rank(
"助教_基础课时长排行_2025年10-12月",
"2025年10-12月 助教基础课时长排行榜",
"口径order_assistant_type=1时长=income_seconds/3600小时按月排名。",
rows,
)
rows = []
for mk,_,_ in MONTHS:
values = {a: svc_map[mk][a]["extra"] for a in assistants}
ranks = dense_rank_desc(values)
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
v = values[a]
if v > 0:
rows.append([mk, ranks[a], a, fmt_hours(v, 2)])
write_rank(
"助教_附加课时长排行_2025年10-12月",
"2025年10-12月 助教附加课(超休)时长排行榜",
"口径order_assistant_type=2超休时长=income_seconds/3600小时按月排名。",
rows,
)
rows = []
for mk,_,_ in MONTHS:
values = revenue_map[mk]
ranks = dense_rank_desc(values)
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
v = values[a]
if v > 0:
rows.append([mk, ranks[a], a, fmt_money(v)])
write_rank(
"助教_客户流水排行_2025年10-12月",
"2025年10-12月 助教客户流水排行榜(全额复制口径)",
"口径:客户流水=台费+助教+商品应付金额按订单归集后,全额计入订单内每位助教;多助教会导致汇总>门店总额。",
rows,
)
rows = []
for mk,_,_ in MONTHS:
values = recharge_map[mk]
ranks = dense_rank_desc(values)
for a in sorted(assistants, key=lambda x: (ranks[x] if ranks[x] else 999999, x)):
v = values[a]
if v > 0:
rows.append([mk, ranks[a], a, fmt_money(v)])
write_rank(
"助教_客户充值归因排行_2025年10-12月",
"2025年10-12月 助教客户充值归因排行榜(全额复制口径)",
"口径:充值支付(dwd_payment.relate_type=5)在消费窗口±30分钟内命中且订单有助教则全额计入助教多助教/多订单命中会重复计入。",
rows,
)
# ====== 输出助教详情(每人一份) ======
# 会员昵称
cur.execute("select member_id, nickname from billiards_dwd.dim_member where scd2_is_current=1")
member_name = {r["member_id"]: (r.get("nickname") or "") for r in cur.fetchall()}
# 索引assistant->member->month
svc_idx = {a: {} for a in assistants}
for r in svc_rows:
a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"]
svc_idx.setdefault(a, {}).setdefault(mid, {})[mk] = {"base": d(r["base_hours"]), "extra": d(r["extra_hours"])}
rev_idx = {a: {} for a in assistants}
for r in rev_rows:
a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"]
rev_idx.setdefault(a, {}).setdefault(mid, {})[mk] = d(r["revenue_amount"])
rech_idx = {a: {} for a in assistants}
for r in rech_rows:
a = r["assistant"]; mid = int(r["member_id"]); mk = r["month_key"]
rech_idx.setdefault(a, {}).setdefault(mid, {})[mk] = d(r["recharge_amount"])
for a in assistants:
safe = sanitize_filename(a)
csv_path = OUT_DIR / f"助教详情_{safe}.csv"
md_path = OUT_DIR / f"助教详情_{safe}.md"
# 评价(简短)
base_total = sum((svc_map[mk][a]["base"] for mk,_,_ in MONTHS), Decimal('0'))
extra_total = sum((svc_map[mk][a]["extra"] for mk,_,_ in MONTHS), Decimal('0'))
rev_total = sum((revenue_map[mk][a] for mk,_,_ in MONTHS), Decimal('0'))
rech_total = sum((recharge_map[mk][a] for mk,_,_ in MONTHS), Decimal('0'))
# 头部客户 Top100按12月消费业绩
members = set(rev_idx.get(a, {}).keys()) | set(svc_idx.get(a, {}).keys()) | set(rech_idx.get(a, {}).keys())
def rev_dec(mid: int) -> Decimal:
return rev_idx.get(a, {}).get(mid, {}).get('2025-12', Decimal('0'))
top_members = sorted(members, key=lambda mid: rev_dec(mid), reverse=True)[:100]
top3 = ''.join([(member_name.get(mid) or str(mid)) for mid in top_members[:3]])
assistant_review = (
f"评价:基础{fmt_hours(base_total,1)},附加{fmt_hours(extra_total,1)}"
f"客户流水¥{rev_total:.2f},充值归因¥{rech_total:.2f}"
f"头部客户(12月)Top3{top3 or ''}"
)
# Part1-4
part1=[]; part2=[]; part3=[]; part4=[]
for mk, mcn, _ in MONTHS:
base_v = svc_map[mk][a]["base"]
extra_v = svc_map[mk][a]["extra"]
rev_v = revenue_map[mk][a]
rech_v = recharge_map[mk][a]
base_all = {x: svc_map[mk][x]["base"] for x in assistants}
extra_all = {x: svc_map[mk][x]["extra"] for x in assistants}
rev_all = {x: revenue_map[mk][x] for x in assistants}
rech_all = {x: recharge_map[mk][x] for x in assistants}
base_rank = dense_rank_desc(base_all).get(a, 0)
extra_rank = dense_rank_desc(extra_all).get(a, 0)
rev_rank = dense_rank_desc(rev_all).get(a, 0)
rech_rank = dense_rank_desc(rech_all).get(a, 0)
base_da, base_dm = calc_diff(base_all, base_v)
extra_da, extra_dm = calc_diff(extra_all, extra_v)
rev_da, rev_dm = calc_diff(rev_all, rev_v)
rech_da, rech_dm = calc_diff(rech_all, rech_v)
part1.append([mcn, fmt_hours(base_v,2), base_rank or "", fmt_hours(base_da,2), fmt_hours(base_dm,2)])
part2.append([mcn, fmt_hours(extra_v,2), extra_rank or "", fmt_hours(extra_da,2), fmt_hours(extra_dm,2)])
part3.append([mcn, fmt_money(rev_v), rev_rank or "", fmt_money(rev_da), fmt_money(rev_dm)])
part4.append([mcn, fmt_money(rech_v), rech_rank or "", fmt_money(rech_da), fmt_money(rech_dm)])
# Part5 rows
part5=[]
for i, mid in enumerate(top_members, start=1):
def h_pair(month_key: str) -> str:
v = svc_idx.get(a, {}).get(mid, {}).get(month_key, {})
return f"{fmt_hours(v.get('base',Decimal('0')),1)} / {fmt_hours(v.get('extra',Decimal('0')),1)}"
def rev_m(month_key: str) -> Decimal:
return rev_idx.get(a, {}).get(mid, {}).get(month_key, Decimal('0'))
def rech_m(month_key: str) -> Decimal:
return rech_idx.get(a, {}).get(mid, {}).get(month_key, Decimal('0'))
name = member_name.get(mid) or str(mid)
part5.append([
i,
name,
h_pair('2025-12'), fmt_money(rev_m('2025-12')), fmt_money(rech_m('2025-12')),
h_pair('2025-11'), fmt_money(rev_m('2025-11')), fmt_money(rech_m('2025-11')),
h_pair('2025-10'), fmt_money(rev_m('2025-10')), fmt_money(rech_m('2025-10')),
])
title = f"助教详情:{a}2025年10-12月"
desc = (
"本表包含5个部分基础课业绩、附加课业绩、客户消费业绩、客户充值业绩、头部客户情况。"
"均值/中位数差值对比集合为当月该指标>0的助教。"
"充值/客户流水多助教与多订单命中均按全额复制计入,故汇总可能大于门店总额。"
)
rows=[]
rows += [["一、基础课业绩"], ["说明:" + assistant_review], []]
rows += [["月份", "基础课业绩", "基础课业绩", "基础课业绩", "基础课业绩"], ["月份", "小时数", "排名", "平均值差值小时数", "中位数值差值小时数"]]
rows += part1
rows += [[], ["二、附加课业绩"], ["说明:附加课=order_assistant_type=2。"], []]
rows += [["月份", "附加课业绩", "附加课业绩", "附加课业绩", "附加课业绩"], ["月份", "小时数", "排名", "平均值差值小时数", "中位数值差值小时数"]]
rows += part2
rows += [[], ["三、客户消费业绩"], ["说明:订单台费+助教+商品应付金额全额计入订单内助教。"], []]
rows += [["月份", "客户消费业绩", "客户消费业绩", "客户消费业绩", "客户消费业绩"], ["月份", "合计元", "排名", "平均值差值元", "中位数值差值元"]]
rows += part3
rows += [[], ["四、客户充值业绩"], ["说明充值命中消费窗口±30分钟且有助教则归因全额复制。"], []]
rows += [["月份", "客户充值业绩", "客户充值业绩", "客户充值业绩", "客户充值业绩"], ["月份", "合计元", "排名", "平均值差值元", "中位数值差值元"]]
rows += part4
rows += [[], ["五、头部客户按12月消费业绩排序Top100"], ["说明:基础/附加课时=基础h/附加h。"], []]
rows += [["排名", "客户名称", "12月", "12月", "12月", "11月", "11月", "11月", "10月", "10月", "10月"],
["排名", "客户名称", "基础/附加课时", "消费业绩(元)", "客户充值(元)", "基础/附加课时", "消费业绩(元)", "客户充值(元)", "基础/附加课时", "消费业绩(元)", "客户充值(元)"]]
rows += part5
write_csv_sections(csv_path, title, desc, rows)
write_md(
md_path,
title,
"按模板拆分5部分输出月度排名采用dense_rank均值/中位数在当月该指标>0助教集合上计算。",
desc + "\n" + assistant_review,
[
SqlBlock("服务时长(助教-客户-月份)", sql_svc),
SqlBlock("客户流水(助教-客户-月份)", sql_rev),
SqlBlock("充值归因(助教-客户-月份)", sql_rech),
],
)
print(f"完成:{OUT_DIR}")
if __name__ == "__main__":
main()