Files
Neo-ZQYY/scripts/ops/find_complex_orders.py

213 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
扫描 EXPORT_ROOT 下所有 ODS JSON 文件,按 order_trade_no 聚合,
计算每个总订单的复杂度并输出 Top 10。
复杂度维度:
- 子台桌使用记录数table_fee_transactions
- 台费折扣记录数table_fee_discount_records
- 助教服务记录数assistant_service_records
- 商品销售记录数store_goods_sales_records
- 团购核销记录数group_buy_redemption_records
- 支付记录数payment_transactions通过 relate_id 关联)
- 退款记录数refund_transactions通过 relate_id 关联)
总复杂度 = 各维度记录数之和
"""
from __future__ import annotations
import json
import sys
from collections import defaultdict
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent))
from _env_paths import get_output_path
def load_records_from_task_dirs(json_root: Path, dir_prefix: str, file_stem: str) -> list[dict]:
"""从 ODS 任务目录中加载所有记录(取最新 run"""
records = []
for task_dir in sorted(json_root.iterdir()):
if not task_dir.is_dir() or not task_dir.name.startswith(dir_prefix):
continue
for run_dir in sorted(task_dir.iterdir()):
if not run_dir.is_dir():
continue
for f in run_dir.iterdir():
if f.stem.startswith(file_stem) and f.suffix == ".json":
records.extend(_extract_records(f))
return records
def load_archive_records(json_root: Path, file_stem: str) -> list[dict]:
"""从 ODS_JSON_ARCHIVE 目录加载分页记录。"""
records = []
archive_dir = json_root / "ODS_JSON_ARCHIVE"
if not archive_dir.exists():
return records
for run_dir in archive_dir.iterdir():
if not run_dir.is_dir():
continue
for f in run_dir.iterdir():
if f.stem.startswith(file_stem) and f.suffix == ".json":
records.extend(_extract_archive_records(f))
return records
def _extract_records(filepath: Path) -> list[dict]:
"""从标准 ODS JSON含 pages[].response.data中提取记录。"""
try:
data = json.loads(filepath.read_text(encoding="utf-8"))
except Exception:
return []
items = []
for page in data.get("pages", []):
resp_data = page.get("response", {}).get("data", {})
# 不同 endpoint 的列表字段名不同,遍历所有 list 类型值
for v in resp_data.values():
if isinstance(v, list):
items.extend(v)
return items
def _extract_archive_records(filepath: Path) -> list[dict]:
"""从 archive 分页 JSON{code, data: [...]}) 中提取记录。"""
try:
data = json.loads(filepath.read_text(encoding="utf-8"))
except Exception:
return []
payload = data.get("data", [])
return payload if isinstance(payload, list) else []
def main():
json_root = get_output_path("EXPORT_ROOT")
# --- 1. 加载各类子记录 ---
# 数据源配置:(目录前缀, 文件名前缀, 关联字段, 维度名称)
sources = [
("ODS_TABLE_USE", "table_fee_transactions", "order_trade_no", "台桌使用"),
("ODS_TABLE_FEE_DISCOUNT", "table_fee_discount_records", "order_trade_no", "台费折扣"),
("ODS_ASSISTANT_LEDGER", "assistant_service_records", "order_trade_no", "助教服务"),
("ODS_STORE_GOODS_SALES", "store_goods_sales_records", "order_trade_no", "商品销售"),
("ODS_GROUP_BUY_REDEMPTION","group_buy_redemption_records", "order_trade_no", "团购核销"),
]
# 支付/退款通过 relate_id 关联到 order_settle_id需要二次映射
payment_sources = [
("ODS_PAYMENT", "payment_transactions", "支付记录"),
]
refund_source = ("ODS_REFUND", "refund_transactions", "退款记录")
# order_trade_no → {维度名: 计数}
order_complexity: dict[int, dict[str, int]] = defaultdict(lambda: defaultdict(int))
# order_trade_no → 首条记录的基本信息(用于展示)
order_info: dict[int, dict] = {}
# order_settle_id → order_trade_no 的映射(从台桌使用记录建立)
settle_to_trade: dict[int, int] = {}
# 加载直接关联的子记录
for dir_prefix, file_stem, key_field, dim_name in sources:
recs = load_records_from_task_dirs(json_root, dir_prefix, file_stem)
recs += load_archive_records(json_root, file_stem)
seen_ids = set()
for r in recs:
trade_no = r.get(key_field)
if not trade_no or trade_no == 0:
continue
# 去重(同一记录可能出现在多个 run 中)
rec_id = r.get("id", id(r))
if rec_id in seen_ids:
continue
seen_ids.add(rec_id)
order_complexity[trade_no][dim_name] += 1
# 保存订单基本信息
if trade_no not in order_info:
order_info[trade_no] = {
"order_trade_no": trade_no,
"create_time": r.get("create_time", ""),
"ledger_name": r.get("ledger_name", r.get("tableName", "")),
}
# 建立 settle_id → trade_no 映射
settle_id = r.get("order_settle_id")
if settle_id and settle_id != 0:
settle_to_trade[settle_id] = trade_no
# 加载支付记录(通过 relate_id → order_settle_id → order_trade_no
for dir_prefix, file_stem, dim_name in payment_sources:
recs = load_records_from_task_dirs(json_root, dir_prefix, file_stem)
recs += load_archive_records(json_root, file_stem)
seen_ids = set()
for r in recs:
rec_id = r.get("id", id(r))
if rec_id in seen_ids:
continue
seen_ids.add(rec_id)
relate_id = r.get("relate_id")
if not relate_id or relate_id == 0:
continue
trade_no = settle_to_trade.get(relate_id)
if trade_no:
order_complexity[trade_no]["支付记录"] += 1
# 加载退款记录
dir_prefix, file_stem, dim_name = refund_source
recs = load_records_from_task_dirs(json_root, dir_prefix, file_stem)
recs += load_archive_records(json_root, file_stem)
seen_ids = set()
for r in recs:
rec_id = r.get("id", id(r))
if rec_id in seen_ids:
continue
seen_ids.add(rec_id)
relate_id = r.get("relate_id")
if not relate_id or relate_id == 0:
continue
trade_no = settle_to_trade.get(relate_id)
if trade_no:
order_complexity[trade_no]["退款记录"] += 1
# --- 2. 计算总复杂度并排序 ---
all_dims = ["台桌使用", "台费折扣", "助教服务", "商品销售", "团购核销", "支付记录", "退款记录"]
scored = []
for trade_no, dims in order_complexity.items():
total = sum(dims.values())
# 额外加权:涉及的维度种类数(鼓励"广度"复杂)
breadth = sum(1 for d in all_dims if dims.get(d, 0) > 0)
score = total + breadth * 2
scored.append((trade_no, score, total, breadth, dims))
scored.sort(key=lambda x: x[1], reverse=True)
top10 = scored[:10]
# --- 3. 输出结果 ---
print("=" * 100)
print(f" 订单复杂度 Top 10共扫描 {len(order_complexity)} 个总订单)")
print("=" * 100)
for rank, (trade_no, score, total, breadth, dims) in enumerate(top10, 1):
info = order_info.get(trade_no, {})
print(f"\n{'' * 80}")
print(f" #{rank} order_trade_no = {trade_no}")
print(f" 创建时间: {info.get('create_time', '未知')}")
print(f" 复杂度得分: {score} (子记录总数={total}, 涉及维度={breadth})")
print(f" 各维度明细:")
for d in all_dims:
cnt = dims.get(d, 0)
if cnt > 0:
bar = "" * min(cnt, 40)
print(f" {d:8s}: {cnt:4d} {bar}")
print(f"\n{'' * 80}")
print(f"\n统计摘要:")
print(f" 总订单数: {len(order_complexity)}")
if scored:
avg_score = sum(s[1] for s in scored) / len(scored)
print(f" 平均复杂度得分: {avg_score:.1f}")
print(f" 最高复杂度得分: {scored[0][1]}")
if __name__ == "__main__":
main()