"""应用 2a 区域财务洞察 Prompt 拼装(app2_finance 的区域派生版本)。 面向 72 组合中 area != 'all' 的 64 个组合(8 时间 × 8 业态)。 差异点(相较 app2_finance): - payload 新增顶层字段:「业态说明」「区域占比」 - 派生比率精简:仅「人力成本占成交收入比」「优惠侵蚀率」(其他比率区域级无法计算) - 单位经济区域级:支持客单价/日均订单数及环比(暂不输出会员占比,与 v1.2 system prompt H6 对齐) - 按星期聚合区域级:无「日均现金流入」(区域级无 cash_inflow 数据) - 日粒度异常区域级:仅对 gross_amount 做异常检测(无 cash_inflow) - 不注入:预收资产/现金流入/现金流出/储值卡余额变化(全店级字段,区域级无业务意义) 数据源: - 主数据:board_service.get_finance_board(time, area, compare=1) - 日粒度:etl 库 app.v_dws_finance_area_daily(按 area_code 过滤) - 区域占比:调用 board_service 两次(一次区域 + 一次 all)后派生 """ from __future__ import annotations import logging from collections import defaultdict from datetime import datetime from typing import Any from app.services.board_service import _calc_date_range, _calc_prev_range, get_finance_board # 复用 app2_finance_prompt 的公共常量与辅助函数 from app.ai.prompts.app2_finance_prompt import ( AREA_LABELS, DIMENSION_LABELS, DIMENSION_MAP, INDUSTRY_BASELINES, _aggregate_expense, _build_coach_kpi, _build_discount_kpi, _slim, _translate_keys, _WEEKDAY_MIN_DAYS, _ANOMALY_DEVIATION, _ANOMALY_MAX_ITEMS, _ANOMALY_MIN_DAYS, _ANOMALY_MIN_SAME_WEEKDAY, _WEEKDAY_ZH, ) logger = logging.getLogger(__name__) # 业态特征字典(与 v1.2 system prompt「三、业态特征」章节对齐) # trait:业态的数据表征(客单/订单密度/会员占比/周期规律) # peer:典型对比项(给 AI 做区域对比时的参照方向) AREA_INDUSTRY_TRAITS: dict[str, dict[str, str]] = { "hall": { "trait": "大厅(合并 hallA+B+C)· 散客主力 · 客单价中等 · 订单密度最高 · 会员占比相对低", "peer": "与 VIP 包厢对比单客贡献差异 · 与团购占比对比获客成本", }, "hallA": { "trait": "A 区大厅 · 散客主力 · 客单价中等 · 订单密度高", "peer": "与 hallB/hallC 对比识别区位差异 · 与 hall 合计对比看单区占比", }, "hallB": { "trait": "B 区大厅 · 散客主力 · 客单价中等 · 订单密度高", "peer": "与 hallA/hallC 对比识别区位差异", }, "hallC": { "trait": "C 区大厅(含 TV 台/美洲豹赛台)· 散客主力 · 客单价中等偏上 · 订单密度较高", "peer": "与 hallA/hallB 对比识别区位差异", }, "vip": { "trait": "VIP 台球包厢 · 会员主力 · 客单价显著高于大厅 2-3 倍 · 订单密度低 · 助教服务收入占比高", "peer": "与 hall 大厅对比单客贡献 · 与 snooker 对比高客单群体差异", }, "snooker": { "trait": "斯诺克 · 专业台球爱好者 · 客单价中高 · 会员占比较高 · 周末/夜场爆满", "peer": "与 VIP 对比高端群体结构 · 与 hall 对比专业 vs 大众", }, "mahjong": { "trait": "麻将房 · 散客 + 小团 · 客单价高(时长计费)· 停留久 · 订单密度低 · 助教参与度极低", "peer": "与 KTV 对比包间型业态 · 与 hall 对比客单价与时长", }, "ktv": { "trait": "团建房 · 团建场景 · 客单价集中在套餐 · 订单密度低 · 周末峰值明显 · 助教几乎不参与", "peer": "与 mahjong 对比包间型业态 · 与 vip 对比高客单群体", }, } def _fetch_area_daily_series( site_id: int, start_date: str, end_date: str, area_code: str, ) -> list[tuple] | None: """查区域级日粒度 [start, end],供单位经济/按星期/异常检测复用。 返回字段顺序:(stat_date, gross, order_count, member_order_count, confirmed) 注:区域级无 cash_inflow(对齐 v1.2 H6 降级),故与全店版 series 字段少一个 cash_in。 area_code 必须为非 "all" 的具体业态编码。 """ from app.database import get_connection from app.services.fdw_queries import _fdw_context try: conn = get_connection() except Exception: logger.debug("区域日粒度查询连接失败", exc_info=True) return None try: with _fdw_context(conn, site_id) as cur: cur.execute( """ SELECT stat_date, COALESCE(gross_amount, 0) AS gross, COALESCE(order_count, 0) AS order_count, COALESCE(member_order_count, 0) AS member_order_count, COALESCE(confirmed_income, 0) AS confirmed FROM app.v_dws_finance_area_daily WHERE area_code = %s AND stat_date >= %s::date AND stat_date <= %s::date ORDER BY stat_date """, (area_code, start_date, end_date), ) rows = cur.fetchall() except Exception: logger.debug( "区域日粒度查询失败: site_id=%s area=%s", site_id, area_code, exc_info=True, ) return None finally: try: conn.close() except Exception: pass active = [ (r[0], float(r[1]), int(r[2] or 0), int(r[3] or 0), float(r[4] or 0)) for r in rows if float(r[1] or 0) > 0 ] return active if active else None def _build_area_unit_economics( series: list[tuple] | None, prev_series: list[tuple] | None = None, ) -> dict | None: """区域级单位经济:客单价 + 日均订单数(含环比)。 与全店版差异: - 不输出「会员订单占比」(对齐 v1.2 system prompt H6 · 等 DWS 回填完成 + A/B 评估后再开放) - series 字段顺序:(stat_date, gross, order_count, member_order_count, confirmed) 月初场景(上期样本 < 5 天)附加"样本不足"后缀让 AI 降权引用。 """ if not series: return None total_orders = sum(r[2] for r in series) if total_orders <= 0: return None total_gross = sum(r[1] for r in series) total_confirmed = sum(r[4] for r in series) days = len(series) price_confirmed = total_confirmed / total_orders price_gross = total_gross / total_orders daily_orders = total_orders / days out: dict[str, Any] = { "总订单数": total_orders, "日均订单数": round(daily_orders, 1), "客单价_按成交收入": round(price_confirmed, 2), "客单价_按发生额": round(price_gross, 2), } if prev_series: prev_orders = sum(r[2] for r in prev_series) if prev_orders > 0: prev_days = len(prev_series) prev_gross = sum(r[1] for r in prev_series) prev_confirmed = sum(r[4] for r in prev_series) low_sample = prev_days < 5 def _pct_change(cur: float, prev: float) -> str: if prev <= 0: return "无上期数据" value = f"{(cur - prev) / prev * 100:+.1f}%" return f"{value}(上期仅 {prev_days} 天,样本不足仅供参考)" if low_sample else value out["客单价_按成交收入_环比"] = _pct_change(price_confirmed, prev_confirmed / prev_orders) out["客单价_按发生额_环比"] = _pct_change(price_gross, prev_gross / prev_orders) out["日均订单数_环比"] = _pct_change(daily_orders, prev_orders / prev_days) return out def _aggregate_by_weekday_area(series: list[tuple] | None) -> dict | None: """区域级按星期聚合(无现金流入字段)。 series 字段顺序:(stat_date, gross, order_count, member_order_count, confirmed) """ if not series or len(series) < _WEEKDAY_MIN_DAYS: return None buckets: dict[int, list[tuple]] = defaultdict(list) for row in series: buckets[row[0].weekday()].append(row) out: dict[str, dict] = {} for wd in range(7): rows = buckets.get(wd) or [] if not rows: continue n = len(rows) out[_WEEKDAY_ZH[wd]] = { "日均发生额": round(sum(r[1] for r in rows) / n, 2), "日均订单数": round(sum(r[2] for r in rows) / n, 1), "营业日数": n, } return out or None def _detect_anomaly_days_area( series: list[tuple] | None, ) -> list[dict] | None: """区域级日粒度异常(仅对 gross_amount 做,无现金流入)。 series 字段顺序:(stat_date, gross, order_count, member_order_count, confirmed) """ if not series or len(series) < _ANOMALY_MIN_DAYS: return None def _scan(idx: int, label: str) -> list[dict]: vals = [row[idx] for row in series] global_mean = sum(vals) / len(vals) if global_mean <= 0: return [] by_weekday: dict[int, list[float]] = defaultdict(list) for d, *metrics in series: by_weekday[d.weekday()].append(metrics[idx - 1]) weekday_mean: dict[int, float] = { wd: (sum(xs) / len(xs)) for wd, xs in by_weekday.items() } flagged: list[dict] = [] for d, *metrics in series: v = metrics[idx - 1] wd = d.weekday() same_count = len(by_weekday.get(wd, [])) if same_count >= _ANOMALY_MIN_SAME_WEEKDAY and weekday_mean[wd] > 0: base = weekday_mean[wd] base_label = f"同{_WEEKDAY_ZH[wd]}均值" else: base = global_mean base_label = "期均" deviation = (v - base) / base if abs(deviation) >= _ANOMALY_DEVIATION: flagged.append({ "日期": f"{d} {_WEEKDAY_ZH[wd]}", "指标": label, "当日": round(v, 2), "基线": round(base, 2), "基线类型": base_label, "偏离": f"{deviation * 100:+.1f}%", "_abs_dev": abs(deviation), }) return flagged candidates = _scan(1, "发生额") # 区域级仅发生额做异常(无现金流入) if not candidates: return None candidates.sort(key=lambda x: x["_abs_dev"], reverse=True) out = [] for c in candidates[:_ANOMALY_MAX_ITEMS]: c.pop("_abs_dev", None) out.append(c) return out async def _fetch_area_share( site_id: int, time_dimension: str, area_confirmed: float, ) -> dict | None: """查全店成交收入 + 上期全店成交收入,派生「区域占比」字段。 返回:{本区域成交收入, 占全店成交收入, 占比环比} 失败或数据不足返回 None。 """ board_time = DIMENSION_MAP.get(time_dimension) if not board_time: return None try: all_board = await get_finance_board( time=board_time, area="all", compare=1, site_id=site_id, ) except Exception: logger.debug("区域占比·全店数据查询失败", exc_info=True) return None all_overview = (all_board or {}).get("overview") or {} all_confirmed = float(all_overview.get("confirmed_revenue") or 0) if all_confirmed <= 0: return None share = area_confirmed / all_confirmed out: dict[str, Any] = { "本区域成交收入": round(area_confirmed, 2), "全店成交收入": round(all_confirmed, 2), "占全店成交收入": f"{share * 100:.1f}%", } # 环比:上期区域占比(本轮简化:若 all 的 confirmed_revenue_compare 可用,则给出"全店环比参照"让 AI 自己对比) # 本区域占比环比 = (本期区域占比 − 上期区域占比),需查上期 area board,为避免额外 DB 访问,暂只给出本期占比 return out def _build_area_derived_ratios( overview: dict | None, coach_kpi: dict | None, discount_kpi: dict | None, ) -> dict | None: """区域级派生比率:仅「人力成本占成交收入比」「优惠侵蚀率」。 其他比率(储值卡占比/结余率)区域级无数据,不输出。 """ if not isinstance(overview, dict): return None confirmed = float(overview.get("confirmed_revenue") or 0) ratios: dict[str, Any] = {} if coach_kpi and confirmed > 0: total_pay = float(coach_kpi.get("人力薪酬合计") or 0) if total_pay > 0: ratios["人力成本占成交收入比"] = round(total_pay / confirmed, 4) if discount_kpi and confirmed > 0: total_discount = float(discount_kpi.get("总优惠") or 0) gross = float(overview.get("occurrence") or 0) if gross > 0: ratios["优惠侵蚀率"] = round(total_discount / gross, 4) return ratios or None async def build_prompt( context: dict, cache_svc: Any | None = None, # 兼容统一签名 ) -> str: """构建 app2a 区域财务洞察 prompt 字符串。 Args: context: site_id, time_dimension, area(area != 'all') Returns: JSON 序列化的 prompt 字符串,字段已翻译为中文。 Raises: ValueError: time_dimension 不支持 · area 为 'all' · area 不在白名单 """ import json site_id = context["site_id"] time_dimension = context["time_dimension"] area = context.get("area") if area == "all": raise ValueError("app2a_finance_area 仅处理区域组合 · area='all' 应走 app2_finance") if area not in AREA_LABELS: raise ValueError(f"app2a_finance_area 不支持的区域: {area}") board_time = DIMENSION_MAP.get(time_dimension) if not board_time: raise ValueError(f"app2a_finance_area 不支持的时间维度: {time_dimension}") try: board_data = await get_finance_board( time=board_time, area=area, compare=1, site_id=site_id, ) except Exception: logger.warning( "app2a 财务看板查询失败: site_id=%s dimension=%s area=%s", site_id, time_dimension, area, exc_info=True, ) board_data = {} overview = board_data.get("overview") if isinstance(board_data, dict) else None revenue = board_data.get("revenue") if isinstance(board_data, dict) else None coach = board_data.get("coach_analysis") if isinstance(board_data, dict) else None expense = board_data.get("expense") if isinstance(board_data, dict) else None discount_kpi = _build_discount_kpi(revenue, overview) coach_kpi = _build_coach_kpi(coach) expense_kpi = _aggregate_expense(expense) ratios = _build_area_derived_ratios(overview, coach_kpi, discount_kpi) # 原始数据 slim 后翻译,供 AI 追溯细节 slim_data = _slim(board_data) or {} raw_cn = _translate_keys(slim_data) # 对比口径(所有环比字段的前置依赖 · H1) compare_caliber: dict[str, Any] | None = None try: cur_start, cur_end = _calc_date_range(board_time) prev_start, prev_end = _calc_prev_range(board_time, cur_start, cur_end) cur_days = (cur_end - cur_start).days + 1 prev_days = (prev_end - prev_start).days + 1 compare_caliber = { "当期范围": f"{cur_start} ~ {cur_end}({cur_days} 天)", "对比期范围": f"{prev_start} ~ {prev_end}({prev_days} 天)", "对齐方式": "上期同天数对齐(非整月/整周对比)", "说明": "所有 _环比 / _compare 字段均按上表口径计算;月中调用时对比期会自动截断到与当期相同天数", } except Exception: logger.debug("对比口径字段生成失败(不影响主流程)", exc_info=True) # 业态说明(v1.2 system prompt H7 引用依据) trait_info = AREA_INDUSTRY_TRAITS.get(area, {}) industry_brief = { "区域编码": area, "区域名称": AREA_LABELS.get(area, area), "业态特征": trait_info.get("trait", "—"), "典型对比项": trait_info.get("peer", "—"), } payload: dict[str, Any] = { "当前时间": datetime.now().strftime("%Y-%m-%d %H:%M"), "门店编号": site_id, "时间维度": DIMENSION_LABELS.get(time_dimension, time_dimension), "区域": AREA_LABELS.get(area, area), **({"对比口径": compare_caliber} if compare_caliber else {}), "业态说明": industry_brief, "核心KPI": { "发生额": float((overview or {}).get("occurrence") or 0), "发生额环比": (overview or {}).get("occurrence_compare") or "持平", "成交收入": float((overview or {}).get("confirmed_revenue") or 0), "成交收入环比": (overview or {}).get("confirmed_revenue_compare") or "持平", # 区域级无现金流入数据(v1.2 H6 降级),不输出现金相关 KPI }, } # 派生比率(仅 2 项) if ratios: payload["派生比率"] = ratios # 区域占比(需异步查全店) area_confirmed = float((overview or {}).get("confirmed_revenue") or 0) if area_confirmed > 0: area_share = await _fetch_area_share(site_id, time_dimension, area_confirmed) if area_share: payload["区域占比"] = area_share # 优惠构成(复用全店版逻辑) if discount_kpi: payload["优惠构成"] = discount_kpi # 助教成本画像(复用全店版逻辑 · 空则整块不注入 · 符合 v1.2 H6) if coach_kpi: payload["助教成本"] = coach_kpi # 支出概况(区域级仅助教支出有效,v1.2 禁谈运营/固定/平台支出 · 但注入给 AI 追溯) # 注:v1.2 system prompt 明确要求 D 板块禁谈这三类,AI 自会规避 if expense_kpi: payload["支出概况"] = expense_kpi # 日粒度派生(区域级) try: start_date, end_date = _calc_date_range(board_time) series = _fetch_area_daily_series( site_id, str(start_date), str(end_date), area_code=area, ) prev_series: list[tuple] | None = None try: prev_start, prev_end = _calc_prev_range(board_time, start_date, end_date) prev_series = _fetch_area_daily_series( site_id, str(prev_start), str(prev_end), area_code=area, ) except Exception: logger.debug("区域上期 series 查询失败,客单价环比字段将省略", exc_info=True) if series: unit_econ = _build_area_unit_economics(series, prev_series=prev_series) if unit_econ: payload["单位经济"] = unit_econ by_weekday = _aggregate_by_weekday_area(series) if by_weekday: payload["按星期聚合"] = by_weekday anomalies = _detect_anomaly_days_area(series) if anomalies: payload["日粒度异常"] = anomalies except Exception: logger.debug("区域日粒度派生字段注入失败(不影响主流程)", exc_info=True) # 行业基线 payload["行业基线"] = INDUSTRY_BASELINES # 原始指标(slim 后的区域子集) payload["原始指标"] = raw_cn if not board_data: payload["数据缺失提示"] = "区域财务看板数据获取失败,请基于已有缓存或常识分析" return json.dumps(payload, ensure_ascii=False, default=str)