#!/usr/bin/env python3 """ 以交车任务列表(交车许可=已交车)为正确基准, 比对生成环境-交车数据(delivery_status_text=已完成 视为已交车), 输出已交车状态不一致的车牌号。 """ import re import shutil from pathlib import Path import pandas as pd from openpyxl import load_workbook from openpyxl.styles import PatternFill FILE_PROD = Path( "/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/" "xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/生成环境-交车数据.xlsx" ) FILE_TASK = Path( "/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/" "xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/" "交车任务列表-导出-2026-05-26 (5).xlsx" ) OUT = Path("/Users/sylvawong/Desktop/生成环境交车状态差异报告.xlsx") OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/生成环境交车状态差异报告.xlsx") RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid") # 基准侧 TASK_DELIVERED = "已交车" # 生成环境侧:已完成 对应 已交车 PROD_DELIVERED = "已完成" def norm_plate(v) -> str: if pd.isna(v): return "" return re.sub(r"[\s\-·]", "", str(v).strip().upper()) def delivered_plates(df, plate_col, status_col, delivered_val): df = df.copy() df["_plate"] = df[plate_col].map(norm_plate) df = df[df["_plate"] != ""] df["_status"] = df[status_col].astype(str).str.strip() # 每车牌取一条代表状态(若多行状态不一致则标为混合) g = df.groupby("_plate")["_status"].agg(lambda s: "|".join(sorted(set(s)))) delivered = set(g[g == delivered_val].index) mixed = {p for p, st in g.items() if delivered_val in st and st != delivered_val} return delivered, mixed, df def display_plate(key, df_prod, df_task): m = df_prod["plate_number"].map(norm_plate) == key if m.any(): return str(df_prod.loc[m, "plate_number"].iloc[0]) m = df_task["车牌号"].map(norm_plate) == key if m.any(): return str(df_task.loc[m, "车牌号"].iloc[0]) return key def run(): df_prod = pd.read_excel(FILE_PROD, sheet_name=0) df_task = pd.read_excel(FILE_TASK, sheet_name=0) set_task, _, df_task_all = delivered_plates( df_task, "车牌号", "交车许可", TASK_DELIVERED ) set_prod, prod_mixed, df_prod_all = delivered_plates( df_prod, "plate_number", "delivery_status_text", PROD_DELIVERED ) # 基准=交车任务已交车;生成环境应对齐为已完成 # 不一致1:任务已交车,生成环境不是已完成(含无记录、其他状态) bad_vs_baseline = sorted(set_task - set_prod) # 不一致2:生成环境已完成,任务不是已交车(生成环境多记) extra_in_prod = sorted(set_prod - set_task) all_bad = sorted(set(bad_vs_baseline) | set(extra_in_prod)) both_ok = sorted(set_task & set_prod) def plate_rows(keys, diff_type): if not keys: return pd.DataFrame(columns=["差异类型", "车牌号"]) return pd.DataFrame( { "差异类型": diff_type, "车牌号": [display_plate(k, df_prod, df_task) for k in keys], } ) df_bad1 = plate_rows( bad_vs_baseline, "交车任务=已交车(基准),生成环境≠已完成或无记录", ) df_bad2 = plate_rows( extra_in_prod, "生成环境=已完成,交车任务≠已交车", ) df_all = pd.concat([df_bad1, df_bad2], ignore_index=True) df_ok = plate_rows(both_ok, "两边一致(任务已交车且生成环境已完成)") task_rows = len(df_task[df_task["交车许可"].astype(str).str.strip() == TASK_DELIVERED]) prod_rows = len( df_prod[df_prod["delivery_status_text"].astype(str).str.strip() == PROD_DELIVERED] ) summary = pd.DataFrame( [ ["正确数据基准", "交车任务列表 · 交车许可=已交车"], ["生成环境已交车判定", "delivery_status_text=已完成"], ["交车任务 已交车行数 / 唯一车牌", f"{task_rows} / {len(set_task)}"], ["生成环境 已完成行数 / 唯一车牌", f"{prod_rows} / {len(set_prod)}"], ["两边状态一致车牌", len(both_ok)], ["已交车状态不一致车牌合计", len(all_bad)], ["└ 基准有已交车,生成环境未对齐", len(bad_vs_baseline)], ["└ 生成环境已完成,基准无已交车", len(extra_in_prod)], ["生成环境同车牌多状态(含已完成)", len(prod_mixed)], ], columns=["项目", "结果"], ) with pd.ExcelWriter(OUT, engine="openpyxl") as w: summary.to_excel(w, sheet_name="比对汇总", index=False) df_all.to_excel(w, sheet_name="不一致车牌", index=False) if len(bad_vs_baseline): df_bad1.to_excel(w, sheet_name="基准有生成环境未对齐", index=False) if len(extra_in_prod): df_bad2.to_excel(w, sheet_name="生成环境多记已完成", index=False) df_ok.to_excel(w, sheet_name="一致车牌", index=False) wb = load_workbook(OUT) for sn in ["不一致车牌", "基准有生成环境未对齐", "生成环境多记已完成"]: if sn in wb.sheetnames: ws = wb[sn] for r in range(2, ws.max_row + 1): for c in range(1, ws.max_column + 1): ws.cell(row=r, column=c).fill = RED wb.save(OUT) shutil.copy(OUT, OUT_PROJ) print(f"已输出: {OUT}") print(summary.to_string(index=False)) if __name__ == "__main__": run()