#!/usr/bin/env python3 """比对生成环境交车数据 vs 交车任务列表(已交车) 车牌差异""" import re from pathlib import Path import pandas as pd from openpyxl import load_workbook from openpyxl.styles import Font, PatternFill FILE_PROD = Path( "/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/" "xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/生成环境-交车数据.xlsx" ) FILE_TASK = Path( "/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/" "xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/" "交车任务列表-导出-2026-05-26 (5).xlsx" ) OUT = Path("/Users/sylvawong/Desktop/交车数据车牌差异报告.xlsx") OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/交车数据车牌差异报告.xlsx") RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid") GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid") YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid") def norm_plate(v) -> str: if pd.isna(v): return "" return re.sub(r"[\s\-·]", "", str(v).strip().upper()) def load_prod(): df = pd.read_excel(FILE_PROD, sheet_name=0) df = df.copy() df["_plate_key"] = df["plate_number"].map(norm_plate) df = df[df["_plate_key"] != ""] return df def load_task_delivered(): df = pd.read_excel(FILE_TASK, sheet_name=0) df = df[df["交车许可"].astype(str).str.strip() == "已交车"].copy() df["_plate_key"] = df["车牌号"].map(norm_plate) df = df[df["_plate_key"] != ""] return df def norm_str(v) -> str: if pd.isna(v): return "" return str(v).strip() def norm_contract(v) -> str: s = norm_str(v).upper().replace(" ", "") return re.sub(r"[^A-Z0-9]", "", s) def run_field_diff(both_prod, both_task): """匹配车牌上关键字段差异""" rows = [] for key in both_prod["_plate_key"].unique(): rp = both_prod[both_prod["_plate_key"] == key].iloc[0] rt = both_task[both_task["_plate_key"] == key].iloc[0] notes = [] if norm_contract(rp.get("contract_code")) != norm_contract(rt.get("合同编码")): notes.append( f"合同:生成[{norm_str(rp.get('contract_code'))}]≠任务[{norm_str(rt.get('合同编码'))}]" ) if norm_str(rp.get("vin")).upper() != norm_str(rt.get("车架号")).upper(): notes.append( f"车架:生成[{norm_str(rp.get('vin'))}]≠任务[{norm_str(rt.get('车架号'))}]" ) if norm_str(rp.get("customer_name")) != norm_str(rt.get("客户名称")): notes.append( f"客户:生成[{norm_str(rp.get('customer_name'))}]≠任务[{norm_str(rt.get('客户名称'))}]" ) if notes: rows.append( { "车牌号": rp.get("plate_number"), "生成环境_合同": rp.get("contract_code"), "交车任务_合同": rt.get("合同编码"), "生成环境_车架": rp.get("vin"), "交车任务_车架": rt.get("车架号"), "生成环境_客户": rp.get("customer_name"), "交车任务_客户": rt.get("客户名称"), "生成环境_交车时间": rp.get("delivery_time"), "交车任务_交车时间": rt.get("交车时间"), "差异说明": ";".join(notes), } ) return pd.DataFrame(rows) def dedupe_by_plate(df, plate_col): """同一车牌多行时保留首行,并标注重复数""" counts = df["_plate_key"].value_counts() df = df.copy() df["_dup_count"] = df["_plate_key"].map(counts) first_idx = df.drop_duplicates(subset=["_plate_key"], keep="first") return first_idx, counts[counts > 1] def run(): df_prod = load_prod() df_task = load_task_delivered() prod_unique, prod_dups = dedupe_by_plate(df_prod, "plate_number") task_unique, task_dups = dedupe_by_plate(df_task, "车牌号") set_prod = set(prod_unique["_plate_key"]) set_task = set(task_unique["_plate_key"]) only_prod_keys = sorted(set_prod - set_task) only_task_keys = sorted(set_task - set_prod) both_keys = sorted(set_prod & set_task) # 明细行 cols_prod_show = [ "plate_number", "vin", "delivery_status_text", "delivery_time", "contract_code", "customer_name", "delivery_no", "delivery_task_id", ] cols_task_show = [ "车牌号", "车架号", "交车许可", "交车时间", "合同编码", "客户名称", "任务类型", "车辆状态", ] def pick_cols(df, cols): exist = [c for c in cols if c in df.columns] extra = ["_plate_key", "_dup_count"] if "_dup_count" in df.columns else ["_plate_key"] return df[exist + [c for c in extra if c in df.columns]] only_prod_all = df_prod[df_prod["_plate_key"].isin(only_prod_keys)].copy() only_prod_all.insert(0, "差异类型", "仅生成环境有") only_prod_df = prod_unique[prod_unique["_plate_key"].isin(only_prod_keys)].copy() only_prod_df.insert(0, "差异类型", "仅生成环境有(去重)") only_task_df = task_unique[task_unique["_plate_key"].isin(only_task_keys)].copy() only_task_df.insert(0, "差异类型", "仅交车任务(已交车)有") # 两边都有 both_prod = prod_unique[prod_unique["_plate_key"].isin(both_keys)].copy() both_task = task_unique[task_unique["_plate_key"].isin(both_keys)].copy() both_merged = both_prod[cols_prod_show + ["_plate_key"]].merge( both_task[cols_task_show + ["_plate_key"]], on="_plate_key", how="inner", suffixes=("_生成环境", "_交车任务"), ) both_merged.insert(0, "差异类型", "两边均有") field_diff_df = run_field_diff(both_prod, both_task) summary = pd.DataFrame( [ ["生成环境-交车数据 总行数", len(df_prod)], ["生成环境 唯一车牌数", len(set_prod)], ["生成环境 重复车牌数(键)", len(prod_dups)], ["交车任务(交车许可=已交车) 总行数", len(df_task)], ["交车任务 唯一车牌数", len(set_task)], ["交车任务 重复车牌数(键)", len(task_dups)], ["—", "—"], ["仅生成环境有(车牌)", len(only_prod_keys)], ["仅交车任务已交车有(车牌)", len(only_task_keys)], ["两边均有(车牌)", len(both_keys)], ["匹配车牌但字段不一致", len(field_diff_df)], ], columns=["项目", "数量"], ) detail_prod = pick_cols(only_prod_all, cols_prod_show) detail_task = pick_cols(only_task_df, cols_task_show) OUT.parent.mkdir(parents=True, exist_ok=True) with pd.ExcelWriter(OUT, engine="openpyxl") as w: summary.to_excel(w, sheet_name="比对汇总", index=False) detail_prod.to_excel(w, sheet_name="仅生成环境有", index=False) detail_task.to_excel(w, sheet_name="仅交车任务已交车有", index=False) both_merged.to_excel(w, sheet_name="两边均有", index=False) field_diff_df.to_excel(w, sheet_name="匹配记录字段差异", index=False) if len(prod_dups): pd.DataFrame({"重复车牌": prod_dups.index, "行数": prod_dups.values}).to_excel( w, sheet_name="生成环境重复车牌", index=False ) if len(task_dups): pd.DataFrame({"重复车牌": task_dups.index, "行数": task_dups.values}).to_excel( w, sheet_name="交车任务重复车牌", index=False ) # 标色仅* sheet 整行红 wb = load_workbook(OUT) for sheet_name in ["仅生成环境有", "仅交车任务已交车有"]: if sheet_name in wb.sheetnames: ws = wb[sheet_name] for row in range(2, ws.max_row + 1): for col in range(1, ws.max_column + 1): ws.cell(row=row, column=col).fill = RED if "匹配记录字段差异" in wb.sheetnames and len(field_diff_df): ws = wb["匹配记录字段差异"] for row in range(2, ws.max_row + 1): for col in range(1, ws.max_column + 1): ws.cell(row=row, column=col).fill = YELLOW wb.save(OUT) import shutil shutil.copy(OUT, OUT_PROJ) print(f"已输出: {OUT}") print(summary.to_string(index=False)) if __name__ == "__main__": run()