Files
ONE-OS/scripts/compare_delivery_plates.py
王冕 a27e3b8e43 feat: sync full workspace including web modules, docs, and configurations to Gitea
Optimized the root .gitignore to exclude virtual environments, node modules,
and temp folders to ensure clean and lightweight version tracking.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-09 18:12:25 +08:00

228 lines
8.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""比对生成环境交车数据 vs 交车任务列表(已交车) 车牌差异"""
import re
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
FILE_PROD = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/生成环境-交车数据.xlsx"
)
FILE_TASK = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"交车任务列表-导出-2026-05-26 (5).xlsx"
)
OUT = Path("/Users/sylvawong/Desktop/交车数据车牌差异报告.xlsx")
OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/交车数据车牌差异报告.xlsx")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
def norm_plate(v) -> str:
if pd.isna(v):
return ""
return re.sub(r"[\s\-·]", "", str(v).strip().upper())
def load_prod():
df = pd.read_excel(FILE_PROD, sheet_name=0)
df = df.copy()
df["_plate_key"] = df["plate_number"].map(norm_plate)
df = df[df["_plate_key"] != ""]
return df
def load_task_delivered():
df = pd.read_excel(FILE_TASK, sheet_name=0)
df = df[df["交车许可"].astype(str).str.strip() == "已交车"].copy()
df["_plate_key"] = df["车牌号"].map(norm_plate)
df = df[df["_plate_key"] != ""]
return df
def norm_str(v) -> str:
if pd.isna(v):
return ""
return str(v).strip()
def norm_contract(v) -> str:
s = norm_str(v).upper().replace(" ", "")
return re.sub(r"[^A-Z0-9]", "", s)
def run_field_diff(both_prod, both_task):
"""匹配车牌上关键字段差异"""
rows = []
for key in both_prod["_plate_key"].unique():
rp = both_prod[both_prod["_plate_key"] == key].iloc[0]
rt = both_task[both_task["_plate_key"] == key].iloc[0]
notes = []
if norm_contract(rp.get("contract_code")) != norm_contract(rt.get("合同编码")):
notes.append(
f"合同:生成[{norm_str(rp.get('contract_code'))}]≠任务[{norm_str(rt.get('合同编码'))}]"
)
if norm_str(rp.get("vin")).upper() != norm_str(rt.get("车架号")).upper():
notes.append(
f"车架:生成[{norm_str(rp.get('vin'))}]≠任务[{norm_str(rt.get('车架号'))}]"
)
if norm_str(rp.get("customer_name")) != norm_str(rt.get("客户名称")):
notes.append(
f"客户:生成[{norm_str(rp.get('customer_name'))}]≠任务[{norm_str(rt.get('客户名称'))}]"
)
if notes:
rows.append(
{
"车牌号": rp.get("plate_number"),
"生成环境_合同": rp.get("contract_code"),
"交车任务_合同": rt.get("合同编码"),
"生成环境_车架": rp.get("vin"),
"交车任务_车架": rt.get("车架号"),
"生成环境_客户": rp.get("customer_name"),
"交车任务_客户": rt.get("客户名称"),
"生成环境_交车时间": rp.get("delivery_time"),
"交车任务_交车时间": rt.get("交车时间"),
"差异说明": "".join(notes),
}
)
return pd.DataFrame(rows)
def dedupe_by_plate(df, plate_col):
"""同一车牌多行时保留首行,并标注重复数"""
counts = df["_plate_key"].value_counts()
df = df.copy()
df["_dup_count"] = df["_plate_key"].map(counts)
first_idx = df.drop_duplicates(subset=["_plate_key"], keep="first")
return first_idx, counts[counts > 1]
def run():
df_prod = load_prod()
df_task = load_task_delivered()
prod_unique, prod_dups = dedupe_by_plate(df_prod, "plate_number")
task_unique, task_dups = dedupe_by_plate(df_task, "车牌号")
set_prod = set(prod_unique["_plate_key"])
set_task = set(task_unique["_plate_key"])
only_prod_keys = sorted(set_prod - set_task)
only_task_keys = sorted(set_task - set_prod)
both_keys = sorted(set_prod & set_task)
# 明细行
cols_prod_show = [
"plate_number",
"vin",
"delivery_status_text",
"delivery_time",
"contract_code",
"customer_name",
"delivery_no",
"delivery_task_id",
]
cols_task_show = [
"车牌号",
"车架号",
"交车许可",
"交车时间",
"合同编码",
"客户名称",
"任务类型",
"车辆状态",
]
def pick_cols(df, cols):
exist = [c for c in cols if c in df.columns]
extra = ["_plate_key", "_dup_count"] if "_dup_count" in df.columns else ["_plate_key"]
return df[exist + [c for c in extra if c in df.columns]]
only_prod_all = df_prod[df_prod["_plate_key"].isin(only_prod_keys)].copy()
only_prod_all.insert(0, "差异类型", "仅生成环境有")
only_prod_df = prod_unique[prod_unique["_plate_key"].isin(only_prod_keys)].copy()
only_prod_df.insert(0, "差异类型", "仅生成环境有(去重)")
only_task_df = task_unique[task_unique["_plate_key"].isin(only_task_keys)].copy()
only_task_df.insert(0, "差异类型", "仅交车任务(已交车)有")
# 两边都有
both_prod = prod_unique[prod_unique["_plate_key"].isin(both_keys)].copy()
both_task = task_unique[task_unique["_plate_key"].isin(both_keys)].copy()
both_merged = both_prod[cols_prod_show + ["_plate_key"]].merge(
both_task[cols_task_show + ["_plate_key"]],
on="_plate_key",
how="inner",
suffixes=("_生成环境", "_交车任务"),
)
both_merged.insert(0, "差异类型", "两边均有")
field_diff_df = run_field_diff(both_prod, both_task)
summary = pd.DataFrame(
[
["生成环境-交车数据 总行数", len(df_prod)],
["生成环境 唯一车牌数", len(set_prod)],
["生成环境 重复车牌数(键)", len(prod_dups)],
["交车任务(交车许可=已交车) 总行数", len(df_task)],
["交车任务 唯一车牌数", len(set_task)],
["交车任务 重复车牌数(键)", len(task_dups)],
["", ""],
["仅生成环境有(车牌)", len(only_prod_keys)],
["仅交车任务已交车有(车牌)", len(only_task_keys)],
["两边均有(车牌)", len(both_keys)],
["匹配车牌但字段不一致", len(field_diff_df)],
],
columns=["项目", "数量"],
)
detail_prod = pick_cols(only_prod_all, cols_prod_show)
detail_task = pick_cols(only_task_df, cols_task_show)
OUT.parent.mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(OUT, engine="openpyxl") as w:
summary.to_excel(w, sheet_name="比对汇总", index=False)
detail_prod.to_excel(w, sheet_name="仅生成环境有", index=False)
detail_task.to_excel(w, sheet_name="仅交车任务已交车有", index=False)
both_merged.to_excel(w, sheet_name="两边均有", index=False)
field_diff_df.to_excel(w, sheet_name="匹配记录字段差异", index=False)
if len(prod_dups):
pd.DataFrame({"重复车牌": prod_dups.index, "行数": prod_dups.values}).to_excel(
w, sheet_name="生成环境重复车牌", index=False
)
if len(task_dups):
pd.DataFrame({"重复车牌": task_dups.index, "行数": task_dups.values}).to_excel(
w, sheet_name="交车任务重复车牌", index=False
)
# 标色仅* sheet 整行红
wb = load_workbook(OUT)
for sheet_name in ["仅生成环境有", "仅交车任务已交车有"]:
if sheet_name in wb.sheetnames:
ws = wb[sheet_name]
for row in range(2, ws.max_row + 1):
for col in range(1, ws.max_column + 1):
ws.cell(row=row, column=col).fill = RED
if "匹配记录字段差异" in wb.sheetnames and len(field_diff_df):
ws = wb["匹配记录字段差异"]
for row in range(2, ws.max_row + 1):
for col in range(1, ws.max_column + 1):
ws.cell(row=row, column=col).fill = YELLOW
wb.save(OUT)
import shutil
shutil.copy(OUT, OUT_PROJ)
print(f"已输出: {OUT}")
print(summary.to_string(index=False))
if __name__ == "__main__":
run()