Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
228 lines
8.5 KiB
Python
228 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
||
"""比对生成环境交车数据 vs 交车任务列表(已交车) 车牌差异"""
|
||
|
||
import re
|
||
from pathlib import Path
|
||
|
||
import pandas as pd
|
||
from openpyxl import load_workbook
|
||
from openpyxl.styles import Font, PatternFill
|
||
|
||
FILE_PROD = Path(
|
||
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
|
||
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/生成环境-交车数据.xlsx"
|
||
)
|
||
FILE_TASK = Path(
|
||
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
|
||
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
|
||
"交车任务列表-导出-2026-05-26 (5).xlsx"
|
||
)
|
||
OUT = Path("/Users/sylvawong/Desktop/交车数据车牌差异报告.xlsx")
|
||
OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/交车数据车牌差异报告.xlsx")
|
||
|
||
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
|
||
GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
|
||
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
|
||
|
||
|
||
def norm_plate(v) -> str:
|
||
if pd.isna(v):
|
||
return ""
|
||
return re.sub(r"[\s\-·]", "", str(v).strip().upper())
|
||
|
||
|
||
def load_prod():
|
||
df = pd.read_excel(FILE_PROD, sheet_name=0)
|
||
df = df.copy()
|
||
df["_plate_key"] = df["plate_number"].map(norm_plate)
|
||
df = df[df["_plate_key"] != ""]
|
||
return df
|
||
|
||
|
||
def load_task_delivered():
|
||
df = pd.read_excel(FILE_TASK, sheet_name=0)
|
||
df = df[df["交车许可"].astype(str).str.strip() == "已交车"].copy()
|
||
df["_plate_key"] = df["车牌号"].map(norm_plate)
|
||
df = df[df["_plate_key"] != ""]
|
||
return df
|
||
|
||
|
||
def norm_str(v) -> str:
|
||
if pd.isna(v):
|
||
return ""
|
||
return str(v).strip()
|
||
|
||
|
||
def norm_contract(v) -> str:
|
||
s = norm_str(v).upper().replace(" ", "")
|
||
return re.sub(r"[^A-Z0-9]", "", s)
|
||
|
||
|
||
def run_field_diff(both_prod, both_task):
|
||
"""匹配车牌上关键字段差异"""
|
||
rows = []
|
||
for key in both_prod["_plate_key"].unique():
|
||
rp = both_prod[both_prod["_plate_key"] == key].iloc[0]
|
||
rt = both_task[both_task["_plate_key"] == key].iloc[0]
|
||
notes = []
|
||
if norm_contract(rp.get("contract_code")) != norm_contract(rt.get("合同编码")):
|
||
notes.append(
|
||
f"合同:生成[{norm_str(rp.get('contract_code'))}]≠任务[{norm_str(rt.get('合同编码'))}]"
|
||
)
|
||
if norm_str(rp.get("vin")).upper() != norm_str(rt.get("车架号")).upper():
|
||
notes.append(
|
||
f"车架:生成[{norm_str(rp.get('vin'))}]≠任务[{norm_str(rt.get('车架号'))}]"
|
||
)
|
||
if norm_str(rp.get("customer_name")) != norm_str(rt.get("客户名称")):
|
||
notes.append(
|
||
f"客户:生成[{norm_str(rp.get('customer_name'))}]≠任务[{norm_str(rt.get('客户名称'))}]"
|
||
)
|
||
if notes:
|
||
rows.append(
|
||
{
|
||
"车牌号": rp.get("plate_number"),
|
||
"生成环境_合同": rp.get("contract_code"),
|
||
"交车任务_合同": rt.get("合同编码"),
|
||
"生成环境_车架": rp.get("vin"),
|
||
"交车任务_车架": rt.get("车架号"),
|
||
"生成环境_客户": rp.get("customer_name"),
|
||
"交车任务_客户": rt.get("客户名称"),
|
||
"生成环境_交车时间": rp.get("delivery_time"),
|
||
"交车任务_交车时间": rt.get("交车时间"),
|
||
"差异说明": ";".join(notes),
|
||
}
|
||
)
|
||
return pd.DataFrame(rows)
|
||
|
||
|
||
def dedupe_by_plate(df, plate_col):
|
||
"""同一车牌多行时保留首行,并标注重复数"""
|
||
counts = df["_plate_key"].value_counts()
|
||
df = df.copy()
|
||
df["_dup_count"] = df["_plate_key"].map(counts)
|
||
first_idx = df.drop_duplicates(subset=["_plate_key"], keep="first")
|
||
return first_idx, counts[counts > 1]
|
||
|
||
|
||
def run():
|
||
df_prod = load_prod()
|
||
df_task = load_task_delivered()
|
||
|
||
prod_unique, prod_dups = dedupe_by_plate(df_prod, "plate_number")
|
||
task_unique, task_dups = dedupe_by_plate(df_task, "车牌号")
|
||
|
||
set_prod = set(prod_unique["_plate_key"])
|
||
set_task = set(task_unique["_plate_key"])
|
||
|
||
only_prod_keys = sorted(set_prod - set_task)
|
||
only_task_keys = sorted(set_task - set_prod)
|
||
both_keys = sorted(set_prod & set_task)
|
||
|
||
# 明细行
|
||
cols_prod_show = [
|
||
"plate_number",
|
||
"vin",
|
||
"delivery_status_text",
|
||
"delivery_time",
|
||
"contract_code",
|
||
"customer_name",
|
||
"delivery_no",
|
||
"delivery_task_id",
|
||
]
|
||
cols_task_show = [
|
||
"车牌号",
|
||
"车架号",
|
||
"交车许可",
|
||
"交车时间",
|
||
"合同编码",
|
||
"客户名称",
|
||
"任务类型",
|
||
"车辆状态",
|
||
]
|
||
|
||
def pick_cols(df, cols):
|
||
exist = [c for c in cols if c in df.columns]
|
||
extra = ["_plate_key", "_dup_count"] if "_dup_count" in df.columns else ["_plate_key"]
|
||
return df[exist + [c for c in extra if c in df.columns]]
|
||
|
||
only_prod_all = df_prod[df_prod["_plate_key"].isin(only_prod_keys)].copy()
|
||
only_prod_all.insert(0, "差异类型", "仅生成环境有")
|
||
only_prod_df = prod_unique[prod_unique["_plate_key"].isin(only_prod_keys)].copy()
|
||
only_prod_df.insert(0, "差异类型", "仅生成环境有(去重)")
|
||
|
||
only_task_df = task_unique[task_unique["_plate_key"].isin(only_task_keys)].copy()
|
||
only_task_df.insert(0, "差异类型", "仅交车任务(已交车)有")
|
||
|
||
# 两边都有
|
||
both_prod = prod_unique[prod_unique["_plate_key"].isin(both_keys)].copy()
|
||
both_task = task_unique[task_unique["_plate_key"].isin(both_keys)].copy()
|
||
both_merged = both_prod[cols_prod_show + ["_plate_key"]].merge(
|
||
both_task[cols_task_show + ["_plate_key"]],
|
||
on="_plate_key",
|
||
how="inner",
|
||
suffixes=("_生成环境", "_交车任务"),
|
||
)
|
||
both_merged.insert(0, "差异类型", "两边均有")
|
||
field_diff_df = run_field_diff(both_prod, both_task)
|
||
|
||
summary = pd.DataFrame(
|
||
[
|
||
["生成环境-交车数据 总行数", len(df_prod)],
|
||
["生成环境 唯一车牌数", len(set_prod)],
|
||
["生成环境 重复车牌数(键)", len(prod_dups)],
|
||
["交车任务(交车许可=已交车) 总行数", len(df_task)],
|
||
["交车任务 唯一车牌数", len(set_task)],
|
||
["交车任务 重复车牌数(键)", len(task_dups)],
|
||
["—", "—"],
|
||
["仅生成环境有(车牌)", len(only_prod_keys)],
|
||
["仅交车任务已交车有(车牌)", len(only_task_keys)],
|
||
["两边均有(车牌)", len(both_keys)],
|
||
["匹配车牌但字段不一致", len(field_diff_df)],
|
||
],
|
||
columns=["项目", "数量"],
|
||
)
|
||
|
||
detail_prod = pick_cols(only_prod_all, cols_prod_show)
|
||
detail_task = pick_cols(only_task_df, cols_task_show)
|
||
|
||
OUT.parent.mkdir(parents=True, exist_ok=True)
|
||
with pd.ExcelWriter(OUT, engine="openpyxl") as w:
|
||
summary.to_excel(w, sheet_name="比对汇总", index=False)
|
||
detail_prod.to_excel(w, sheet_name="仅生成环境有", index=False)
|
||
detail_task.to_excel(w, sheet_name="仅交车任务已交车有", index=False)
|
||
both_merged.to_excel(w, sheet_name="两边均有", index=False)
|
||
field_diff_df.to_excel(w, sheet_name="匹配记录字段差异", index=False)
|
||
if len(prod_dups):
|
||
pd.DataFrame({"重复车牌": prod_dups.index, "行数": prod_dups.values}).to_excel(
|
||
w, sheet_name="生成环境重复车牌", index=False
|
||
)
|
||
if len(task_dups):
|
||
pd.DataFrame({"重复车牌": task_dups.index, "行数": task_dups.values}).to_excel(
|
||
w, sheet_name="交车任务重复车牌", index=False
|
||
)
|
||
|
||
# 标色仅* sheet 整行红
|
||
wb = load_workbook(OUT)
|
||
for sheet_name in ["仅生成环境有", "仅交车任务已交车有"]:
|
||
if sheet_name in wb.sheetnames:
|
||
ws = wb[sheet_name]
|
||
for row in range(2, ws.max_row + 1):
|
||
for col in range(1, ws.max_column + 1):
|
||
ws.cell(row=row, column=col).fill = RED
|
||
if "匹配记录字段差异" in wb.sheetnames and len(field_diff_df):
|
||
ws = wb["匹配记录字段差异"]
|
||
for row in range(2, ws.max_row + 1):
|
||
for col in range(1, ws.max_column + 1):
|
||
ws.cell(row=row, column=col).fill = YELLOW
|
||
wb.save(OUT)
|
||
|
||
import shutil
|
||
|
||
shutil.copy(OUT, OUT_PROJ)
|
||
print(f"已输出: {OUT}")
|
||
print(summary.to_string(index=False))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
run()
|