#!/usr/bin/env python3 """Merge 表2(嘉锦明细) into 表1(车辆氢费明细), generate result + comparison report.""" import pandas as pd import numpy as np from datetime import datetime T1_PATH = "/Users/sylvawong/Desktop/车辆氢费明细_2026-06-06_嘉兴嘉锦亭桥北综合供能服务站.xlsx" T2_PATH = ( "/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/" "xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/嘉锦 4月明细.xls" ) OUT_DATA = "/Users/sylvawong/Desktop/车辆氢费明细_嘉兴嘉锦亭_4月合并结果.xlsx" OUT_COMPARE = "/Users/sylvawong/Desktop/车辆氢费明细_嘉兴嘉锦亭_4月比对结果.xlsx" FILTER_YEAR = 2026 FILTER_MONTH = 4 def norm_date(val): return pd.to_datetime(val).strftime("%Y-%m-%d") def norm_plate(val): return str(val).strip().upper() def norm_kg(val): return round(float(val), 2) def make_key(date_val, plate_val, kg_val): return f"{norm_date(date_val)}|{norm_plate(plate_val)}|{norm_kg(kg_val)}" def fmt_datetime(val): ts = pd.to_datetime(val) if ts.hour == 0 and ts.minute == 0 and ts.second == 0: return ts.strftime("%Y-%m-%dT00:00") return ts.strftime("%Y-%m-%d %H:%M:%S") def parse_status(val): if pd.isna(val): return "未对账" s = str(val) if "已对账" in s: return "已对账" if "未对账" in s: return "未对账" return s def num_equal(a, b, tol=0.01): if pd.isna(a) and pd.isna(b): return True if pd.isna(a) or pd.isna(b): return False return abs(float(a) - float(b)) <= tol def str_equal(a, b): if pd.isna(a) and pd.isna(b): return True if pd.isna(a) or pd.isna(b): return False return str(a).strip() == str(b).strip() def load_table1(): return pd.read_excel(T1_PATH, sheet_name=0) def load_table2(): df = pd.read_excel(T2_PATH, sheet_name=0, header=1) df.columns = [ "日期", "车牌", "加气量kg", "公里数", "加氢站", "是否对账", "车架号", "成本单价", "成本金额", ] return df def filter_april(df, date_col): dt = pd.to_datetime(df[date_col]) mask = (dt.dt.year == FILTER_YEAR) & (dt.dt.month == FILTER_MONTH) return df.loc[mask].copy() def row_from_table2(r2, seq_no=None): dt = pd.to_datetime(r2["日期"]) return { "序号": seq_no, "年": int(dt.year), "月": int(dt.month), "加氢日期": norm_date(r2["日期"]), "加氢时间": fmt_datetime(r2["日期"]), "加氢站名称": r2["加氢站"], "客户名称": np.nan, "车牌号": norm_plate(r2["车牌"]), "加氢量(kg)": norm_kg(r2["加气量kg"]), "成本单价(元/kg)": float(r2["成本单价"]) if pd.notna(r2["成本单价"]) else np.nan, "成本总价(元)": float(r2["成本金额"]) if pd.notna(r2["成本金额"]) else np.nan, "加氢单价(元/kg)": 0, "加氢总价(元)": 0.0, "行驶里程(km)": float(r2["公里数"]) if pd.notna(r2["公里数"]) else np.nan, "备注": np.nan, "业务员": np.nan, "承担方式": np.nan, "状态": parse_status(r2["是否对账"]), "订单编号": np.nan, } def main(): df1_all = load_table1() df2_all = load_table2() df1 = filter_april(df1_all, "加氢日期") df2 = filter_april(df2_all, "日期") df2 = df2.copy() df2["_key"] = df2.apply( lambda r: make_key(r["日期"], r["车牌"], r["加气量kg"]), axis=1 ) df1["_key"] = df1.apply( lambda r: make_key(r["加氢日期"], r["车牌号"], r["加氢量(kg)"]), axis=1 ) df1_orig = df1.copy() t2_map = df2.set_index("_key", drop=False) keys1 = set(df1["_key"]) keys2 = set(df2["_key"]) matched_keys = keys1 & keys2 only_t2_keys = keys2 - keys1 only_t1_keys = keys1 - keys2 overwrite_rows = [] diff_rows = [] unchanged_matched = 0 for idx, row in df1.iterrows(): key = row["_key"] if key not in matched_keys: continue r2 = t2_map.loc[key] if isinstance(r2, pd.DataFrame): r2 = r2.iloc[0] before = row.copy() updates = { "加氢日期": norm_date(r2["日期"]), "加氢时间": fmt_datetime(r2["日期"]), "加氢站名称": r2["加氢站"], "车牌号": norm_plate(r2["车牌"]), "加氢量(kg)": norm_kg(r2["加气量kg"]), "成本单价(元/kg)": float(r2["成本单价"]) if pd.notna(r2["成本单价"]) else np.nan, "成本总价(元)": float(r2["成本金额"]) if pd.notna(r2["成本金额"]) else np.nan, "行驶里程(km)": float(r2["公里数"]) if pd.notna(r2["公里数"]) else np.nan, } changed_fields = [] for field, new_val in updates.items(): old_val = before[field] is_diff = False if field in ("成本单价(元/kg)", "成本总价(元)", "加氢量(kg)", "行驶里程(km)"): is_diff = not num_equal(old_val, new_val) else: is_diff = not str_equal(old_val, new_val) if is_diff: changed_fields.append(field) diff_rows.append( { "匹配键": key, "订单编号": before.get("订单编号"), "差异字段": field, "表1原值": old_val, "表2正确值": new_val, } ) df1.at[idx, field] = new_val overwrite_rows.append( { "操作": "覆盖更新", "匹配键": key, "加氢日期": updates["加氢日期"], "车牌号": updates["车牌号"], "加氢量(kg)": updates["加氢量(kg)"], "表1原客户名称": before["客户名称"], "表1原加氢站名称": before["加氢站名称"], "表1原成本总价(元)": before["成本总价(元)"], "表1原行驶里程(km)": before["行驶里程(km)"], "表2加氢站": r2["加氢站"], "表2成本金额": r2["成本金额"], "表2公里数": r2["公里数"], "变更字段数": len(changed_fields), "变更字段": "、".join(changed_fields) if changed_fields else "无", "订单编号": before.get("订单编号"), } ) if not changed_fields: unchanged_matched += 1 # 表2有、表1无 -> 新增(不生成订单编号) new_rows = [] next_seq = int(df1["序号"].max()) + 1 if len(df1) else 1 for key in sorted(only_t2_keys): r2 = t2_map.loc[key] if isinstance(r2, pd.DataFrame): r2 = r2.iloc[0] new_row = row_from_table2(r2, seq_no=next_seq) new_row["_key"] = key new_rows.append(new_row) next_seq += 1 df_new = pd.DataFrame(new_rows) if new_rows else pd.DataFrame() if len(df_new): df1 = pd.concat([df1, df_new], ignore_index=True) out_cols = [c for c in df1_orig.columns if c != "_key"] df1 = df1.drop(columns=["_key"], errors="ignore")[out_cols] # 比对报告 summary = pd.DataFrame( [ {"项目": "比对范围", "数量": f"{FILTER_YEAR}年{FILTER_MONTH}月"}, {"项目": "表1全量行数", "数量": len(df1_all)}, {"项目": "表1四月行数", "数量": len(df1_orig)}, {"项目": "表2全量行数", "数量": len(df2_all)}, {"项目": "表2四月行数", "数量": len(df2)}, {"项目": "三条件完全命中(覆盖更新)", "数量": len(matched_keys)}, {"项目": "命中且字段完全一致", "数量": unchanged_matched}, {"项目": "命中且存在字段差异", "数量": len(matched_keys) - unchanged_matched}, {"项目": "表2有表1无(新增)", "数量": len(only_t2_keys)}, {"项目": "表1有表2无(保留表1原样)", "数量": len(only_t1_keys)}, {"项目": "合并后四月总行数", "数量": len(df1)}, { "项目": "说明", "数量": "仅比对/合并四月数据;表2无客户名称列,匹配记录客户名称保留表1原值;新增记录客户名称为空", }, ] ) df_overwrite = pd.DataFrame(overwrite_rows) df_diff = pd.DataFrame(diff_rows) df_add = pd.DataFrame(new_rows)[ [ "序号", "加氢日期", "加氢时间", "加氢站名称", "车牌号", "加氢量(kg)", "成本单价(元/kg)", "成本总价(元)", "行驶里程(km)", "状态", "订单编号", ] ] if len(new_rows) else pd.DataFrame( columns=[ "序号", "加氢日期", "加氢时间", "加氢站名称", "车牌号", "加氢量(kg)", "成本单价(元/kg)", "成本总价(元)", "行驶里程(km)", "状态", "订单编号", ] ) only_t2_detail = df2[df2["_key"].isin(only_t2_keys)][ ["日期", "车牌", "加气量kg", "公里数", "加氢站", "是否对账", "成本单价", "成本金额"] ].copy() only_t1_detail = df1_orig[df1_orig["_key"].isin(only_t1_keys)][ ["加氢日期", "车牌号", "加氢量(kg)", "加氢站名称", "客户名称", "成本总价(元)", "订单编号"] ].copy() field_map = pd.DataFrame( [ {"表1字段": "加氢日期", "表2字段": "日期", "处理方式": "覆盖"}, {"表1字段": "加氢时间", "表2字段": "日期", "处理方式": "覆盖(与加氢日期对应)"}, {"表1字段": "加氢站名称", "表2字段": "加氢站", "处理方式": "覆盖"}, {"表1字段": "客户名称", "表2字段": "(表2无此列)", "处理方式": "匹配记录保留表1原值"}, {"表1字段": "车牌号", "表2字段": "车牌", "处理方式": "覆盖"}, {"表1字段": "加氢量(kg)", "表2字段": "加气量(kg)", "处理方式": "覆盖"}, {"表1字段": "成本单价(元/kg)", "表2字段": "成本单价", "处理方式": "覆盖"}, {"表1字段": "成本总价(元)", "表2字段": "成本金额", "处理方式": "覆盖"}, {"表1字段": "加氢单价(元/kg)", "表2字段": "-", "处理方式": "保留表1"}, {"表1字段": "加氢总价(元)", "表2字段": "-", "处理方式": "保留表1"}, {"表1字段": "行驶里程(km)", "表2字段": "公里数", "处理方式": "覆盖"}, {"表1字段": "备注", "表2字段": "-", "处理方式": "保留表1"}, {"表1字段": "业务员", "表2字段": "-", "处理方式": "保留表1"}, {"表1字段": "承担方式", "表2字段": "-", "处理方式": "保留表1"}, {"表1字段": "订单编号", "表2字段": "-", "处理方式": "保留表1;新增记录不生成"}, ] ) with pd.ExcelWriter(OUT_DATA, engine="openpyxl") as writer: df1.to_excel(writer, sheet_name="车辆氢费明细", index=False) with pd.ExcelWriter(OUT_COMPARE, engine="openpyxl") as writer: summary.to_excel(writer, sheet_name="比对总览", index=False) field_map.to_excel(writer, sheet_name="字段映射规则", index=False) df_overwrite.to_excel(writer, sheet_name="覆盖更新明细", index=False) df_diff.to_excel(writer, sheet_name="字段差异明细", index=False) df_add.to_excel(writer, sheet_name="新增记录", index=False) only_t2_detail.to_excel(writer, sheet_name="仅表2有", index=False) only_t1_detail.to_excel(writer, sheet_name="仅表1有", index=False) print("合并结果:", OUT_DATA) print("比对结果:", OUT_COMPARE) print(summary.to_string(index=False)) if __name__ == "__main__": main()