feat: sync full workspace including web modules, docs, and configurations to Gitea
Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
361
scripts/compare_h2_records.py
Normal file
361
scripts/compare_h2_records.py
Normal file
@@ -0,0 +1,361 @@
|
||||
#!/usr/bin/env python3
|
||||
"""比对「加氢记录」与「氢费台账-加氢订单」,输出标黄/标红 Excel。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill
|
||||
|
||||
FILE_RECORD = Path(
|
||||
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
|
||||
"xwechat_files/wxid_l80gh7d3x7u012_385c/msg/file/2026-05/"
|
||||
"加氢记录-(2026.1月-4月).xlsx"
|
||||
)
|
||||
FILE_LEDGER = Path(
|
||||
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
|
||||
"xwechat_files/wxid_l80gh7d3x7u012_385c/msg/file/2026-05/"
|
||||
"氢费台账总表2026.5.21(1).xlsx"
|
||||
)
|
||||
OUT_DIR = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS")
|
||||
OUT_FILE = OUT_DIR / "加氢记录与台账比对结果.xlsx"
|
||||
|
||||
COMPARE_FIELDS = [
|
||||
("加氢站", "加氢站名称", "加氢站"),
|
||||
("成本单价", "成本单价(元/kg)", "成本单价"),
|
||||
("成本金额", "成本费用(元)", "成本金额"),
|
||||
("加氢单价", "加氢单价(元/kg)", "加氢单价"),
|
||||
("加氢金额", "加氢金额(元)", "加氢金额"),
|
||||
("客户名", "客户名称", "客户名"),
|
||||
("氢费计算方式", "结算类别", "氢费计算方式"),
|
||||
]
|
||||
|
||||
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
|
||||
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
|
||||
RED_FONT = Font(color="9C0006", bold=True)
|
||||
|
||||
|
||||
def read_record_sheet(path: Path) -> pd.DataFrame:
|
||||
raw = pd.read_excel(path, sheet_name=0, header=None)
|
||||
# 第 0 行为汇总行,第 1 行为表头
|
||||
raw.columns = [
|
||||
str(c).strip() if pd.notna(c) else f"_col{i}" for i, c in enumerate(raw.iloc[1])
|
||||
]
|
||||
df = raw.iloc[2:].copy().reset_index(drop=True)
|
||||
rename = {
|
||||
"序号": "序号",
|
||||
"日期": "日期",
|
||||
"车牌": "车牌",
|
||||
"加气量(kg)": "加气量(kg)",
|
||||
"加气量(kg)": "加气量(kg)",
|
||||
"加氢站": "加氢站",
|
||||
"成本单价": "成本单价",
|
||||
"成本金额": "成本金额",
|
||||
"加氢单价": "加氢单价",
|
||||
"加氢金额": "加氢金额",
|
||||
"客户名": "客户名",
|
||||
"氢费计算方式": "氢费计算方式",
|
||||
}
|
||||
for old, new in rename.items():
|
||||
if old in df.columns:
|
||||
df = df.rename(columns={old: new})
|
||||
return df
|
||||
|
||||
|
||||
def parse_date(val) -> str:
|
||||
if pd.isna(val):
|
||||
return ""
|
||||
if isinstance(val, pd.Timestamp):
|
||||
return val.strftime("%Y-%m-%d")
|
||||
if isinstance(val, (int, float)) and val > 40000:
|
||||
try:
|
||||
return (pd.Timestamp("1899-12-30") + pd.Timedelta(days=int(val))).strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
pass
|
||||
s = str(val).strip()
|
||||
for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S"):
|
||||
try:
|
||||
return pd.to_datetime(s).strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
return pd.to_datetime(val).strftime("%Y-%m-%d")
|
||||
except Exception:
|
||||
return s[:10] if len(s) >= 10 else s
|
||||
|
||||
|
||||
def norm_plate(val) -> str:
|
||||
if pd.isna(val):
|
||||
return ""
|
||||
return re.sub(r"[\s\-·]", "", str(val).strip().upper())
|
||||
|
||||
|
||||
def norm_kg(val, tol_decimals=3) -> str:
|
||||
if pd.isna(val) or str(val).strip() in ("", "-", "nan"):
|
||||
return ""
|
||||
try:
|
||||
return f"{round(float(val), tol_decimals):.{tol_decimals}f}"
|
||||
except Exception:
|
||||
return str(val).strip()
|
||||
|
||||
|
||||
def norm_str(val) -> str:
|
||||
if pd.isna(val):
|
||||
return ""
|
||||
return str(val).strip()
|
||||
|
||||
|
||||
def norm_num(val, tol=0.02) -> float | None:
|
||||
if pd.isna(val) or str(val).strip() in ("", "-", "nan"):
|
||||
return None
|
||||
try:
|
||||
return float(val)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def values_equal(a, b, numeric: bool) -> bool:
|
||||
if numeric:
|
||||
na, nb = norm_num(a), norm_num(b)
|
||||
if na is None and nb is None:
|
||||
return True
|
||||
if na is None or nb is None:
|
||||
return False
|
||||
return abs(na - nb) <= 0.02
|
||||
return norm_str(a) == norm_str(b)
|
||||
|
||||
|
||||
def make_key(row, date_col, plate_col, kg_col) -> str:
|
||||
return f"{parse_date(row[date_col])}|{norm_plate(row[plate_col])}|{norm_kg(row[kg_col])}"
|
||||
|
||||
|
||||
def build_index(df, date_col, plate_col, kg_col):
|
||||
"""同一 key 可能多行,用列表保存行号。"""
|
||||
idx: dict[str, list[int]] = defaultdict(list)
|
||||
for i, row in df.iterrows():
|
||||
k = make_key(row, date_col, plate_col, kg_col)
|
||||
parts = k.split("|")
|
||||
if parts[0] and parts[1] and parts[2]:
|
||||
idx[k].append(i)
|
||||
return idx
|
||||
|
||||
|
||||
def pair_rows(keys_a: list[int], keys_b: list[int]):
|
||||
"""按顺序一对一配对,多余行不配。"""
|
||||
n = min(len(keys_a), len(keys_b))
|
||||
return list(zip(keys_a[:n], keys_b[:n]))
|
||||
|
||||
|
||||
def compare_pair(row_a: pd.Series, row_b: pd.Series) -> tuple[list[str], list[str]]:
|
||||
"""返回 (差异字段名列表, 备注片段列表)。"""
|
||||
diffs = []
|
||||
notes = []
|
||||
for col_a, col_b, label in COMPARE_FIELDS:
|
||||
numeric = label in ("成本单价", "成本金额", "加氢单价", "加氢金额")
|
||||
va = row_a.get(col_a)
|
||||
vb = row_b.get(col_b)
|
||||
if not values_equal(va, vb, numeric):
|
||||
diffs.append(label)
|
||||
notes.append(f"{label}:记录[{norm_str(va)}]≠台账[{norm_str(vb)}]")
|
||||
return diffs, notes
|
||||
|
||||
|
||||
def add_ledger_cols(df: pd.DataFrame, row_b: pd.Series | None) -> dict:
|
||||
if row_b is None:
|
||||
return {f"台账_{label}": "" for _, _, label in COMPARE_FIELDS}
|
||||
out = {}
|
||||
for col_a, col_b, label in COMPARE_FIELDS:
|
||||
out[f"台账_{label}"] = row_b.get(col_b, "")
|
||||
return out
|
||||
|
||||
|
||||
def run():
|
||||
df_rec = read_record_sheet(FILE_RECORD)
|
||||
df_led = pd.read_excel(FILE_LEDGER, sheet_name="加氢订单")
|
||||
|
||||
# 台账限定 2026-01 ~ 2026-04,与加氢记录范围一致
|
||||
df_led["_date_parsed"] = df_led["加氢时间"].apply(parse_date)
|
||||
df_led_14 = df_led[
|
||||
(df_led["_date_parsed"] >= "2026-01-01") & (df_led["_date_parsed"] < "2026-05-01")
|
||||
].copy()
|
||||
|
||||
idx_rec = build_index(df_rec, "日期", "车牌", "加气量(kg)")
|
||||
idx_led = build_index(df_led_14, "加氢时间", "车牌号", "加氢量(kg)")
|
||||
|
||||
all_keys = set(idx_rec) | set(idx_led)
|
||||
pairs: list[tuple[int | None, int | None, str]] = []
|
||||
for k in sorted(all_keys):
|
||||
ra = idx_rec.get(k, [])
|
||||
rb = idx_led.get(k, [])
|
||||
paired = pair_rows(ra, rb)
|
||||
for ia, ib in paired:
|
||||
pairs.append((ia, ib, k))
|
||||
for ia in ra[len(paired) :]:
|
||||
pairs.append((ia, None, k))
|
||||
for ib in rb[len(paired) :]:
|
||||
pairs.append((None, ib, k))
|
||||
|
||||
# --- 加氢记录侧输出 ---
|
||||
rows_rec_out = []
|
||||
rec_diff_cells: dict[int, set[str]] = defaultdict(set) # excel row -> col names
|
||||
rec_red_rows: set[int] = set()
|
||||
|
||||
for ia, ib, k in pairs:
|
||||
if ia is None:
|
||||
continue
|
||||
row_a = df_rec.loc[ia]
|
||||
row_b = df_led_14.loc[ib] if ib is not None else None
|
||||
extra = add_ledger_cols(df_rec, row_b)
|
||||
remark_parts = []
|
||||
|
||||
if ib is None:
|
||||
if k in idx_led:
|
||||
remark_parts.append("同键重复:加氢记录行数多于台账,本条未配对")
|
||||
else:
|
||||
remark_parts.append("台账缺失:加氢订单(1-4月)中无匹配记录")
|
||||
rec_red_rows.add(len(rows_rec_out) + 2) # +2: header + 1-based
|
||||
else:
|
||||
diffs, notes = compare_pair(row_a, row_b)
|
||||
if diffs:
|
||||
remark_parts.append("字段差异:" + ";".join(notes))
|
||||
for label in diffs:
|
||||
rec_diff_cells[len(rows_rec_out) + 2].add(label)
|
||||
rec_diff_cells[len(rows_rec_out) + 2].add(f"台账_{label}")
|
||||
|
||||
out_row = row_a.to_dict()
|
||||
out_row.update(extra)
|
||||
out_row["比对备注"] = ";".join(remark_parts) if remark_parts else "一致"
|
||||
rows_rec_out.append(out_row)
|
||||
|
||||
df_rec_out = pd.DataFrame(rows_rec_out)
|
||||
|
||||
# --- 加氢订单侧输出(含仅台账有、加氢记录无)---
|
||||
rows_led_out = []
|
||||
led_diff_cells: dict[int, set[str]] = defaultdict(set)
|
||||
led_red_rows: set[int] = set()
|
||||
|
||||
for ia, ib, k in pairs:
|
||||
if ib is None:
|
||||
continue
|
||||
row_b = df_led_14.loc[ib]
|
||||
row_a = df_rec.loc[ia] if ia is not None else None
|
||||
out_row = row_b.drop(labels=["_date_parsed"], errors="ignore").to_dict()
|
||||
remark_parts = []
|
||||
|
||||
if ia is None:
|
||||
if k in idx_rec:
|
||||
remark_parts.append("同键重复:台账行数多于加氢记录,本条未配对")
|
||||
else:
|
||||
remark_parts.append("加氢记录缺失:加氢记录表中无匹配记录")
|
||||
led_red_rows.add(len(rows_led_out) + 2)
|
||||
else:
|
||||
diffs, notes = compare_pair(row_a, row_b)
|
||||
if diffs:
|
||||
remark_parts.append("字段差异:" + ";".join(notes))
|
||||
col_map = {label: col_b for _, col_b, label in COMPARE_FIELDS}
|
||||
for label in diffs:
|
||||
led_diff_cells[len(rows_led_out) + 2].add(col_map[label])
|
||||
|
||||
out_row["比对备注"] = ";".join(remark_parts) if remark_parts else "一致"
|
||||
rows_led_out.append(out_row)
|
||||
|
||||
df_led_out = pd.DataFrame(rows_led_out)
|
||||
|
||||
# 汇总
|
||||
matched = sum(1 for ia, ib, _ in pairs if ia is not None and ib is not None)
|
||||
only_rec = sum(1 for ia, ib, _ in pairs if ia is not None and ib is None)
|
||||
only_led = sum(1 for ia, ib, _ in pairs if ia is None and ib is not None)
|
||||
only_rec_true = sum(
|
||||
1 for ia, ib, k in pairs if ia is not None and ib is None and k not in idx_led
|
||||
)
|
||||
only_rec_dup = only_rec - only_rec_true
|
||||
only_led_true = sum(
|
||||
1 for ia, ib, k in pairs if ia is None and ib is not None and k not in idx_rec
|
||||
)
|
||||
only_led_dup = only_led - only_led_true
|
||||
diff_count = sum(
|
||||
1
|
||||
for ia, ib, _ in pairs
|
||||
if ia is not None and ib is not None and compare_pair(df_rec.loc[ia], df_led_14.loc[ib])[0]
|
||||
)
|
||||
|
||||
summary = pd.DataFrame(
|
||||
[
|
||||
["加氢记录行数", len(df_rec)],
|
||||
["台账加氢订单(1-4月)行数", len(df_led_14)],
|
||||
["匹配键(日期+车牌+加氢量)对数", matched],
|
||||
["仅加氢记录有-合计", only_rec],
|
||||
[" 其中台账真缺失", only_rec_true],
|
||||
[" 其中同键重复多出", only_rec_dup],
|
||||
["仅台账有-合计", only_led],
|
||||
[" 其中加氢记录真缺失", only_led_true],
|
||||
[" 其中同键重复多出", only_led_dup],
|
||||
["匹配但字段有差异", diff_count],
|
||||
],
|
||||
columns=["项目", "数量"],
|
||||
)
|
||||
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with pd.ExcelWriter(OUT_FILE, engine="openpyxl") as writer:
|
||||
summary.to_excel(writer, sheet_name="比对汇总", index=False)
|
||||
df_rec_out.to_excel(writer, sheet_name="加氢记录-比对", index=False)
|
||||
df_led_out.to_excel(writer, sheet_name="加氢订单-比对", index=False)
|
||||
|
||||
apply_styles_rec(df_rec_out, rec_diff_cells, rec_red_rows)
|
||||
apply_styles_led(df_led_out, led_diff_cells, led_red_rows)
|
||||
|
||||
print(f"已输出: {OUT_FILE}")
|
||||
print(summary.to_string(index=False))
|
||||
|
||||
|
||||
def col_letter_map(columns: list) -> dict[str, int]:
|
||||
return {str(c): i + 1 for i, c in enumerate(columns)}
|
||||
|
||||
|
||||
def apply_styles_rec(df: pd.DataFrame, diff_cells: dict, red_rows: set):
|
||||
wb = load_workbook(OUT_FILE)
|
||||
ws = wb["加氢记录-比对"]
|
||||
cmap = col_letter_map(list(df.columns))
|
||||
label_to_col = {label: label for _, _, label in COMPARE_FIELDS}
|
||||
label_to_col.update({label: f"台账_{label}" for _, _, label in COMPARE_FIELDS})
|
||||
|
||||
for row_idx, labels in diff_cells.items():
|
||||
for label in labels:
|
||||
col = label_to_col.get(label, label)
|
||||
if col in cmap:
|
||||
ws.cell(row=row_idx, column=cmap[col]).fill = YELLOW
|
||||
|
||||
for row_idx in red_rows:
|
||||
for c in range(1, ws.max_column + 1):
|
||||
ws.cell(row=row_idx, column=c).fill = RED
|
||||
if "比对备注" in cmap:
|
||||
ws.cell(row=row_idx, column=cmap["比对备注"]).font = RED_FONT
|
||||
|
||||
wb.save(OUT_FILE)
|
||||
|
||||
|
||||
def apply_styles_led(df: pd.DataFrame, diff_cells: dict, red_rows: set):
|
||||
wb = load_workbook(OUT_FILE)
|
||||
ws = wb["加氢订单-比对"]
|
||||
cmap = col_letter_map(list(df.columns))
|
||||
|
||||
for row_idx, labels in diff_cells.items():
|
||||
for col_name in labels:
|
||||
if col_name in cmap:
|
||||
ws.cell(row=row_idx, column=cmap[col_name]).fill = YELLOW
|
||||
|
||||
for row_idx in red_rows:
|
||||
for c in range(1, ws.max_column + 1):
|
||||
ws.cell(row=row_idx, column=c).fill = RED
|
||||
if "比对备注" in cmap:
|
||||
ws.cell(row=row_idx, column=cmap["比对备注"]).font = RED_FONT
|
||||
|
||||
wb.save(OUT_FILE)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Reference in New Issue
Block a user