feat: sync full workspace including web modules, docs, and configurations to Gitea

Optimized the root .gitignore to exclude virtual environments, node modules,
and temp folders to ensure clean and lightweight version tracking.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
王冕
2026-06-09 18:12:25 +08:00
parent 351688006e
commit a27e3b8e43
1510 changed files with 162044 additions and 1517 deletions

View File

@@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""比对生成环境交车数据 vs 交车任务列表(已交车) 车牌差异"""
import re
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
FILE_PROD = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/生成环境-交车数据.xlsx"
)
FILE_TASK = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"交车任务列表-导出-2026-05-26 (5).xlsx"
)
OUT = Path("/Users/sylvawong/Desktop/交车数据车牌差异报告.xlsx")
OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/交车数据车牌差异报告.xlsx")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
GREEN = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
def norm_plate(v) -> str:
if pd.isna(v):
return ""
return re.sub(r"[\s\-·]", "", str(v).strip().upper())
def load_prod():
df = pd.read_excel(FILE_PROD, sheet_name=0)
df = df.copy()
df["_plate_key"] = df["plate_number"].map(norm_plate)
df = df[df["_plate_key"] != ""]
return df
def load_task_delivered():
df = pd.read_excel(FILE_TASK, sheet_name=0)
df = df[df["交车许可"].astype(str).str.strip() == "已交车"].copy()
df["_plate_key"] = df["车牌号"].map(norm_plate)
df = df[df["_plate_key"] != ""]
return df
def norm_str(v) -> str:
if pd.isna(v):
return ""
return str(v).strip()
def norm_contract(v) -> str:
s = norm_str(v).upper().replace(" ", "")
return re.sub(r"[^A-Z0-9]", "", s)
def run_field_diff(both_prod, both_task):
"""匹配车牌上关键字段差异"""
rows = []
for key in both_prod["_plate_key"].unique():
rp = both_prod[both_prod["_plate_key"] == key].iloc[0]
rt = both_task[both_task["_plate_key"] == key].iloc[0]
notes = []
if norm_contract(rp.get("contract_code")) != norm_contract(rt.get("合同编码")):
notes.append(
f"合同:生成[{norm_str(rp.get('contract_code'))}]≠任务[{norm_str(rt.get('合同编码'))}]"
)
if norm_str(rp.get("vin")).upper() != norm_str(rt.get("车架号")).upper():
notes.append(
f"车架:生成[{norm_str(rp.get('vin'))}]≠任务[{norm_str(rt.get('车架号'))}]"
)
if norm_str(rp.get("customer_name")) != norm_str(rt.get("客户名称")):
notes.append(
f"客户:生成[{norm_str(rp.get('customer_name'))}]≠任务[{norm_str(rt.get('客户名称'))}]"
)
if notes:
rows.append(
{
"车牌号": rp.get("plate_number"),
"生成环境_合同": rp.get("contract_code"),
"交车任务_合同": rt.get("合同编码"),
"生成环境_车架": rp.get("vin"),
"交车任务_车架": rt.get("车架号"),
"生成环境_客户": rp.get("customer_name"),
"交车任务_客户": rt.get("客户名称"),
"生成环境_交车时间": rp.get("delivery_time"),
"交车任务_交车时间": rt.get("交车时间"),
"差异说明": "".join(notes),
}
)
return pd.DataFrame(rows)
def dedupe_by_plate(df, plate_col):
"""同一车牌多行时保留首行,并标注重复数"""
counts = df["_plate_key"].value_counts()
df = df.copy()
df["_dup_count"] = df["_plate_key"].map(counts)
first_idx = df.drop_duplicates(subset=["_plate_key"], keep="first")
return first_idx, counts[counts > 1]
def run():
df_prod = load_prod()
df_task = load_task_delivered()
prod_unique, prod_dups = dedupe_by_plate(df_prod, "plate_number")
task_unique, task_dups = dedupe_by_plate(df_task, "车牌号")
set_prod = set(prod_unique["_plate_key"])
set_task = set(task_unique["_plate_key"])
only_prod_keys = sorted(set_prod - set_task)
only_task_keys = sorted(set_task - set_prod)
both_keys = sorted(set_prod & set_task)
# 明细行
cols_prod_show = [
"plate_number",
"vin",
"delivery_status_text",
"delivery_time",
"contract_code",
"customer_name",
"delivery_no",
"delivery_task_id",
]
cols_task_show = [
"车牌号",
"车架号",
"交车许可",
"交车时间",
"合同编码",
"客户名称",
"任务类型",
"车辆状态",
]
def pick_cols(df, cols):
exist = [c for c in cols if c in df.columns]
extra = ["_plate_key", "_dup_count"] if "_dup_count" in df.columns else ["_plate_key"]
return df[exist + [c for c in extra if c in df.columns]]
only_prod_all = df_prod[df_prod["_plate_key"].isin(only_prod_keys)].copy()
only_prod_all.insert(0, "差异类型", "仅生成环境有")
only_prod_df = prod_unique[prod_unique["_plate_key"].isin(only_prod_keys)].copy()
only_prod_df.insert(0, "差异类型", "仅生成环境有(去重)")
only_task_df = task_unique[task_unique["_plate_key"].isin(only_task_keys)].copy()
only_task_df.insert(0, "差异类型", "仅交车任务(已交车)有")
# 两边都有
both_prod = prod_unique[prod_unique["_plate_key"].isin(both_keys)].copy()
both_task = task_unique[task_unique["_plate_key"].isin(both_keys)].copy()
both_merged = both_prod[cols_prod_show + ["_plate_key"]].merge(
both_task[cols_task_show + ["_plate_key"]],
on="_plate_key",
how="inner",
suffixes=("_生成环境", "_交车任务"),
)
both_merged.insert(0, "差异类型", "两边均有")
field_diff_df = run_field_diff(both_prod, both_task)
summary = pd.DataFrame(
[
["生成环境-交车数据 总行数", len(df_prod)],
["生成环境 唯一车牌数", len(set_prod)],
["生成环境 重复车牌数(键)", len(prod_dups)],
["交车任务(交车许可=已交车) 总行数", len(df_task)],
["交车任务 唯一车牌数", len(set_task)],
["交车任务 重复车牌数(键)", len(task_dups)],
["", ""],
["仅生成环境有(车牌)", len(only_prod_keys)],
["仅交车任务已交车有(车牌)", len(only_task_keys)],
["两边均有(车牌)", len(both_keys)],
["匹配车牌但字段不一致", len(field_diff_df)],
],
columns=["项目", "数量"],
)
detail_prod = pick_cols(only_prod_all, cols_prod_show)
detail_task = pick_cols(only_task_df, cols_task_show)
OUT.parent.mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(OUT, engine="openpyxl") as w:
summary.to_excel(w, sheet_name="比对汇总", index=False)
detail_prod.to_excel(w, sheet_name="仅生成环境有", index=False)
detail_task.to_excel(w, sheet_name="仅交车任务已交车有", index=False)
both_merged.to_excel(w, sheet_name="两边均有", index=False)
field_diff_df.to_excel(w, sheet_name="匹配记录字段差异", index=False)
if len(prod_dups):
pd.DataFrame({"重复车牌": prod_dups.index, "行数": prod_dups.values}).to_excel(
w, sheet_name="生成环境重复车牌", index=False
)
if len(task_dups):
pd.DataFrame({"重复车牌": task_dups.index, "行数": task_dups.values}).to_excel(
w, sheet_name="交车任务重复车牌", index=False
)
# 标色仅* sheet 整行红
wb = load_workbook(OUT)
for sheet_name in ["仅生成环境有", "仅交车任务已交车有"]:
if sheet_name in wb.sheetnames:
ws = wb[sheet_name]
for row in range(2, ws.max_row + 1):
for col in range(1, ws.max_column + 1):
ws.cell(row=row, column=col).fill = RED
if "匹配记录字段差异" in wb.sheetnames and len(field_diff_df):
ws = wb["匹配记录字段差异"]
for row in range(2, ws.max_row + 1):
for col in range(1, ws.max_column + 1):
ws.cell(row=row, column=col).fill = YELLOW
wb.save(OUT)
import shutil
shutil.copy(OUT, OUT_PROJ)
print(f"已输出: {OUT}")
print(summary.to_string(index=False))
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,361 @@
#!/usr/bin/env python3
"""比对「加氢记录」与「氢费台账-加氢订单」,输出标黄/标红 Excel。"""
from __future__ import annotations
import re
from collections import defaultdict
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
FILE_RECORD = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/msg/file/2026-05/"
"加氢记录-2026.1月-4月.xlsx"
)
FILE_LEDGER = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/msg/file/2026-05/"
"氢费台账总表2026.5.21(1).xlsx"
)
OUT_DIR = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS")
OUT_FILE = OUT_DIR / "加氢记录与台账比对结果.xlsx"
COMPARE_FIELDS = [
("加氢站", "加氢站名称", "加氢站"),
("成本单价", "成本单价(元/kg)", "成本单价"),
("成本金额", "成本费用(元)", "成本金额"),
("加氢单价", "加氢单价(元/kg)", "加氢单价"),
("加氢金额", "加氢金额(元)", "加氢金额"),
("客户名", "客户名称", "客户名"),
("氢费计算方式", "结算类别", "氢费计算方式"),
]
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
RED_FONT = Font(color="9C0006", bold=True)
def read_record_sheet(path: Path) -> pd.DataFrame:
raw = pd.read_excel(path, sheet_name=0, header=None)
# 第 0 行为汇总行,第 1 行为表头
raw.columns = [
str(c).strip() if pd.notna(c) else f"_col{i}" for i, c in enumerate(raw.iloc[1])
]
df = raw.iloc[2:].copy().reset_index(drop=True)
rename = {
"序号": "序号",
"日期": "日期",
"车牌": "车牌",
"加气量kg)": "加气量kg)",
"加气量kg": "加气量kg)",
"加氢站": "加氢站",
"成本单价": "成本单价",
"成本金额": "成本金额",
"加氢单价": "加氢单价",
"加氢金额": "加氢金额",
"客户名": "客户名",
"氢费计算方式": "氢费计算方式",
}
for old, new in rename.items():
if old in df.columns:
df = df.rename(columns={old: new})
return df
def parse_date(val) -> str:
if pd.isna(val):
return ""
if isinstance(val, pd.Timestamp):
return val.strftime("%Y-%m-%d")
if isinstance(val, (int, float)) and val > 40000:
try:
return (pd.Timestamp("1899-12-30") + pd.Timedelta(days=int(val))).strftime("%Y-%m-%d")
except Exception:
pass
s = str(val).strip()
for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S"):
try:
return pd.to_datetime(s).strftime("%Y-%m-%d")
except Exception:
continue
try:
return pd.to_datetime(val).strftime("%Y-%m-%d")
except Exception:
return s[:10] if len(s) >= 10 else s
def norm_plate(val) -> str:
if pd.isna(val):
return ""
return re.sub(r"[\s\-·]", "", str(val).strip().upper())
def norm_kg(val, tol_decimals=3) -> str:
if pd.isna(val) or str(val).strip() in ("", "-", "nan"):
return ""
try:
return f"{round(float(val), tol_decimals):.{tol_decimals}f}"
except Exception:
return str(val).strip()
def norm_str(val) -> str:
if pd.isna(val):
return ""
return str(val).strip()
def norm_num(val, tol=0.02) -> float | None:
if pd.isna(val) or str(val).strip() in ("", "-", "nan"):
return None
try:
return float(val)
except Exception:
return None
def values_equal(a, b, numeric: bool) -> bool:
if numeric:
na, nb = norm_num(a), norm_num(b)
if na is None and nb is None:
return True
if na is None or nb is None:
return False
return abs(na - nb) <= 0.02
return norm_str(a) == norm_str(b)
def make_key(row, date_col, plate_col, kg_col) -> str:
return f"{parse_date(row[date_col])}|{norm_plate(row[plate_col])}|{norm_kg(row[kg_col])}"
def build_index(df, date_col, plate_col, kg_col):
"""同一 key 可能多行,用列表保存行号。"""
idx: dict[str, list[int]] = defaultdict(list)
for i, row in df.iterrows():
k = make_key(row, date_col, plate_col, kg_col)
parts = k.split("|")
if parts[0] and parts[1] and parts[2]:
idx[k].append(i)
return idx
def pair_rows(keys_a: list[int], keys_b: list[int]):
"""按顺序一对一配对,多余行不配。"""
n = min(len(keys_a), len(keys_b))
return list(zip(keys_a[:n], keys_b[:n]))
def compare_pair(row_a: pd.Series, row_b: pd.Series) -> tuple[list[str], list[str]]:
"""返回 (差异字段名列表, 备注片段列表)。"""
diffs = []
notes = []
for col_a, col_b, label in COMPARE_FIELDS:
numeric = label in ("成本单价", "成本金额", "加氢单价", "加氢金额")
va = row_a.get(col_a)
vb = row_b.get(col_b)
if not values_equal(va, vb, numeric):
diffs.append(label)
notes.append(f"{label}:记录[{norm_str(va)}]≠台账[{norm_str(vb)}]")
return diffs, notes
def add_ledger_cols(df: pd.DataFrame, row_b: pd.Series | None) -> dict:
if row_b is None:
return {f"台账_{label}": "" for _, _, label in COMPARE_FIELDS}
out = {}
for col_a, col_b, label in COMPARE_FIELDS:
out[f"台账_{label}"] = row_b.get(col_b, "")
return out
def run():
df_rec = read_record_sheet(FILE_RECORD)
df_led = pd.read_excel(FILE_LEDGER, sheet_name="加氢订单")
# 台账限定 2026-01 ~ 2026-04与加氢记录范围一致
df_led["_date_parsed"] = df_led["加氢时间"].apply(parse_date)
df_led_14 = df_led[
(df_led["_date_parsed"] >= "2026-01-01") & (df_led["_date_parsed"] < "2026-05-01")
].copy()
idx_rec = build_index(df_rec, "日期", "车牌", "加气量kg)")
idx_led = build_index(df_led_14, "加氢时间", "车牌号", "加氢量(kg)")
all_keys = set(idx_rec) | set(idx_led)
pairs: list[tuple[int | None, int | None, str]] = []
for k in sorted(all_keys):
ra = idx_rec.get(k, [])
rb = idx_led.get(k, [])
paired = pair_rows(ra, rb)
for ia, ib in paired:
pairs.append((ia, ib, k))
for ia in ra[len(paired) :]:
pairs.append((ia, None, k))
for ib in rb[len(paired) :]:
pairs.append((None, ib, k))
# --- 加氢记录侧输出 ---
rows_rec_out = []
rec_diff_cells: dict[int, set[str]] = defaultdict(set) # excel row -> col names
rec_red_rows: set[int] = set()
for ia, ib, k in pairs:
if ia is None:
continue
row_a = df_rec.loc[ia]
row_b = df_led_14.loc[ib] if ib is not None else None
extra = add_ledger_cols(df_rec, row_b)
remark_parts = []
if ib is None:
if k in idx_led:
remark_parts.append("同键重复:加氢记录行数多于台账,本条未配对")
else:
remark_parts.append("台账缺失:加氢订单(1-4月)中无匹配记录")
rec_red_rows.add(len(rows_rec_out) + 2) # +2: header + 1-based
else:
diffs, notes = compare_pair(row_a, row_b)
if diffs:
remark_parts.append("字段差异:" + "".join(notes))
for label in diffs:
rec_diff_cells[len(rows_rec_out) + 2].add(label)
rec_diff_cells[len(rows_rec_out) + 2].add(f"台账_{label}")
out_row = row_a.to_dict()
out_row.update(extra)
out_row["比对备注"] = "".join(remark_parts) if remark_parts else "一致"
rows_rec_out.append(out_row)
df_rec_out = pd.DataFrame(rows_rec_out)
# --- 加氢订单侧输出(含仅台账有、加氢记录无)---
rows_led_out = []
led_diff_cells: dict[int, set[str]] = defaultdict(set)
led_red_rows: set[int] = set()
for ia, ib, k in pairs:
if ib is None:
continue
row_b = df_led_14.loc[ib]
row_a = df_rec.loc[ia] if ia is not None else None
out_row = row_b.drop(labels=["_date_parsed"], errors="ignore").to_dict()
remark_parts = []
if ia is None:
if k in idx_rec:
remark_parts.append("同键重复:台账行数多于加氢记录,本条未配对")
else:
remark_parts.append("加氢记录缺失:加氢记录表中无匹配记录")
led_red_rows.add(len(rows_led_out) + 2)
else:
diffs, notes = compare_pair(row_a, row_b)
if diffs:
remark_parts.append("字段差异:" + "".join(notes))
col_map = {label: col_b for _, col_b, label in COMPARE_FIELDS}
for label in diffs:
led_diff_cells[len(rows_led_out) + 2].add(col_map[label])
out_row["比对备注"] = "".join(remark_parts) if remark_parts else "一致"
rows_led_out.append(out_row)
df_led_out = pd.DataFrame(rows_led_out)
# 汇总
matched = sum(1 for ia, ib, _ in pairs if ia is not None and ib is not None)
only_rec = sum(1 for ia, ib, _ in pairs if ia is not None and ib is None)
only_led = sum(1 for ia, ib, _ in pairs if ia is None and ib is not None)
only_rec_true = sum(
1 for ia, ib, k in pairs if ia is not None and ib is None and k not in idx_led
)
only_rec_dup = only_rec - only_rec_true
only_led_true = sum(
1 for ia, ib, k in pairs if ia is None and ib is not None and k not in idx_rec
)
only_led_dup = only_led - only_led_true
diff_count = sum(
1
for ia, ib, _ in pairs
if ia is not None and ib is not None and compare_pair(df_rec.loc[ia], df_led_14.loc[ib])[0]
)
summary = pd.DataFrame(
[
["加氢记录行数", len(df_rec)],
["台账加氢订单(1-4月)行数", len(df_led_14)],
["匹配键(日期+车牌+加氢量)对数", matched],
["仅加氢记录有-合计", only_rec],
[" 其中台账真缺失", only_rec_true],
[" 其中同键重复多出", only_rec_dup],
["仅台账有-合计", only_led],
[" 其中加氢记录真缺失", only_led_true],
[" 其中同键重复多出", only_led_dup],
["匹配但字段有差异", diff_count],
],
columns=["项目", "数量"],
)
OUT_DIR.mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(OUT_FILE, engine="openpyxl") as writer:
summary.to_excel(writer, sheet_name="比对汇总", index=False)
df_rec_out.to_excel(writer, sheet_name="加氢记录-比对", index=False)
df_led_out.to_excel(writer, sheet_name="加氢订单-比对", index=False)
apply_styles_rec(df_rec_out, rec_diff_cells, rec_red_rows)
apply_styles_led(df_led_out, led_diff_cells, led_red_rows)
print(f"已输出: {OUT_FILE}")
print(summary.to_string(index=False))
def col_letter_map(columns: list) -> dict[str, int]:
return {str(c): i + 1 for i, c in enumerate(columns)}
def apply_styles_rec(df: pd.DataFrame, diff_cells: dict, red_rows: set):
wb = load_workbook(OUT_FILE)
ws = wb["加氢记录-比对"]
cmap = col_letter_map(list(df.columns))
label_to_col = {label: label for _, _, label in COMPARE_FIELDS}
label_to_col.update({label: f"台账_{label}" for _, _, label in COMPARE_FIELDS})
for row_idx, labels in diff_cells.items():
for label in labels:
col = label_to_col.get(label, label)
if col in cmap:
ws.cell(row=row_idx, column=cmap[col]).fill = YELLOW
for row_idx in red_rows:
for c in range(1, ws.max_column + 1):
ws.cell(row=row_idx, column=c).fill = RED
if "比对备注" in cmap:
ws.cell(row=row_idx, column=cmap["比对备注"]).font = RED_FONT
wb.save(OUT_FILE)
def apply_styles_led(df: pd.DataFrame, diff_cells: dict, red_rows: set):
wb = load_workbook(OUT_FILE)
ws = wb["加氢订单-比对"]
cmap = col_letter_map(list(df.columns))
for row_idx, labels in diff_cells.items():
for col_name in labels:
if col_name in cmap:
ws.cell(row=row_idx, column=cmap[col_name]).fill = YELLOW
for row_idx in red_rows:
for c in range(1, ws.max_column + 1):
ws.cell(row=row_idx, column=c).fill = RED
if "比对备注" in cmap:
ws.cell(row=row_idx, column=cmap["比对备注"]).font = RED_FONT
wb.save(OUT_FILE)
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,333 @@
#!/usr/bin/env python3
"""比对「桃子表格」与「新表」(匹配键:加氢日期+车牌号),输出标黄/标红 Excel。"""
from __future__ import annotations
import re
import shutil
from collections import defaultdict
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
NAME_PEACH = "桃子表格"
NAME_NEW = "新表"
FILE_PEACH = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"加氢记录-2026.1月-4月(2)(1).xlsx"
)
FILE_NEW = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"氢费台账总表2026.5.21(1)(2)(2).xlsx"
)
OUT_FILE = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/桃子表格与新表比对结果.xlsx")
OUT_DESKTOP = Path("/Users/sylvawong/Desktop/桃子表格与新表比对结果.xlsx")
# (桃子表格列, 新表列, 展示名)
COMPARE_FIELDS = [
("加氢站", "加氢站名称", "加氢站"),
("加气量kg)", "加氢量(kg)", "加气量kg)"),
("成本单价", "成本单价(元/kg)", "成本单价"),
("成本金额", "成本费用(元)", "成本金额"),
("加氢单价", "加氢单价(元/kg)", "加氢单价"),
("加氢金额", "加氢金额(元)", "加氢金额"),
("客户名", "客户名称", "客户名"),
("氢费计算方式", "结算类别", "氢费计算方式"),
]
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
RED_FONT = Font(color="9C0006", bold=True)
REMARK_COL = "差异说明"
def new_col_prefix(label: str) -> str:
return f"{NAME_NEW}_{label}"
def read_peach_sheet(path: Path) -> pd.DataFrame:
raw = pd.read_excel(path, sheet_name=0, header=None)
raw.columns = [
str(c).strip() if pd.notna(c) else f"_col{i}" for i, c in enumerate(raw.iloc[1])
]
df = raw.iloc[2:].copy().reset_index(drop=True)
if "加气量kg" in df.columns:
df = df.rename(columns={"加气量kg": "加气量kg)"})
return df
def read_new_sheet(path: Path) -> pd.DataFrame:
raw = pd.read_excel(path, sheet_name="加氢订单", header=None)
cols_raw = list(raw.iloc[1])
cols = []
seen: dict[str, int] = {}
for i, c in enumerate(cols_raw):
name = str(c).strip() if pd.notna(c) else f"_col{i}"
if name in seen:
seen[name] += 1
name = f"{name}_{seen[name]}"
else:
seen[name] = 0
cols.append(name)
df = raw.iloc[2:].copy().reset_index(drop=True)
df.columns = cols
return df
def parse_date(val) -> str:
if pd.isna(val):
return ""
if isinstance(val, pd.Timestamp):
return val.strftime("%Y-%m-%d")
if isinstance(val, (int, float)) and val > 40000:
try:
return (pd.Timestamp("1899-12-30") + pd.Timedelta(days=int(val))).strftime(
"%Y-%m-%d"
)
except Exception:
pass
s = str(val).strip()
try:
return pd.to_datetime(s).strftime("%Y-%m-%d")
except Exception:
return s[:10] if len(s) >= 10 else s
def norm_plate(val) -> str:
if pd.isna(val):
return ""
return re.sub(r"[\s\-·]", "", str(val).strip().upper())
def norm_str(val) -> str:
if pd.isna(val):
return ""
return str(val).strip()
def norm_num(val) -> float | None:
if pd.isna(val) or str(val).strip() in ("", "-", "nan"):
return None
try:
return float(val)
except Exception:
return None
def values_equal(a, b, numeric: bool) -> bool:
if numeric:
na, nb = norm_num(a), norm_num(b)
if na is None and nb is None:
return True
if na is None or nb is None:
return False
return abs(na - nb) <= 0.02
return norm_str(a) == norm_str(b)
def make_key(row, date_col, plate_col) -> str:
d = parse_date(row[date_col])
p = norm_plate(row[plate_col])
return f"{d}|{p}" if d and p else ""
def build_index(df, date_col, plate_col):
idx: dict[str, list[int]] = defaultdict(list)
for i, row in df.iterrows():
k = make_key(row, date_col, plate_col)
if k:
idx[k].append(i)
return idx
def pair_rows(a_list: list[int], b_list: list[int]):
n = min(len(a_list), len(b_list))
return list(zip(a_list[:n], b_list[:n]))
def compare_pair(row_peach: pd.Series, row_new: pd.Series) -> tuple[list[str], list[str]]:
diffs = []
notes = []
for col_p, col_n, label in COMPARE_FIELDS:
numeric = label in ("加气量kg)", "成本单价", "成本金额", "加氢单价", "加氢金额")
vp = row_peach.get(col_p)
vn = row_new.get(col_n)
if not values_equal(vp, vn, numeric):
diffs.append(label)
notes.append(
f"{label}:{NAME_PEACH}[{norm_str(vp)}]≠{NAME_NEW}[{norm_str(vn)}]"
)
return diffs, notes
def new_to_peach_fields(row_new: pd.Series) -> dict:
return {
"序号": row_new.get("订单编号", row_new.get("订单编号_1", "")),
"日期": row_new.get("加氢时间"),
"车牌": row_new.get("车牌号"),
"加气量kg)": row_new.get("加氢量(kg)"),
"加氢站": row_new.get("加氢站名称"),
"成本单价": row_new.get("成本单价(元/kg)"),
"成本金额": row_new.get("成本费用(元)"),
"加氢单价": row_new.get("加氢单价(元/kg)"),
"加氢金额": row_new.get("加氢金额(元)"),
"客户名": row_new.get("客户名称"),
"氢费计算方式": row_new.get("结算类别"),
}
def add_new_cols(row_new: pd.Series | None) -> dict:
if row_new is None:
return {new_col_prefix(label): "" for _, _, label in COMPARE_FIELDS}
out = {}
for _p, col_n, label in COMPARE_FIELDS:
out[new_col_prefix(label)] = row_new.get(col_n, "")
return out
def run():
df_peach = read_peach_sheet(FILE_PEACH)
df_new = read_new_sheet(FILE_NEW)
df_new["_date_parsed"] = df_new["加氢时间"].apply(parse_date)
df_new_14 = df_new[
(df_new["_date_parsed"] >= "2026-01-01") & (df_new["_date_parsed"] < "2026-05-01")
].copy()
idx_peach = build_index(df_peach, "日期", "车牌")
idx_new = build_index(df_new_14, "加氢时间", "车牌号")
all_keys = set(idx_peach) | set(idx_new)
pairs: list[tuple[int | None, int | None, str]] = []
for k in sorted(all_keys):
pa, pb = idx_peach.get(k, []), idx_new.get(k, [])
paired = pair_rows(pa, pb)
for ia, ib in paired:
pairs.append((ia, ib, k))
for ia in pa[len(paired) :]:
pairs.append((ia, None, k))
for ib in pb[len(paired) :]:
pairs.append((None, ib, k))
rows_out = []
diff_cells: dict[int, set[str]] = defaultdict(set)
red_rows: set[int] = set()
peach_cols = list(df_peach.columns)
for ia, ib, k in pairs:
excel_row = len(rows_out) + 2
remark_parts = []
is_red = False
if ia is not None:
base = df_peach.loc[ia].to_dict()
base["数据来源"] = NAME_PEACH
else:
row_new = df_new_14.loc[ib]
base = {c: "" for c in peach_cols}
base.update(new_to_peach_fields(row_new))
base["数据来源"] = f"{NAME_NEW}补录"
is_red = True
remark_parts.append(
f"{NAME_PEACH}缺失:本条由【{NAME_NEW}】补录添加(日期+车牌在新表中存在、桃子表格中无对应行)"
)
row_new = df_new_14.loc[ib] if ib is not None else None
if ib is None and ia is not None:
is_red = True
if k in idx_new:
remark_parts.append(
f"同键重复:{NAME_PEACH}行数多于{NAME_NEW},本条在{NAME_PEACH}有但未与{NAME_NEW}配对"
)
else:
remark_parts.append(
f"{NAME_NEW}缺失:【{NAME_NEW}】加氢订单(1-4月)中无相同加氢日期+车牌"
)
if ia is not None and ib is not None:
diffs, notes = compare_pair(df_peach.loc[ia], row_new)
if diffs:
remark_parts.append("字段差异:" + "".join(notes))
for label in diffs:
diff_cells[excel_row].add(label)
diff_cells[excel_row].add(new_col_prefix(label))
out_row = base
out_row.update(add_new_cols(row_new))
out_row[REMARK_COL] = "".join(remark_parts) if remark_parts else "一致"
if is_red:
red_rows.add(excel_row)
rows_out.append(out_row)
df_out = pd.DataFrame(rows_out)
matched = sum(1 for ia, ib, _ in pairs if ia is not None and ib is not None)
only_peach = sum(1 for ia, ib, _ in pairs if ia is not None and ib is None)
only_new = sum(1 for ia, ib, _ in pairs if ia is None and ib is not None)
diff_count = sum(
1
for ia, ib, _ in pairs
if ia is not None
and ib is not None
and compare_pair(df_peach.loc[ia], df_new_14.loc[ib])[0]
)
summary = pd.DataFrame(
[
[f"{NAME_PEACH}行数", len(df_peach)],
[f"{NAME_NEW}行数(加氢订单1-4月)", len(df_new_14)],
["匹配键", "加氢日期 + 车牌号"],
["成功配对", matched],
[f"{NAME_PEACH}", only_peach],
[f"{NAME_NEW}有(已补录)", only_new],
["配对但字段有差异", diff_count],
],
columns=["项目", "数量/说明"],
)
OUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(OUT_FILE, engine="openpyxl") as writer:
summary.to_excel(writer, sheet_name="比对汇总", index=False)
df_out.to_excel(writer, sheet_name="比对结果", index=False)
apply_styles(df_out, diff_cells, red_rows)
shutil.copy(OUT_FILE, OUT_DESKTOP)
print(f"已输出: {OUT_FILE}")
print(f"已复制: {OUT_DESKTOP}")
print(summary.to_string(index=False))
def col_letter_map(columns: list) -> dict[str, int]:
return {str(c): i + 1 for i, c in enumerate(columns)}
def apply_styles(df: pd.DataFrame, diff_cells: dict, red_rows: set):
wb = load_workbook(OUT_FILE)
ws = wb["比对结果"]
cmap = col_letter_map(list(df.columns))
label_to_col = {label: label for _, _, label in COMPARE_FIELDS}
for _, _, label in COMPARE_FIELDS:
label_to_col[label] = label
label_to_col[new_col_prefix(label)] = new_col_prefix(label)
for row_idx, labels in diff_cells.items():
for label in labels:
col = label_to_col.get(label, label)
if col in cmap:
ws.cell(row=row_idx, column=cmap[col]).fill = YELLOW
for row_idx in red_rows:
for c in range(1, ws.max_column + 1):
ws.cell(row=row_idx, column=c).fill = RED
if REMARK_COL in cmap:
ws.cell(row=row_idx, column=cmap[REMARK_COL]).font = RED_FONT
wb.save(OUT_FILE)
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,352 @@
#!/usr/bin/env python3
"""比对「小许表格」与「新表」(匹配键:加氢日期+车牌号+加氢站5月范围"""
from __future__ import annotations
import re
import shutil
from collections import defaultdict
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
NAME_XU = "小许表格"
NAME_NEW = "新表"
FILE_XU = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/最新版加氢记录5.13(2).xlsx"
)
FILE_NEW = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"氢费台账总表2026.5.21(1)(2)(2).xlsx"
)
OUT_FILE = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/小许表格与新表比对结果_5月.xlsx")
OUT_DESKTOP = Path("/Users/sylvawong/Desktop/小许表格与新表比对结果_5月.xlsx")
# 新表:仅 2026 年 5 月小许表格2026 年 5 月及之后
NEW_MONTH_START = "2026-05-01"
NEW_MONTH_END = "2026-06-01"
XU_MONTH_START = "2026-05-01"
COMPARE_FIELDS = [
("加氢站", "加氢站名称", "加氢站"),
("加气量kg)", "加氢量(kg)", "加气量kg)"),
("成本单价", "成本单价(元/kg)", "成本单价"),
("成本金额", "成本费用(元)", "成本金额"),
("加氢单价", "加氢单价(元/kg)", "加氢单价"),
("加氢金额", "加氢金额(元)", "加氢金额"),
("客户名", "客户名称", "客户名"),
("氢费计算方式", "结算类别", "氢费计算方式"),
]
YELLOW = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
RED_FONT = Font(color="9C0006", bold=True)
REMARK_COL = "差异说明"
def new_col_prefix(label: str) -> str:
return f"{NAME_NEW}_{label}"
def read_xu_sheet(path: Path) -> pd.DataFrame:
"""最新版加氢记录:第 0 行汇总,第 1 行表头,数据从第 2 行起。"""
raw = pd.read_excel(path, sheet_name="明细台账", header=None)
raw.columns = [
str(c).strip() if pd.notna(c) else f"_col{i}" for i, c in enumerate(raw.iloc[1])
]
df = raw.iloc[2:].copy().reset_index(drop=True)
if "加气量kg" in df.columns:
df = df.rename(columns={"加气量kg": "加气量kg)"})
return df
def read_new_sheet(path: Path) -> pd.DataFrame:
raw = pd.read_excel(path, sheet_name="加氢订单", header=None)
cols_raw = list(raw.iloc[1])
cols = []
seen: dict[str, int] = {}
for i, c in enumerate(cols_raw):
name = str(c).strip() if pd.notna(c) else f"_col{i}"
if name in seen:
seen[name] += 1
name = f"{name}_{seen[name]}"
else:
seen[name] = 0
cols.append(name)
df = raw.iloc[2:].copy().reset_index(drop=True)
df.columns = cols
return df
def parse_date(val) -> str:
if pd.isna(val):
return ""
if isinstance(val, pd.Timestamp):
return val.strftime("%Y-%m-%d")
if isinstance(val, (int, float)) and val > 40000:
try:
return (pd.Timestamp("1899-12-30") + pd.Timedelta(days=int(val))).strftime(
"%Y-%m-%d"
)
except Exception:
pass
s = str(val).strip()
try:
return pd.to_datetime(s).strftime("%Y-%m-%d")
except Exception:
return s[:10] if len(s) >= 10 else s
def norm_plate(val) -> str:
if pd.isna(val):
return ""
return re.sub(r"[\s\-·]", "", str(val).strip().upper())
def norm_station(val) -> str:
if pd.isna(val):
return ""
return re.sub(r"\s+", "", str(val).strip())
def norm_str(val) -> str:
if pd.isna(val):
return ""
return str(val).strip()
def norm_num(val) -> float | None:
if pd.isna(val) or str(val).strip() in ("", "-", "nan"):
return None
try:
return float(val)
except Exception:
return None
def values_equal(a, b, numeric: bool) -> bool:
if numeric:
na, nb = norm_num(a), norm_num(b)
if na is None and nb is None:
return True
if na is None or nb is None:
return False
return abs(na - nb) <= 0.02
return norm_str(a) == norm_str(b)
def make_key(row, date_col, plate_col, station_col) -> str:
"""同日期+同车牌重复时,用加氢站区分,避免误判为缺失。"""
d = parse_date(row[date_col])
p = norm_plate(row[plate_col])
s = norm_station(row[station_col]) or "_未填加氢站_"
return f"{d}|{p}|{s}" if d and p else ""
def filter_by_date_range(df, date_col, start: str, end: str | None) -> pd.DataFrame:
df = df.copy()
df["_date_parsed"] = df[date_col].apply(parse_date)
mask = df["_date_parsed"] >= start
if end:
mask = mask & (df["_date_parsed"] < end)
return df[mask].reset_index(drop=True)
def build_index(df, date_col, plate_col, station_col):
idx: dict[str, list[int]] = defaultdict(list)
for i, row in df.iterrows():
k = make_key(row, date_col, plate_col, station_col)
if k:
idx[k].append(i)
return idx
def pair_rows(a_list: list[int], b_list: list[int]):
n = min(len(a_list), len(b_list))
return list(zip(a_list[:n], b_list[:n]))
def compare_pair(row_xu: pd.Series, row_new: pd.Series) -> tuple[list[str], list[str]]:
diffs = []
notes = []
for col_x, col_n, label in COMPARE_FIELDS:
numeric = label in ("加气量kg)", "成本单价", "成本金额", "加氢单价", "加氢金额")
vx = row_xu.get(col_x)
vn = row_new.get(col_n)
if not values_equal(vx, vn, numeric):
diffs.append(label)
notes.append(f"{label}:{NAME_XU}[{norm_str(vx)}]≠{NAME_NEW}[{norm_str(vn)}]")
return diffs, notes
def new_to_xu_fields(row_new: pd.Series) -> dict:
return {
"序号": row_new.get("订单编号", row_new.get("订单编号_1", "")),
"日期": row_new.get("加氢时间"),
"车牌": row_new.get("车牌号"),
"加气量kg)": row_new.get("加氢量(kg)"),
"加氢站": row_new.get("加氢站名称"),
"成本单价": row_new.get("成本单价(元/kg)"),
"成本金额": row_new.get("成本费用(元)"),
"加氢单价": row_new.get("加氢单价(元/kg)"),
"加氢金额": row_new.get("加氢金额(元)"),
"客户名": row_new.get("客户名称"),
"氢费计算方式": row_new.get("结算类别"),
}
def add_new_cols(row_new: pd.Series | None) -> dict:
if row_new is None:
return {new_col_prefix(label): "" for _, _, label in COMPARE_FIELDS}
return {
new_col_prefix(label): row_new.get(col_n, "")
for _x, col_n, label in COMPARE_FIELDS
}
def run():
df_xu_all = read_xu_sheet(FILE_XU)
df_new_all = read_new_sheet(FILE_NEW)
df_xu = filter_by_date_range(df_xu_all, "日期", XU_MONTH_START, None)
df_new = filter_by_date_range(df_new_all, "加氢时间", NEW_MONTH_START, NEW_MONTH_END)
idx_xu = build_index(df_xu, "日期", "车牌", "加氢站")
idx_new = build_index(df_new, "加氢时间", "车牌号", "加氢站名称")
all_keys = set(idx_xu) | set(idx_new)
pairs: list[tuple[int | None, int | None, str]] = []
for k in sorted(all_keys):
xa, xb = idx_xu.get(k, []), idx_new.get(k, [])
paired = pair_rows(xa, xb)
for ia, ib in paired:
pairs.append((ia, ib, k))
for ia in xa[len(paired) :]:
pairs.append((ia, None, k))
for ib in xb[len(paired) :]:
pairs.append((None, ib, k))
rows_out = []
diff_cells: dict[int, set[str]] = defaultdict(set)
red_rows: set[int] = set()
xu_cols = list(df_xu.columns)
for ia, ib, k in pairs:
excel_row = len(rows_out) + 2
remark_parts = []
is_red = False
if ia is not None:
base = df_xu.loc[ia].to_dict()
base["数据来源"] = NAME_XU
else:
row_new = df_new.loc[ib]
base = {c: "" for c in xu_cols}
base.update(new_to_xu_fields(row_new))
base["数据来源"] = f"{NAME_NEW}补录"
is_red = True
remark_parts.append(
f"{NAME_XU}缺失:本条由【{NAME_NEW}】补录({NAME_NEW}有相同日期+车牌+加氢站,{NAME_XU}无)"
)
row_new = df_new.loc[ib] if ib is not None else None
if ib is None and ia is not None:
is_red = True
if k in idx_new:
remark_parts.append(
f"同键重复:{NAME_XU}行数多于{NAME_NEW}(日期+车牌+加氢站相同,本条未配对)"
)
else:
remark_parts.append(
f"{NAME_NEW}缺失:【{NAME_NEW}】5月订单中无相同日期+车牌+加氢站"
)
if ia is not None and ib is not None:
diffs, notes = compare_pair(df_xu.loc[ia], row_new)
if diffs:
remark_parts.append("字段差异:" + "".join(notes))
for label in diffs:
diff_cells[excel_row].add(label)
diff_cells[excel_row].add(new_col_prefix(label))
out_row = base
out_row.update(add_new_cols(row_new))
out_row[REMARK_COL] = "".join(remark_parts) if remark_parts else "一致"
if is_red:
red_rows.add(excel_row)
rows_out.append(out_row)
df_out = pd.DataFrame(rows_out)
matched = sum(1 for ia, ib, _ in pairs if ia is not None and ib is not None)
only_xu = sum(1 for ia, ib, _ in pairs if ia is not None and ib is None)
only_new = sum(1 for ia, ib, _ in pairs if ia is None and ib is not None)
diff_count = sum(
1
for ia, ib, _ in pairs
if ia is not None
and ib is not None
and compare_pair(df_xu.loc[ia], df_new.loc[ib])[0]
)
summary = pd.DataFrame(
[
[f"{NAME_XU}范围", f"日期 >= {XU_MONTH_START}5月及之后"],
[f"{NAME_NEW}范围", f"{NEW_MONTH_START} <= 加氢时间 < {NEW_MONTH_END}仅5月"],
[f"{NAME_XU}行数", len(df_xu)],
[f"{NAME_NEW}行数", len(df_new)],
["匹配键", "加氢日期 + 车牌号 + 加氢站(同站不判缺失)"],
["成功配对", matched],
[f"{NAME_XU}", only_xu],
[f"{NAME_NEW}有(已补录)", only_new],
["配对但字段有差异", diff_count],
],
columns=["项目", "数量/说明"],
)
OUT_FILE.parent.mkdir(parents=True, exist_ok=True)
with pd.ExcelWriter(OUT_FILE, engine="openpyxl") as writer:
summary.to_excel(writer, sheet_name="比对汇总", index=False)
df_out.to_excel(writer, sheet_name="比对结果", index=False)
apply_styles(df_out, diff_cells, red_rows)
shutil.copy(OUT_FILE, OUT_DESKTOP)
print(f"已输出: {OUT_FILE}")
print(f"已复制: {OUT_DESKTOP}")
print(summary.to_string(index=False))
def col_letter_map(columns: list) -> dict[str, int]:
return {str(c): i + 1 for i, c in enumerate(columns)}
def apply_styles(df: pd.DataFrame, diff_cells: dict, red_rows: set):
wb = load_workbook(OUT_FILE)
ws = wb["比对结果"]
cmap = col_letter_map(list(df.columns))
label_to_col = {label: label for _, _, label in COMPARE_FIELDS}
for _, _, label in COMPARE_FIELDS:
label_to_col[new_col_prefix(label)] = new_col_prefix(label)
for row_idx, labels in diff_cells.items():
for label in labels:
col = label_to_col.get(label, label)
if col in cmap:
ws.cell(row=row_idx, column=cmap[col]).fill = YELLOW
for row_idx in red_rows:
for c in range(1, ws.max_column + 1):
ws.cell(row=row_idx, column=c).fill = RED
if REMARK_COL in cmap:
ws.cell(row=row_idx, column=cmap[REMARK_COL]).font = RED_FONT
wb.save(OUT_FILE)
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,148 @@
#!/usr/bin/env python3
"""
以交车任务列表(交车许可=已交车)为正确基准,
比对生成环境-交车数据(delivery_status_text=已完成 视为已交车)
输出已交车状态不一致的车牌号。
"""
import re
import shutil
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
FILE_PROD = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/生成环境-交车数据.xlsx"
)
FILE_TASK = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"交车任务列表-导出-2026-05-26 (5).xlsx"
)
OUT = Path("/Users/sylvawong/Desktop/生成环境交车状态差异报告.xlsx")
OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/生成环境交车状态差异报告.xlsx")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
# 基准侧
TASK_DELIVERED = "已交车"
# 生成环境侧:已完成 对应 已交车
PROD_DELIVERED = "已完成"
def norm_plate(v) -> str:
if pd.isna(v):
return ""
return re.sub(r"[\s\-·]", "", str(v).strip().upper())
def delivered_plates(df, plate_col, status_col, delivered_val):
df = df.copy()
df["_plate"] = df[plate_col].map(norm_plate)
df = df[df["_plate"] != ""]
df["_status"] = df[status_col].astype(str).str.strip()
# 每车牌取一条代表状态(若多行状态不一致则标为混合)
g = df.groupby("_plate")["_status"].agg(lambda s: "|".join(sorted(set(s))))
delivered = set(g[g == delivered_val].index)
mixed = {p for p, st in g.items() if delivered_val in st and st != delivered_val}
return delivered, mixed, df
def display_plate(key, df_prod, df_task):
m = df_prod["plate_number"].map(norm_plate) == key
if m.any():
return str(df_prod.loc[m, "plate_number"].iloc[0])
m = df_task["车牌号"].map(norm_plate) == key
if m.any():
return str(df_task.loc[m, "车牌号"].iloc[0])
return key
def run():
df_prod = pd.read_excel(FILE_PROD, sheet_name=0)
df_task = pd.read_excel(FILE_TASK, sheet_name=0)
set_task, _, df_task_all = delivered_plates(
df_task, "车牌号", "交车许可", TASK_DELIVERED
)
set_prod, prod_mixed, df_prod_all = delivered_plates(
df_prod, "plate_number", "delivery_status_text", PROD_DELIVERED
)
# 基准=交车任务已交车;生成环境应对齐为已完成
# 不一致1任务已交车生成环境不是已完成含无记录、其他状态
bad_vs_baseline = sorted(set_task - set_prod)
# 不一致2生成环境已完成任务不是已交车生成环境多记
extra_in_prod = sorted(set_prod - set_task)
all_bad = sorted(set(bad_vs_baseline) | set(extra_in_prod))
both_ok = sorted(set_task & set_prod)
def plate_rows(keys, diff_type):
if not keys:
return pd.DataFrame(columns=["差异类型", "车牌号"])
return pd.DataFrame(
{
"差异类型": diff_type,
"车牌号": [display_plate(k, df_prod, df_task) for k in keys],
}
)
df_bad1 = plate_rows(
bad_vs_baseline,
"交车任务=已交车(基准),生成环境≠已完成或无记录",
)
df_bad2 = plate_rows(
extra_in_prod,
"生成环境=已完成,交车任务≠已交车",
)
df_all = pd.concat([df_bad1, df_bad2], ignore_index=True)
df_ok = plate_rows(both_ok, "两边一致(任务已交车且生成环境已完成)")
task_rows = len(df_task[df_task["交车许可"].astype(str).str.strip() == TASK_DELIVERED])
prod_rows = len(
df_prod[df_prod["delivery_status_text"].astype(str).str.strip() == PROD_DELIVERED]
)
summary = pd.DataFrame(
[
["正确数据基准", "交车任务列表 · 交车许可=已交车"],
["生成环境已交车判定", "delivery_status_text=已完成"],
["交车任务 已交车行数 / 唯一车牌", f"{task_rows} / {len(set_task)}"],
["生成环境 已完成行数 / 唯一车牌", f"{prod_rows} / {len(set_prod)}"],
["两边状态一致车牌", len(both_ok)],
["已交车状态不一致车牌合计", len(all_bad)],
["└ 基准有已交车,生成环境未对齐", len(bad_vs_baseline)],
["└ 生成环境已完成,基准无已交车", len(extra_in_prod)],
["生成环境同车牌多状态(含已完成)", len(prod_mixed)],
],
columns=["项目", "结果"],
)
with pd.ExcelWriter(OUT, engine="openpyxl") as w:
summary.to_excel(w, sheet_name="比对汇总", index=False)
df_all.to_excel(w, sheet_name="不一致车牌", index=False)
if len(bad_vs_baseline):
df_bad1.to_excel(w, sheet_name="基准有生成环境未对齐", index=False)
if len(extra_in_prod):
df_bad2.to_excel(w, sheet_name="生成环境多记已完成", index=False)
df_ok.to_excel(w, sheet_name="一致车牌", index=False)
wb = load_workbook(OUT)
for sn in ["不一致车牌", "基准有生成环境未对齐", "生成环境多记已完成"]:
if sn in wb.sheetnames:
ws = wb[sn]
for r in range(2, ws.max_row + 1):
for c in range(1, ws.max_column + 1):
ws.cell(row=r, column=c).fill = RED
wb.save(OUT)
shutil.copy(OUT, OUT_PROJ)
print(f"已输出: {OUT}")
print(summary.to_string(index=False))
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""车辆信息(车辆状态) vs 交车任务(交车许可):同车牌「已交车」是否一致,不一致仅输出车牌"""
import re
import shutil
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
FILE_VEHICLE = Path("/Users/sylvawong/Downloads/车辆信息-1779792181510.xlsx")
FILE_TASK = Path(
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/"
"交车任务列表-导出-2026-05-26 (5).xlsx"
)
OUT = Path("/Users/sylvawong/Desktop/车辆信息与交车任务差异报告.xlsx")
OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/车辆信息与交车任务差异报告.xlsx")
RED = PatternFill(start_color="FF9999", end_color="FF9999", fill_type="solid")
STATUS = "已交车"
def norm_plate(v) -> str:
if pd.isna(v):
return ""
return re.sub(r"[\s\-·]", "", str(v).strip().upper())
def plate_sets(df, plate_col, status_col):
"""返回:已交车车牌集合、该表全部车牌->状态(取首条)"""
df = df.copy()
df["_plate"] = df[plate_col].map(norm_plate)
df = df[df["_plate"] != ""]
status_map = df.drop_duplicates(subset=["_plate"], keep="first").set_index("_plate")[
status_col
].astype(str).str.strip()
delivered = set(status_map[status_map == STATUS].index)
return delivered, status_map
def run():
df_v = pd.read_excel(FILE_VEHICLE, sheet_name=0)
df_t = pd.read_excel(FILE_TASK, sheet_name=0)
set_v, map_v = plate_sets(df_v, "车牌号", "车辆状态")
set_t, map_t = plate_sets(df_t, "车牌号", "交车许可")
both_ok = sorted(set_v & set_t)
only_v = sorted(set_v - set_t) # 车辆信息=已交车,交车任务≠已交车
only_t = sorted(set_t - set_v) # 交车任务=已交车,车辆信息≠已交车
inconsistent = sorted(set(only_v) | set(only_t))
row_v = len(df_v[df_v["车辆状态"].astype(str).str.strip() == STATUS])
row_t = len(df_t[df_t["交车许可"].astype(str).str.strip() == STATUS])
consistent = len(only_v) == 0 and len(only_t) == 0
summary = pd.DataFrame(
[
["比对说明", "同车牌:车辆信息「车辆状态=已交车」与交车任务「交车许可=已交车」是否一致"],
["车辆信息 已交车行数", row_v],
["交车任务 已交车行数", row_t],
["车辆信息 已交车唯一车牌", len(set_v)],
["交车任务 已交车唯一车牌", len(set_t)],
["两边均已交车(一致)", len(both_ok)],
["是否完全一致", "" if consistent else ""],
["不一致车牌合计", len(inconsistent)],
["仅车辆信息为已交车(任务侧非已交车)", len(only_v)],
["仅交车任务为已交车(车辆信息侧非已交车)", len(only_t)],
],
columns=["项目", "结果"],
)
def display_plate(key: str) -> str:
mask_v = df_v["车牌号"].map(norm_plate) == key
mask_t = df_t["车牌号"].map(norm_plate) == key
if mask_v.any():
return str(df_v.loc[mask_v, "车牌号"].iloc[0])
if mask_t.any():
return str(df_t.loc[mask_t, "车牌号"].iloc[0])
return key
def plates_df(keys, diff_type):
if not keys:
return pd.DataFrame(columns=["差异类型", "车牌号"])
return pd.DataFrame(
{"差异类型": diff_type, "车牌号": [display_plate(k) for k in keys]}
)
df_only_v = plates_df(only_v, "车辆信息=已交车,交车任务≠已交车")
df_only_t = plates_df(only_t, "交车任务=已交车,车辆信息≠已交车")
df_all_bad = pd.concat([df_only_v, df_only_t], ignore_index=True)
df_both_ok = plates_df(both_ok, "两边均为已交车(一致)")
with pd.ExcelWriter(OUT, engine="openpyxl") as w:
summary.to_excel(w, sheet_name="比对汇总", index=False)
df_all_bad.to_excel(w, sheet_name="不一致车牌", index=False)
if len(only_v):
df_only_v.to_excel(w, sheet_name="仅车辆信息已交车", index=False)
if len(only_t):
df_only_t.to_excel(w, sheet_name="仅交车任务已交车", index=False)
df_both_ok.to_excel(w, sheet_name="一致车牌", index=False)
wb = load_workbook(OUT)
for sn in ["不一致车牌", "仅车辆信息已交车", "仅交车任务已交车"]:
if sn in wb.sheetnames:
ws = wb[sn]
for r in range(2, ws.max_row + 1):
for c in range(1, ws.max_column + 1):
ws.cell(row=r, column=c).fill = RED
wb.save(OUT)
shutil.copy(OUT, OUT_PROJ)
print(f"已输出: {OUT}")
print(summary.to_string(index=False))
if __name__ == "__main__":
run()

View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
"""加氢订单按合同编号匹配客户名称,并生成「月份-客户-项目」汇总子表。"""
from __future__ import annotations
import argparse
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Alignment
def _line_total_yuan(row: pd.Series) -> float:
"""单行金额:对客费用非 0 时取对客费用,否则取成本费用(与台账常见口径一致)。"""
c = row.get("对客费用(元)")
if pd.notna(c) and float(c) != 0:
return float(c)
v = row.get("成本费用(元)")
return float(v) if pd.notna(v) else 0.0
def _merge_month_customer_cells(
workbook_path: Path,
sheet_name: str = "月度客户项目汇总",
col_month: int = 1,
col_customer: int = 2,
header_rows: int = 1,
) -> None:
"""对「月份」「客户名称」列:连续且(月份、客户名称)均相同的行做纵向合并。"""
wb = load_workbook(workbook_path)
if sheet_name not in wb.sheetnames:
wb.close()
raise SystemExit(f"工作簿中未找到工作表: {sheet_name}")
ws = wb[sheet_name]
max_r = ws.max_row
data_start = header_rows + 1
if max_r < data_start:
wb.save(workbook_path)
wb.close()
return
align_m = Alignment(vertical="center", horizontal="center", wrap_text=True)
align_c = Alignment(vertical="center", horizontal="left", wrap_text=True)
def norm(v) -> str:
if v is None:
return ""
return str(v).strip()
r = data_start
while r <= max_r:
m0 = norm(ws.cell(r, col_month).value)
c0 = norm(ws.cell(r, col_customer).value)
start = r
r += 1
while r <= max_r:
if norm(ws.cell(r, col_month).value) != m0 or norm(ws.cell(r, col_customer).value) != c0:
break
r += 1
end = r - 1
if end > start:
ws.merge_cells(
start_row=start,
start_column=col_month,
end_row=end,
end_column=col_month,
)
ws.merge_cells(
start_row=start,
start_column=col_customer,
end_row=end,
end_column=col_customer,
)
ws.cell(start, col_month).alignment = align_m
ws.cell(start, col_customer).alignment = align_c
wb.save(workbook_path)
wb.close()
def run(
orders_path: Path,
contract_map_path: Path,
out_path: Path,
orders_sheet: str = "加氢订单",
map_sheet: str = "Sheet1",
merge_summary_cells: bool = True,
) -> None:
orders = pd.read_excel(orders_path, sheet_name=orders_sheet)
cmap = pd.read_excel(contract_map_path, sheet_name=map_sheet)
need_o = {"合同编号", "项目名称", "加氢时间", "加氢量(kg)", "对客费用(元)", "成本费用(元)", "订单编号"}
miss_o = need_o - set(orders.columns)
if miss_o:
raise SystemExit(f"加氢订单表缺少列: {miss_o}")
need_m = {"contract_no", "customer_name"}
miss_m = need_m - set(cmap.columns)
if miss_m:
raise SystemExit(f"合同映射表缺少列: {miss_m},当前为 {list(cmap.columns)}")
cmap = cmap[["contract_no", "customer_name"]].copy()
cmap["contract_no"] = cmap["contract_no"].astype(str).str.strip()
o = orders.copy()
o["_合同键"] = o["合同编号"].astype(str).str.strip()
merged = o.merge(
cmap,
left_on="_合同键",
right_on="contract_no",
how="left",
suffixes=("", "_映射表"),
)
merged["客户名称"] = merged["customer_name"].fillna("")
merged = merged.drop(columns=["customer_name", "contract_no"], errors="ignore")
# 月份
ts = pd.to_datetime(merged["加氢时间"], errors="coerce")
merged["月份"] = ts.dt.strftime("%Y-%m")
merged.loc[ts.isna(), "月份"] = ""
merged["行加氢总额(元)"] = merged.apply(_line_total_yuan, axis=1)
# 汇总月份、客户名称、项目名称、加氢次数、加气量kg、加氢总额
gcols = ["月份", "客户名称", "项目名称"]
sub = (
merged.groupby(gcols, dropna=False)
.agg(
加氢次数=("订单编号", "count"),
**{"加气量kg": ("加氢量(kg)", "sum")},
**{"加氢总额(元)": ("行加氢总额(元)", "sum")},
)
.reset_index()
)
sub = sub.sort_values(gcols).reset_index(drop=True)
sub["客户名称"] = sub["客户名称"].fillna("").astype(str)
sub["项目名称"] = sub["项目名称"].fillna("").astype(str)
sub["月份"] = sub["月份"].fillna("").astype(str)
drop_from_detail = {"_合同键", "行加氢总额(元)", "月份"}
base = [c for c in merged.columns if c not in drop_from_detail and c != "客户名称"]
if "项目名称" in base:
i = base.index("项目名称") + 1
detail_out = base[:i] + ["客户名称"] + base[i:]
else:
detail_out = base + ["客户名称"]
detail_df = merged[detail_out].copy()
detail_df["客户名称"] = detail_df["客户名称"].fillna("").astype(str)
with pd.ExcelWriter(out_path, engine="openpyxl") as w:
detail_df.to_excel(w, sheet_name=orders_sheet, index=False)
sub.to_excel(w, sheet_name="月度客户项目汇总", index=False)
if merge_summary_cells:
_merge_month_customer_cells(out_path, sheet_name="月度客户项目汇总")
print(f"已写入: {out_path}")
print(f" 明细行数: {len(detail_df)},已匹配客户名称: {(detail_df['客户名称'] != '').sum()}")
print(f" 汇总行数: {len(sub)}")
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--orders", type=Path, required=True)
ap.add_argument("--contracts", type=Path, required=True)
ap.add_argument("-o", "--out", type=Path, required=True)
ap.add_argument(
"--no-merge-summary-cells",
action="store_true",
help="不对「月度客户项目汇总」做月份/客户名称列合并",
)
args = ap.parse_args()
run(
args.orders.resolve(),
args.contracts.resolve(),
args.out.resolve(),
merge_summary_cells=not args.no_merge_summary_cells,
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,237 @@
#!/usr/bin/env python3
"""将加氢订单 Excel 数据填入导入模板(列名可自动匹配多种写法)。"""
from __future__ import annotations
import argparse
import re
from datetime import datetime
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
# 模板列 -> 源表中可能出现的列名(按优先级)
SOURCE_CANDIDATES: dict[str, list[str]] = {
"加氢站名称": ["加氢站", "加氢站名称", "站点", "站点名称"],
"订单编号": ["序号", "订单编号", "订单号"],
"加氢时间": ["日期", "加氢时间", "加氢日期", "时间"],
"加氢量": [
"加气量kg",
"加气量kg)", # 半角右括号,常见于台账导出
"加气量(kg)",
"加氢量kg",
"加氢量(kg)",
"加氢量",
"加气量",
],
"车牌": ["车牌", "车牌号"],
"行驶里程数": ["行驶里程(km)", "行驶里程km", "行驶里程", "里程", "公里数"],
}
def _pick_column(df: pd.DataFrame, template_col: str) -> str | None:
for name in SOURCE_CANDIDATES.get(template_col, [template_col]):
if name in df.columns:
return name
return None
def _is_unknown_station(val) -> bool:
if val is None or (isinstance(val, float) and pd.isna(val)):
return True
x = str(val).strip()
if not x or x.lower() in ("nan", "none"):
return True
return x in ("未知加氢站", "未知")
def _repair_unknown_stations(
df: pd.DataFrame,
station_col: str,
order_col: str,
plate_col: str,
time_col: str,
qty_col: str,
) -> pd.DataFrame:
"""源表中同一笔加氢(订单+车牌+时间+加氢量相同)若混有「未知加氢站」与其它站名,用非未知站名回填。
常见于 Excel 导入产生的重复行:一行站名正确、一行为未知。
"""
work = df.copy()
work["_g_t"] = pd.to_datetime(work[time_col], errors="coerce").dt.strftime("%Y-%m-%d %H:%M:%S")
work["_g_v"] = pd.to_numeric(work[qty_col], errors="coerce").round(4)
gcols = [order_col, plate_col, "_g_t", "_g_v"]
def consensus_fill(s: pd.Series) -> pd.Series:
good_mask = ~s.map(_is_unknown_station)
good = s[good_mask]
if good.empty:
return s
uniq = pd.unique(good.astype(str).str.strip())
if len(uniq) != 1:
return s
fill = good.iloc[0]
out = s.copy()
out[~good_mask] = fill
return out
work[station_col] = work.groupby(gcols, dropna=False)[station_col].transform(consensus_fill)
return work.drop(columns=["_g_t", "_g_v"])
def normalize_datetime_cell(val) -> str:
"""统一为 YYYY-MM-DD HH:MM:SS无法解析则返回原值的字符串形式。"""
if val is None or (isinstance(val, float) and pd.isna(val)):
return ""
if isinstance(val, pd.Timestamp):
if pd.isna(val):
return ""
dt = val.to_pydatetime()
return dt.strftime("%Y-%m-%d %H:%M:%S")
if isinstance(val, datetime):
return val.strftime("%Y-%m-%d %H:%M:%S")
if isinstance(val, (int, float)) and not isinstance(val, bool):
# Excel 序列日期
try:
ts = pd.to_datetime(val, unit="D", origin="1899-12-30", errors="coerce")
if pd.isna(ts):
return str(val)
return ts.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
pass
s = str(val).strip()
if not s or s.lower() in ("nan", "none", "-"):
return s if s == "-" else ""
ts = pd.to_datetime(s, errors="coerce")
if pd.isna(ts):
# 常见中文/斜杠格式再试
for pat in (r"(\d{4})[年/-](\d{1,2})[月/-](\d{1,2})",):
m = re.search(pat, s)
if m:
y, mo, d = int(m.group(1)), int(m.group(2)), int(m.group(3))
rest = s[m.end() :].strip()
h = mi = se = 0
m2 = re.search(r"(\d{1,2})\s*[::时]\s*(\d{1,2})(?:\s*[::分]\s*(\d{1,2}))?", rest)
if m2:
h, mi = int(m2.group(1)), int(m2.group(2))
se = int(m2.group(3) or 0)
try:
return datetime(y, mo, d, h, mi, se).strftime("%Y-%m-%d %H:%M:%S")
except ValueError:
pass
return s
return ts.strftime("%Y-%m-%d %H:%M:%S")
def fill_template(
template_path: Path,
source_path: Path,
out_path: Path,
source_sheet: str | int = 0,
repair_unknown_station: bool = True,
source_header: int = 0,
) -> None:
template_wb = load_workbook(template_path)
if "加氢订单" not in template_wb.sheetnames:
raise SystemExit("模板中未找到工作表「加氢订单」")
tpl_ws = template_wb["加氢订单"]
header = [c.value for c in next(tpl_ws.iter_rows(min_row=1, max_row=1))]
if not header or any(x is None for x in header):
raise SystemExit("模板首行表头异常")
df = pd.read_excel(source_path, sheet_name=source_sheet, header=source_header)
col_src: dict[str, str] = {}
missing = []
for tpl_col in header:
if tpl_col is None:
continue
tpl_col = str(tpl_col).strip()
src = _pick_column(df, tpl_col)
if src is None:
missing.append(tpl_col)
else:
col_src[tpl_col] = src
required = ["加氢站名称", "订单编号", "加氢时间", "加氢量", "车牌"]
for k in required:
if k not in col_src:
raise SystemExit(
f"源表缺少可映射列: 模板「{k}」需要以下之一 {SOURCE_CANDIDATES.get(k, [])}"
f"当前源表列: {list(df.columns)}"
)
if repair_unknown_station:
st = col_src["加氢站名称"]
df = _repair_unknown_stations(
df,
station_col=st,
order_col=col_src["订单编号"],
plate_col=col_src["车牌"],
time_col=col_src["加氢时间"],
qty_col=col_src["加氢量"],
)
# 清空模板数据行(保留表头)
if tpl_ws.max_row > 1:
tpl_ws.delete_rows(2, tpl_ws.max_row - 1)
time_col = col_src["加氢时间"]
for _, row in df.iterrows():
out_row: list = []
for tpl_col in header:
if tpl_col is None:
out_row.append(None)
continue
tpl_col = str(tpl_col).strip()
if tpl_col not in col_src:
out_row.append("")
continue
src_col = col_src[tpl_col]
v = row[src_col]
if tpl_col == "加氢时间":
out_row.append(normalize_datetime_cell(v))
elif pd.isna(v):
out_row.append("")
else:
out_row.append(v)
tpl_ws.append(out_row)
template_wb.save(out_path)
print(f"已写入 {out_path},共 {len(df)} 行数据。")
if missing:
print("(以下模板列在源表中未匹配到,已留空)", ", ".join(missing))
def main() -> None:
p = argparse.ArgumentParser(description="加氢订单 -> 导入模板")
p.add_argument("--template", type=Path, required=True)
p.add_argument("--source", type=Path, required=True)
p.add_argument("--out", type=Path, required=True)
p.add_argument("--sheet", default="加氢订单", help="源 Excel 工作表名,默认「加氢订单」")
p.add_argument(
"--source-header",
type=int,
default=0,
help="源表表头所在行pandas 的 header0 为第一行;台账「明细台账」一般为 1",
)
p.add_argument(
"--no-repair-unknown-station",
action="store_true",
help="关闭「同笔交易未知加氢站」回填(默认开启,用于修正源表重复行)",
)
args = p.parse_args()
sheet = args.sheet
if str(sheet).isdigit():
sheet = int(sheet)
fill_template(
args.template,
args.source,
args.out,
source_sheet=sheet,
repair_unknown_station=not args.no_repair_unknown_station,
source_header=args.source_header,
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,208 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""扫描商业/交强/超赔目录下保单文件,按导入模板 15 列结构生成已填报 Excel。"""
from __future__ import annotations
import calendar
import os
import re
from typing import Any, Dict, List, Optional, Tuple
import pandas as pd
TEMPLATE_IN = "/Users/sylvawong/Downloads/保险采购信息导入模板-1776319598586.xlsx"
VEHICLE_XLSX = "/Users/sylvawong/Downloads/车辆信息-1776270214730.xlsx"
BASE_POLICY_DIR = "/Users/sylvawong/Desktop/保单-2025.7-2026.4"
DIR_COMMERCIAL = os.path.join(BASE_POLICY_DIR, "商业")
DIR_COMPULSORY = os.path.join(BASE_POLICY_DIR, "交强")
DIR_EXCESS = os.path.join(BASE_POLICY_DIR, "超赔")
OUTPUT_XLSX = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/保险采购信息导入模板-已填报.xlsx"
SHEET_NAME = "保险采购信息导入模板"
PRODUCT_COMMERCIAL = "商业险"
PRODUCT_COMPULSORY = "交强险"
PRODUCT_EXCESS = "超赔险"
PLATE_CORE = r"[\u4e00-\u9fa5][A-Z0-9\u4e00-\u9fa5·挂]{5,14}"
PAT_COMMERCIAL = re.compile(rf"^({PLATE_CORE})[_\-]?商业", re.I)
PAT_COMPULSORY = re.compile(rf"交强险[_\-]({PLATE_CORE})[_\-]", re.I)
PAT_COMPULSORY_ALT = re.compile(rf"\d{{4}}交强险[_]({PLATE_CORE})[_]", re.I)
PAT_COMPULSORY_TIGHT = re.compile(rf"^\d{{4}}交强险({PLATE_CORE})[_]", re.I)
PAT_COMPULSORY_PLATE_FIRST = re.compile(rf"^({PLATE_CORE})_交强险", re.I)
PAT_EXCESS = re.compile(rf"^({PLATE_CORE})[_\-]?超赔", re.I)
PAT_PERIOD = re.compile(r"(\d{4})\.(\d{1,2})-(\d{4})\.(\d{1,2})")
PAT_NEW_CAR_COMM = re.compile(r"^新车_商业_(.+)\.(pdf|jpg|jpeg|png)$", re.I)
PAT_NEW_CAR_JQ = re.compile(r"^新车_交强_(.+)\.(pdf|jpg|jpeg|png)$", re.I)
def iter_policy_files(root: str) -> List[str]:
out: List[str] = []
if not os.path.isdir(root):
return out
for dp, _, fns in os.walk(root):
for fn in fns:
low = fn.lower()
if low.endswith((".pdf", ".jpg", ".jpeg", ".png")):
out.append(os.path.join(dp, fn))
return sorted(out)
def plate_from_filename(path: str, kind: str) -> Optional[str]:
fn = os.path.basename(path)
if kind == "commercial":
m = PAT_COMMERCIAL.match(fn)
return m.group(1) if m else None
if kind == "compulsory":
m = (
PAT_COMPULSORY.search(fn)
or PAT_COMPULSORY_ALT.search(fn)
or PAT_COMPULSORY_TIGHT.search(fn)
or PAT_COMPULSORY_PLATE_FIRST.search(fn)
)
return m.group(1) if m else None
if kind == "excess":
m = PAT_EXCESS.match(fn)
return m.group(1) if m else None
return None
def period_from_filename(fn: str) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
m = PAT_PERIOD.search(fn)
if not m:
return None, None
y1, mo1, y2, mo2 = map(int, m.groups())
start = pd.Timestamp(year=y1, month=mo1, day=1)
last = calendar.monthrange(y2, mo2)[1]
end = pd.Timestamp(year=y2, month=mo2, day=last)
return start, end
def policy_no_from_filename(path: str, product: str) -> Optional[str]:
fn = os.path.basename(path)
if product == PRODUCT_COMMERCIAL:
m = PAT_NEW_CAR_COMM.match(fn)
if m:
return m.group(1).strip()
if product == PRODUCT_COMPULSORY:
m = PAT_NEW_CAR_JQ.match(fn)
if m:
return m.group(1).strip()
return None
def load_vehicle_index() -> Dict[str, pd.Series]:
vdf = pd.read_excel(VEHICLE_XLSX, sheet_name=0, header=0, engine="openpyxl")
vdf = vdf.drop_duplicates(subset=["车牌号"], keep="first")
idx: Dict[str, pd.Series] = {}
for _, row in vdf.iterrows():
p = str(row["车牌号"]).strip() if pd.notna(row["车牌号"]) else ""
if p:
idx[p] = row
return idx
def split_city(运营城市: Any, 所属省份: Any) -> Tuple[str, str]:
prov = str(所属省份).strip() if pd.notna(所属省份) else ""
city = ""
if pd.notna(运营城市):
s = str(运营城市).strip()
if "-" in s:
parts = s.split("-", 1)
if not prov:
prov = parts[0].strip()
city = parts[1].strip() if len(parts) > 1 else ""
else:
city = s
return prov, city
def empty_row_template(columns: List[str]) -> Dict[str, Any]:
return {c: None for c in columns}
def build_rows_for_dir(
root: str,
kind: str,
product: str,
vidx: Dict[str, pd.Series],
columns: List[str],
) -> List[Dict[str, Any]]:
rows: List[Dict[str, Any]] = []
for path in iter_policy_files(root):
fn = os.path.basename(path)
plate = plate_from_filename(path, kind) or ""
row = empty_row_template(columns)
pno = policy_no_from_filename(path, product)
row["保单号"] = pno if pno else None
row["车牌号"] = plate if plate else None
row["保险产品"] = product
st, en = period_from_filename(fn)
if st is not None:
row["保险开始时间"] = st
if en is not None:
row["保险结束时间"] = en
if plate and plate in vidx:
vr = vidx[plate]
vin = vr.get("VIN")
row["保险公司"] = vin if pd.notna(vin) else None
prov, city = split_city(vr.get("运营城市"), vr.get("所属省份"))
row[""] = prov or None
row[""] = city or None
rows.append(row)
return rows
def main() -> None:
tpl = pd.read_excel(TEMPLATE_IN, sheet_name=0, header=0)
columns = tpl.columns.tolist()
required = [
"保单号",
"车牌号",
"保险公司",
"保险产品",
"保险费用金额",
"保险额度",
"",
"",
"保险开始时间",
"保险结束时间",
"缴费时间",
"保险公司联系人",
"保险公司联系电话",
"保险采购人",
"保险采购人电话",
]
if columns != required:
raise SystemExit(f"模板列与预期不一致: {columns}")
vidx = load_vehicle_index()
all_rows: List[Dict[str, Any]] = []
all_rows.extend(
build_rows_for_dir(DIR_COMMERCIAL, "commercial", PRODUCT_COMMERCIAL, vidx, columns)
)
all_rows.extend(
build_rows_for_dir(DIR_COMPULSORY, "compulsory", PRODUCT_COMPULSORY, vidx, columns)
)
all_rows.extend(build_rows_for_dir(DIR_EXCESS, "excess", PRODUCT_EXCESS, vidx, columns))
out_df = pd.DataFrame(all_rows, columns=columns)
os.makedirs(os.path.dirname(OUTPUT_XLSX), exist_ok=True)
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as w:
out_df.to_excel(w, index=False, sheet_name=SHEET_NAME)
print(f"已生成: {OUTPUT_XLSX}")
print(f"总行数(每个保单文件一行): {len(out_df)}")
for p, n in out_df["保险产品"].value_counts().items():
print(f" {p}: {int(n)}")
miss_plate = out_df["车牌号"].isna() | (out_df["车牌号"].astype(str).str.strip() == "")
print(f" 未解析到车牌的行: {int(miss_plate.sum())}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,255 @@
#!/usr/bin/env python3
"""从桌面「车辆行驶证」图片 OCR 提取字段,写入证照信息导入模板(仅填 AE 列)。"""
import re
import calendar
import shutil
import sys
from collections import Counter
from datetime import date
from pathlib import Path
from typing import List, Optional, Tuple
import openpyxl
from rapidocr_onnxruntime import RapidOCR
IMG_DIR = Path("/Users/sylvawong/Desktop/车辆行驶证")
TEMPLATE = Path("/Users/sylvawong/Desktop/证照信息导入模板-1776082388558.xlsx")
OUT = Path("/Users/sylvawong/Desktop/证照信息导入模板-OCR导入结果.xlsx")
TODAY = date.today()
PLATE_CHARS = r"京津沪渝冀晋蒙辽吉黑苏浙皖闽赣鲁豫鄂湘粤桂琼川贵云藏陕甘青宁新"
PLATE_RE = re.compile(
rf"[{PLATE_CHARS}][A-Z0-9·]{{4,12}}(?:挂)?"
)
# 标准 VIN 不含 I、O、QOCR 常误识,放宽为 17 位大写字母数字
VIN_RE = re.compile(r"[A-Z0-9]{17}")
VIN_BODY = set("ABCDEFGHJKLMNPRSTUVWXYZ0123456789")
def normalize_stem(stem: str) -> str:
s = re.sub(r"\s+\d+$", "", stem).strip()
s = re.sub(r"行驶证年审.*$", "", s).strip()
return s
def end_of_month(y: int, m: int) -> date:
return date(y, m, calendar.monthrange(y, m)[1])
def parse_inspection_dates(text: str) -> List[date]:
out: List[date] = []
pat1 = re.compile(
r"检验有效期[至到]\s*(\d{4})\s*年\s*(\d{1,2})\s*月(?:\s*(\d{1,2})\s*日)?"
)
for m in pat1.finditer(text):
y, mo = int(m.group(1)), int(m.group(2))
if not (2000 <= y <= 2040 and 1 <= mo <= 12):
continue
d = int(m.group(3)) if m.group(3) else None
if d and 1 <= d <= 31:
try:
out.append(date(y, mo, d))
except ValueError:
out.append(end_of_month(y, mo))
else:
out.append(end_of_month(y, mo))
pat2 = re.compile(r"检验有效期[至到]\s*(\d{4})\s*年(?!\s*\d{1,2}\s*月)")
for m in pat2.finditer(text):
y = int(m.group(1))
if 2000 <= y <= 2040:
out.append(end_of_month(y, 12))
return out
def extract_plate_ocr(text: str, filename_plate: Optional[str]) -> Optional[str]:
hits = PLATE_RE.findall(text)
# 去掉明显非号牌片段
bad = {"中华人民共和国", "中国人民共和国"}
hits = [h for h in hits if h not in bad and len(h) <= 12]
if not hits:
return filename_plate
cnt = Counter(hits)
if filename_plate and filename_plate in cnt:
return filename_plate
return cnt.most_common(1)[0][0]
def _vin_from_l_window(window: str) -> Optional[str]:
"""OCR 常把代号与型号粘在一起,用 L 起算滑动取 17 位合法车架字符。"""
for i, ch in enumerate(window):
if ch != "L":
continue
chunk = window[i : i + 17]
if len(chunk) < 17:
continue
if not all(c in VIN_BODY for c in chunk):
continue
if re.search(r"\d", chunk) and re.search(r"[A-Z]", chunk):
return chunk
return None
def extract_vin(text: str) -> Optional[str]:
compact = re.sub(r"[\s\r\n ]+", "", text)
idx = compact.find("车辆识别代号")
if idx < 0:
idx = compact.find("VIN")
if idx >= 0:
window = compact[idx : idx + 260]
v = _vin_from_l_window(window)
if v:
return v
for m in VIN_RE.finditer(window):
s = m.group(0)
if re.search(r"\d", s) and re.search(r"[A-Z]", s) and s[0] == "L":
return s
for m in VIN_RE.finditer(window):
s = m.group(0)
if re.search(r"\d", s) and re.search(r"[A-Z]", s):
return s
v = _vin_from_l_window(compact)
if v:
return v
candidates = []
for m in VIN_RE.finditer(compact):
s = m.group(0)
if not (re.search(r"\d", s) and re.search(r"[A-Z]", s)):
continue
if s[0] == "L" and all(c in VIN_BODY for c in s):
candidates.append(s)
return candidates[0] if candidates else None
def extract_register_date(text: str) -> Optional[date]:
m = re.search(
r"注册日期\s*[\n\r\s]*发证日期\s*[\n\r\s]*(\d{4})-(\d{1,2})-(\d{1,2})",
text,
)
if m:
try:
return date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
except ValueError:
pass
m = re.search(
r"(?:注册日期|RegisterDate)\D{0,120}?(\d{4})[-/.](\d{1,2})[-/.](\d{1,2})",
text,
re.DOTALL | re.IGNORECASE,
)
if m:
try:
return date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
except ValueError:
pass
m = re.search(
r"注册日期\D{0,60}?(\d{4})[-/.年](\d{1,2})[-/.月](\d{1,2})",
text,
re.DOTALL,
)
if m:
try:
return date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
except ValueError:
pass
return None
def extract_scrap_date(text: str) -> Optional[date]:
patterns = [
r"强制报废期止\s*[:]\s*(\d{4})[-/.](\d{1,2})[-/.](\d{1,2})",
r"强制报废期止\s*(\d{4})[-/.](\d{1,2})[-/.](\d{1,2})",
r"强制报废期止\s*(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日?",
r"报废期止\s*(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})",
r"强制报废[^0-9]{0,20}(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})",
]
for pat in patterns:
m = re.search(pat, text)
if m:
try:
return date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
except ValueError:
continue
return None
def pick_largest_per_plate(folder: Path):
groups = {}
for p in folder.iterdir():
if not p.is_file():
continue
if p.suffix.lower() not in (".jpg", ".jpeg", ".png", ".webp"):
continue
key = normalize_stem(p.stem)
if not key:
continue
if key not in groups or p.stat().st_size > groups[key].stat().st_size:
groups[key] = p
return groups
def main() -> int:
if not IMG_DIR.is_dir():
print("图片目录不存在:", IMG_DIR, file=sys.stderr)
return 1
if not TEMPLATE.is_file():
print("模板不存在:", TEMPLATE, file=sys.stderr)
return 1
groups = pick_largest_per_plate(IMG_DIR)
ocr = RapidOCR()
rows: List[Tuple[str, Optional[str], Optional[date], Optional[date], Optional[date]]] = []
stats = {"no_inspection": 0, "no_vin": 0, "no_register": 0, "no_scrap": 0}
for i, (fn_plate, path) in enumerate(sorted(groups.items()), 1):
if i % 80 == 0:
print(f"OCR {i}/{len(groups)}", flush=True)
try:
result, _ = ocr(str(path))
except Exception as e:
print("OCR失败", fn_plate, e, file=sys.stderr)
continue
text = "\n".join([t[1] for t in result]) if result else ""
plate = extract_plate_ocr(text, fn_plate) or fn_plate
vin = extract_vin(text)
reg = extract_register_date(text)
scrap = extract_scrap_date(text)
ins_dates = parse_inspection_dates(text)
inspection_end = max(ins_dates) if ins_dates else None
if not inspection_end:
stats["no_inspection"] += 1
if not vin:
stats["no_vin"] += 1
if not reg:
stats["no_register"] += 1
if not scrap:
stats["no_scrap"] += 1
rows.append((plate, vin, reg, scrap, inspection_end))
wb = openpyxl.load_workbook(TEMPLATE)
ws = wb.active
# 从第 2 行写入;若模板已有数据先清空旧数据区
max_row = ws.max_row
if max_row > 1:
ws.delete_rows(2, max_row - 1)
for r, (plate, vin, reg, scrap, insp) in enumerate(rows, start=2):
ws.cell(row=r, column=1, value=plate)
ws.cell(row=r, column=2, value=vin or "")
ws.cell(row=r, column=3, value=reg)
ws.cell(row=r, column=4, value=scrap)
ws.cell(row=r, column=5, value=insp)
wb.save(OUT)
shutil.copy2(OUT, TEMPLATE)
print("写入:", OUT)
print("已同步覆盖模板:", TEMPLATE)
print("行数(含表头):", ws.max_row)
print("统计(空字段数):", stats)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""筛选导入失败行(站名未匹配 / 加氢量为空),并按规则生成「修正后加氢站名称」。"""
from __future__ import annotations
import argparse
from pathlib import Path
import pandas as pd
# 原名称 -> 导入系统应使用的站点名称
STATION_RENAME: dict[str, str] = {
"张家港港城加氢站": "江苏嘉化氢能港城加氢站",
"重庆销售渝中分公司丝路加油站": "重庆丝路加氢站",
"武汉双龙站": "武汉中石化双龙站",
"皖能集团龙塘综合能源港": "皖能集团龙塘综合能源港(CNG、H2、充换电)",
"上海嘉氢实业加氢站(江桥重塑)": "上海嘉氢实业加氢站(江桥重塑)-暂停营业",
"如皋神华加氢站": "神华加氢站",
"宁波慈溪加氢站": "国家电投宁波慈溪加氢站",
"江苏花桥开发区加氢站": "昆山市中国石油昆山花桥加油站(花安路北50米)",
"江南西彭综合能源站": "中国石化江南西彭综合能源站",
"江南半山环道加能站": "中国石化江南半山环道加能站",
"海德利森(丹灶)加氢站": "丹灶海德利森加氢站",
"广州开泰北加油站": "中国石化广州开泰北加油加氢站",
"广州金坑加油站": "中国石化广州金坑加氢站",
"广东中恒东明三路油站": "东明三路综合能源站",
"诚志AP银河路加氢站": "诚志空气产品银河路加氢站",
"成都天府机场高速南站(含网点)": "成都天府机场高速南站",
"成都天府机场高速北站(含网点)": "成都天府机场高速北站",
}
def _fix_station_name(val) -> str:
if pd.isna(val):
return ""
s = str(val).strip()
return STATION_RENAME.get(s, s)
def process(in_path: Path, out_path: Path, sheet: str = "失败记录") -> None:
df = pd.read_excel(in_path, sheet_name=sheet)
reason = df["导入错误原因"].astype(str)
mask_nomatch = reason.str.startswith("加氢站名称未匹配到站点")
mask_empty_qty = reason == "加氢量为空"
mask_union = mask_nomatch | mask_empty_qty
df_nomatch = df.loc[mask_nomatch].copy()
df_empty_qty = df.loc[mask_empty_qty].copy()
merged = df.loc[mask_union].copy()
reasons: list[str] = []
for idx in merged.index:
parts: list[str] = []
if bool(mask_nomatch.loc[idx]):
parts.append("加氢站名称未匹配到站点")
if bool(mask_empty_qty.loc[idx]):
parts.append("加氢量为空")
reasons.append("".join(parts))
merged["筛选原因"] = reasons
for frame in (df_nomatch, df_empty_qty, merged):
frame["修正后加氢站名称"] = frame["加氢站名称"].map(_fix_station_name)
keys = set(STATION_RENAME.keys())
nm = df_nomatch["加氢站名称"].map(lambda x: str(x).strip() if pd.notna(x) else "")
unmapped_mask = ~nm.isin(keys) & (nm != "")
unmapped = (
df_nomatch.loc[unmapped_mask, ["加氢站名称", "订单编号", "加氢时间", "车牌", "导入错误原因"]]
.drop_duplicates(subset=["加氢站名称"])
.sort_values("加氢站名称")
)
with pd.ExcelWriter(out_path, engine="openpyxl") as w:
df_nomatch.to_excel(w, sheet_name="未匹配站点", index=False)
df_empty_qty.to_excel(w, sheet_name="加氢量为空", index=False)
merged.to_excel(w, sheet_name="合并_含筛选原因", index=False)
unmapped.to_excel(w, sheet_name="未匹配_无映射规则", index=False)
print(f"已写出: {out_path}")
print(f" 未匹配站点: {len(df_nomatch)}")
print(f" 加氢量为空: {len(df_empty_qty)}")
print(f" 合并: {len(merged)}")
print(f" 未匹配中尚无重命名规则的站名种类: {len(unmapped)}见工作表「未匹配_无映射规则」")
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("input_xlsx", type=Path)
ap.add_argument("-o", "--out", type=Path, required=True)
ap.add_argument("--sheet", default="失败记录")
args = ap.parse_args()
process(args.input_xlsx.resolve(), args.out.resolve(), sheet=args.sheet)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""为「明细台账」生成唯一序号JQ + YYMMDD(6) + 当日序号(5位),每日从 00001 起。"""
from __future__ import annotations
import argparse
from datetime import datetime
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
def parse_cell_date(val) -> pd.Timestamp:
if pd.isna(val):
return pd.NaT
if isinstance(val, (pd.Timestamp, datetime)):
return pd.Timestamp(val)
if isinstance(val, (int, float)) and not isinstance(val, bool):
v = float(val)
if 20000 < v < 80000:
return pd.to_datetime(v, unit="D", origin="1899-12-30")
return pd.to_datetime(val, errors="coerce")
def build_jq_ids(df: pd.DataFrame, date_col: str = "日期") -> pd.Series:
"""与 df 同索引;无法解析日期的行为空字符串。"""
ts = df[date_col].map(parse_cell_date)
valid = ts.notna()
out = pd.Series([""] * len(df), index=df.index, dtype=object)
sub = df.loc[valid].copy()
sub["_ts"] = ts[valid]
sub["_day"] = sub["_ts"].dt.normalize()
sub["_orig"] = sub.index
sub = sub.sort_values(["_day", "_ts", "_orig"])
sub["_seq"] = sub.groupby("_day", sort=False).cumcount() + 1
if (sub["_seq"] > 99999).any():
raise SystemExit("单日记录超过 99999 条,无法使用 5 位序号,请调整规则或拆分数据。")
yy_mm_dd = sub["_day"].dt.strftime("%y%m%d")
out.loc[sub.index] = "JQ" + yy_mm_dd + sub["_seq"].astype(int).astype(str).str.zfill(5)
return out
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("xlsx", type=Path, help="加氢记录 xlsx 路径")
p.add_argument(
"--sheet",
default="明细台账",
help="工作表名,默认「明细台账」",
)
p.add_argument(
"--out",
type=Path,
default=None,
help="输出路径;默认覆盖输入文件",
)
args = p.parse_args()
path = args.xlsx.expanduser().resolve()
out_path = args.out.expanduser().resolve() if args.out else path
df = pd.read_excel(path, sheet_name=args.sheet, header=1)
ids = build_jq_ids(df)
if ids[ids != ""].duplicated().any():
raise SystemExit("内部错误:生成了重复序号")
wb = load_workbook(path)
if args.sheet not in wb.sheetnames:
raise SystemExit(f"未找到工作表: {args.sheet}")
ws = wb[args.sheet]
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
try:
col_序号 = headers.index("序号") + 1
except ValueError:
raise SystemExit(f"第 2 行表头中未找到「序号」列,当前为: {headers[:40]}")
n = len(df)
for i in range(n):
ws.cell(3 + i, col_序号).value = ids.iloc[i] if ids.iloc[i] else None
wb.save(out_path)
empty_n = (ids == "").sum()
print(f"已写入 {out_path},共 {n} 行;序号为空(日期缺失): {empty_n} 行。")
if empty_n:
print("请为日期为空的行补全日期后重新运行,或手工填写序号。")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,372 @@
#!/usr/bin/env python3
"""羚牛数智中心建设规划 — 赛博朋克风格 PPT"""
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR
from pptx.enum.shapes import MSO_SHAPE
OUT = "/Users/sylvawong/Desktop/羚牛数智中心建设规划_赛博朋克.pptx"
# 赛博朋克配色
BG_DARK = RGBColor(6, 10, 28)
BG_PANEL = RGBColor(12, 18, 48)
CYAN = RGBColor(0, 245, 255)
MAGENTA = RGBColor(255, 0, 140)
PURPLE = RGBColor(140, 80, 255)
GOLD = RGBColor(255, 210, 60)
WHITE = RGBColor(235, 245, 255)
SILVER = RGBColor(160, 180, 210)
DIM = RGBColor(90, 110, 150)
SLIDE_W = Inches(13.333)
SLIDE_H = Inches(7.5)
def set_bg(slide, color=BG_DARK):
slide.background.fill.solid()
slide.background.fill.fore_color.rgb = color
def add_glow_bar(slide, left, top, width, height, color):
sh = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, left, top, width, height)
sh.fill.solid()
sh.fill.fore_color.rgb = color
sh.line.fill.background()
return sh
def add_corner_accents(slide):
add_glow_bar(slide, Inches(0), Inches(0), Inches(0.12), SLIDE_H, CYAN)
add_glow_bar(slide, SLIDE_W - Inches(0.08), Inches(0), Inches(0.08), SLIDE_H, MAGENTA)
add_glow_bar(slide, Inches(0), SLIDE_H - Inches(0.06), SLIDE_W, Inches(0.06), PURPLE)
def add_header(slide, title, subtitle=None):
add_corner_accents(slide)
add_glow_bar(slide, Inches(0.8), Inches(1.05), Inches(3.5), Pt(4), CYAN)
tb = slide.shapes.add_textbox(Inches(0.8), Inches(0.45), Inches(11.5), Inches(0.9))
p = tb.text_frame.paragraphs[0]
p.text = title
p.font.size = Pt(32)
p.font.bold = True
p.font.color.rgb = WHITE
if subtitle:
tb2 = slide.shapes.add_textbox(Inches(0.8), Inches(1.15), Inches(11), Inches(0.5))
p2 = tb2.text_frame.paragraphs[0]
p2.text = subtitle
p2.font.size = Pt(14)
p2.font.color.rgb = CYAN
def add_bullets(slide, items, top=Inches(1.6), left=Inches(0.9), width=Inches(11.2)):
tb = slide.shapes.add_textbox(left, top, width, Inches(5.2))
tf = tb.text_frame
tf.word_wrap = True
for i, (text, accent) in enumerate(items):
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
p.text = "" + text
p.font.size = Pt(16 if len(text) < 80 else 14)
p.font.color.rgb = accent if accent else SILVER
p.space_after = Pt(10)
p.level = 0
def add_cards(slide, cards, top=Inches(1.7)):
n = len(cards)
gap = Inches(0.25)
w = (Inches(11.5) - gap * (n - 1)) / n
for i, (title, body, color) in enumerate(cards):
left = Inches(0.8) + (w + gap) * i
panel = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, left, top, w, Inches(4.8))
panel.fill.solid()
panel.fill.fore_color.rgb = BG_PANEL
panel.line.color.rgb = color
panel.line.width = Pt(1.5)
tb = slide.shapes.add_textbox(left + Inches(0.2), top + Inches(0.25), w - Inches(0.4), Inches(4.3))
tf = tb.text_frame
tf.word_wrap = True
p0 = tf.paragraphs[0]
p0.text = title
p0.font.size = Pt(18)
p0.font.bold = True
p0.font.color.rgb = color
p1 = tf.add_paragraph()
p1.text = body
p1.font.size = Pt(12)
p1.font.color.rgb = SILVER
p1.space_before = Pt(12)
def slide_title(prs):
slide = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(slide)
add_corner_accents(slide)
# 装饰线网格感
for x in range(3):
add_glow_bar(slide, Inches(0.5 + x * 4), Inches(2.2), Inches(0.02), Inches(3), RGBColor(30, 50, 90))
add_glow_bar(slide, Inches(1.2), Inches(3.8), Inches(10.8), Pt(2), MAGENTA)
tb = slide.shapes.add_textbox(Inches(0.9), Inches(1.8), Inches(11.5), Inches(1.2))
p = tb.text_frame.paragraphs[0]
p.text = "羚牛数智中心"
p.font.size = Pt(48)
p.font.bold = True
p.font.color.rgb = CYAN
p.alignment = PP_ALIGN.CENTER
tb2 = slide.shapes.add_textbox(Inches(0.9), Inches(2.9), Inches(11.5), Inches(0.8))
p2 = tb2.text_frame.paragraphs[0]
p2.text = "建设规划报告"
p2.font.size = Pt(36)
p2.font.color.rgb = WHITE
p2.alignment = PP_ALIGN.CENTER
tb3 = slide.shapes.add_textbox(Inches(0.9), Inches(4.2), Inches(11.5), Inches(0.6))
p3 = tb3.text_frame.paragraphs[0]
p3.text = "LINGNIU DIGITAL INTELLIGENCE CENTER · V1.0"
p3.font.size = Pt(14)
p3.font.color.rgb = MAGENTA
p3.alignment = PP_ALIGN.CENTER
tb4 = slide.shapes.add_textbox(Inches(0.9), Inches(5.2), Inches(11.5), Inches(0.8))
p4 = tb4.text_frame.paragraphs[0]
p4.text = "羚牛氢能 One OS · 氢能交通的商业操作系统"
p4.font.size = Pt(16)
p4.font.color.rgb = GOLD
p4.alignment = PP_ALIGN.CENTER
def build(prs):
# 1 背景
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "一、报告背景与目标", "REPORT BACKGROUND & OBJECTIVES")
add_bullets(
s,
[
("业务规模扩大,人工台账与分散系统模式难以为继", None),
("数据分散 · 流程依赖人工 · 单点建设难复用 · 多能源场景难支撑", MAGENTA),
("建设统一「数智中心」是稳定运营与战略发展的必然选择", CYAN),
],
)
# 2 痛点
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "现状痛点", "CURRENT CHALLENGES")
add_cards(
s,
[
("数据孤岛", "业务数据分散\n难以形成统一视图", CYAN),
("流程非标", "依赖人工经验\n标准化与可追溯不足", MAGENTA),
("重复建设", "单点需求驱动\n难以复用与扩展", PURPLE),
("复杂场景", "多能源多业务并行\n现有模式难支撑", GOLD),
],
)
# 3 要解决的问题
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "数智中心要解决的问题", "PROBLEMS TO SOLVE")
add_cards(
s,
[
(
"当前必须解决",
"手工台账与人工对账比例高\n业务系统割裂,资产/能源/运维无法贯通\n任务分配追踪依赖人工\n管理决策缺乏数据支撑",
CYAN,
),
(
"未来必须应对",
"业务体量扩大后人效下降\n多能源多业务管理复杂度上升\n无统一平台则系统不可持续\nAI 无法在非标准数据上有效落地",
MAGENTA,
),
],
top=Inches(1.65),
)
# 4 四大目标
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "四大建设目标", "CORE OBJECTIVES")
add_cards(
s,
[
("稳定运行", "解决手工台账与流程混乱\n支撑当前业务稳定运行", CYAN),
("持续演进", "架构灵活支持未来5年发展\n具备扩展能力", MAGENTA),
("数据资产", "数据可沉淀、可复用、可变现\n以平台与数据为核心", PURPLE),
("对内对外", "内部降本增效\n对外输出 SaaS 能力", GOLD),
],
)
# 5 战略定位
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "二、战略定位", "STRATEGIC POSITIONING")
add_bullets(
s,
[
("数智中心是羚牛未来 35 年最核心的数字基础设施与能力中枢", CYAN),
("不是单一 IT 项目,而是公司长期战略资产", WHITE),
("构建「羚牛氢能 One OS」— 面向氢能交通的一体化商业操作系统", GOLD),
("核心产品:小羚羚 — 统一运营平台与服务终端", MAGENTA),
],
top=Inches(1.7),
)
# 6 三步走总览
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "战略阶段:三步走", "THREE-PHASE ROADMAP")
add_cards(
s,
[
("Phase 01", "统一 · 标准 · 稳定\n业务在线 · 数据留痕\n筑牢数据底座", CYAN),
("Phase 02", "平台化 · 复用 · 协同\n用数据驱动管理\n降本增效可复制", MAGENTA),
("Phase 03", "智能化 · 开放 · 商业化\nAI 深度驱动生态\n利润中心转型", GOLD),
],
top=Inches(1.75),
)
# 7 Phase 1
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "Phase 01 · 统一 · 标准 · 稳定", "FOUNDATION")
add_bullets(
s,
[
("全面迁移至线上平台:业务在线、数据留痕、流程可控", CYAN),
("统一:构建统一运营平台与统一服务终端(小羚羚),打破系统孤岛", None),
("标准建立标准化数据结构、业务流程、接口规范AI 辅助", None),
("稳定:高可用、安全合规,支撑日常业务连续运行", None),
("此阶段不追求先进功能,而为数字化与智能化筑牢底座", DIM),
],
)
# 8 Phase 2
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "Phase 02 · 平台化 · 复用 · 协同", "PLATFORM")
add_bullets(
s,
[
("从「有数据」走向「用数据」— 业务可复制、管理可量化、决策有依据", MAGENTA),
("平台化:任务调度、能源管理、资产视图等抽象为平台服务", None),
("复用:避免重复建设,新业务快速接入,降低边际成本", None),
("协同:打通资产设备、能源、运维、服务,跨部门高效协作", None),
("核心价值:降本增效,为规模化扩张提供可复制的运营模板", GOLD),
],
)
# 9 Phase 3
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "Phase 03 · 智能化 · 开放 · 商业化", "INTELLIGENCE & ECOSYSTEM")
add_bullets(
s,
[
("构建 AI 深度驱动、多方参与、商业闭环的绿色智能生态平台", GOLD),
("智能化AI 从辅助走向决策,运维/调度/安全/财务自主优化", None),
("开放:通过 API、SaaS 向能源企业、物流商、装备商输出能力", None),
("商业化:平台+服务+数据多元收入,成本中心 → 利润中心", CYAN),
("成为氢能交通领域的「操作系统提供商」", MAGENTA),
],
)
# 10 演进逻辑
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "演进逻辑", "EVOLUTION LOGIC")
add_cards(
s,
[
("信息化", "解决「有没有」\n从 0 到 1", CYAN),
("数字化", "解决「好不好」\n从 1 到 N", MAGENTA),
("智能化", "解决「赚不赚」\n从 N 到 ∞", GOLD),
],
top=Inches(2.0),
)
tb = s.shapes.add_textbox(Inches(0.8), Inches(5.5), Inches(11.5), Inches(0.8))
p = tb.text_frame.paragraphs[0]
p.text = "成本中心 ──▶ 利润中心 · 数字基础设施驱动绿色交通变革"
p.font.size = Pt(18)
p.font.color.rgb = WHITE
p.alignment = PP_ALIGN.CENTER
# 11 建设方案
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "三、整体建设方案", "IMPLEMENTATION")
add_bullets(
s,
[
("分阶段实施 — 按业务节奏推进,避免一次性重投入", CYAN),
("基础优先 — 系统能力先于智能能力", None),
("核心内控 — 需求、架构、数据由内部掌控", None),
("敏捷迭代 — MVP 验证,快速反馈,持续优化", None),
],
top=Inches(1.65),
)
# 12 1+N+X
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_header(s, "「1 + N + X」核心理念", "ARCHITECTURE PHILOSOPHY")
add_cards(
s,
[
("1", "以车辆/设备/能源资产\n为唯一主键\n贯穿全生命周期", CYAN),
("N", "覆盖电、氢、充换氢\n储能等多能源形态", MAGENTA),
("X", "运营、物流、金融\nESG、碳管理、智能调度", GOLD),
],
top=Inches(1.85),
)
add_bullets(
s,
[
("一车/一设备看全局 · 一个平台管到底 · 一套能力可复用、可扩展、可对外输出", WHITE),
],
top=Inches(5.6),
)
# 13 封底
s = prs.slides.add_slide(prs.slide_layouts[6])
set_bg(s)
add_corner_accents(s)
add_glow_bar(s, Inches(2), Inches(3.2), Inches(9.3), Pt(3), CYAN)
tb = s.shapes.add_textbox(Inches(0.9), Inches(2.5), Inches(11.5), Inches(1.5))
p = tb.text_frame.paragraphs[0]
p.text = "羚牛氢能 One OS"
p.font.size = Pt(44)
p.font.bold = True
p.font.color.rgb = CYAN
p.alignment = PP_ALIGN.CENTER
tb2 = s.shapes.add_textbox(Inches(0.9), Inches(3.8), Inches(11.5), Inches(1))
p2 = tb2.text_frame.paragraphs[0]
p2.text = "连接产业各方 · 驱动绿色交通变革"
p2.font.size = Pt(20)
p2.font.color.rgb = WHITE
p2.alignment = PP_ALIGN.CENTER
tb3 = s.shapes.add_textbox(Inches(0.9), Inches(5.0), Inches(11.5), Inches(0.6))
p3 = tb3.text_frame.paragraphs[0]
p3.text = "数智中心 · 氢能交通商业操作系统"
p3.font.size = Pt(14)
p3.font.color.rgb = MAGENTA
p3.alignment = PP_ALIGN.CENTER
def main():
prs = Presentation()
prs.slide_width = SLIDE_W
prs.slide_height = SLIDE_H
slide_title(prs)
build(prs)
prs.save(OUT)
print(f"已生成: {OUT}")
print(f"{len(prs.slides)}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,610 @@
#!/usr/bin/env python3
"""羚牛数智中心 — 赛博朋克增强版 PPT背景图/图示/科幻字体/动画标记)"""
from __future__ import annotations
import shutil
import zipfile
from pathlib import Path
from lxml import etree
from pptx import Presentation
from pptx.dml.color import RGBColor
from pptx.enum.shapes import MSO_SHAPE
from pptx.enum.text import PP_ALIGN
from pptx.util import Inches, Pt
BASE = Path(__file__).resolve().parent.parent
IMG = BASE / "assets" / "ppt-images"
OUT = BASE / "羚牛数智中心建设规划_赛博朋克_增强版.pptx"
OUT_DESKTOP = Path("/Users/sylvawong/Desktop/羚牛数智中心建设规划_赛博朋克_增强版.pptx")
# 科幻字体Mac 常见;若已安装 Orbitron 会自动使用)
FONT_EN = "Orbitron"
FONT_EN_FB = "Avenir Next Demi Bold"
FONT_CN = "PingFang SC Light"
FONT_CN_FB = "Heiti SC Light"
BG_DARK = RGBColor(6, 10, 28)
BG_PANEL = RGBColor(12, 18, 48)
CYAN = RGBColor(0, 245, 255)
MAGENTA = RGBColor(255, 0, 140)
PURPLE = RGBColor(140, 80, 255)
GOLD = RGBColor(255, 210, 60)
WHITE = RGBColor(235, 245, 255)
SILVER = RGBColor(160, 180, 210)
DIM = RGBColor(90, 110, 150)
OVERLAY = RGBColor(8, 12, 32)
SLIDE_W = Inches(13.333)
SLIDE_H = Inches(7.5)
# 每页登记待动画 shape_id: {slide_index: [(shape_id, anim_type)]}
ANIM_REGISTRY: dict[int, list[tuple[int, str]]] = {}
SLIDE_BG_CYCLE = ["bg-city.png", "bg-hydrogen.png", "bg-dataflow.png", "bg-city.png"]
def font_en():
return FONT_EN
def font_cn():
return FONT_CN
def set_font(p, en=False, size=16, bold=False, color=SILVER):
p.font.size = Pt(size)
p.font.bold = bold
p.font.color.rgb = color
p.font.name = font_en() if en else font_cn()
def register_anim(slide_idx: int, shape_id: int, anim: str):
ANIM_REGISTRY.setdefault(slide_idx, []).append((shape_id, anim))
def add_bg_image(slide, filename: str, overlay_alpha=0.72):
path = IMG / filename
if not path.exists():
return
pic = slide.shapes.add_picture(str(path), 0, 0, width=SLIDE_W, height=SLIDE_H)
# 暗色蒙版提升文字可读性
mask = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, 0, 0, SLIDE_W, SLIDE_H)
mask.fill.solid()
mask.fill.fore_color.rgb = OVERLAY
mask.fill.transparency = 1.0 - overlay_alpha # 0=不透明蒙版
mask.line.fill.background()
# 置底:先添加的在下,把 pic 和 mask 移到最前再 send backward - python-pptx z-order
sp_tree = slide.shapes._spTree
sp_tree.remove(pic._element)
sp_tree.insert(2, pic._element)
sp_tree.remove(mask._element)
sp_tree.insert(3, mask._element)
def add_glow_bar(slide, left, top, width, height, color):
sh = slide.shapes.add_shape(MSO_SHAPE.RECTANGLE, left, top, width, height)
sh.fill.solid()
sh.fill.fore_color.rgb = color
sh.line.fill.background()
return sh
def add_corner_accents(slide):
add_glow_bar(slide, Inches(0), Inches(0), Inches(0.12), SLIDE_H, CYAN)
add_glow_bar(slide, SLIDE_W - Inches(0.08), Inches(0), Inches(0.08), SLIDE_H, MAGENTA)
def add_header(slide, slide_idx, title, subtitle=None, anim_title="fly", anim_sub="fade"):
add_corner_accents(slide)
add_glow_bar(slide, Inches(0.8), Inches(1.05), Inches(3.5), Pt(4), CYAN)
tb = slide.shapes.add_textbox(Inches(0.8), Inches(0.45), Inches(11.5), Inches(0.9))
p = tb.text_frame.paragraphs[0]
p.text = title
set_font(p, en=False, size=32, bold=True, color=WHITE)
register_anim(slide_idx, tb.shape_id, anim_title)
if subtitle:
tb2 = slide.shapes.add_textbox(Inches(0.8), Inches(1.12), Inches(11), Inches(0.5))
p2 = tb2.text_frame.paragraphs[0]
p2.text = subtitle
set_font(p2, en=True, size=13, color=CYAN)
register_anim(slide_idx, tb2.shape_id, anim_sub)
def add_bullets(slide, slide_idx, items, top=Inches(1.6)):
tb = slide.shapes.add_textbox(Inches(0.9), top, Inches(11.2), Inches(5.2))
tf = tb.text_frame
tf.word_wrap = True
for i, (text, accent) in enumerate(items):
p = tf.paragraphs[0] if i == 0 else tf.add_paragraph()
p.text = "" + text
set_font(p, size=16 if len(text) < 80 else 14, color=accent or SILVER)
p.space_after = Pt(10)
register_anim(slide_idx, tb.shape_id, "fade")
def add_cards(slide, slide_idx, cards, top=Inches(1.7)):
n = len(cards)
gap = Inches(0.25)
w = (Inches(11.5) - gap * (n - 1)) / n
for i, (title, body, color) in enumerate(cards):
left = Inches(0.8) + (w + gap) * i
panel = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, left, top, w, Inches(4.8))
panel.fill.solid()
panel.fill.fore_color.rgb = BG_PANEL
panel.line.color.rgb = color
panel.line.width = Pt(1.5)
register_anim(slide_idx, panel.shape_id, "fade")
tb = slide.shapes.add_textbox(left + Inches(0.2), top + Inches(0.25), w - Inches(0.4), Inches(4.3))
tf = tb.text_frame
tf.word_wrap = True
p0 = tf.paragraphs[0]
p0.text = title
set_font(p0, en=True, size=17, bold=True, color=color)
p1 = tf.add_paragraph()
p1.text = body
set_font(p1, size=12, color=SILVER)
p1.space_before = Pt(12)
def add_flow_diagram(slide, slide_idx, labels, colors, top=Inches(2.2)):
"""三步演进图示:箭头连接圆角框"""
n = len(labels)
box_w = Inches(3.2)
gap = Inches(0.55)
start_x = Inches(0.9)
for i, (lab, col) in enumerate(zip(labels, colors)):
left = start_x + (box_w + gap) * i
sh = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, left, top, box_w, Inches(1.4))
sh.fill.solid()
sh.fill.fore_color.rgb = BG_PANEL
sh.line.color.rgb = col
sh.line.width = Pt(2)
register_anim(slide_idx, sh.shape_id, "fly")
tb = slide.shapes.add_textbox(left, top + Inches(0.35), box_w, Inches(0.8))
p = tb.text_frame.paragraphs[0]
p.text = lab
p.alignment = PP_ALIGN.CENTER
set_font(p, size=14, bold=True, color=col)
if i < n - 1:
ax = left + box_w + Inches(0.08)
arr = slide.shapes.add_shape(MSO_SHAPE.RIGHT_ARROW, ax, top + Inches(0.45), gap - Inches(0.16), Inches(0.5))
arr.fill.solid()
arr.fill.fore_color.rgb = col
arr.line.fill.background()
def add_hex_diagram(slide, slide_idx, items, top=Inches(2.0)):
"""1+N+X 六边形图示"""
n = len(items)
w = Inches(3.5)
gap = Inches(0.35)
for i, (t, b, c) in enumerate(items):
left = Inches(0.85) + (w + gap) * i
hex_shape = slide.shapes.add_shape(MSO_SHAPE.HEXAGON, left, top, w, Inches(2.8))
hex_shape.fill.solid()
hex_shape.fill.fore_color.rgb = BG_PANEL
hex_shape.line.color.rgb = c
hex_shape.line.width = Pt(2)
register_anim(slide_idx, hex_shape.shape_id, "fade")
tb = slide.shapes.add_textbox(left + Inches(0.15), top + Inches(0.5), w - Inches(0.3), Inches(2))
tf = tb.text_frame
tf.word_wrap = True
p0 = tf.paragraphs[0]
p0.text = t
p0.alignment = PP_ALIGN.CENTER
set_font(p0, en=True, size=22, bold=True, color=c)
p1 = tf.add_paragraph()
p1.text = b
p1.alignment = PP_ALIGN.CENTER
set_font(p1, size=11, color=SILVER)
p1.space_before = Pt(8)
def new_slide(prs, bg_file: str):
slide = prs.slides.add_slide(prs.slide_layouts[6])
add_bg_image(slide, bg_file, overlay_alpha=0.68)
return slide
def slide_title(prs, idx):
slide = new_slide(prs, "bg-city.png")
add_corner_accents(slide)
add_glow_bar(slide, Inches(1.2), Inches(3.85), Inches(10.8), Pt(2), MAGENTA)
tb = slide.shapes.add_textbox(Inches(0.9), Inches(1.7), Inches(11.5), Inches(1.2))
p = tb.text_frame.paragraphs[0]
p.text = "羚牛数智中心"
set_font(p, size=48, bold=True, color=CYAN)
p.alignment = PP_ALIGN.CENTER
register_anim(idx, tb.shape_id, "fly")
tb2 = slide.shapes.add_textbox(Inches(0.9), Inches(2.85), Inches(11.5), Inches(0.8))
p2 = tb2.text_frame.paragraphs[0]
p2.text = "建设规划报告"
set_font(p2, size=34, color=WHITE)
p2.alignment = PP_ALIGN.CENTER
register_anim(idx, tb2.shape_id, "fade")
tb3 = slide.shapes.add_textbox(Inches(0.9), Inches(4.1), Inches(11.5), Inches(0.6))
p3 = tb3.text_frame.paragraphs[0]
p3.text = "LINGNIU DIGITAL INTELLIGENCE CENTER · V1.0"
set_font(p3, en=True, size=13, color=MAGENTA)
p3.alignment = PP_ALIGN.CENTER
register_anim(idx, tb3.shape_id, "fade")
tb4 = slide.shapes.add_textbox(Inches(0.9), Inches(5.0), Inches(11.5), Inches(0.8))
p4 = tb4.text_frame.paragraphs[0]
p4.text = "羚牛氢能 One OS · 氢能交通商业操作系统"
set_font(p4, size=16, color=GOLD)
p4.alignment = PP_ALIGN.CENTER
register_anim(idx, tb4.shape_id, "fade")
def slide_architecture(prs, idx):
slide = new_slide(prs, "bg-dataflow.png")
add_header(slide, idx, "羚牛氢能 One OS · 架构示意图", "SYSTEM ARCHITECTURE", "fly", "fade")
arch = IMG / "architecture.png"
if arch.exists():
pic = slide.shapes.add_picture(
str(arch), Inches(0.55), Inches(1.45), width=Inches(12.2), height=Inches(5.7)
)
register_anim(idx, pic.shape_id, "fade")
# 叠加矢量层:核心模块框
layers = [
("应用层", "小羚羚 · 运营终端 · 客户服务", CYAN, Inches(1.0), Inches(1.55)),
("平台层", "任务调度 · 能源管理 · 资产视图", MAGENTA, Inches(4.6), Inches(1.55)),
("数据层", "主数据 · 指标仓 · AI 特征", PURPLE, Inches(8.2), Inches(1.55)),
("生态层", "API · SaaS · 产业开放", GOLD, Inches(4.6), Inches(5.85)),
]
for title, sub, col, left, top in layers:
box = slide.shapes.add_shape(MSO_SHAPE.ROUNDED_RECTANGLE, left, top, Inches(3.8), Inches(0.95))
box.fill.solid()
box.fill.fore_color.rgb = BG_PANEL
box.line.color.rgb = col
box.line.width = Pt(1.5)
register_anim(idx, box.shape_id, "fly")
tb = slide.shapes.add_textbox(left + Inches(0.1), top + Inches(0.08), Inches(3.6), Inches(0.8))
tf = tb.text_frame
p0 = tf.paragraphs[0]
p0.text = title
set_font(p0, en=True, size=12, bold=True, color=col)
p1 = tf.add_paragraph()
p1.text = sub
set_font(p1, size=9, color=SILVER)
def build_slides(prs):
si = 0
slide_title(prs, si)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "一、报告背景与目标", "BACKGROUND & OBJECTIVES")
add_bullets(
s,
si,
[
("业务规模扩大,人工台账与分散系统难以为继", None),
("数据分散 · 流程依赖人工 · 单点建设难复用", MAGENTA),
("建设统一「数智中心」是战略发展的必然选择", CYAN),
],
)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "现状痛点", "CURRENT CHALLENGES")
add_cards(
s,
si,
[
("数据孤岛", "难以统一视图", CYAN),
("流程非标", "可追溯性不足", MAGENTA),
("重复建设", "难以复用扩展", PURPLE),
("复杂场景", "多能源难支撑", GOLD),
],
)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "要解决的问题", "PROBLEMS TO SOLVE")
add_cards(
s,
si,
[
("当前必须解决", "手工台账 · 系统割裂\n任务依赖人工 · 缺乏数据支撑", CYAN),
("未来必须应对", "人效下降 · 复杂度上升\n平台不可持续 · AI 难落地", MAGENTA),
],
)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "四大建设目标", "CORE OBJECTIVES")
add_cards(
s,
si,
[
("稳定运行", "支撑当前业务", CYAN),
("持续演进", "支持5年发展", MAGENTA),
("数据资产", "可沉淀可变现", PURPLE),
("对内对外", "降本+SaaS输出", GOLD),
],
)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "二、战略定位", "STRATEGIC POSITIONING")
add_bullets(
s,
si,
[
("数智中心未来35年核心数字基础设施", CYAN),
("长期战略资产非单一IT项目", WHITE),
("羚牛氢能 One OS — 氢能交通商业操作系统", GOLD),
("核心产品:小羚羚统一运营与服务终端", MAGENTA),
],
)
si += 1
slide_architecture(prs, si)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "战略阶段:三步走", "THREE-PHASE ROADMAP")
add_flow_diagram(
s,
si,
["Phase 01\n统一·标准·稳定", "Phase 02\n平台·复用·协同", "Phase 03\n智能·开放·商业"],
[CYAN, MAGENTA, GOLD],
)
si += 1
for title, sub, bullets in [
(
"Phase 01 · 统一 · 标准 · 稳定",
"FOUNDATION",
[
("业务在线、数据留痕、流程可控", CYAN),
("统一运营平台与小羚羚终端", None),
("标准化数据、流程、接口", None),
("高可用安全合规", None),
],
),
(
"Phase 02 · 平台化 · 复用 · 协同",
"PLATFORM",
[
("从有数据到用数据", MAGENTA),
("平台化抽象通用能力", None),
("复用降低边际成本", None),
("跨部门协同打通全链路", GOLD),
],
),
(
"Phase 03 · 智能化 · 开放 · 商业化",
"INTELLIGENCE",
[
("AI驱动绿色智能生态", GOLD),
("智能化自主优化", None),
("开放API与SaaS", None),
("成本中心→利润中心", CYAN),
],
),
]:
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, title, sub)
add_bullets(s, si, bullets)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "演进逻辑", "EVOLUTION LOGIC")
add_flow_diagram(s, si, ["信息化\n0 → 1", "数字化\n1 → N", "智能化\nN → ∞"], [CYAN, MAGENTA, GOLD], top=Inches(2.4))
tb = s.shapes.add_textbox(Inches(0.8), Inches(5.4), Inches(11.5), Inches(0.8))
p = tb.text_frame.paragraphs[0]
p.text = "成本中心 ──▶ 利润中心 · 驱动绿色交通变革"
set_font(p, size=18, color=WHITE)
p.alignment = PP_ALIGN.CENTER
register_anim(si, tb.shape_id, "fade")
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "三、整体建设方案", "IMPLEMENTATION")
add_bullets(
s,
si,
[
("分阶段实施,按业务节奏推进", CYAN),
("基础优先:系统能力先于智能", None),
("核心内控:需求架构数据内控", None),
("敏捷迭代MVP快速验证", None),
],
)
si += 1
s = new_slide(prs, SLIDE_BG_CYCLE[si % 4])
add_header(s, si, "「1 + N + X」核心理念", "1 + N + X")
add_hex_diagram(
s,
si,
[
("1", "车辆/设备/能源\n唯一主键", CYAN),
("N", "电·氢·充换氢\n多能源", MAGENTA),
("X", "运营·物流·金融\nESG·调度", GOLD),
],
)
si += 1
s = new_slide(prs, "bg-hydrogen.png")
add_corner_accents(s)
add_glow_bar(s, Inches(2), Inches(3.2), Inches(9.3), Pt(3), CYAN)
tb = s.shapes.add_textbox(Inches(0.9), Inches(2.5), Inches(11.5), Inches(1.5))
p = tb.text_frame.paragraphs[0]
p.text = "羚牛氢能 One OS"
set_font(p, size=44, bold=True, color=CYAN)
p.alignment = PP_ALIGN.CENTER
register_anim(si, tb.shape_id, "fly")
tb2 = s.shapes.add_textbox(Inches(0.9), Inches(3.8), Inches(11.5), Inches(1))
p2 = tb2.text_frame.paragraphs[0]
p2.text = "连接产业各方 · 驱动绿色交通变革"
set_font(p2, size=20, color=WHITE)
p2.alignment = PP_ALIGN.CENTER
register_anim(si, tb2.shape_id, "fade")
# ---------- 动画注入OOXML----------
NSMAP = {
"p": "http://schemas.openxmlformats.org/presentationml/2006/main",
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
}
def _timing_xml(shape_id: int, preset_id: int, preset_subtype: int, dur_ms: int) -> etree._Element:
"""生成单 shape 入场动画 timing 片段fade preset 10, fly preset 2"""
# 简化:使用 preset 动画
return etree.fromstring(
f"""
<p:par xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main"
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
<p:cTn id="{shape_id + 1000}" presetID="{preset_id}" presetClass="entr"
presetSubtype="{preset_subtype}" fill="hold" nodeType="clickEffect">
<p:stCondLst><p:cond delay="0"/></p:stCondLst>
<p:childTnLst>
<p:set>
<p:cBhvr>
<p:cTn id="{shape_id + 2000}" dur="1" fill="hold">
<p:stCondLst><p:cond delay="0"/></p:stCondLst>
</p:cTn>
<p:tgtEl><p:spTgt spid="{shape_id}"/></p:tgtEl>
<p:attributeNameLst><p:attributeName>style.visibility</p:attributeName></p:attributeNameLst>
</p:cBhvr>
<p:to><p:strVal val="visible"/></p:to>
</p:set>
<p:animEffect transition="in" filter="fade({dur_ms})">
<p:cBhvr>
<p:cTn id="{shape_id + 3000}" dur="{dur_ms}"/>
<p:tgtEl><p:spTgt spid="{shape_id}"/></p:tgtEl>
</p:cBhvr>
</p:animEffect>
</p:childTnLst>
</p:cTn>
</p:par>
""".encode()
)
def inject_animations(pptx_path: Path, registry: dict[int, list[tuple[int, str]]]):
"""向 pptx 注入基础淡入/飞入动画PowerPoint 2016+"""
tmp = pptx_path.parent / "_pptx_anim_tmp"
if tmp.exists():
shutil.rmtree(tmp)
with zipfile.ZipFile(pptx_path, "r") as z:
z.extractall(tmp)
slides_dir = tmp / "ppt" / "slides"
slide_files = sorted(slides_dir.glob("slide*.xml"), key=lambda p: int(p.stem.replace("slide", "")))
for slide_idx, anims in registry.items():
if slide_idx >= len(slide_files):
continue
slide_path = slide_files[slide_idx]
tree = etree.parse(str(slide_path))
root = tree.getroot()
# 移除已有 timing
for old in root.findall("p:timing", NSMAP):
root.remove(old)
child_tn_lst = etree.Element("{http://schemas.openxmlformats.org/presentationml/2006/main}childTnLst")
for spid, anim_type in anims:
if anim_type == "fly":
# fly from bottom: presetID 2 subtype 4
node = _timing_xml(spid, 2, 4, 500)
else:
node = _timing_xml(spid, 10, 0, 400)
child_tn_lst.append(node)
timing = etree.Element("{http://schemas.openxmlformats.org/presentationml/2006/main}timing")
tn_lst = etree.SubElement(timing, "{http://schemas.openxmlformats.org/presentationml/2006/main}tnLst")
par = etree.SubElement(tn_lst, "{http://schemas.openxmlformats.org/presentationml/2006/main}par")
ctn = etree.SubElement(
par,
"{http://schemas.openxmlformats.org/presentationml/2006/main}cTn",
id="1",
dur="indefinite",
restart="never",
nodeType="tmRoot",
)
ctn_child = etree.SubElement(
ctn, "{http://schemas.openxmlformats.org/presentationml/2006/main}childTnLst"
)
seq = etree.SubElement(
ctn_child,
"{http://schemas.openxmlformats.org/presentationml/2006/main}seq",
concurrent="1",
nextAc="seek",
)
seq_ctn = etree.SubElement(
seq,
"{http://schemas.openxmlformats.org/presentationml/2006/main}cTn",
id="2",
dur="indefinite",
nodeType="mainSeq",
)
seq_child = etree.SubElement(
seq_ctn, "{http://schemas.openxmlformats.org/presentationml/2006/main}childTnLst"
)
for child in child_tn_lst:
wrap = etree.SubElement(
seq_child, "{http://schemas.openxmlformats.org/presentationml/2006/main}par"
)
wrap_ctn = etree.SubElement(
wrap,
"{http://schemas.openxmlformats.org/presentationml/2006/main}cTn",
fill="hold",
)
wrap_child = etree.SubElement(
wrap_ctn,
"{http://schemas.openxmlformats.org/presentationml/2006/main}childTnLst",
)
wrap_child.append(child)
root.append(timing)
tree.write(str(slide_path), xml_declaration=True, encoding="UTF-8", standalone=True)
out_zip = pptx_path.parent / "_pptx_anim_out.pptx"
if out_zip.exists():
out_zip.unlink()
with zipfile.ZipFile(out_zip, "w", zipfile.ZIP_DEFLATED) as zout:
for fp in tmp.rglob("*"):
if fp.is_file():
zout.write(fp, fp.relative_to(tmp))
shutil.move(str(out_zip), str(pptx_path))
shutil.rmtree(tmp, ignore_errors=True)
def main():
if not IMG.exists():
raise SystemExit(f"缺少图片资源目录: {IMG}")
prs = Presentation()
prs.slide_width = SLIDE_W
prs.slide_height = SLIDE_H
build_slides(prs)
prs.save(OUT)
try:
inject_animations(OUT, ANIM_REGISTRY)
anim_note = "已注入标题飞入/内容淡入动画"
except Exception as e:
anim_note = f"动画注入部分失败({e})可在PPT中全选标题批量添加「飞入/淡入」"
shutil.copy(OUT, OUT_DESKTOP)
print(f"已生成: {OUT}")
print(f"已复制: {OUT_DESKTOP}")
print(f"{len(prs.slides)} 页 | {anim_note}")
print(f"字体: 英文 {font_en()} / 中文 {font_cn()}(若 Orbitron 未安装将回退系统字体)")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""生成 ONEOS 业务迁移通知 Word 文档(桌面)"""
from pathlib import Path
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
OUT = Path("/Users/sylvawong/Desktop/ONEOS业务全面迁移通知.docx")
FONT_CN = "PingFang SC"
FONT_CN_FB = "Microsoft YaHei"
ACCENT = RGBColor(0, 82, 155)
def set_run_font(run, size=11, bold=False, color=None, name=None):
run.font.size = Pt(size)
run.font.bold = bold
run.font.name = name or FONT_CN
run._element.rPr.rFonts.set(qn("w:eastAsia"), name or FONT_CN_FB)
if color:
run.font.color.rgb = color
def add_para(doc, text, size=11, bold=False, align=None, space_after=6, color=None):
p = doc.add_paragraph()
if align is not None:
p.alignment = align
p.paragraph_format.space_after = Pt(space_after)
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
p.paragraph_format.line_spacing = 1.35
run = p.add_run(text)
set_run_font(run, size=size, bold=bold, color=color)
return p
def add_heading(doc, text, level=1):
h = doc.add_heading(level=level)
h.clear()
run = h.add_run(text)
sizes = {1: 16, 2: 14, 3: 12}
set_run_font(run, size=sizes.get(level, 12), bold=True, color=ACCENT)
h.paragraph_format.space_before = Pt(14 if level == 1 else 10)
h.paragraph_format.space_after = Pt(8)
return h
def add_bullet(doc, text, level=0):
p = doc.add_paragraph(style="List Bullet")
p.clear()
p.paragraph_format.left_indent = Cm(0.75 + level * 0.5)
p.paragraph_format.space_after = Pt(4)
p.paragraph_format.line_spacing = 1.35
run = p.add_run(text)
set_run_font(run, size=11)
def build():
doc = Document()
sec = doc.sections[0]
sec.page_height = Cm(29.7)
sec.page_width = Cm(21)
sec.left_margin = Cm(2.5)
sec.right_margin = Cm(2.5)
sec.top_margin = Cm(2.2)
sec.bottom_margin = Cm(2.2)
# 标题
t = doc.add_paragraph()
t.alignment = WD_ALIGN_PARAGRAPH.CENTER
r = t.add_run("关于业务全面迁移至 ONEOS 系统的通知")
set_run_font(r, size=22, bold=True, color=ACCENT)
t.paragraph_format.space_after = Pt(16)
add_para(
doc,
"各部门同事:",
size=12,
bold=True,
space_after=8,
)
add_para(
doc,
"为统一业务流程、提升数据准确性与协同效率,公司自 2025年5月25日 起,相关业务全面迁移至 ONEOS 系统执行。"
"请各部门按要求完成系统登录、密码修改及本部门功能模块的日常维护与操作。",
space_after=12,
)
add_heading(doc, "一、系统登录信息", 1)
table = doc.add_table(rows=4, cols=2)
table.style = "Table Grid"
rows_data = [
("项目", "说明"),
("系统地址", "https://oneos.lnh2e.com/"),
("用户名", "姓名拼音"),
("默认密码", "admin123请第一时间修改"),
]
for i, (a, b) in enumerate(rows_data):
row = table.rows[i]
for j, val in enumerate([a, b]):
cell = row.cells[j]
cell.text = ""
p = cell.paragraphs[0]
run = p.add_run(val)
set_run_font(run, size=11, bold=(i == 0 or j == 0))
if i == 0:
run.font.color.rgb = RGBColor(255, 255, 255)
from docx.oxml import OxmlElement
shading = OxmlElement("w:shd")
shading.set(qn("w:fill"), "0052A3")
cell._tc.get_or_add_tcPr().append(shading)
p.paragraph_format.space_after = Pt(2)
doc.add_paragraph()
p_warn = add_para(
doc,
"重要提醒:请大家在第一时间登录并修改默认密码,勿将账号密码告知他人。",
bold=True,
space_after=14,
color=RGBColor(180, 0, 0),
)
add_heading(doc, "二、各部门职责与功能模块", 1)
# 客服
add_heading(doc, "(一)客服服务部", 2)
items_cs = [
"业务管理 → 客户管理:客户信息录入准确;创建合同时须正确关联客户信息。",
"业务管理 → 租赁费用模板:按车型维护各类费用明细,确保模板项目准确。",
"财务管理 → 提车应收款:实收录入准确。",
"业务管理 → 交还车任务:按客户约定交车日期生成交车任务(生成交车任务后,对应区域运维方可收到任务)。",
"业务管理 → 租赁账单:运维交车后,对系统自动生成的租赁账单实收情况进行维护。",
"业务管理 → 交还车任务:在合同下已交车任务操作列点击「查看」→ 按客户要求选择还车车辆 → 点击「还车」并填写预计还车时间,生成还车任务(生成还车任务后,对应区域运维方可收到任务)。",
"财务管理 → 还车应结款:协同客服组、运维组、安全组、能源组完成还车应结款录入。",
"车辆氢费明细:加氢记录录入准确。",
"车辆成本维护:维护各车型日成本(用于租赁账单中车辆成本统计)。",
]
for it in items_cs:
add_bullet(doc, it)
# 运维
add_heading(doc, "(二)运维部", 2)
add_heading(doc, "网页版 ONEOS", 3)
items_ops_web = [
"运维管理 → 车辆业务 → 证照管理:按车辆实际证照维护信息(证照信息须完整,否则影响交车)。",
"运维管理 → 基本数据维护 → 停车场:按业务实际维护停车场信息。",
"运维管理 → 基本数据维护 → 型号参数:按车辆实际维护型号参数。",
"维修站管理 → 维修站信息:按合作情况维护维修站信息。",
"检测服务站 → 检测服务站信息:按合作情况维护检测服务站信息。",
"运维管理 → 车辆管理:新增车辆时导入车辆信息,并同步通知相关人员维护证照信息。",
]
for it in items_ops_web:
add_bullet(doc, it)
add_heading(doc, "微信小程序「小羚羚」", 3)
items_ops_wx = [
"备车:按业务实际完成备车(新系统仅已备车车辆方可交车)。",
"交车:按交车任务执行交车。",
"还车:按还车任务执行还车。",
"异动:按实际提交异动申请并记录异动情况。",
"调拨:按实际提交调拨申请并记录调拨情况。",
"年审:按实际录入年审记录(可多轮保存,全部完成后再点击提交闭环)。",
"故障:按实际填写故障信息(可多轮保存,全部完成后再点击提交闭环)。",
]
for it in items_ops_wx:
add_bullet(doc, it)
# 采购
add_heading(doc, "(三)采购部", 2)
for it in [
"加氢站管理 → 站点信息:维护全部合作加氢站信息(作为氢费账单基础依据)。",
"能源管理 → 能源账户:按实际业务创建客户账户。",
"业务管理 → 保险采购:按实际业务导入保单数据。",
"加氢站管理 → 加氢站打款管理(即将上线):按实际业务导入加氢站打款情况,将自动同步至「加氢站余额管理」。",
]:
add_bullet(doc, it)
# 财务
add_heading(doc, "(四)财务部", 2)
for it in [
"审批中心 → 我的待办:及时处理各类审批任务。",
"能源管理 → 充值单:按实际业务导入客户账户充值数据。",
]:
add_bullet(doc, it)
# 法务
add_heading(doc, "(五)法务部", 2)
add_bullet(doc, "审批中心 → 我的待办:及时处理各类审批任务。")
add_heading(doc, "三、优化迭代与意见反馈", 1)
add_para(
doc,
"各部门此前提出的优化意见均已收到。后续将结合业务紧迫程度分批迭代上线,"
"也欢迎在使用过程中继续提出宝贵建议。",
space_after=8,
)
add_para(doc, "意见与建议反馈方式:", bold=True, space_after=6)
add_bullet(doc, "邮箱wangmian@lingniutech.com")
add_bullet(doc, "钉钉联系数智部:王冕、秦蔚")
doc.add_paragraph()
add_para(
doc,
"请各部门负责人传达至本部门全员,自 5月25日 起严格执行。如有系统操作问题,请优先联系数智部协调处理。",
space_after=20,
)
add_para(doc, "特此通知。", bold=True, space_after=24)
sig = doc.add_paragraph()
sig.alignment = WD_ALIGN_PARAGRAPH.RIGHT
r1 = sig.add_run("数智部\n")
set_run_font(r1, size=12, bold=True)
r2 = sig.add_run("2025年5月24日")
set_run_font(r2, size=12)
doc.save(OUT)
print(f"已生成: {OUT}")
if __name__ == "__main__":
build()

View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python3
"""生成 2026年5月26日 ONEOS 试运行问题报告 Word 文档"""
from pathlib import Path
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor
from docx.enum.table import WD_TABLE_ALIGNMENT
OUT = Path("/Users/sylvawong/Desktop/ONEOS试运行问题报告_20260526.docx")
OUT_PROJ = Path("/Users/sylvawong/Desktop/CURSOR/ONE-OS/docs/ONEOS试运行问题报告_20260526.docx")
FONT_CN = "PingFang SC"
FONT_CN_FB = "Microsoft YaHei"
ACCENT = RGBColor(0, 82, 155)
GRAY = RGBColor(80, 80, 80)
def set_run_font(run, size=11, bold=False, color=None):
run.font.size = Pt(size)
run.font.bold = bold
run.font.name = FONT_CN
run._element.rPr.rFonts.set(qn("w:eastAsia"), FONT_CN_FB)
if color:
run.font.color.rgb = color
def add_para(doc, text, size=11, bold=False, align=None, space_after=6, color=None):
p = doc.add_paragraph()
if align is not None:
p.alignment = align
p.paragraph_format.space_after = Pt(space_after)
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
p.paragraph_format.line_spacing = 1.35
run = p.add_run(text)
set_run_font(run, size=size, bold=bold, color=color)
return p
def add_heading(doc, text, level=1):
h = doc.add_heading(level=level)
h.clear()
run = h.add_run(text)
sizes = {1: 16, 2: 14, 3: 12}
set_run_font(run, size=sizes.get(level, 12), bold=True, color=ACCENT)
h.paragraph_format.space_before = Pt(14 if level == 1 else 10)
h.paragraph_format.space_after = Pt(8)
return h
def add_bullet(doc, text, level=0):
p = doc.add_paragraph(style="List Bullet")
p.clear()
p.paragraph_format.left_indent = Cm(0.75 + level * 0.5)
p.paragraph_format.space_after = Pt(4)
p.paragraph_format.line_spacing = 1.35
run = p.add_run(text)
set_run_font(run, size=11)
def set_cell_text(cell, text, bold=False, size=10):
cell.text = ""
p = cell.paragraphs[0]
run = p.add_run(str(text))
set_run_font(run, size=size, bold=bold)
def add_issue_table(doc, rows):
headers = ["序号", "模块", "问题描述", "影响", "处理状态", "备注/后续"]
table = doc.add_table(rows=1 + len(rows), cols=len(headers))
table.style = "Table Grid"
table.alignment = WD_TABLE_ALIGNMENT.CENTER
for i, h in enumerate(headers):
set_cell_text(table.rows[0].cells[i], h, bold=True, size=10)
for ri, row in enumerate(rows, start=1):
for ci, val in enumerate(row):
set_cell_text(table.rows[ri].cells[ci], val, size=9)
doc.add_paragraph()
def build():
doc = Document()
sec = doc.sections[0]
sec.top_margin = Cm(2.5)
sec.bottom_margin = Cm(2.5)
sec.left_margin = Cm(2.8)
sec.right_margin = Cm(2.8)
title = doc.add_paragraph()
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
r = title.add_run("ONEOS 系统试运行问题报告")
set_run_font(r, size=20, bold=True, color=ACCENT)
title.paragraph_format.space_after = Pt(6)
sub = doc.add_paragraph()
sub.alignment = WD_ALIGN_PARAGRAPH.CENTER
r2 = sub.add_run("报告日期2026年5月26日  试运行阶段业务全面迁移后首日验证")
set_run_font(r2, size=11, color=GRAY)
sub.paragraph_format.space_after = Pt(18)
add_heading(doc, "一、试运行概述", 1)
add_para(
doc,
"本次为 ONEOS 业务全面迁移上线后的首日试运行,重点验证租赁业务主流程(合同发起→提车应收款→交车→还车→还车应结款)"
"及客服组车辆管理、交车管理等支撑能力。整体主流程可跑通,但迁移数据质量、照片上传机制、"
"权限配置及部分业务规则仍存在需跟进的问题。",
)
add_para(doc, "总体结论:", bold=True, space_after=4)
add_bullet(doc, "租赁业务主链路:基本跑通,存在局部阻塞与体验类问题。")
add_bullet(doc, "权限与路由:财务出纳 404 问题已临时解决,需规范默认工作台权限配置。")
add_bullet(doc, "照片环节:交车/还车均存在上传未即时保存导致照片丢失风险,已提优化需求。")
add_bullet(doc, "迁移数据:车辆状态、停车场、交车数量、还车应结款费用明细等存在批量准确性风险。")
add_heading(doc, "二、租赁业务试运行情况", 1)
add_heading(doc, "2.1 流程验证结论", 2)
flow_rows = [
("1", "租赁合同发起", "流程正常,可顺利完成发起。", "", "通过", ""),
("2", "提车应收款", "整体可走通;个别合同无法生成应收款。", "", "部分异常", "4 辆车合同需排查"),
("3", "交车", "主流程可走通。", "", "通过(有小问题)", "照片上传见 2.3"),
("4", "还车", "主流程可走通。", "", "通过(有小问题)", "照片及迁移展示见 2.3"),
("5", "还车应结款", "流程可走通,数据与展示异常较多。", "", "待处理", "见 2.4"),
]
add_issue_table(doc, flow_rows)
add_heading(doc, "2.2 提车应收款", 2)
add_bullet(doc, "问题:出现一个包含 4 辆车的合同无法正常生成提车应收款。")
add_bullet(doc, "问题:财务出纳账号登录后出现 404。原因为财务出纳未配置工作台权限登录默认路由指向工作台导致。")
add_bullet(doc, "处理:已通过分配权限临时解决;建议统一梳理财务类角色默认路由与菜单权限模板。")
add_heading(doc, "2.3 交车 / 还车", 2)
add_bullet(doc, "共性问题(交车、还车):拍摄照片环节存在「上传后未即时保存」问题;异常返回后已拍照片全部丢失。")
add_bullet(doc, "处理:已提交产品优化需求,建议增加分张即时落库/断点续传能力。")
add_bullet(doc, "还车专项:照片页「交车照片(迁移数据)」部位匹配不一致(如左后方显示为右前方),需开发修复映射关系。")
add_heading(doc, "2.4 还车应结款", 2)
add_bullet(doc, "业务服务组、运维组、能源部办理情况中,人员均错误显示为「黄桂球」。")
add_bullet(doc, "迁移数据问题:运维组下错误迁移了部分费用明细(旧版与新版名称不一致导致误迁)。")
add_bullet(doc, "处理admin 已上线删除功能,可手工处理异常数据;暂缺批量排查迁移数据的工具或规则。")
add_bullet(doc, "逻辑问题:未做还车费用的提车应结款单,错误显示为「审核完成」,需核查状态机与数据校验。")
add_heading(doc, "三、客服组试运行情况", 1)
add_heading(doc, "3.1 车辆管理", 2)
add_bullet(doc, "问题:存在大量停车场未正确匹配或错误标记为「呆滞车」。")
add_bullet(doc, "说明:「呆滞车」状态此前已暂时删除,不应再出现。")
add_bullet(doc, "影响:运维无法正常选择车辆。")
add_bullet(doc, "建议方案:")
add_bullet(doc, "核对所有车辆停车场迁移准确性;排除运营状态为「租赁」外的车辆,检查停车场是否为空,空数据需补全或修正。", level=1)
add_bullet(doc, "在车辆管理增加修改功能(停车场等字段可编辑),需求待提。", level=1)
add_heading(doc, "3.2 交车管理", 2)
add_bullet(doc, "问题:实际交车数量显示不正确,判断为迁移数据问题,需与交车任务/车辆状态数据交叉核对。")
add_heading(doc, "四、问题汇总清单", 1)
all_issues = [
("1", "提车应收款", "4 辆车合同无法生成提车应收款", "", "待排查", "需定位合同/车辆绑定规则"),
("2", "权限", "财务出纳登录 404", "", "已解决", "已分配工作台权限,建议固化模板"),
("3", "交车", "照片上传未即时保存致丢失", "", "已提优化", "产品优化中"),
("4", "还车", "照片上传未即时保存致丢失", "", "已提优化", "同交车"),
("5", "还车", "迁移交车照片部位映射错误", "", "待开发", "左后/右前等错位"),
("6", "还车应结款", "办理人错误显示为黄桂球", "", "待修复", "多部门均受影响"),
("7", "还车应结款", "运维组费用明细误迁移", "", "可手工删", "缺批量排查方案"),
("8", "还车应结款", "无还车费用单却显示审核完成", "", "待修复", "状态逻辑异常"),
("9", "车辆管理", "停车场空/错、呆滞车误标", "", "待处理", "影响运维选车"),
("10", "车辆管理", "缺少停车场等字段修改入口", "", "需求待提", ""),
("11", "交车管理", "实际交车数量不正确", "", "待核对", "迁移数据问题"),
]
add_issue_table(doc, all_issues)
add_heading(doc, "五、后续工作计划(建议)", 1)
plans = [
"【P0】修复还车应结款状态展示异常无费用却审核完成及办理人显示错误。",
"【P0】开展车辆停车场、车辆状态含呆滞车、交车数量迁移数据专项核对与修正。",
"【P1】排查 4 辆车合同无法生成提车应收款的具体原因并修复。",
"【P1】修复还车迁移照片部位映射推进交车/还车照片分张即时保存方案落地。",
"【P1】制定还车应结款迁移数据批量校验规则旧版/新版费用名称映射表 + 异常清单导出)。",
"【P2】车辆管理增加停车场等字段编辑能力统一财务类角色权限与默认路由配置。",
]
for p in plans:
add_bullet(doc, p)
add_heading(doc, "六、附录", 1)
add_para(doc, "本报告依据 2026 年 5 月 26 日试运行现场反馈整理,问题状态随修复进展动态更新。", color=GRAY, size=10)
add_para(doc, "编制试运行项目组   日期2026年5月26日", color=GRAY, size=10)
OUT.parent.mkdir(parents=True, exist_ok=True)
doc.save(OUT)
OUT_PROJ.parent.mkdir(parents=True, exist_ok=True)
doc.save(OUT_PROJ)
print(f"已生成: {OUT}")
print(f"副本: {OUT_PROJ}")
if __name__ == "__main__":
build()

View File

@@ -0,0 +1,335 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""从行驶证影像 OCR 提取检验日期,与车辆表合并导出 Excel。
有照片的车辆:先对影像做 OCR若未识别到「检验有效期」再用表内「行驶证检验有效期」补全第三列。
"""
from __future__ import annotations
import calendar
import os
import re
import subprocess
import tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import date
from typing import Dict, List, Optional, Tuple
import fitz # pymupdf
import pandas as pd
from PIL import Image, ImageEnhance
# 配置路径(可按需修改)
EXCEL_IN = "/Users/sylvawong/Downloads/车辆信息-1776270214730.xlsx"
PHOTO_DIR = "/Users/sylvawong/Desktop/证件信息梳理/行驶证"
EXCEL_OUT = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/车辆行驶证_OCR核对.xlsx"
EXCEL_INSPECTION_COL = "行驶证检验有效期"
TESSERACT = "/opt/homebrew/bin/tesseract"
OCR_LANG = "chi_sim+eng"
MAX_IMAGE_SIDE = 2200
MAX_WORKERS = 3
MAX_PDF_PAGES = 3
PDF_ZOOM = 2.5
def extract_plate_from_filename(basename_no_ext: str) -> Optional[str]:
s = basename_no_ext
s = re.sub(r"\s*\(\d+\)\s*$", "", s)
if "行驶证" in s:
s = s.split("行驶证")[0]
s = s.rstrip("-_· ")
if not s:
return None
parts = s.split("-")
first = parts[0]
if (
len(parts) >= 2
and len(parts[1]) == 17
and re.match(r"^[A-HJ-NPR-Z0-9]{17}$", parts[1], re.I)
):
cand = first
else:
cand = first
m = re.match(r"^([\u4e00-\u9fa5][A-Z0-9\u4e00-\u9fa5·]{1,14})$", cand)
if not m:
return None
plate = m.group(1).rstrip("-_·")
if len(plate) < 6:
return None
return plate
def build_plate_files() -> Dict[str, List[str]]:
mapping: Dict[str, List[str]] = {}
for fn in os.listdir(PHOTO_DIR):
path = os.path.join(PHOTO_DIR, fn)
if not os.path.isfile(path):
continue
base, _ = os.path.splitext(fn)
plate = extract_plate_from_filename(base)
if not plate:
continue
mapping.setdefault(plate, []).append(path)
return mapping
def file_try_order(paths: List[str]) -> List[str]:
def score(p: str) -> Tuple[float, str]:
fn = os.path.basename(p).lower()
# 先年审页通常含检验记录再主页PDF 略靠后(需渲染)
if "年审" in fn:
tier = 0.0
elif "行驶证" in fn or "行驶" in fn:
tier = 1.0
else:
tier = 1.5
if fn.endswith(".pdf"):
tier += 0.25
return (tier, fn)
return sorted(paths, key=score)
def load_image_for_ocr(path: str) -> Image.Image:
img = Image.open(path).convert("RGB")
w, h = img.size
if max(w, h) > MAX_IMAGE_SIDE:
s = MAX_IMAGE_SIDE / max(w, h)
img = img.resize((int(w * s), int(h * s)), Image.LANCZOS)
return img
def ocr_image(img: Image.Image) -> str:
gray = img.convert("L")
gray = ImageEnhance.Contrast(gray).enhance(1.35)
fd, tmp = tempfile.mkstemp(suffix=".png")
os.close(fd)
try:
gray.save(tmp, format="PNG")
cmd = [
TESSERACT,
tmp,
"stdout",
"-l",
OCR_LANG,
"--psm",
"6",
"-c",
"preserve_interword_spaces=1",
]
r = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=180,
)
if r.returncode != 0:
return ""
return r.stdout or ""
finally:
try:
os.unlink(tmp)
except OSError:
pass
def ocr_file(path: str) -> str:
try:
img = load_image_for_ocr(path)
return ocr_image(img)
except Exception:
return ""
def month_end(y: int, m: int) -> date:
last = calendar.monthrange(y, m)[1]
return date(y, m, last)
def parse_inspection_date(text: str) -> Optional[date]:
if not text:
return None
t = text.replace("O", "0").replace("o", "0")
# 检验有效期至 2027年06月04日 / 2026-03-31
patterns = [
r"检验有效期至\s*[:]?\s*(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日",
r"检验有效期至\s*[:]?\s*(\d{4})年(\d{1,2})月(\d{1,2})日",
r"检验有效期至\s*[:]?\s*(\d{4})\s*[-/.]\s*(\d{1,2})\s*[-/.]\s*(\d{1,2})",
r"检验有效期至\s*[:]?\s*(\d{4})[-/.](\d{1,2})[-/.](\d{1,2})",
r"检验有效期\s*[:]?\s*(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日",
r"有效期至\s*[:]?\s*(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日",
r"检验有效期至\s*[:]?\s*(\d{4})\s*年\s*(\d{1,2})\s*月(?!\s*\d{1,2}\s*日)",
r"检验有效期至\s*[:]?\s*(\d{4})年(\d{1,2})月(?!\d{1,2}日)",
r"检验有效期至\s*[:]?\s*(\d{4})\s*年\s*(\d{1,2})\s*月\s*",
r"(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日\s*[(]?\s*检验",
]
for pat in patterns:
m = re.search(pat, t)
if not m:
continue
y, mo = int(m.group(1)), int(m.group(2))
if mo < 1 or mo > 12 or y < 2000 or y > 2100:
continue
g3 = m.group(3) if m.lastindex and m.lastindex >= 3 else None
if g3 is not None and str(g3).strip() != "":
try:
d = int(g3)
except ValueError:
continue
if 1 <= d <= 31:
try:
return date(y, mo, d)
except ValueError:
continue
return month_end(y, mo)
return None
def ocr_inspection_from_path(path: str) -> Optional[date]:
ext = os.path.splitext(path)[1].lower()
if ext == ".pdf":
doc = fitz.open(path)
try:
n = min(len(doc), MAX_PDF_PAGES)
for i in range(n):
page = doc.load_page(i)
mat = fitz.Matrix(PDF_ZOOM, PDF_ZOOM)
pix = page.get_pixmap(matrix=mat, alpha=False)
mode = "RGB" if pix.n < 4 else "RGBA"
img = Image.frombytes(mode, [pix.width, pix.height], pix.samples)
if mode == "RGBA":
img = img.convert("RGB")
w, h = img.size
if max(w, h) > MAX_IMAGE_SIDE:
s = MAX_IMAGE_SIDE / max(w, h)
img = img.resize((int(w * s), int(h * s)), Image.LANCZOS)
t = ocr_image(img)
d = parse_inspection_date(t)
if d:
return d
finally:
doc.close()
return None
t = ocr_file(path)
return parse_inspection_date(t)
def parse_excel_inspection_date(val) -> Optional[date]:
if val is None or (isinstance(val, float) and pd.isna(val)):
return None
ts = pd.to_datetime(val, errors="coerce")
if pd.isna(ts):
return None
return ts.date()
def next_inspection_for_plate(paths: List[str]) -> Optional[date]:
ordered = file_try_order(paths)
for p in ordered[:4]:
d = ocr_inspection_from_path(p)
if d:
return d
return None
def main() -> None:
if not os.path.isfile(TESSERACT):
raise SystemExit(f"未找到 tesseract: {TESSERACT}")
today = date.today()
plate_files = build_plate_files()
df = pd.read_excel(EXCEL_IN, sheet_name="车辆信息", header=0, engine="openpyxl")
plates = [str(x).strip() if pd.notna(x) else "" for x in df["车牌号"]]
excel_inspection = (
df[EXCEL_INSPECTION_COL]
if EXCEL_INSPECTION_COL in df.columns
else pd.Series([pd.NA] * len(df))
)
tasks: List[Tuple[int, str, List[str]]] = []
for idx, plate in enumerate(plates):
if plate and plate_files.get(plate):
tasks.append((idx, plate, plate_files[plate]))
results: Dict[int, Optional[date]] = {}
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
fut_to_idx = {
ex.submit(next_inspection_for_plate, fl): idx for idx, _p, fl in tasks
}
for fut in as_completed(fut_to_idx):
idx = fut_to_idx[fut]
try:
results[idx] = fut.result()
except Exception:
results[idx] = None
out_rows = []
for idx, plate in enumerate(plates):
if not plate:
out_rows.append(
{
"车牌号": plate,
"行驶证照片是否存在": "",
"行驶证下次检验日期": "",
"行驶证是否过期": "",
}
)
continue
files = plate_files.get(plate, [])
if not files:
out_rows.append(
{
"车牌号": plate,
"行驶证照片是否存在": "",
"行驶证下次检验日期": "",
"行驶证是否过期": "",
}
)
continue
ocr_d = results.get(idx)
excel_d = parse_excel_inspection_date(excel_inspection.iloc[idx])
final_d = ocr_d or excel_d
if final_d:
date_str = final_d.strftime("%Y-%m-%d")
expired = "" if final_d < today else ""
else:
date_str = ""
expired = ""
out_rows.append(
{
"车牌号": plate,
"行驶证照片是否存在": "",
"行驶证下次检验日期": date_str,
"行驶证是否过期": expired,
}
)
out_df = pd.DataFrame(out_rows)
os.makedirs(os.path.dirname(EXCEL_OUT), exist_ok=True)
with pd.ExcelWriter(EXCEL_OUT, engine="openpyxl") as w:
out_df.to_excel(w, index=False, sheet_name="行驶证OCR")
has_date = out_df["行驶证下次检验日期"] != ""
with_photo = out_df["行驶证照片是否存在"] == ""
ocr_ok = sum(1 for idx, _, _ in tasks if results.get(idx))
excel_only = 0
for idx, plate in enumerate(plates):
if not plate or not plate_files.get(plate):
continue
if (
results.get(idx) is None
and parse_excel_inspection_date(excel_inspection.iloc[idx]) is not None
):
excel_only += 1
print(
f"完成: {EXCEL_OUT}\n"
f"今日日期: {today.isoformat()}\n"
f"有照片: {int(with_photo.sum())}\n"
f"OCR 识别到检验日期: {ocr_ok}\n"
f"OCR 未识别、第三列由表内「{EXCEL_INSPECTION_COL}」补全: {excel_only}\n"
f"有检验日期(第三列非空): {int(has_date.sum())}"
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""Generate Excel from transcribed 车用气瓶充装记录表 data (OCR/image description)."""
from pathlib import Path
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter
TITLE = (
"广东顺兴石油燃料有限公司2024年车用气瓶充装前、后检查和充装记录表-跨年"
)
HEADERS = [
"日期/班次",
"时间",
"车牌号",
"气瓶编号",
"是否持气瓶使用登记证",
"气瓶下次检验日期",
"充装介质",
"车辆外观、气瓶附件是否完好",
"气瓶附件是否正常",
"车辆电源是否关闭、清漏",
"充装前气瓶余压(MPa)",
"充装量(kg)",
"充装金额(元)",
"充装后压力(MPa)",
"充装后检查封条是否良好",
"公里数",
"驾驶员签名",
"检查员",
"充装员",
]
# Transcribed from image; empty cells where illegible/blank in source
ROWS = [
["5.9", "9:10", "粤A RT078", "2400718", "", "2029.6", "H2", "", "", "", 9, 7.94, 277.9, 35, "", 4215, "李建辉", "", ""],
["5.9", "9:34", "粤A GP722", "2400886", "", "2029.7", "H2", "", "", "", 17, 5.06, 177.22, 35, "", 4207, "", "", ""],
["5.9", "9:49", "粤A GP708", "2400681", "", "2029.7", "H2", "", "", "", 18, 4.79, 177.03, 35, "", 10122, "罗继荣", "", ""],
["5.9", "14:06", "粤A GW1936", "2401145", "", "2029.6", "H2", "", "", "", 12, 7.42, 259.38, 35, "", 5243, "曾庆平", "", ""],
["5.9", "16:02", "粤A GP518", "2400849", "", "2029.7", "H2", "", "", "", 13, 6.86, 240.10, 35, "", 2008, "马庆军", "", ""],
["5.9", "16:32", "粤A P9719", "2400666", "", "2029.7", "H2", "", "", "", 15, 7.82, 304.98, 35, "", 10960, "孙红兵", "", ""],
["5.9", "17:50", "粤A V7749", "2400572", "", "2029.6", "H2", "", "", "", 16, 4.05, 149.85, 35, "", 61928, "王建波", "", ""],
["5.9", "18:11", "粤A FH492", "2300493", "", "2029.9", "H2", "", "", "", 12, 4.87, 169.45, 35, "", 1981, "纪晓", "", ""],
["5.9", "19:55", "粤A H9878", "", "", "", "H2", "", "", "", 12, 8.77, 302.03, 35, "", 9172, "王晓", "", ""],
["5.9", "20:12", "粤A Q0826", "", "", "", "H2", "", "", "", 13, 8.36, 326.04, 35, "", 19414, "", "", ""],
["5.10", "6:28", "粤A R1586", "2400749", "", "2029.6", "H2", "", "", "", 6, 9.86, 384.54, 35, "", 4221, "", "", ""],
["5.10", "8:25", "粤A R5058", "2400768", "", "2029.6", "H2", "", "", "", 17, 3.08, 107.96, 35, "", 5218, "李建辉", "", ""],
["5.10", "8:44", "粤A GP367", "2400754", "", "2029.6", "H2", "", "", "", 6, 8.75, 323.75, 35, "", 1227, "曾庆平", "", ""],
["5.10", "9:58", "粤A GP217", "2400764", "", "2029.6", "H2", "", "", "", 8, 7.85, 306.15, 35, "", 4833, "马庆军", "", ""],
["5.10", "14:44", "粤A 02239F", "250082", "", "", "H2", "", "", "", 8, 19.21, 710.77, 35, "", 1371, "纪晓", "", ""],
["5.10", "14:19", "粤A H30131", "2501164", "", "2029.6", "H2", "", "", "", 5, 9.23, 359.97, 35, "", 3740, "王晓", "", ""],
]
FOOTNOTES = [
"1. 符合的打\"\",不符合的打\"X\"",
"2. 充装前必须逐项内容进行逐项检查,检查员在填表后签字确认。",
"3. 检查中发现有不符合的,不予充装。",
"4. (原表底部关于车辆外观、后车顶高度、气瓶登记证等可通过公交公司网络终端核对的说明,以纸质原件为准。)",
]
def main() -> None:
out = Path(__file__).resolve().parent.parent / "车用气瓶充装记录_手写表转录.xlsx"
wb = Workbook()
ws = wb.active
ws.title = "充装记录"
ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=len(HEADERS))
c = ws.cell(row=1, column=1, value=TITLE)
c.font = Font(bold=True, size=11)
c.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
for col, h in enumerate(HEADERS, start=1):
cell = ws.cell(row=2, column=col, value=h)
cell.font = Font(bold=True, size=10)
cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
for r_idx, row in enumerate(ROWS, start=3):
for c_idx, val in enumerate(row, start=1):
ws.cell(row=r_idx, column=c_idx, value=val)
note_row = 3 + len(ROWS) + 1
ws.cell(row=note_row, column=1, value="备注说明:")
ws.cell(row=note_row, column=1).font = Font(bold=True)
for i, line in enumerate(FOOTNOTES, start=1):
ws.cell(row=note_row + i, column=1, value=line)
ws.merge_cells(
start_row=note_row + i,
start_column=1,
end_row=note_row + i,
end_column=len(HEADERS),
)
ws.cell(row=note_row + i, column=1).alignment = Alignment(wrap_text=True, vertical="top")
# Column widths (readable for Chinese)
widths = [10, 8, 14, 12, 14, 14, 8, 18, 14, 18, 14, 10, 12, 12, 18, 8, 10, 8, 8]
for i, w in enumerate(widths, start=1):
ws.column_dimensions[get_column_letter(i)].width = w
ws.row_dimensions[1].height = 36
ws.row_dimensions[2].height = 36
wb.save(out)
print(out)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,216 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""根据车辆表 + 保险采购导入模板,匹配本地商业/交强/超赔保单文件夹中的 PDF导出分类 Excel。"""
from __future__ import annotations
import os
import re
from typing import Dict, List, Optional, Tuple
import pandas as pd
# 可按需修改
VEHICLE_XLSX = "/Users/sylvawong/Downloads/车辆信息-1776270214730.xlsx"
IMPORT_TEMPLATE = "/Users/sylvawong/Downloads/保险采购信息导入模板-1776306962989.xlsx"
BASE_POLICY_DIR = "/Users/sylvawong/Desktop/保单-2025.7-2026.4"
DIR_COMMERCIAL = os.path.join(BASE_POLICY_DIR, "商业")
DIR_COMPULSORY = os.path.join(BASE_POLICY_DIR, "交强")
DIR_EXCESS = os.path.join(BASE_POLICY_DIR, "超赔")
OUTPUT_XLSX = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/保险文件匹配结果.xlsx"
PRODUCT_COMMERCIAL = "商业险"
PRODUCT_COMPULSORY = "交强险"
PRODUCT_EXCESS = "超赔险"
# 车牌:省份简称 + 字母/数字/挂 等(与行驶证解析类似,略宽松)
PLATE_CORE = r"[\u4e00-\u9fa5][A-Z0-9\u4e00-\u9fa5·挂]{5,14}"
PAT_COMMERCIAL = re.compile(rf"^({PLATE_CORE})[_\-]?商业", re.I)
PAT_COMPULSORY = re.compile(rf"交强险[_\-]({PLATE_CORE})[_\-]", re.I)
PAT_COMPULSORY_ALT = re.compile(rf"\d{{4}}交强险[_]({PLATE_CORE})[_]", re.I)
# 2025交强险粤AGP4597_公司.pdf交强险与车牌之间无下划线
PAT_COMPULSORY_TIGHT = re.compile(rf"^\d{{4}}交强险({PLATE_CORE})[_]", re.I)
# 京A13691F_交强险2025.7-2026.7.pdf
PAT_COMPULSORY_PLATE_FIRST = re.compile(rf"^({PLATE_CORE})_交强险", re.I)
PAT_EXCESS = re.compile(rf"^({PLATE_CORE})[_\-]?超赔", re.I)
def iter_policy_files(root: str) -> List[str]:
out: List[str] = []
if not os.path.isdir(root):
return out
for dp, _, fns in os.walk(root):
for fn in fns:
low = fn.lower()
if low.endswith(".pdf") or low.endswith((".jpg", ".jpeg", ".png")):
out.append(os.path.join(dp, fn))
return sorted(out)
def plate_from_filename(path: str, kind: str) -> Optional[str]:
fn = os.path.basename(path)
if kind == "commercial":
m = PAT_COMMERCIAL.match(fn)
return m.group(1) if m else None
if kind == "compulsory":
m = (
PAT_COMPULSORY.search(fn)
or PAT_COMPULSORY_ALT.search(fn)
or PAT_COMPULSORY_TIGHT.search(fn)
or PAT_COMPULSORY_PLATE_FIRST.search(fn)
)
return m.group(1) if m else None
if kind == "excess":
m = PAT_EXCESS.match(fn)
return m.group(1) if m else None
return None
def build_plate_files(root: str, kind: str) -> Dict[str, List[str]]:
d: Dict[str, List[str]] = {}
unmapped: List[str] = []
for p in iter_policy_files(root):
pl = plate_from_filename(p, kind)
if pl:
d.setdefault(pl, []).append(p)
else:
unmapped.append(p)
return d, unmapped
def normalize_plate(s: str) -> str:
return str(s).strip() if pd.notna(s) else ""
def load_template_by_product() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
df = pd.read_excel(IMPORT_TEMPLATE, sheet_name=0, header=0)
df = df.dropna(how="all")
df = df[df["保险产品"].notna()]
jq = df[df["保险产品"] == PRODUCT_COMPULSORY].copy()
sy = df[df["保险产品"] == PRODUCT_COMMERCIAL].copy()
cp = df[df["保险产品"] == PRODUCT_EXCESS].copy()
return jq, sy, cp
def vehicle_subset(vdf: pd.DataFrame) -> pd.DataFrame:
cols = [
"车牌号",
"VIN",
"所属省份",
"运营城市",
"登记所有权",
"运营公司",
"车辆类型",
"品牌",
"型号",
"保险状态",
]
keep = [c for c in cols if c in vdf.columns]
return vdf[keep].drop_duplicates(subset=["车牌号"])
def merge_sheet(
v_base: pd.DataFrame,
tpl: pd.DataFrame,
plate_files: Dict[str, List[str]],
product_name: str,
) -> pd.DataFrame:
"""以车辆表车牌为左表;模板与保单文件按车牌关联。"""
if tpl is not None and len(tpl) > 0:
tpl_idx = tpl.drop_duplicates(subset=["车牌号"], keep="first").set_index("车牌号")
else:
tpl_idx = pd.DataFrame().set_index(pd.Index([], name="车牌号"))
rows = []
for _, vr in v_base.iterrows():
plate = normalize_plate(vr["车牌号"])
if not plate:
continue
files = plate_files.get(plate, [])
file_cell = "; ".join(files) if files else ""
if len(tpl_idx.index) and plate in tpl_idx.index:
tr = tpl_idx.loc[plate]
if isinstance(tr, pd.DataFrame):
tr = tr.iloc[0]
row = tr.to_dict()
else:
row = {c: None for c in tpl.columns} if len(tpl.columns) else {}
row["车牌号"] = plate
row["保险产品"] = product_name
for k, val in vr.to_dict().items():
row.setdefault(k, val)
row["保单文件路径"] = file_cell
row["保单文件数量"] = len(files)
rows.append(row)
out = pd.DataFrame(rows)
if "保险产品" in out.columns:
out["保险产品"] = product_name
return out
def unmapped_sheet(
commercial_um: List[str],
compulsory_um: List[str],
excess_um: List[str],
) -> pd.DataFrame:
rows = []
for p in commercial_um:
rows.append({"险种文件夹": "商业", "文件路径": p, "文件名": os.path.basename(p)})
for p in compulsory_um:
rows.append({"险种文件夹": "交强", "文件路径": p, "文件名": os.path.basename(p)})
for p in excess_um:
rows.append({"险种文件夹": "超赔", "文件路径": p, "文件名": os.path.basename(p)})
return pd.DataFrame(rows)
def main() -> None:
vdf = pd.read_excel(VEHICLE_XLSX, sheet_name=0, header=0, engine="openpyxl")
v_base = vehicle_subset(vdf)
jq_tpl, sy_tpl, cp_tpl = load_template_by_product()
sy_map, sy_um = build_plate_files(DIR_COMMERCIAL, "commercial")
jq_map, jq_um = build_plate_files(DIR_COMPULSORY, "compulsory")
cp_map, cp_um = build_plate_files(DIR_EXCESS, "excess")
df_sy = merge_sheet(v_base, sy_tpl, sy_map, PRODUCT_COMMERCIAL)
df_jq = merge_sheet(v_base, jq_tpl, jq_map, PRODUCT_COMPULSORY)
tpl_cols = sy_tpl.columns.tolist() if len(sy_tpl.columns) else jq_tpl.columns.tolist()
if len(cp_tpl) > 0:
df_cp = merge_sheet(v_base, cp_tpl, cp_map, PRODUCT_EXCESS)
else:
df_cp = merge_sheet(
v_base, pd.DataFrame(columns=tpl_cols), cp_map, PRODUCT_EXCESS
)
df_um = unmapped_sheet(sy_um, jq_um, cp_um)
os.makedirs(os.path.dirname(OUTPUT_XLSX), exist_ok=True)
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as w:
df_jq.to_excel(w, index=False, sheet_name="交强险")
df_sy.to_excel(w, index=False, sheet_name="商业险")
df_cp.to_excel(w, index=False, sheet_name="超赔险")
df_um.to_excel(w, index=False, sheet_name="未能解析车牌的文件")
def stat(df: pd.DataFrame) -> Tuple[int, int]:
n = len(df)
m = (df["保单文件路径"].astype(str).str.len() > 0).sum() if "保单文件路径" in df else 0
return n, int(m)
print(f"已生成: {OUTPUT_XLSX}")
for name, df in [
("交强险", df_jq),
("商业险", df_sy),
("超赔险", df_cp),
]:
n, m = stat(df)
print(f" {name}: 车辆行数 {n}, 已匹配到本地保单文件 {m}")
print(f" 未能解析车牌的文件: {len(df_um)}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,331 @@
#!/usr/bin/env python3
"""Merge 表2(嘉锦明细) into 表1(车辆氢费明细), generate result + comparison report."""
import pandas as pd
import numpy as np
from datetime import datetime
T1_PATH = "/Users/sylvawong/Desktop/车辆氢费明细_2026-06-06_嘉兴嘉锦亭桥北综合供能服务站.xlsx"
T2_PATH = (
"/Users/sylvawong/Library/Containers/com.tencent.xinWeChat/Data/Documents/"
"xwechat_files/wxid_l80gh7d3x7u012_385c/temp/drag/嘉锦 4月明细.xls"
)
OUT_DATA = "/Users/sylvawong/Desktop/车辆氢费明细_嘉兴嘉锦亭_4月合并结果.xlsx"
OUT_COMPARE = "/Users/sylvawong/Desktop/车辆氢费明细_嘉兴嘉锦亭_4月比对结果.xlsx"
FILTER_YEAR = 2026
FILTER_MONTH = 4
def norm_date(val):
return pd.to_datetime(val).strftime("%Y-%m-%d")
def norm_plate(val):
return str(val).strip().upper()
def norm_kg(val):
return round(float(val), 2)
def make_key(date_val, plate_val, kg_val):
return f"{norm_date(date_val)}|{norm_plate(plate_val)}|{norm_kg(kg_val)}"
def fmt_datetime(val):
ts = pd.to_datetime(val)
if ts.hour == 0 and ts.minute == 0 and ts.second == 0:
return ts.strftime("%Y-%m-%dT00:00")
return ts.strftime("%Y-%m-%d %H:%M:%S")
def parse_status(val):
if pd.isna(val):
return "未对账"
s = str(val)
if "已对账" in s:
return "已对账"
if "未对账" in s:
return "未对账"
return s
def num_equal(a, b, tol=0.01):
if pd.isna(a) and pd.isna(b):
return True
if pd.isna(a) or pd.isna(b):
return False
return abs(float(a) - float(b)) <= tol
def str_equal(a, b):
if pd.isna(a) and pd.isna(b):
return True
if pd.isna(a) or pd.isna(b):
return False
return str(a).strip() == str(b).strip()
def load_table1():
return pd.read_excel(T1_PATH, sheet_name=0)
def load_table2():
df = pd.read_excel(T2_PATH, sheet_name=0, header=1)
df.columns = [
"日期",
"车牌",
"加气量kg",
"公里数",
"加氢站",
"是否对账",
"车架号",
"成本单价",
"成本金额",
]
return df
def filter_april(df, date_col):
dt = pd.to_datetime(df[date_col])
mask = (dt.dt.year == FILTER_YEAR) & (dt.dt.month == FILTER_MONTH)
return df.loc[mask].copy()
def row_from_table2(r2, seq_no=None):
dt = pd.to_datetime(r2["日期"])
return {
"序号": seq_no,
"": int(dt.year),
"": int(dt.month),
"加氢日期": norm_date(r2["日期"]),
"加氢时间": fmt_datetime(r2["日期"]),
"加氢站名称": r2["加氢站"],
"客户名称": np.nan,
"车牌号": norm_plate(r2["车牌"]),
"加氢量(kg)": norm_kg(r2["加气量kg"]),
"成本单价(元/kg)": float(r2["成本单价"]) if pd.notna(r2["成本单价"]) else np.nan,
"成本总价(元)": float(r2["成本金额"]) if pd.notna(r2["成本金额"]) else np.nan,
"加氢单价(元/kg)": 0,
"加氢总价(元)": 0.0,
"行驶里程(km)": float(r2["公里数"]) if pd.notna(r2["公里数"]) else np.nan,
"备注": np.nan,
"业务员": np.nan,
"承担方式": np.nan,
"状态": parse_status(r2["是否对账"]),
"订单编号": np.nan,
}
def main():
df1_all = load_table1()
df2_all = load_table2()
df1 = filter_april(df1_all, "加氢日期")
df2 = filter_april(df2_all, "日期")
df2 = df2.copy()
df2["_key"] = df2.apply(
lambda r: make_key(r["日期"], r["车牌"], r["加气量kg"]), axis=1
)
df1["_key"] = df1.apply(
lambda r: make_key(r["加氢日期"], r["车牌号"], r["加氢量(kg)"]), axis=1
)
df1_orig = df1.copy()
t2_map = df2.set_index("_key", drop=False)
keys1 = set(df1["_key"])
keys2 = set(df2["_key"])
matched_keys = keys1 & keys2
only_t2_keys = keys2 - keys1
only_t1_keys = keys1 - keys2
overwrite_rows = []
diff_rows = []
unchanged_matched = 0
for idx, row in df1.iterrows():
key = row["_key"]
if key not in matched_keys:
continue
r2 = t2_map.loc[key]
if isinstance(r2, pd.DataFrame):
r2 = r2.iloc[0]
before = row.copy()
updates = {
"加氢日期": norm_date(r2["日期"]),
"加氢时间": fmt_datetime(r2["日期"]),
"加氢站名称": r2["加氢站"],
"车牌号": norm_plate(r2["车牌"]),
"加氢量(kg)": norm_kg(r2["加气量kg"]),
"成本单价(元/kg)": float(r2["成本单价"]) if pd.notna(r2["成本单价"]) else np.nan,
"成本总价(元)": float(r2["成本金额"]) if pd.notna(r2["成本金额"]) else np.nan,
"行驶里程(km)": float(r2["公里数"]) if pd.notna(r2["公里数"]) else np.nan,
}
changed_fields = []
for field, new_val in updates.items():
old_val = before[field]
is_diff = False
if field in ("成本单价(元/kg)", "成本总价(元)", "加氢量(kg)", "行驶里程(km)"):
is_diff = not num_equal(old_val, new_val)
else:
is_diff = not str_equal(old_val, new_val)
if is_diff:
changed_fields.append(field)
diff_rows.append(
{
"匹配键": key,
"订单编号": before.get("订单编号"),
"差异字段": field,
"表1原值": old_val,
"表2正确值": new_val,
}
)
df1.at[idx, field] = new_val
overwrite_rows.append(
{
"操作": "覆盖更新",
"匹配键": key,
"加氢日期": updates["加氢日期"],
"车牌号": updates["车牌号"],
"加氢量(kg)": updates["加氢量(kg)"],
"表1原客户名称": before["客户名称"],
"表1原加氢站名称": before["加氢站名称"],
"表1原成本总价(元)": before["成本总价(元)"],
"表1原行驶里程(km)": before["行驶里程(km)"],
"表2加氢站": r2["加氢站"],
"表2成本金额": r2["成本金额"],
"表2公里数": r2["公里数"],
"变更字段数": len(changed_fields),
"变更字段": "".join(changed_fields) if changed_fields else "",
"订单编号": before.get("订单编号"),
}
)
if not changed_fields:
unchanged_matched += 1
# 表2有、表1无 -> 新增(不生成订单编号)
new_rows = []
next_seq = int(df1["序号"].max()) + 1 if len(df1) else 1
for key in sorted(only_t2_keys):
r2 = t2_map.loc[key]
if isinstance(r2, pd.DataFrame):
r2 = r2.iloc[0]
new_row = row_from_table2(r2, seq_no=next_seq)
new_row["_key"] = key
new_rows.append(new_row)
next_seq += 1
df_new = pd.DataFrame(new_rows) if new_rows else pd.DataFrame()
if len(df_new):
df1 = pd.concat([df1, df_new], ignore_index=True)
out_cols = [c for c in df1_orig.columns if c != "_key"]
df1 = df1.drop(columns=["_key"], errors="ignore")[out_cols]
# 比对报告
summary = pd.DataFrame(
[
{"项目": "比对范围", "数量": f"{FILTER_YEAR}{FILTER_MONTH}"},
{"项目": "表1全量行数", "数量": len(df1_all)},
{"项目": "表1四月行数", "数量": len(df1_orig)},
{"项目": "表2全量行数", "数量": len(df2_all)},
{"项目": "表2四月行数", "数量": len(df2)},
{"项目": "三条件完全命中(覆盖更新)", "数量": len(matched_keys)},
{"项目": "命中且字段完全一致", "数量": unchanged_matched},
{"项目": "命中且存在字段差异", "数量": len(matched_keys) - unchanged_matched},
{"项目": "表2有表1无新增", "数量": len(only_t2_keys)},
{"项目": "表1有表2无保留表1原样", "数量": len(only_t1_keys)},
{"项目": "合并后四月总行数", "数量": len(df1)},
{
"项目": "说明",
"数量": "仅比对/合并四月数据表2无客户名称列匹配记录客户名称保留表1原值新增记录客户名称为空",
},
]
)
df_overwrite = pd.DataFrame(overwrite_rows)
df_diff = pd.DataFrame(diff_rows)
df_add = pd.DataFrame(new_rows)[
[
"序号",
"加氢日期",
"加氢时间",
"加氢站名称",
"车牌号",
"加氢量(kg)",
"成本单价(元/kg)",
"成本总价(元)",
"行驶里程(km)",
"状态",
"订单编号",
]
] if len(new_rows) else pd.DataFrame(
columns=[
"序号",
"加氢日期",
"加氢时间",
"加氢站名称",
"车牌号",
"加氢量(kg)",
"成本单价(元/kg)",
"成本总价(元)",
"行驶里程(km)",
"状态",
"订单编号",
]
)
only_t2_detail = df2[df2["_key"].isin(only_t2_keys)][
["日期", "车牌", "加气量kg", "公里数", "加氢站", "是否对账", "成本单价", "成本金额"]
].copy()
only_t1_detail = df1_orig[df1_orig["_key"].isin(only_t1_keys)][
["加氢日期", "车牌号", "加氢量(kg)", "加氢站名称", "客户名称", "成本总价(元)", "订单编号"]
].copy()
field_map = pd.DataFrame(
[
{"表1字段": "加氢日期", "表2字段": "日期", "处理方式": "覆盖"},
{"表1字段": "加氢时间", "表2字段": "日期", "处理方式": "覆盖(与加氢日期对应)"},
{"表1字段": "加氢站名称", "表2字段": "加氢站", "处理方式": "覆盖"},
{"表1字段": "客户名称", "表2字段": "表2无此列", "处理方式": "匹配记录保留表1原值"},
{"表1字段": "车牌号", "表2字段": "车牌", "处理方式": "覆盖"},
{"表1字段": "加氢量(kg)", "表2字段": "加气量kg", "处理方式": "覆盖"},
{"表1字段": "成本单价(元/kg)", "表2字段": "成本单价", "处理方式": "覆盖"},
{"表1字段": "成本总价(元)", "表2字段": "成本金额", "处理方式": "覆盖"},
{"表1字段": "加氢单价(元/kg)", "表2字段": "-", "处理方式": "保留表1"},
{"表1字段": "加氢总价(元)", "表2字段": "-", "处理方式": "保留表1"},
{"表1字段": "行驶里程(km)", "表2字段": "公里数", "处理方式": "覆盖"},
{"表1字段": "备注", "表2字段": "-", "处理方式": "保留表1"},
{"表1字段": "业务员", "表2字段": "-", "处理方式": "保留表1"},
{"表1字段": "承担方式", "表2字段": "-", "处理方式": "保留表1"},
{"表1字段": "订单编号", "表2字段": "-", "处理方式": "保留表1新增记录不生成"},
]
)
with pd.ExcelWriter(OUT_DATA, engine="openpyxl") as writer:
df1.to_excel(writer, sheet_name="车辆氢费明细", index=False)
with pd.ExcelWriter(OUT_COMPARE, engine="openpyxl") as writer:
summary.to_excel(writer, sheet_name="比对总览", index=False)
field_map.to_excel(writer, sheet_name="字段映射规则", index=False)
df_overwrite.to_excel(writer, sheet_name="覆盖更新明细", index=False)
df_diff.to_excel(writer, sheet_name="字段差异明细", index=False)
df_add.to_excel(writer, sheet_name="新增记录", index=False)
only_t2_detail.to_excel(writer, sheet_name="仅表2有", index=False)
only_t1_detail.to_excel(writer, sheet_name="仅表1有", index=False)
print("合并结果:", OUT_DATA)
print("比对结果:", OUT_COMPARE)
print(summary.to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,586 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""One-off: merge repair Excel sources into 羚牛公司车辆运维成本台账.xlsx and dedupe."""
from __future__ import annotations
import os
import re
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Iterable, Iterator, List, Optional, Tuple
import pandas as pd
from openpyxl import load_workbook
LEDGER = Path("/Users/sylvawong/Desktop/羚牛公司车辆运维成本台账.xlsx")
ROOTS = [
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/2026年2月常州维修明细"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/2026年2月金华维修费"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/2026年3-4月金华(中顺维修费)"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/2026年3月常州维修费"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/2026年3月开封维修费"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/2026年4月河南开封维修费"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/上海昱巷2026年1月维修费"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/上海昱巷2026年2月维修费"),
Path("/Users/sylvawong/Desktop/26年维修费/沈帅/上海昱巷2026年3月维修费"),
]
@dataclass(frozen=True)
class Row:
y: int
m: int
d: int
plate: str
fee: float
remark: str
source: str = ""
def key(self) -> Tuple:
fee = round(float(self.fee), 2)
rmk = re.sub(r"\s+", "", self.remark or "")
return (self.y, self.m, self.d, self.plate, fee, rmk)
def iter_xlsx_files(roots: Iterable[Path]) -> Iterator[Path]:
for root in roots:
if not root.is_dir():
continue
for dirpath, _, filenames in os.walk(root):
for fn in filenames:
if fn.startswith("~$"):
continue
low = fn.lower()
if low.endswith(".xlsx") or low.endswith(".xls"):
yield Path(dirpath) / fn
def parse_cn_date_yy(s: object) -> Optional[Tuple[int, int, int]]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return None
t = re.sub(r"\s+", "", str(s).strip())
m = re.match(r"^(\d{2})年(\d{1,2})月(\d{1,2})日", t)
if not m:
return None
y, mo, d = int(m.group(1)), int(m.group(2)), int(m.group(3))
if y < 100:
y += 2000
return y, mo, d
def parse_cn_date_yyyy(s: object) -> Optional[Tuple[int, int, int]]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return None
t = str(s).strip()
m = re.match(r"^(\d{4})\s*年\s*(\d{1,2})\s*月\s*(\d{1,2})\s*日", t)
if not m:
return None
return int(m.group(1)), int(m.group(2)), int(m.group(3))
def parse_any_date_cell(s: object) -> Optional[Tuple[int, int, int]]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return None
if isinstance(s, datetime):
return s.year, s.month, s.day
t = pd.to_datetime(s, errors="coerce")
if pd.notna(t):
tt = t.to_pydatetime()
return tt.year, tt.month, tt.day
x = parse_cn_date_yyyy(s)
if x:
return x
return parse_cn_date_yy(s)
def month_from_parent_dir(path: Path) -> Optional[Tuple[int, int]]:
"""Parse (year, month) from folder name like '2026年4月河南开封维修费'."""
m = re.search(r"(\d{4})年(\d{1,2})月", path.parent.name)
if m:
return int(m.group(1)), int(m.group(2))
return None
def parse_standard_feedback_df(df: pd.DataFrame) -> Optional[Tuple[int, int, int, str, float, str]]:
if df.shape[0] < 3:
return None
ymd = parse_cn_date_yy(df.iloc[1, 0])
if ymd is None or ymd[0] is None:
ymd = parse_cn_date_yyyy(df.iloc[1, 0])
if ymd is None:
return None
y, mo, d = ymd
plate = None
info_fallback = ""
for i in range(min(30, df.shape[0])):
v = df.iloc[i, 0]
if pd.isna(v):
continue
s = str(v)
if "车牌号:" in s or "车牌号:" in s:
m = re.search(r"车牌号[:]\s*([^\s]+)", s)
if m:
plate = m.group(1).strip()
if "车辆维修信息:" in s or "车辆维修信息:" in s:
info_fallback = re.split(r"车辆维修信息[:]", s, maxsplit=1)[-1].strip()
header_idx = None
for i in range(df.shape[0]):
c0 = df.iloc[i, 0]
c1 = df.iloc[i, 1] if df.shape[1] > 1 else None
if pd.isna(c0):
continue
if str(c0).strip() == "序号" and pd.notna(c1) and "维修项目" in str(c1):
header_idx = i
break
projects: List[str] = []
total_fee: Optional[float] = None
if header_idx is not None:
for j in range(header_idx + 1, df.shape[0]):
c0 = df.iloc[j, 0]
s0 = "" if pd.isna(c0) else str(c0)
if "费用总计" in s0 or "费用共计" in s0:
m = re.search(r"(?:费用总计|费用共计)[:]\s*([\d.]+)", s0)
if m:
total_fee = float(m.group(1))
break
try:
float(c0)
int(float(c0))
except (TypeError, ValueError):
continue
proj = df.iloc[j, 1] if df.shape[1] > 1 else None
if pd.notna(proj):
p = str(proj).strip()
if p:
projects.append(p)
remark = "".join(projects) if projects else info_fallback
if plate is None or total_fee is None:
return None
return y, mo, d, plate, float(total_fee), remark
def parse_yuyu_feedback_df(df: pd.DataFrame) -> Optional[Tuple[int, int, int, str, float, str]]:
"""上海昱巷「故障车辆维修信息反馈单」."""
if df.shape[0] < 3:
return None
ymd = parse_cn_date_yyyy(df.iloc[1, 0])
if ymd is None:
ymd = parse_any_date_cell(df.iloc[1, 0])
if ymd is None or ymd[1] == 0 or ymd[2] == 0:
return None
y, mo, d = ymd
plate = None
symptom = ""
for i in range(min(25, df.shape[0])):
v = df.iloc[i, 0]
if pd.isna(v):
continue
s = str(v)
if "车牌号:" in s or "车牌号:" in s:
m = re.search(r"车牌号[:]\s*([^\s]+)", s)
if m:
p = m.group(1).strip()
if p:
plate = p
if "故障现象:" in s or "故障现象:" in s:
symptom = re.split(r"故障现象[:]", s, maxsplit=1)[-1].strip()
header_idx = None
for i in range(df.shape[0]):
c0 = df.iloc[i, 0]
c1 = df.iloc[i, 1] if df.shape[1] > 1 else None
if pd.isna(c0):
continue
if str(c0).strip() == "序号" and pd.notna(c1) and "维修项目" in str(c1):
header_idx = i
break
projects: List[str] = []
total_fee: Optional[float] = None
if header_idx is not None:
for j in range(header_idx + 1, df.shape[0]):
c0 = df.iloc[j, 0]
c1 = df.iloc[j, 1] if df.shape[1] > 1 else None
s0 = "" if pd.isna(c0) else str(c0)
s1 = "" if pd.isna(c1) else str(c1)
if "费用共计" in s0 or "费用共计" in s1:
cell = s1 if "费用共计" in s1 else s0
m = re.search(r"费用共计[:]\s*([\d.]+)", cell)
if m:
total_fee = float(m.group(1))
break
try:
float(c0)
int(float(c0))
except (TypeError, ValueError):
continue
if pd.notna(c1):
p = str(c1).strip()
if p:
projects.append(p)
remark = "".join(projects) if projects else symptom
if plate is None or total_fee is None:
return None
return y, mo, d, plate, float(total_fee), remark
def parse_mingrui_chaidui(path: Path, df: pd.DataFrame) -> List[Row]:
out: List[Row] = []
if df.shape[0] < 3:
return out
hdr_row = 1 if str(df.iloc[1, 0]).strip() == "序号" else None
if hdr_row is None:
return out
sub = pd.read_excel(path, sheet_name=0, header=hdr_row)
cols = {str(c).strip(): c for c in sub.columns}
need = ["车牌号", "时间", "维修项目", "价格"]
if not all(k in cols for k in need):
return out
for _, r in sub.iterrows():
plate = r.get("车牌号")
if pd.isna(plate) or str(plate).strip() == "":
continue
proj = r.get("维修项目")
if pd.notna(proj) and "合计" in str(proj):
continue
ymd = parse_cn_date_yy(r.get("时间"))
if ymd is None:
continue
y, mo, d = ymd
fee = r.get("价格")
try:
fee_f = float(fee) if pd.notna(fee) else 0.0
except (TypeError, ValueError):
fee_f = 0.0
remark = "" if pd.isna(proj) else str(proj).strip()
out.append(Row(y, mo, d, str(plate).strip(), fee_f, remark, path.name))
return out
def parse_jinhua_chaidui(path: Path, df: pd.DataFrame) -> List[Row]:
out: List[Row] = []
if df.shape[0] < 3:
return out
hdr = None
for i in range(min(5, df.shape[0])):
if str(df.iloc[i, 0]).strip() == "车牌号":
hdr = i
break
if hdr is None:
return out
sub = pd.read_excel(path, sheet_name=0, header=hdr)
time_col = "时间" if "时间" in sub.columns else None
proj_col = None
for c in sub.columns:
cs = str(c).strip()
if cs in ("项目", "维修项目"):
proj_col = c
break
price_col = None
for c in sub.columns:
if str(c).strip().startswith("价格"):
price_col = c
break
if time_col is None or proj_col is None or price_col is None:
return out
for _, r in sub.iterrows():
plate = r.get("车牌号")
if pd.isna(plate) or str(plate).strip() == "":
continue
ymd = parse_cn_date_yy(r.get(time_col))
if ymd is None:
continue
y, mo, d = ymd
proj = r.get(proj_col)
if pd.notna(proj) and "合计" in str(proj):
continue
try:
fee_f = float(r.get(price_col)) if pd.notna(r.get(price_col)) else 0.0
except (TypeError, ValueError):
fee_f = 0.0
remark = "" if pd.isna(proj) else str(proj).strip()
out.append(Row(y, mo, d, str(plate).strip(), fee_f, remark, path.name))
return out
def parse_kaifeng_chaidui(path: Path, df: pd.DataFrame) -> List[Row]:
out: List[Row] = []
mm = month_from_parent_dir(path)
if mm is None:
return out
y0, m0 = mm
hdr = None
for i in range(min(6, df.shape[0])):
c0 = str(df.iloc[i, 0]).strip() if pd.notna(df.iloc[i, 0]) else ""
if c0 == "序号":
hdr = i
break
if hdr is None:
return out
sub = pd.read_excel(path, sheet_name=0, header=hdr)
for _, r in sub.iterrows():
try:
idx = r.iloc[0]
if pd.isna(idx):
continue
float(idx)
except (TypeError, ValueError):
continue
plate = r.get("车牌") if "车牌" in sub.columns else r.get(sub.columns[1])
if plate is None or (isinstance(plate, float) and pd.isna(plate)):
continue
fee_col = None
for c in sub.columns:
if str(c).strip() == "价格":
fee_col = c
break
if fee_col is None:
continue
try:
fee_f = float(r.get(fee_col)) if pd.notna(r.get(fee_col)) else 0.0
except (TypeError, ValueError):
continue
if "合计" in str(plate):
continue
out.append(Row(y0, m0, 1, str(plate).strip(), fee_f, "开封拆堆", path.name))
return out
def find_col(df: pd.DataFrame, names: Tuple[str, ...]) -> Optional[str]:
for c in df.columns:
cs = str(c).strip()
if cs in names:
return c # type: ignore[return-value]
return None
def parse_system_table_df(df: pd.DataFrame, source: str) -> List[Row]:
out: List[Row] = []
if df.shape[0] < 1:
return out
plate_c = find_col(df, ("车牌号", "车牌号码"))
if plate_c is None:
return out
date_c = find_col(df, ("故障上报时间",))
sol_c = find_col(df, ("解决方案",))
part_c = find_col(df, ("配件价格", "配件", "配件费用", "配件费"))
labor_c = find_col(df, ("工时", "人工", "工时费"))
for _, r in df.iterrows():
plate = r.get(plate_c)
if plate is None or (isinstance(plate, float) and pd.isna(plate)):
continue
plate_s = str(plate).strip()
if plate_s == "" or plate_s == "NaN":
continue
sol = r.get(sol_c) if sol_c else None
if sol is not None and "合计" in str(sol):
continue
ymd = parse_any_date_cell(r.get(date_c)) if date_c else None
if ymd is None:
continue
fee = 0.0
if part_c:
fee += float(r.get(part_c) or 0) if pd.notna(r.get(part_c)) else 0.0
if labor_c:
v = r.get(labor_c)
if pd.notna(v):
try:
fee += float(v)
except (TypeError, ValueError):
pass
# skip summary rows where part column shows 合计
if part_c:
pv = r.get(part_c)
if pd.notna(pv) and "合计" in str(pv):
continue
remark = "" if sol is None or pd.isna(sol) else str(sol).strip()
y, mo, d = ymd
out.append(Row(y, mo, d, plate_s, float(fee), remark, source))
return out
def parse_system_workbook(path: Path) -> List[Row]:
out: List[Row] = []
xl = pd.ExcelFile(path)
for sn in xl.sheet_names:
try:
df = pd.read_excel(path, sheet_name=sn, header=0)
except Exception:
continue
if df.shape[0] == 0:
continue
c0 = str(df.columns[0]).strip() if df.columns[0] is not None else ""
if c0 not in ("车牌号", "车辆编号"):
continue
out.extend(parse_system_table_df(df, f"{path.name}:{sn}"))
return out
def classify_and_parse(path: Path) -> List[Row]:
rows: List[Row] = []
try:
df0 = pd.read_excel(path, sheet_name=0, header=None)
except Exception:
return rows
if df0.shape[0] == 0:
return rows
c00 = str(df0.iloc[0, 0]) if pd.notna(df0.iloc[0, 0]) else ""
# 河南开封拆堆(无日期列)
if "开封拆堆" in c00 or ("拆堆明细" in c00 and "开封" in c00):
rows.extend(parse_kaifeng_chaidui(path, df0))
return rows
# 铭瑞类拆堆
if "铭瑞" in c00 or (df0.shape[0] > 2 and str(df0.iloc[1, 0]).strip() == "序号" and "维修项目" in str(df0.iloc[1, 4])):
mr = parse_mingrui_chaidui(path, df0)
if mr:
return mr
# 金华拆堆
if "金华维修明细" in c00:
jh = parse_jinhua_chaidui(path, df0)
if jh:
return jh
# 昱巷:故障车辆维修信息反馈单(多 sheet
if "故障车辆维修信息反馈单" in c00:
xl = pd.ExcelFile(path)
for sn in xl.sheet_names:
df = pd.read_excel(path, sheet_name=sn, header=None)
rec = parse_yuyu_feedback_df(df)
if rec:
y, mo, d, plate, fee, remark = rec
rows.append(Row(y, mo, d, plate, fee, remark, f"{path.name}:{sn}"))
return rows
# 常州/开封 车辆维修信息反馈单
if "车辆维修信息反馈单" in c00:
xl = pd.ExcelFile(path)
for sn in xl.sheet_names:
df = pd.read_excel(path, sheet_name=sn, header=None)
rec = parse_standard_feedback_df(df)
if rec:
y, mo, d, plate, fee, remark = rec
rows.append(Row(y, mo, d, plate, fee, remark, f"{path.name}:{sn}"))
return rows
# 系统故障 / 系统维修 / 昱巷系统明细
c0h = str(df0.iloc[0, 0]).strip() if pd.notna(df0.iloc[0, 0]) else ""
if c0h == "车牌号" or c0h == "车辆编号":
rows.extend(parse_system_workbook(path))
return rows
# 兜底:按系统表再试(部分文件首行是合并格)
alt = parse_system_workbook(path)
if alt:
return alt
return rows
def read_existing_ledger(path: Path) -> Tuple[int, List[Row]]:
wb = load_workbook(path)
ws = wb.active
header_row = None
for i, row in enumerate(ws.iter_rows(min_row=1, max_row=40, values_only=True), start=1):
if row and any(c == "年份" for c in row if c is not None):
header_row = i
break
if header_row is None:
raise SystemExit("ledger: 年份 header not found")
col_map: dict = {}
for cell in ws[header_row]:
if cell.value:
col_map[str(cell.value).strip()] = cell.column
out: List[Row] = []
for r in range(header_row + 1, ws.max_row + 1):
plate = ws.cell(row=r, column=col_map["车牌号"]).value
if plate is None or str(plate).strip() == "":
continue
y = ws.cell(row=r, column=col_map["年份"]).value
mo = ws.cell(row=r, column=col_map["月份"]).value
d = ws.cell(row=r, column=col_map["日期"]).value
fee = ws.cell(row=r, column=col_map["修理费"]).value
remark = ws.cell(row=r, column=col_map["备注"]).value
try:
yy, mm, dd = int(y), int(mo), int(d)
ff = float(fee) if fee is not None else 0.0
except (TypeError, ValueError):
continue
rmk = "" if remark is None else str(remark)
out.append(Row(yy, mm, dd, str(plate).strip(), ff, rmk, "ledger:existing"))
return header_row, out
def write_ledger(path: Path, header_row: int, rows: List[Row]) -> None:
wb = load_workbook(path)
ws = wb.active
col_map: dict = {}
for cell in ws[header_row]:
if cell.value:
col_map[str(cell.value).strip()] = cell.column
if ws.max_row > header_row:
ws.delete_rows(header_row + 1, ws.max_row - header_row)
r = header_row + 1
for row in rows:
ws.cell(row=r, column=col_map["年份"], value=row.y)
ws.cell(row=r, column=col_map["月份"], value=row.m)
ws.cell(row=r, column=col_map["日期"], value=row.d)
ws.cell(row=r, column=col_map["车牌号"], value=row.plate)
ws.cell(row=r, column=col_map["修理费"], value=row.fee)
for k in ("保养费", "年审费", "轮胎费", "其他"):
if k in col_map:
ws.cell(row=r, column=col_map[k], value=None)
ws.cell(row=r, column=col_map["小计"], value=row.fee)
if "费用是否为公司承担" in col_map:
ws.cell(row=r, column=col_map["费用是否为公司承担"], value=None)
ws.cell(row=r, column=col_map["备注"], value=row.remark or None)
r += 1
wb.save(path)
def main() -> None:
files = sorted({p.resolve() for p in iter_xlsx_files(ROOTS)})
imported: List[Row] = []
for p in files:
imported.extend(classify_and_parse(p))
header_row, existing = read_existing_ledger(LEDGER)
merged = existing + imported
before = len(merged)
seen = set()
deduped: List[Row] = []
for row in merged:
k = row.key()
if k in seen:
continue
seen.add(k)
deduped.append(row)
deduped.sort(key=lambda x: (x.y, x.m, x.d, x.plate, x.remark))
write_ledger(LEDGER, header_row, deduped)
print("files scanned:", len(files))
print("rows imported from files:", len(imported))
print("existing ledger rows:", len(existing))
print("merged before dedupe:", before)
print("after dedupe:", len(deduped))
print("removed duplicates:", before - len(deduped))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,462 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Import 赵伟军 batch Excel files into 羚牛公司车辆运维成本台账.xlsx and dedupe."""
from __future__ import annotations
import re
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import Iterable, List, Optional, Tuple
import pandas as pd
from openpyxl import load_workbook
LEDGER = Path("/Users/sylvawong/Desktop/羚牛公司车辆运维成本台账.xlsx")
SOURCES: List[Path] = [
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/26-1氢能源点检明细及照片(1).xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/26-2氢能源点检明细及照片(1).xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/2026-2服务站提供的明细表-羚牛(1).xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/2026-2故障车辆维修信息反馈单-羚牛.xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/2026-3车辆润滑保养结算申请单羚牛(1).xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/2026-3打黄油明细(1).xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/2026-3服务站提供的明细表-羚牛(1).xlsx"),
Path("/Users/sylvawong/Desktop/26年维修费/赵伟军/2026-3故障车辆维修信息反馈单-羚牛.xlsx"),
]
PLATE_RE = re.compile(
r"(沪[A-Z][A-Z0-9]{5,6}|"
r"浙[A-Z][A-Z0-9]{5,6}|"
r"粤[A-Z][A-Z0-9]{5,7})",
re.I,
)
def normalize_plate(raw: object) -> Optional[str]:
if raw is None or (isinstance(raw, float) and pd.isna(raw)):
return None
s = str(raw).strip().replace(" ", "").replace("'", "").replace("`", "")
m = PLATE_RE.search(s)
if not m:
return None
return m.group(1).upper().replace(" ", "")
def excel_date_to_ymd(v: object) -> Optional[Tuple[int, int, int]]:
if v is None or (isinstance(v, float) and pd.isna(v)):
return None
if isinstance(v, datetime):
return v.year, v.month, v.day
if isinstance(v, pd.Timestamp):
t = v.to_pydatetime()
return t.year, t.month, t.day
try:
n = float(v)
except (TypeError, ValueError):
t = pd.to_datetime(v, errors="coerce")
if pd.isna(t):
return None
tt = t.to_pydatetime()
return tt.year, tt.month, tt.day
base = datetime(1899, 12, 30)
dt = base + timedelta(days=int(n))
return dt.year, dt.month, dt.day
@dataclass
class Row:
y: int
m: int
d: int
plate: str
fee: float
remark: str
def key(self) -> Tuple:
fee = round(float(self.fee), 2)
rmk = re.sub(r"\s+", "", self.remark or "")
return (self.y, self.m, self.d, self.plate, fee, rmk)
def parse_dianjian_sheet(path: Path, df: pd.DataFrame, tag: str) -> List[Row]:
"""点检结算汇总明细表:进修日期 + 车牌 + 维修项目/点检结果 + 合计(末列)."""
out: List[Row] = []
if df.shape[1] < 14:
return out
for i in range(3, len(df)):
r = df.iloc[i]
try:
int(float(r[0]))
except (TypeError, ValueError):
continue
plate = normalize_plate(r[2])
if not plate:
continue
ymd = excel_date_to_ymd(r[1])
if not ymd:
continue
remark = r[4] if pd.notna(r[4]) else ""
remark = str(remark).strip() or "氢能源点检"
fee_v = r[13]
try:
fee = float(fee_v) if pd.notna(fee_v) else 0.0
except (TypeError, ValueError):
fee = 0.0
y, mo, d = ymd
out.append(Row(y, mo, d, plate, fee, remark))
return out
def parse_dianjian_workbook(path: Path) -> List[Row]:
df = pd.read_excel(path, sheet_name="点检明细", header=None)
c00 = str(df.iloc[0, 0]) if pd.notna(df.iloc[0, 0]) else ""
if "点检结算汇总明细表" not in c00:
return []
return parse_dianjian_sheet(path, df, path.name)
def _amount_cols_for_huizong(df: pd.DataFrame) -> Tuple[int, int]:
"""服务站版:两笔「金额」在列 8、12打黄油等窄表在列 7、11。"""
hdr = df.iloc[1].tolist() if len(df) > 1 else []
flat = " ".join(str(x) for x in hdr if pd.notna(x))
if "车辆所属公司" in flat:
return 8, 12
return 7, 11
def parse_weixiu_huizong_sheet(df: pd.DataFrame, tag: str) -> List[Row]:
"""维修结算汇总明细表(服务站/打黄油):按序号分组,汇总两栏「金额(含税)」."""
out: List[Row] = []
if df.shape[1] < 13:
return out
c00 = str(df.iloc[0, 0]) if pd.notna(df.iloc[0, 0]) else ""
if "维修结算汇总明细表" not in c00:
return []
c8, c12 = _amount_cols_for_huizong(df)
def add_amounts(rr) -> float:
s = 0.0
for c in (c8, c12):
if c >= len(rr):
continue
v = rr[c]
if pd.notna(v) and isinstance(v, (int, float)):
s += float(v)
return s
i = 3
while i < len(df):
r = df.iloc[i]
try:
int(float(r[0]))
has_seq = True
except (TypeError, ValueError):
has_seq = False
if not has_seq:
# 无序号行:若带独立车牌,则按单车一行入账(批量润滑等)
p = normalize_plate(r[2]) if len(r) > 2 else None
if p and len(r) > 4:
ymd = excel_date_to_ymd(r[1])
if ymd:
total = add_amounts(r)
remark = str(r[4]).strip() if pd.notna(r[4]) else "维修"
y, mo, d = ymd
out.append(Row(y, mo, d, p, total, remark))
i += 1
continue
ymd = excel_date_to_ymd(r[1])
plate = normalize_plate(r[2])
if not ymd or not plate:
i += 1
continue
parts: List[str] = []
total = 0.0
if c8 == 8:
if len(r) > 4 and pd.notna(r[4]) and str(r[4]).strip():
parts.append(str(r[4]).strip())
else:
if len(r) > 3 and pd.notna(r[3]) and str(r[3]).strip():
parts.append(str(r[3]).strip())
total += add_amounts(r)
anchor = plate
i += 1
while i < len(df):
r2 = df.iloc[i]
try:
int(float(r2[0]))
break
except (TypeError, ValueError):
pass
p2 = normalize_plate(r2[2]) if len(r2) > 2 else None
if p2 is not None and p2 != anchor:
break
if len(r2) > 4 and pd.notna(r2[4]) and str(r2[4]).strip():
parts.append(str(r2[4]).strip())
total += add_amounts(r2)
i += 1
remark = "".join(parts) if parts else "维修"
y, mo, d = ymd
out.append(Row(y, mo, d, anchor, total, remark))
return out
def parse_weixiu_workbook(path: Path) -> List[Row]:
out: List[Row] = []
xl = pd.ExcelFile(path)
for sn in xl.sheet_names:
if sn.startswith("Sheet") and sn in ("Sheet2", "Sheet3"):
pass
try:
df = pd.read_excel(path, sheet_name=sn, header=None)
except Exception:
continue
out.extend(parse_weixiu_huizong_sheet(df, f"{path.name}:{sn}"))
return out
def parse_dahuangyou_only(path: Path) -> List[Row]:
df = pd.read_excel(path, sheet_name="Sheet1", header=None)
return parse_weixiu_huizong_sheet(df, path.name)
def parse_zhaowei_fault_df(df: pd.DataFrame) -> Optional[Row]:
if df.shape[0] < 10:
return None
c00 = str(df.iloc[0, 0]) if pd.notna(df.iloc[0, 0]) else ""
if "故障车辆维修信息反馈单" not in c00:
return None
ymd = excel_date_to_ymd(df.iloc[1, 0])
if not ymd:
return None
plate = None
symptom = ""
for i in range(min(25, len(df))):
v = df.iloc[i, 0]
if pd.isna(v):
continue
s = str(v)
if "车牌号:" in s or "车牌号:" in s:
m = re.search(r"车牌号[:]\s*([^\s]+)", s)
if m:
plate = normalize_plate(m.group(1))
if "故障现象:" in s or "故障现象:" in s:
symptom = re.split(r"故障现象[:]", s, maxsplit=1)[-1].strip()
total: Optional[float] = None
for i in range(len(df)):
for j in range(min(6, df.shape[1])):
cell = df.iloc[i, j]
if pd.isna(cell):
continue
st = str(cell)
if "费用共计" in st:
m = re.search(r"费用共计[:]\s*([\d.]+)", st)
if m:
total = float(m.group(1))
break
if total is not None:
break
projects: List[str] = []
header_at = None
for i in range(len(df)):
c0 = df.iloc[i, 0]
c1 = df.iloc[i, 1] if df.shape[1] > 1 else None
if str(c0).strip() == "序号" and pd.notna(c1) and "维修项目" in str(c1):
header_at = i
break
if header_at is not None:
for j in range(header_at + 1, len(df)):
c0 = df.iloc[j, 0]
try:
int(float(c0))
except (TypeError, ValueError):
continue
proj = df.iloc[j, 1] if df.shape[1] > 1 else None
if pd.notna(proj) and str(proj).strip():
projects.append(str(proj).strip())
if plate is None or total is None:
return None
remark = "".join(projects) if projects else symptom
if not remark:
remark = "故障维修"
y, mo, d = ymd
return Row(y, mo, d, plate, float(total), remark)
def parse_fault_workbook(path: Path) -> List[Row]:
out: List[Row] = []
xl = pd.ExcelFile(path)
for sn in xl.sheet_names:
try:
df = pd.read_excel(path, sheet_name=sn, header=None)
except Exception:
continue
rec = parse_zhaowei_fault_df(df)
if rec:
out.append(rec)
return out
def parse_lubrication_front_axle(path: Path) -> List[Row]:
"""仅解析「前 轮 保养」:主表车牌 + 附件列中的车牌70 元/台。"""
out: List[Row] = []
try:
df = pd.read_excel(path, sheet_name="前 轮 保养", header=None)
except Exception:
return out
c00 = str(df.iloc[0, 0]) if pd.notna(df.iloc[0, 0]) else ""
if "车辆维修保养结算申请单" not in c00:
return out
ymd = excel_date_to_ymd(df.iloc[1, 0])
if not ymd:
return out
fee = 70.0
remark = "润滑保养(打黄油)"
uniq: List[str] = []
seen = set()
for i in range(len(df)):
if df.shape[1] < 3:
break
v = df.iloc[i, 2]
if pd.isna(v):
continue
s = str(v)
if "车牌号:" in s:
m = re.search(r"车牌号[:]\s*([^\s(]+)", s)
if m:
s = m.group(1)
p = normalize_plate(s)
if p and p not in seen:
seen.add(p)
uniq.append(p)
y, mo, d = ymd
for p in uniq:
out.append(Row(y, mo, d, p, fee, remark))
return out
def classify_file(path: Path) -> List[Row]:
name = path.name
if "氢能源点检" in name and "点检明细" in pd.ExcelFile(path).sheet_names:
return parse_dianjian_workbook(path)
if "打黄油明细" in name:
return parse_dahuangyou_only(path)
if "润滑保养结算申请单" in name:
return parse_lubrication_front_axle(path)
if "服务站提供的明细表" in name:
return parse_weixiu_workbook(path)
if "故障车辆维修信息反馈单" in name:
return parse_fault_workbook(path)
return []
def _drop_corrupt_lubrication_rows(rows: List[Row]) -> List[Row]:
"""剔除异常合并行(多段「润滑保养」串在一条且金额畸高)。"""
out: List[Row] = []
for r in rows:
rm = r.remark or ""
if rm.count("润滑保养") > 2 and r.fee > 500:
continue
out.append(r)
return out
def read_ledger_rows() -> Tuple[int, List[Row]]:
wb = load_workbook(LEDGER)
ws = wb.active
header_row = None
for i, row in enumerate(ws.iter_rows(min_row=1, max_row=40, values_only=True), start=1):
if row and any(c == "年份" for c in row if c is not None):
header_row = i
break
if header_row is None:
raise SystemExit("ledger: 未找到表头")
col_map = {}
for cell in ws[header_row]:
if cell.value:
col_map[str(cell.value).strip()] = cell.column
existing: List[Row] = []
for r in range(header_row + 1, ws.max_row + 1):
plate = ws.cell(row=r, column=col_map["车牌号"]).value
if plate is None or str(plate).strip() == "":
continue
try:
yy = int(ws.cell(row=r, column=col_map["年份"]).value)
mm = int(ws.cell(row=r, column=col_map["月份"]).value)
dd = int(ws.cell(row=r, column=col_map["日期"]).value)
fee = float(ws.cell(row=r, column=col_map["修理费"]).value or 0)
except (TypeError, ValueError):
continue
remark = ws.cell(row=r, column=col_map["备注"]).value
remark = "" if remark is None else str(remark)
existing.append(Row(yy, mm, dd, str(plate).strip().upper(), fee, remark))
existing = _drop_corrupt_lubrication_rows(existing)
return header_row, existing
def write_ledger(header_row: int, rows: List[Row]) -> None:
wb = load_workbook(LEDGER)
ws = wb.active
col_map = {}
for cell in ws[header_row]:
if cell.value:
col_map[str(cell.value).strip()] = cell.column
if ws.max_row > header_row:
ws.delete_rows(header_row + 1, ws.max_row - header_row)
r = header_row + 1
for row in rows:
ws.cell(row=r, column=col_map["年份"], value=row.y)
ws.cell(row=r, column=col_map["月份"], value=row.m)
ws.cell(row=r, column=col_map["日期"], value=row.d)
ws.cell(row=r, column=col_map["车牌号"], value=row.plate)
ws.cell(row=r, column=col_map["修理费"], value=row.fee)
for k in ("保养费", "年审费", "轮胎费", "其他"):
if k in col_map:
ws.cell(row=r, column=col_map[k], value=None)
ws.cell(row=r, column=col_map["小计"], value=row.fee)
if "费用是否为公司承担" in col_map:
ws.cell(row=r, column=col_map["费用是否为公司承担"], value=None)
ws.cell(row=r, column=col_map["备注"], value=row.remark or None)
r += 1
wb.save(LEDGER)
def main() -> None:
imported: List[Row] = []
for p in SOURCES:
if not p.is_file():
print("missing:", p)
continue
rows = classify_file(p)
print(f"{p.name}: +{len(rows)}")
imported.extend(rows)
header_row, existing = read_ledger_rows()
merged = existing + imported
before = len(merged)
seen = set()
deduped: List[Row] = []
for row in merged:
k = row.key()
if k in seen:
continue
seen.add(k)
deduped.append(row)
deduped.sort(key=lambda x: (x.y, x.m, x.d, x.plate, x.remark))
write_ledger(header_row, deduped)
print("---")
print("existing ledger:", len(existing))
print("imported:", len(imported))
print("merged before dedupe:", before)
print("after dedupe:", len(deduped))
print("removed duplicates:", before - len(deduped))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,336 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""赵小峰 维修明细 → 羚牛公司车辆运维成本台账(一车一条、合计入修理费、项目合并备注、去重)."""
from __future__ import annotations
import re
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import pandas as pd
from openpyxl import load_workbook
LEDGER = Path("/Users/sylvawong/Desktop/羚牛公司车辆运维成本台账.xlsx")
JAN = Path("/Users/sylvawong/Desktop/26年维修费/赵小峰/2026年1月维修明细(1).xlsx")
FEB = Path("/Users/sylvawong/Desktop/26年维修费/赵小峰/羚牛26.2月(1).xls")
PLATE_RE = re.compile(
r"(沪[A-Z][A-Z0-9]{5,6}|"
r"浙[A-Z][A-Z0-9]{4,6}挂?|"
r"粤[A-Z][A-Z0-9]{5,7}|"
r"粤[A-Z]{2,3}\d{4,5})",
re.I,
)
def normalize_plate(raw: object) -> Optional[str]:
if raw is None or (isinstance(raw, float) and pd.isna(raw)):
return None
s = str(raw).strip().replace(" ", "")
m = PLATE_RE.search(s)
if not m:
return None
return m.group(1).upper()
def parse_date_jan(s: object) -> Optional[Tuple[int, int, int]]:
if s is None or (isinstance(s, float) and pd.isna(s)):
return None
if isinstance(s, datetime):
return s.year, s.month, s.day
t = pd.to_datetime(s, errors="coerce")
if pd.notna(t):
tt = t.to_pydatetime()
return tt.year, tt.month, tt.day
st = str(s).strip().replace("", ".")
m = re.match(r"^(\d{4})\D(\d{1,2})\D(\d{1,2})", st)
if m:
return int(m.group(1)), int(m.group(2)), int(m.group(3))
return None
def parse_date_feb(v: object) -> Optional[Tuple[int, int, int]]:
if v is None or (isinstance(v, float) and pd.isna(v)):
return None
if isinstance(v, datetime):
return v.year, v.month, v.day
t = pd.to_datetime(v, errors="coerce")
if pd.isna(t):
return None
tt = t.to_pydatetime()
return tt.year, tt.month, tt.day
def add_line_amounts(rr: pd.Series, c_amt_a: int, c_amt_b: int) -> float:
s = 0.0
for c in (c_amt_a, c_amt_b):
if c >= len(rr):
continue
v = rr[c]
if pd.notna(v) and isinstance(v, (int, float)):
s += float(v)
return s
@dataclass
class Row:
y: int
m: int
d: int
plate: str
fee: float
remark: str
def key(self) -> Tuple:
fee = round(float(self.fee), 2)
rmk = re.sub(r"\s+", "", self.remark or "")
return (self.y, self.m, self.d, self.plate, fee, rmk)
def parse_january_xlsx(path: Path) -> List[Row]:
"""维修结算汇总明细表:按序号分组;修理费取「总计」列;备注合并项目列。"""
df = pd.read_excel(path, sheet_name=0, header=None)
if df.shape[1] < 14:
return []
c00 = str(df.iloc[0, 0]) if pd.notna(df.iloc[0, 0]) else ""
if "维修结算汇总明细表" not in c00:
return []
# 金额(含税)在列 7、11与赵小峰表结构一致
c_amt_a, c_amt_b = 7, 11
c_total = 13
out: List[Row] = []
i = 3
while i < len(df):
r = df.iloc[i]
try:
int(float(r[0]))
has_seq = True
except (TypeError, ValueError):
has_seq = False
if not has_seq:
i += 1
continue
if len(r) > 3 and pd.notna(r[3]) and str(r[3]).strip() == "合计":
break
ymd = parse_date_jan(r[1])
plate = normalize_plate(r[2])
if not ymd or not plate:
i += 1
continue
parts: List[str] = []
fee_from_total: Optional[float] = None
if len(r) > 3 and pd.notna(r[3]) and str(r[3]).strip():
parts.append(str(r[3]).strip())
tv = r[c_total] if c_total < len(r) else None
if pd.notna(tv):
try:
fee_from_total = float(tv)
except (TypeError, ValueError):
pass
sum_lines = add_line_amounts(r, c_amt_a, c_amt_b)
anchor = plate
i += 1
while i < len(df):
r2 = df.iloc[i]
if len(r2) > 3 and pd.notna(r2[3]) and str(r2[3]).strip() == "合计":
i += 1
break
try:
int(float(r2[0]))
break
except (TypeError, ValueError):
pass
p2 = normalize_plate(r2[2]) if len(r2) > 2 else None
if p2 is not None and p2 != anchor:
break
if len(r2) > 3 and pd.notna(r2[3]) and str(r2[3]).strip():
parts.append(str(r2[3]).strip())
if fee_from_total is None and c_total < len(r2) and pd.notna(r2[c_total]):
try:
fee_from_total = float(r2[c_total])
except (TypeError, ValueError):
pass
sum_lines += add_line_amounts(r2, c_amt_a, c_amt_b)
i += 1
fee_total = fee_from_total if fee_from_total is not None else sum_lines
remark = "".join(parts) if parts else "维修"
y, mo, d = ymd
out.append(Row(y, mo, d, anchor, float(fee_total or 0), remark))
return consolidate_by_plate(out)
def parse_february_xls(path: Path) -> List[Row]:
df = pd.read_excel(path, sheet_name=0, header=None)
out: List[Row] = []
i = 5
while i < len(df):
c0 = df.iloc[i, 0]
if pd.isna(c0):
i += 1
continue
s0 = str(c0).strip()
if s0 in ("车牌", "Page 2 of 2") or "车主" in s0 or "公司" in s0 and "合计" in str(df.iloc[i, 5]):
i += 1
continue
plate = normalize_plate(s0) or (s0 if re.match(r"^[浙沪粤]", s0) else None)
if not plate:
i += 1
continue
if len(s0) > 15:
i += 1
continue
try:
fee = float(df.iloc[i, 7])
except (TypeError, ValueError, IndexError):
fee = 0.0
i += 1
projects: List[str] = []
dates: List[Tuple[int, int, int]] = []
while i < len(df):
r = df.iloc[i]
n0 = r[0]
if pd.notna(n0):
s = str(n0).strip()
if "Page" in s or "打印" in s:
i += 1
break
if normalize_plate(s) and s not in ("车牌",):
break
if pd.notna(r[3]):
proj = str(r[3]).strip()
if proj and proj not in ("项目名称", "NaN"):
projects.append(proj)
dt = parse_date_feb(r[1])
if dt:
dates.append(dt)
i += 1
if not dates:
ymd = (2026, 2, 1)
else:
ymd = min(dates)
y, mo, d = ymd
remark = "".join(projects) if projects else "维修"
out.append(Row(y, mo, d, plate.upper(), fee, remark))
return consolidate_by_plate(out)
def consolidate_by_plate(rows: List[Row]) -> List[Row]:
"""同一文件内同一车牌合并为一条:金额相加、备注去重拼接、日期取最早。"""
m: Dict[str, Row] = {}
order: List[str] = []
for r in rows:
if r.plate not in m:
m[r.plate] = Row(r.y, r.m, r.d, r.plate, r.fee, r.remark)
order.append(r.plate)
else:
cur = m[r.plate]
d1 = datetime(cur.y, cur.m, cur.d)
d2 = datetime(r.y, r.m, r.d)
y, mo, d = (cur.y, cur.m, cur.d) if d1 <= d2 else (r.y, r.m, r.d)
fee = cur.fee + r.fee
rks = []
for part in (cur.remark, r.remark):
for x in part.split(""):
x = x.strip()
if x and x not in rks:
rks.append(x)
remark = "".join(rks)
m[r.plate] = Row(y, mo, d, r.plate, fee, remark)
return [m[p] for p in order]
def read_ledger() -> Tuple[int, List[Row]]:
wb = load_workbook(LEDGER)
ws = wb.active
header_row = None
for i, row in enumerate(ws.iter_rows(min_row=1, max_row=40, values_only=True), start=1):
if row and any(c == "年份" for c in row if c is not None):
header_row = i
break
if header_row is None:
raise SystemExit("未找到表头")
col_map = {}
for cell in ws[header_row]:
if cell.value:
col_map[str(cell.value).strip()] = cell.column
rows: List[Row] = []
for r in range(header_row + 1, ws.max_row + 1):
plate = ws.cell(row=r, column=col_map["车牌号"]).value
if plate is None or str(plate).strip() == "":
continue
try:
yy = int(ws.cell(row=r, column=col_map["年份"]).value)
mm = int(ws.cell(row=r, column=col_map["月份"]).value)
dd = int(ws.cell(row=r, column=col_map["日期"]).value)
fee = float(ws.cell(row=r, column=col_map["修理费"]).value or 0)
except (TypeError, ValueError):
continue
remark = ws.cell(row=r, column=col_map["备注"]).value
remark = "" if remark is None else str(remark)
rows.append(Row(yy, mm, dd, str(plate).strip().upper(), fee, remark))
return header_row, rows
def write_ledger(header_row: int, rows: List[Row]) -> None:
wb = load_workbook(LEDGER)
ws = wb.active
col_map = {}
for cell in ws[header_row]:
if cell.value:
col_map[str(cell.value).strip()] = cell.column
if ws.max_row > header_row:
ws.delete_rows(header_row + 1, ws.max_row - header_row)
r = header_row + 1
for row in rows:
ws.cell(row=r, column=col_map["年份"], value=row.y)
ws.cell(row=r, column=col_map["月份"], value=row.m)
ws.cell(row=r, column=col_map["日期"], value=row.d)
ws.cell(row=r, column=col_map["车牌号"], value=row.plate)
ws.cell(row=r, column=col_map["修理费"], value=row.fee)
for k in ("保养费", "年审费", "轮胎费", "其他"):
if k in col_map:
ws.cell(row=r, column=col_map[k], value=None)
ws.cell(row=r, column=col_map["小计"], value=row.fee)
if "费用是否为公司承担" in col_map:
ws.cell(row=r, column=col_map["费用是否为公司承担"], value=None)
ws.cell(row=r, column=col_map["备注"], value=row.remark or None)
r += 1
wb.save(LEDGER)
def main() -> None:
imported: List[Row] = []
if JAN.is_file():
j = parse_january_xlsx(JAN)
print("1月明细:", len(j), "条(按车合并后)")
imported.extend(j)
else:
print("missing", JAN)
if FEB.is_file():
f = parse_february_xls(FEB)
print("2月结算单:", len(f), "条(按车合并后)")
imported.extend(f)
else:
print("missing", FEB)
header_row, existing = read_ledger()
merged = existing + imported
before = len(merged)
seen = set()
deduped: List[Row] = []
for row in merged:
k = row.key()
if k in seen:
continue
seen.add(k)
deduped.append(row)
deduped.sort(key=lambda x: (x.y, x.m, x.d, x.plate, x.remark))
write_ledger(header_row, deduped)
print("原台账", len(existing), "合并前", before, "去重后", len(deduped), "剔除重复", before - len(deduped))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,47 @@
import pandas as pd
import numpy as np
out = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/驾驶员提成与运营数据表_含排序.xlsx"
s1 = pd.read_excel(out, sheet_name="Sheet1")
sheet2 = pd.read_excel(out, sheet_name="Sheet2")
df = sheet2.copy()
df["日期"] = pd.to_datetime(df["日期"])
df["月份"] = df["日期"].dt.to_period("M").astype(str)
detail = (
df.groupby(["月份", "驾驶员", "车牌号码", "运营项目", "归属项目"], as_index=False)
.agg(总运费=("运费", "sum"), 提成金额=("提成金额", "sum"))
)
detail[["总运费", "提成金额"]] = detail[["总运费", "提成金额"]].round(2)
summary = (
detail.groupby(["月份", "驾驶员"], as_index=False)
.agg(总运费=("总运费", "sum"), 同月提成合计=("提成金额", "sum"))
)
summary[["总运费", "同月提成合计"]] = summary[["总运费", "同月提成合计"]].round(2)
detail["同月提成合计"] = np.nan
summary_rows = summary.copy()
summary_rows["车牌号码"] = ""
summary_rows["运营项目"] = "【同月合计】"
summary_rows["归属项目"] = ""
summary_rows["提成金额"] = summary_rows["同月提成合计"]
cols = ["月份", "驾驶员", "车牌号码", "运营项目", "归属项目", "总运费", "提成金额", "同月提成合计"]
detail = detail[cols]
summary_rows = summary_rows[cols]
detail["_sort"] = 0
summary_rows["_sort"] = 1
sheet3 = pd.concat([detail, summary_rows], ignore_index=True)
sheet3 = sheet3.sort_values(["月份", "驾驶员", "_sort", "车牌号码", "运营项目"], kind="stable").drop(columns="_sort")
with pd.ExcelWriter(out, engine="openpyxl") as writer:
s1.to_excel(writer, sheet_name="Sheet1", index=False)
sheet2.to_excel(writer, sheet_name="Sheet2", index=False)
sheet3.to_excel(writer, sheet_name="Sheet3", index=False)
print(f"updated {out}, rows={len(sheet3)}")