Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
217 lines
7.5 KiB
Python
217 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""根据车辆表 + 保险采购导入模板,匹配本地商业/交强/超赔保单文件夹中的 PDF,导出分类 Excel。"""
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import re
|
||
from typing import Dict, List, Optional, Tuple
|
||
|
||
import pandas as pd
|
||
|
||
# 可按需修改
|
||
VEHICLE_XLSX = "/Users/sylvawong/Downloads/车辆信息-1776270214730.xlsx"
|
||
IMPORT_TEMPLATE = "/Users/sylvawong/Downloads/保险采购信息导入模板-1776306962989.xlsx"
|
||
BASE_POLICY_DIR = "/Users/sylvawong/Desktop/保单-2025.7-2026.4"
|
||
DIR_COMMERCIAL = os.path.join(BASE_POLICY_DIR, "商业")
|
||
DIR_COMPULSORY = os.path.join(BASE_POLICY_DIR, "交强")
|
||
DIR_EXCESS = os.path.join(BASE_POLICY_DIR, "超赔")
|
||
OUTPUT_XLSX = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/保险文件匹配结果.xlsx"
|
||
|
||
PRODUCT_COMMERCIAL = "商业险"
|
||
PRODUCT_COMPULSORY = "交强险"
|
||
PRODUCT_EXCESS = "超赔险"
|
||
|
||
# 车牌:省份简称 + 字母/数字/挂 等(与行驶证解析类似,略宽松)
|
||
PLATE_CORE = r"[\u4e00-\u9fa5][A-Z0-9\u4e00-\u9fa5·挂]{5,14}"
|
||
|
||
PAT_COMMERCIAL = re.compile(rf"^({PLATE_CORE})[_\-]?商业", re.I)
|
||
PAT_COMPULSORY = re.compile(rf"交强险[_\-]({PLATE_CORE})[_\-]", re.I)
|
||
PAT_COMPULSORY_ALT = re.compile(rf"\d{{4}}交强险[_]({PLATE_CORE})[_]", re.I)
|
||
# 2025交强险粤AGP4597_公司.pdf(交强险与车牌之间无下划线)
|
||
PAT_COMPULSORY_TIGHT = re.compile(rf"^\d{{4}}交强险({PLATE_CORE})[_]", re.I)
|
||
# 京A13691F_交强险2025.7-2026.7.pdf
|
||
PAT_COMPULSORY_PLATE_FIRST = re.compile(rf"^({PLATE_CORE})_交强险", re.I)
|
||
PAT_EXCESS = re.compile(rf"^({PLATE_CORE})[_\-]?超赔", re.I)
|
||
|
||
|
||
def iter_policy_files(root: str) -> List[str]:
|
||
out: List[str] = []
|
||
if not os.path.isdir(root):
|
||
return out
|
||
for dp, _, fns in os.walk(root):
|
||
for fn in fns:
|
||
low = fn.lower()
|
||
if low.endswith(".pdf") or low.endswith((".jpg", ".jpeg", ".png")):
|
||
out.append(os.path.join(dp, fn))
|
||
return sorted(out)
|
||
|
||
|
||
def plate_from_filename(path: str, kind: str) -> Optional[str]:
|
||
fn = os.path.basename(path)
|
||
if kind == "commercial":
|
||
m = PAT_COMMERCIAL.match(fn)
|
||
return m.group(1) if m else None
|
||
if kind == "compulsory":
|
||
m = (
|
||
PAT_COMPULSORY.search(fn)
|
||
or PAT_COMPULSORY_ALT.search(fn)
|
||
or PAT_COMPULSORY_TIGHT.search(fn)
|
||
or PAT_COMPULSORY_PLATE_FIRST.search(fn)
|
||
)
|
||
return m.group(1) if m else None
|
||
if kind == "excess":
|
||
m = PAT_EXCESS.match(fn)
|
||
return m.group(1) if m else None
|
||
return None
|
||
|
||
|
||
def build_plate_files(root: str, kind: str) -> Dict[str, List[str]]:
|
||
d: Dict[str, List[str]] = {}
|
||
unmapped: List[str] = []
|
||
for p in iter_policy_files(root):
|
||
pl = plate_from_filename(p, kind)
|
||
if pl:
|
||
d.setdefault(pl, []).append(p)
|
||
else:
|
||
unmapped.append(p)
|
||
return d, unmapped
|
||
|
||
|
||
def normalize_plate(s: str) -> str:
|
||
return str(s).strip() if pd.notna(s) else ""
|
||
|
||
|
||
def load_template_by_product() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
||
df = pd.read_excel(IMPORT_TEMPLATE, sheet_name=0, header=0)
|
||
df = df.dropna(how="all")
|
||
df = df[df["保险产品"].notna()]
|
||
jq = df[df["保险产品"] == PRODUCT_COMPULSORY].copy()
|
||
sy = df[df["保险产品"] == PRODUCT_COMMERCIAL].copy()
|
||
cp = df[df["保险产品"] == PRODUCT_EXCESS].copy()
|
||
return jq, sy, cp
|
||
|
||
|
||
def vehicle_subset(vdf: pd.DataFrame) -> pd.DataFrame:
|
||
cols = [
|
||
"车牌号",
|
||
"VIN",
|
||
"所属省份",
|
||
"运营城市",
|
||
"登记所有权",
|
||
"运营公司",
|
||
"车辆类型",
|
||
"品牌",
|
||
"型号",
|
||
"保险状态",
|
||
]
|
||
keep = [c for c in cols if c in vdf.columns]
|
||
return vdf[keep].drop_duplicates(subset=["车牌号"])
|
||
|
||
|
||
def merge_sheet(
|
||
v_base: pd.DataFrame,
|
||
tpl: pd.DataFrame,
|
||
plate_files: Dict[str, List[str]],
|
||
product_name: str,
|
||
) -> pd.DataFrame:
|
||
"""以车辆表车牌为左表;模板与保单文件按车牌关联。"""
|
||
if tpl is not None and len(tpl) > 0:
|
||
tpl_idx = tpl.drop_duplicates(subset=["车牌号"], keep="first").set_index("车牌号")
|
||
else:
|
||
tpl_idx = pd.DataFrame().set_index(pd.Index([], name="车牌号"))
|
||
|
||
rows = []
|
||
for _, vr in v_base.iterrows():
|
||
plate = normalize_plate(vr["车牌号"])
|
||
if not plate:
|
||
continue
|
||
files = plate_files.get(plate, [])
|
||
file_cell = "; ".join(files) if files else ""
|
||
|
||
if len(tpl_idx.index) and plate in tpl_idx.index:
|
||
tr = tpl_idx.loc[plate]
|
||
if isinstance(tr, pd.DataFrame):
|
||
tr = tr.iloc[0]
|
||
row = tr.to_dict()
|
||
else:
|
||
row = {c: None for c in tpl.columns} if len(tpl.columns) else {}
|
||
row["车牌号"] = plate
|
||
row["保险产品"] = product_name
|
||
|
||
for k, val in vr.to_dict().items():
|
||
row.setdefault(k, val)
|
||
row["保单文件路径"] = file_cell
|
||
row["保单文件数量"] = len(files)
|
||
rows.append(row)
|
||
|
||
out = pd.DataFrame(rows)
|
||
if "保险产品" in out.columns:
|
||
out["保险产品"] = product_name
|
||
return out
|
||
|
||
|
||
def unmapped_sheet(
|
||
commercial_um: List[str],
|
||
compulsory_um: List[str],
|
||
excess_um: List[str],
|
||
) -> pd.DataFrame:
|
||
rows = []
|
||
for p in commercial_um:
|
||
rows.append({"险种文件夹": "商业", "文件路径": p, "文件名": os.path.basename(p)})
|
||
for p in compulsory_um:
|
||
rows.append({"险种文件夹": "交强", "文件路径": p, "文件名": os.path.basename(p)})
|
||
for p in excess_um:
|
||
rows.append({"险种文件夹": "超赔", "文件路径": p, "文件名": os.path.basename(p)})
|
||
return pd.DataFrame(rows)
|
||
|
||
|
||
def main() -> None:
|
||
vdf = pd.read_excel(VEHICLE_XLSX, sheet_name=0, header=0, engine="openpyxl")
|
||
v_base = vehicle_subset(vdf)
|
||
|
||
jq_tpl, sy_tpl, cp_tpl = load_template_by_product()
|
||
|
||
sy_map, sy_um = build_plate_files(DIR_COMMERCIAL, "commercial")
|
||
jq_map, jq_um = build_plate_files(DIR_COMPULSORY, "compulsory")
|
||
cp_map, cp_um = build_plate_files(DIR_EXCESS, "excess")
|
||
|
||
df_sy = merge_sheet(v_base, sy_tpl, sy_map, PRODUCT_COMMERCIAL)
|
||
df_jq = merge_sheet(v_base, jq_tpl, jq_map, PRODUCT_COMPULSORY)
|
||
|
||
tpl_cols = sy_tpl.columns.tolist() if len(sy_tpl.columns) else jq_tpl.columns.tolist()
|
||
if len(cp_tpl) > 0:
|
||
df_cp = merge_sheet(v_base, cp_tpl, cp_map, PRODUCT_EXCESS)
|
||
else:
|
||
df_cp = merge_sheet(
|
||
v_base, pd.DataFrame(columns=tpl_cols), cp_map, PRODUCT_EXCESS
|
||
)
|
||
|
||
df_um = unmapped_sheet(sy_um, jq_um, cp_um)
|
||
|
||
os.makedirs(os.path.dirname(OUTPUT_XLSX), exist_ok=True)
|
||
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as w:
|
||
df_jq.to_excel(w, index=False, sheet_name="交强险")
|
||
df_sy.to_excel(w, index=False, sheet_name="商业险")
|
||
df_cp.to_excel(w, index=False, sheet_name="超赔险")
|
||
df_um.to_excel(w, index=False, sheet_name="未能解析车牌的文件")
|
||
|
||
def stat(df: pd.DataFrame) -> Tuple[int, int]:
|
||
n = len(df)
|
||
m = (df["保单文件路径"].astype(str).str.len() > 0).sum() if "保单文件路径" in df else 0
|
||
return n, int(m)
|
||
|
||
print(f"已生成: {OUTPUT_XLSX}")
|
||
for name, df in [
|
||
("交强险", df_jq),
|
||
("商业险", df_sy),
|
||
("超赔险", df_cp),
|
||
]:
|
||
n, m = stat(df)
|
||
print(f" {name}: 车辆行数 {n}, 已匹配到本地保单文件 {m}")
|
||
print(f" 未能解析车牌的文件: {len(df_um)} 个")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|