Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
209 lines
7.0 KiB
Python
209 lines
7.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""扫描商业/交强/超赔目录下保单文件,按导入模板 15 列结构生成已填报 Excel。"""
|
|
from __future__ import annotations
|
|
|
|
import calendar
|
|
import os
|
|
import re
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import pandas as pd
|
|
|
|
TEMPLATE_IN = "/Users/sylvawong/Downloads/保险采购信息导入模板-1776319598586.xlsx"
|
|
VEHICLE_XLSX = "/Users/sylvawong/Downloads/车辆信息-1776270214730.xlsx"
|
|
BASE_POLICY_DIR = "/Users/sylvawong/Desktop/保单-2025.7-2026.4"
|
|
DIR_COMMERCIAL = os.path.join(BASE_POLICY_DIR, "商业")
|
|
DIR_COMPULSORY = os.path.join(BASE_POLICY_DIR, "交强")
|
|
DIR_EXCESS = os.path.join(BASE_POLICY_DIR, "超赔")
|
|
OUTPUT_XLSX = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/保险采购信息导入模板-已填报.xlsx"
|
|
SHEET_NAME = "保险采购信息导入模板"
|
|
|
|
PRODUCT_COMMERCIAL = "商业险"
|
|
PRODUCT_COMPULSORY = "交强险"
|
|
PRODUCT_EXCESS = "超赔险"
|
|
|
|
PLATE_CORE = r"[\u4e00-\u9fa5][A-Z0-9\u4e00-\u9fa5·挂]{5,14}"
|
|
|
|
PAT_COMMERCIAL = re.compile(rf"^({PLATE_CORE})[_\-]?商业", re.I)
|
|
PAT_COMPULSORY = re.compile(rf"交强险[_\-]({PLATE_CORE})[_\-]", re.I)
|
|
PAT_COMPULSORY_ALT = re.compile(rf"\d{{4}}交强险[_]({PLATE_CORE})[_]", re.I)
|
|
PAT_COMPULSORY_TIGHT = re.compile(rf"^\d{{4}}交强险({PLATE_CORE})[_]", re.I)
|
|
PAT_COMPULSORY_PLATE_FIRST = re.compile(rf"^({PLATE_CORE})_交强险", re.I)
|
|
PAT_EXCESS = re.compile(rf"^({PLATE_CORE})[_\-]?超赔", re.I)
|
|
PAT_PERIOD = re.compile(r"(\d{4})\.(\d{1,2})-(\d{4})\.(\d{1,2})")
|
|
PAT_NEW_CAR_COMM = re.compile(r"^新车_商业_(.+)\.(pdf|jpg|jpeg|png)$", re.I)
|
|
PAT_NEW_CAR_JQ = re.compile(r"^新车_交强_(.+)\.(pdf|jpg|jpeg|png)$", re.I)
|
|
|
|
|
|
def iter_policy_files(root: str) -> List[str]:
|
|
out: List[str] = []
|
|
if not os.path.isdir(root):
|
|
return out
|
|
for dp, _, fns in os.walk(root):
|
|
for fn in fns:
|
|
low = fn.lower()
|
|
if low.endswith((".pdf", ".jpg", ".jpeg", ".png")):
|
|
out.append(os.path.join(dp, fn))
|
|
return sorted(out)
|
|
|
|
|
|
def plate_from_filename(path: str, kind: str) -> Optional[str]:
|
|
fn = os.path.basename(path)
|
|
if kind == "commercial":
|
|
m = PAT_COMMERCIAL.match(fn)
|
|
return m.group(1) if m else None
|
|
if kind == "compulsory":
|
|
m = (
|
|
PAT_COMPULSORY.search(fn)
|
|
or PAT_COMPULSORY_ALT.search(fn)
|
|
or PAT_COMPULSORY_TIGHT.search(fn)
|
|
or PAT_COMPULSORY_PLATE_FIRST.search(fn)
|
|
)
|
|
return m.group(1) if m else None
|
|
if kind == "excess":
|
|
m = PAT_EXCESS.match(fn)
|
|
return m.group(1) if m else None
|
|
return None
|
|
|
|
|
|
def period_from_filename(fn: str) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
|
|
m = PAT_PERIOD.search(fn)
|
|
if not m:
|
|
return None, None
|
|
y1, mo1, y2, mo2 = map(int, m.groups())
|
|
start = pd.Timestamp(year=y1, month=mo1, day=1)
|
|
last = calendar.monthrange(y2, mo2)[1]
|
|
end = pd.Timestamp(year=y2, month=mo2, day=last)
|
|
return start, end
|
|
|
|
|
|
def policy_no_from_filename(path: str, product: str) -> Optional[str]:
|
|
fn = os.path.basename(path)
|
|
if product == PRODUCT_COMMERCIAL:
|
|
m = PAT_NEW_CAR_COMM.match(fn)
|
|
if m:
|
|
return m.group(1).strip()
|
|
if product == PRODUCT_COMPULSORY:
|
|
m = PAT_NEW_CAR_JQ.match(fn)
|
|
if m:
|
|
return m.group(1).strip()
|
|
return None
|
|
|
|
|
|
def load_vehicle_index() -> Dict[str, pd.Series]:
|
|
vdf = pd.read_excel(VEHICLE_XLSX, sheet_name=0, header=0, engine="openpyxl")
|
|
vdf = vdf.drop_duplicates(subset=["车牌号"], keep="first")
|
|
idx: Dict[str, pd.Series] = {}
|
|
for _, row in vdf.iterrows():
|
|
p = str(row["车牌号"]).strip() if pd.notna(row["车牌号"]) else ""
|
|
if p:
|
|
idx[p] = row
|
|
return idx
|
|
|
|
|
|
def split_city(运营城市: Any, 所属省份: Any) -> Tuple[str, str]:
|
|
prov = str(所属省份).strip() if pd.notna(所属省份) else ""
|
|
city = ""
|
|
if pd.notna(运营城市):
|
|
s = str(运营城市).strip()
|
|
if "-" in s:
|
|
parts = s.split("-", 1)
|
|
if not prov:
|
|
prov = parts[0].strip()
|
|
city = parts[1].strip() if len(parts) > 1 else ""
|
|
else:
|
|
city = s
|
|
return prov, city
|
|
|
|
|
|
def empty_row_template(columns: List[str]) -> Dict[str, Any]:
|
|
return {c: None for c in columns}
|
|
|
|
|
|
def build_rows_for_dir(
|
|
root: str,
|
|
kind: str,
|
|
product: str,
|
|
vidx: Dict[str, pd.Series],
|
|
columns: List[str],
|
|
) -> List[Dict[str, Any]]:
|
|
rows: List[Dict[str, Any]] = []
|
|
for path in iter_policy_files(root):
|
|
fn = os.path.basename(path)
|
|
plate = plate_from_filename(path, kind) or ""
|
|
row = empty_row_template(columns)
|
|
|
|
pno = policy_no_from_filename(path, product)
|
|
row["保单号"] = pno if pno else None
|
|
row["车牌号"] = plate if plate else None
|
|
row["保险产品"] = product
|
|
|
|
st, en = period_from_filename(fn)
|
|
if st is not None:
|
|
row["保险开始时间"] = st
|
|
if en is not None:
|
|
row["保险结束时间"] = en
|
|
|
|
if plate and plate in vidx:
|
|
vr = vidx[plate]
|
|
vin = vr.get("VIN")
|
|
row["保险公司"] = vin if pd.notna(vin) else None
|
|
prov, city = split_city(vr.get("运营城市"), vr.get("所属省份"))
|
|
row["省"] = prov or None
|
|
row["市"] = city or None
|
|
rows.append(row)
|
|
return rows
|
|
|
|
|
|
def main() -> None:
|
|
tpl = pd.read_excel(TEMPLATE_IN, sheet_name=0, header=0)
|
|
columns = tpl.columns.tolist()
|
|
required = [
|
|
"保单号",
|
|
"车牌号",
|
|
"保险公司",
|
|
"保险产品",
|
|
"保险费用金额",
|
|
"保险额度",
|
|
"省",
|
|
"市",
|
|
"保险开始时间",
|
|
"保险结束时间",
|
|
"缴费时间",
|
|
"保险公司联系人",
|
|
"保险公司联系电话",
|
|
"保险采购人",
|
|
"保险采购人电话",
|
|
]
|
|
if columns != required:
|
|
raise SystemExit(f"模板列与预期不一致: {columns}")
|
|
|
|
vidx = load_vehicle_index()
|
|
|
|
all_rows: List[Dict[str, Any]] = []
|
|
all_rows.extend(
|
|
build_rows_for_dir(DIR_COMMERCIAL, "commercial", PRODUCT_COMMERCIAL, vidx, columns)
|
|
)
|
|
all_rows.extend(
|
|
build_rows_for_dir(DIR_COMPULSORY, "compulsory", PRODUCT_COMPULSORY, vidx, columns)
|
|
)
|
|
all_rows.extend(build_rows_for_dir(DIR_EXCESS, "excess", PRODUCT_EXCESS, vidx, columns))
|
|
|
|
out_df = pd.DataFrame(all_rows, columns=columns)
|
|
|
|
os.makedirs(os.path.dirname(OUTPUT_XLSX), exist_ok=True)
|
|
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as w:
|
|
out_df.to_excel(w, index=False, sheet_name=SHEET_NAME)
|
|
|
|
print(f"已生成: {OUTPUT_XLSX}")
|
|
print(f"总行数(每个保单文件一行): {len(out_df)}")
|
|
for p, n in out_df["保险产品"].value_counts().items():
|
|
print(f" {p}: {int(n)}")
|
|
miss_plate = out_df["车牌号"].isna() | (out_df["车牌号"].astype(str).str.strip() == "")
|
|
print(f" 未解析到车牌的行: {int(miss_plate.sum())}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|