feat: sync full workspace including web modules, docs, and configurations to Gitea
Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
208
scripts/fill_insurance_import_from_policies.py
Normal file
208
scripts/fill_insurance_import_from_policies.py
Normal file
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""扫描商业/交强/超赔目录下保单文件,按导入模板 15 列结构生成已填报 Excel。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import calendar
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import pandas as pd
|
||||
|
||||
TEMPLATE_IN = "/Users/sylvawong/Downloads/保险采购信息导入模板-1776319598586.xlsx"
|
||||
VEHICLE_XLSX = "/Users/sylvawong/Downloads/车辆信息-1776270214730.xlsx"
|
||||
BASE_POLICY_DIR = "/Users/sylvawong/Desktop/保单-2025.7-2026.4"
|
||||
DIR_COMMERCIAL = os.path.join(BASE_POLICY_DIR, "商业")
|
||||
DIR_COMPULSORY = os.path.join(BASE_POLICY_DIR, "交强")
|
||||
DIR_EXCESS = os.path.join(BASE_POLICY_DIR, "超赔")
|
||||
OUTPUT_XLSX = "/Users/sylvawong/Desktop/CURSOR/ONE-OS/保险采购信息导入模板-已填报.xlsx"
|
||||
SHEET_NAME = "保险采购信息导入模板"
|
||||
|
||||
PRODUCT_COMMERCIAL = "商业险"
|
||||
PRODUCT_COMPULSORY = "交强险"
|
||||
PRODUCT_EXCESS = "超赔险"
|
||||
|
||||
PLATE_CORE = r"[\u4e00-\u9fa5][A-Z0-9\u4e00-\u9fa5·挂]{5,14}"
|
||||
|
||||
PAT_COMMERCIAL = re.compile(rf"^({PLATE_CORE})[_\-]?商业", re.I)
|
||||
PAT_COMPULSORY = re.compile(rf"交强险[_\-]({PLATE_CORE})[_\-]", re.I)
|
||||
PAT_COMPULSORY_ALT = re.compile(rf"\d{{4}}交强险[_]({PLATE_CORE})[_]", re.I)
|
||||
PAT_COMPULSORY_TIGHT = re.compile(rf"^\d{{4}}交强险({PLATE_CORE})[_]", re.I)
|
||||
PAT_COMPULSORY_PLATE_FIRST = re.compile(rf"^({PLATE_CORE})_交强险", re.I)
|
||||
PAT_EXCESS = re.compile(rf"^({PLATE_CORE})[_\-]?超赔", re.I)
|
||||
PAT_PERIOD = re.compile(r"(\d{4})\.(\d{1,2})-(\d{4})\.(\d{1,2})")
|
||||
PAT_NEW_CAR_COMM = re.compile(r"^新车_商业_(.+)\.(pdf|jpg|jpeg|png)$", re.I)
|
||||
PAT_NEW_CAR_JQ = re.compile(r"^新车_交强_(.+)\.(pdf|jpg|jpeg|png)$", re.I)
|
||||
|
||||
|
||||
def iter_policy_files(root: str) -> List[str]:
|
||||
out: List[str] = []
|
||||
if not os.path.isdir(root):
|
||||
return out
|
||||
for dp, _, fns in os.walk(root):
|
||||
for fn in fns:
|
||||
low = fn.lower()
|
||||
if low.endswith((".pdf", ".jpg", ".jpeg", ".png")):
|
||||
out.append(os.path.join(dp, fn))
|
||||
return sorted(out)
|
||||
|
||||
|
||||
def plate_from_filename(path: str, kind: str) -> Optional[str]:
|
||||
fn = os.path.basename(path)
|
||||
if kind == "commercial":
|
||||
m = PAT_COMMERCIAL.match(fn)
|
||||
return m.group(1) if m else None
|
||||
if kind == "compulsory":
|
||||
m = (
|
||||
PAT_COMPULSORY.search(fn)
|
||||
or PAT_COMPULSORY_ALT.search(fn)
|
||||
or PAT_COMPULSORY_TIGHT.search(fn)
|
||||
or PAT_COMPULSORY_PLATE_FIRST.search(fn)
|
||||
)
|
||||
return m.group(1) if m else None
|
||||
if kind == "excess":
|
||||
m = PAT_EXCESS.match(fn)
|
||||
return m.group(1) if m else None
|
||||
return None
|
||||
|
||||
|
||||
def period_from_filename(fn: str) -> Tuple[Optional[pd.Timestamp], Optional[pd.Timestamp]]:
|
||||
m = PAT_PERIOD.search(fn)
|
||||
if not m:
|
||||
return None, None
|
||||
y1, mo1, y2, mo2 = map(int, m.groups())
|
||||
start = pd.Timestamp(year=y1, month=mo1, day=1)
|
||||
last = calendar.monthrange(y2, mo2)[1]
|
||||
end = pd.Timestamp(year=y2, month=mo2, day=last)
|
||||
return start, end
|
||||
|
||||
|
||||
def policy_no_from_filename(path: str, product: str) -> Optional[str]:
|
||||
fn = os.path.basename(path)
|
||||
if product == PRODUCT_COMMERCIAL:
|
||||
m = PAT_NEW_CAR_COMM.match(fn)
|
||||
if m:
|
||||
return m.group(1).strip()
|
||||
if product == PRODUCT_COMPULSORY:
|
||||
m = PAT_NEW_CAR_JQ.match(fn)
|
||||
if m:
|
||||
return m.group(1).strip()
|
||||
return None
|
||||
|
||||
|
||||
def load_vehicle_index() -> Dict[str, pd.Series]:
|
||||
vdf = pd.read_excel(VEHICLE_XLSX, sheet_name=0, header=0, engine="openpyxl")
|
||||
vdf = vdf.drop_duplicates(subset=["车牌号"], keep="first")
|
||||
idx: Dict[str, pd.Series] = {}
|
||||
for _, row in vdf.iterrows():
|
||||
p = str(row["车牌号"]).strip() if pd.notna(row["车牌号"]) else ""
|
||||
if p:
|
||||
idx[p] = row
|
||||
return idx
|
||||
|
||||
|
||||
def split_city(运营城市: Any, 所属省份: Any) -> Tuple[str, str]:
|
||||
prov = str(所属省份).strip() if pd.notna(所属省份) else ""
|
||||
city = ""
|
||||
if pd.notna(运营城市):
|
||||
s = str(运营城市).strip()
|
||||
if "-" in s:
|
||||
parts = s.split("-", 1)
|
||||
if not prov:
|
||||
prov = parts[0].strip()
|
||||
city = parts[1].strip() if len(parts) > 1 else ""
|
||||
else:
|
||||
city = s
|
||||
return prov, city
|
||||
|
||||
|
||||
def empty_row_template(columns: List[str]) -> Dict[str, Any]:
|
||||
return {c: None for c in columns}
|
||||
|
||||
|
||||
def build_rows_for_dir(
|
||||
root: str,
|
||||
kind: str,
|
||||
product: str,
|
||||
vidx: Dict[str, pd.Series],
|
||||
columns: List[str],
|
||||
) -> List[Dict[str, Any]]:
|
||||
rows: List[Dict[str, Any]] = []
|
||||
for path in iter_policy_files(root):
|
||||
fn = os.path.basename(path)
|
||||
plate = plate_from_filename(path, kind) or ""
|
||||
row = empty_row_template(columns)
|
||||
|
||||
pno = policy_no_from_filename(path, product)
|
||||
row["保单号"] = pno if pno else None
|
||||
row["车牌号"] = plate if plate else None
|
||||
row["保险产品"] = product
|
||||
|
||||
st, en = period_from_filename(fn)
|
||||
if st is not None:
|
||||
row["保险开始时间"] = st
|
||||
if en is not None:
|
||||
row["保险结束时间"] = en
|
||||
|
||||
if plate and plate in vidx:
|
||||
vr = vidx[plate]
|
||||
vin = vr.get("VIN")
|
||||
row["保险公司"] = vin if pd.notna(vin) else None
|
||||
prov, city = split_city(vr.get("运营城市"), vr.get("所属省份"))
|
||||
row["省"] = prov or None
|
||||
row["市"] = city or None
|
||||
rows.append(row)
|
||||
return rows
|
||||
|
||||
|
||||
def main() -> None:
|
||||
tpl = pd.read_excel(TEMPLATE_IN, sheet_name=0, header=0)
|
||||
columns = tpl.columns.tolist()
|
||||
required = [
|
||||
"保单号",
|
||||
"车牌号",
|
||||
"保险公司",
|
||||
"保险产品",
|
||||
"保险费用金额",
|
||||
"保险额度",
|
||||
"省",
|
||||
"市",
|
||||
"保险开始时间",
|
||||
"保险结束时间",
|
||||
"缴费时间",
|
||||
"保险公司联系人",
|
||||
"保险公司联系电话",
|
||||
"保险采购人",
|
||||
"保险采购人电话",
|
||||
]
|
||||
if columns != required:
|
||||
raise SystemExit(f"模板列与预期不一致: {columns}")
|
||||
|
||||
vidx = load_vehicle_index()
|
||||
|
||||
all_rows: List[Dict[str, Any]] = []
|
||||
all_rows.extend(
|
||||
build_rows_for_dir(DIR_COMMERCIAL, "commercial", PRODUCT_COMMERCIAL, vidx, columns)
|
||||
)
|
||||
all_rows.extend(
|
||||
build_rows_for_dir(DIR_COMPULSORY, "compulsory", PRODUCT_COMPULSORY, vidx, columns)
|
||||
)
|
||||
all_rows.extend(build_rows_for_dir(DIR_EXCESS, "excess", PRODUCT_EXCESS, vidx, columns))
|
||||
|
||||
out_df = pd.DataFrame(all_rows, columns=columns)
|
||||
|
||||
os.makedirs(os.path.dirname(OUTPUT_XLSX), exist_ok=True)
|
||||
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as w:
|
||||
out_df.to_excel(w, index=False, sheet_name=SHEET_NAME)
|
||||
|
||||
print(f"已生成: {OUTPUT_XLSX}")
|
||||
print(f"总行数(每个保单文件一行): {len(out_df)}")
|
||||
for p, n in out_df["保险产品"].value_counts().items():
|
||||
print(f" {p}: {int(n)}")
|
||||
miss_plate = out_df["车牌号"].isna() | (out_df["车牌号"].astype(str).str.strip() == "")
|
||||
print(f" 未解析到车牌的行: {int(miss_plate.sum())}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user