#!/usr/bin/env python3 """为「明细台账」生成唯一序号:JQ + YYMMDD(6) + 当日序号(5位),每日从 00001 起。""" from __future__ import annotations import argparse from datetime import datetime from pathlib import Path import pandas as pd from openpyxl import load_workbook def parse_cell_date(val) -> pd.Timestamp: if pd.isna(val): return pd.NaT if isinstance(val, (pd.Timestamp, datetime)): return pd.Timestamp(val) if isinstance(val, (int, float)) and not isinstance(val, bool): v = float(val) if 20000 < v < 80000: return pd.to_datetime(v, unit="D", origin="1899-12-30") return pd.to_datetime(val, errors="coerce") def build_jq_ids(df: pd.DataFrame, date_col: str = "日期") -> pd.Series: """与 df 同索引;无法解析日期的行为空字符串。""" ts = df[date_col].map(parse_cell_date) valid = ts.notna() out = pd.Series([""] * len(df), index=df.index, dtype=object) sub = df.loc[valid].copy() sub["_ts"] = ts[valid] sub["_day"] = sub["_ts"].dt.normalize() sub["_orig"] = sub.index sub = sub.sort_values(["_day", "_ts", "_orig"]) sub["_seq"] = sub.groupby("_day", sort=False).cumcount() + 1 if (sub["_seq"] > 99999).any(): raise SystemExit("单日记录超过 99999 条,无法使用 5 位序号,请调整规则或拆分数据。") yy_mm_dd = sub["_day"].dt.strftime("%y%m%d") out.loc[sub.index] = "JQ" + yy_mm_dd + sub["_seq"].astype(int).astype(str).str.zfill(5) return out def main() -> None: p = argparse.ArgumentParser() p.add_argument("xlsx", type=Path, help="加氢记录 xlsx 路径") p.add_argument( "--sheet", default="明细台账", help="工作表名,默认「明细台账」", ) p.add_argument( "--out", type=Path, default=None, help="输出路径;默认覆盖输入文件", ) args = p.parse_args() path = args.xlsx.expanduser().resolve() out_path = args.out.expanduser().resolve() if args.out else path df = pd.read_excel(path, sheet_name=args.sheet, header=1) ids = build_jq_ids(df) if ids[ids != ""].duplicated().any(): raise SystemExit("内部错误:生成了重复序号") wb = load_workbook(path) if args.sheet not in wb.sheetnames: raise SystemExit(f"未找到工作表: {args.sheet}") ws = wb[args.sheet] headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)] try: col_序号 = headers.index("序号") + 1 except ValueError: raise SystemExit(f"第 2 行表头中未找到「序号」列,当前为: {headers[:40]}") n = len(df) for i in range(n): ws.cell(3 + i, col_序号).value = ids.iloc[i] if ids.iloc[i] else None wb.save(out_path) empty_n = (ids == "").sum() print(f"已写入 {out_path},共 {n} 行;序号为空(日期缺失): {empty_n} 行。") if empty_n: print("请为日期为空的行补全日期后重新运行,或手工填写序号。") if __name__ == "__main__": main()