Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
92 lines
3.0 KiB
Python
92 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
||
"""为「明细台账」生成唯一序号:JQ + YYMMDD(6) + 当日序号(5位),每日从 00001 起。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
|
||
import pandas as pd
|
||
from openpyxl import load_workbook
|
||
|
||
|
||
def parse_cell_date(val) -> pd.Timestamp:
|
||
if pd.isna(val):
|
||
return pd.NaT
|
||
if isinstance(val, (pd.Timestamp, datetime)):
|
||
return pd.Timestamp(val)
|
||
if isinstance(val, (int, float)) and not isinstance(val, bool):
|
||
v = float(val)
|
||
if 20000 < v < 80000:
|
||
return pd.to_datetime(v, unit="D", origin="1899-12-30")
|
||
return pd.to_datetime(val, errors="coerce")
|
||
|
||
|
||
def build_jq_ids(df: pd.DataFrame, date_col: str = "日期") -> pd.Series:
|
||
"""与 df 同索引;无法解析日期的行为空字符串。"""
|
||
ts = df[date_col].map(parse_cell_date)
|
||
valid = ts.notna()
|
||
out = pd.Series([""] * len(df), index=df.index, dtype=object)
|
||
|
||
sub = df.loc[valid].copy()
|
||
sub["_ts"] = ts[valid]
|
||
sub["_day"] = sub["_ts"].dt.normalize()
|
||
sub["_orig"] = sub.index
|
||
sub = sub.sort_values(["_day", "_ts", "_orig"])
|
||
sub["_seq"] = sub.groupby("_day", sort=False).cumcount() + 1
|
||
if (sub["_seq"] > 99999).any():
|
||
raise SystemExit("单日记录超过 99999 条,无法使用 5 位序号,请调整规则或拆分数据。")
|
||
yy_mm_dd = sub["_day"].dt.strftime("%y%m%d")
|
||
out.loc[sub.index] = "JQ" + yy_mm_dd + sub["_seq"].astype(int).astype(str).str.zfill(5)
|
||
return out
|
||
|
||
|
||
def main() -> None:
|
||
p = argparse.ArgumentParser()
|
||
p.add_argument("xlsx", type=Path, help="加氢记录 xlsx 路径")
|
||
p.add_argument(
|
||
"--sheet",
|
||
default="明细台账",
|
||
help="工作表名,默认「明细台账」",
|
||
)
|
||
p.add_argument(
|
||
"--out",
|
||
type=Path,
|
||
default=None,
|
||
help="输出路径;默认覆盖输入文件",
|
||
)
|
||
args = p.parse_args()
|
||
path = args.xlsx.expanduser().resolve()
|
||
out_path = args.out.expanduser().resolve() if args.out else path
|
||
|
||
df = pd.read_excel(path, sheet_name=args.sheet, header=1)
|
||
ids = build_jq_ids(df)
|
||
|
||
if ids[ids != ""].duplicated().any():
|
||
raise SystemExit("内部错误:生成了重复序号")
|
||
|
||
wb = load_workbook(path)
|
||
if args.sheet not in wb.sheetnames:
|
||
raise SystemExit(f"未找到工作表: {args.sheet}")
|
||
ws = wb[args.sheet]
|
||
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
|
||
try:
|
||
col_序号 = headers.index("序号") + 1
|
||
except ValueError:
|
||
raise SystemExit(f"第 2 行表头中未找到「序号」列,当前为: {headers[:40]}")
|
||
|
||
n = len(df)
|
||
for i in range(n):
|
||
ws.cell(3 + i, col_序号).value = ids.iloc[i] if ids.iloc[i] else None
|
||
|
||
wb.save(out_path)
|
||
empty_n = (ids == "").sum()
|
||
print(f"已写入 {out_path},共 {n} 行;序号为空(日期缺失): {empty_n} 行。")
|
||
if empty_n:
|
||
print("请为日期为空的行补全日期后重新运行,或手工填写序号。")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|