Files
ONE-OS/scripts/generate_jq_serial_for_h2_ledger.py
王冕 a27e3b8e43 feat: sync full workspace including web modules, docs, and configurations to Gitea
Optimized the root .gitignore to exclude virtual environments, node modules,
and temp folders to ensure clean and lightweight version tracking.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-09 18:12:25 +08:00

92 lines
3.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""为「明细台账」生成唯一序号JQ + YYMMDD(6) + 当日序号(5位),每日从 00001 起。"""
from __future__ import annotations
import argparse
from datetime import datetime
from pathlib import Path
import pandas as pd
from openpyxl import load_workbook
def parse_cell_date(val) -> pd.Timestamp:
if pd.isna(val):
return pd.NaT
if isinstance(val, (pd.Timestamp, datetime)):
return pd.Timestamp(val)
if isinstance(val, (int, float)) and not isinstance(val, bool):
v = float(val)
if 20000 < v < 80000:
return pd.to_datetime(v, unit="D", origin="1899-12-30")
return pd.to_datetime(val, errors="coerce")
def build_jq_ids(df: pd.DataFrame, date_col: str = "日期") -> pd.Series:
"""与 df 同索引;无法解析日期的行为空字符串。"""
ts = df[date_col].map(parse_cell_date)
valid = ts.notna()
out = pd.Series([""] * len(df), index=df.index, dtype=object)
sub = df.loc[valid].copy()
sub["_ts"] = ts[valid]
sub["_day"] = sub["_ts"].dt.normalize()
sub["_orig"] = sub.index
sub = sub.sort_values(["_day", "_ts", "_orig"])
sub["_seq"] = sub.groupby("_day", sort=False).cumcount() + 1
if (sub["_seq"] > 99999).any():
raise SystemExit("单日记录超过 99999 条,无法使用 5 位序号,请调整规则或拆分数据。")
yy_mm_dd = sub["_day"].dt.strftime("%y%m%d")
out.loc[sub.index] = "JQ" + yy_mm_dd + sub["_seq"].astype(int).astype(str).str.zfill(5)
return out
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("xlsx", type=Path, help="加氢记录 xlsx 路径")
p.add_argument(
"--sheet",
default="明细台账",
help="工作表名,默认「明细台账」",
)
p.add_argument(
"--out",
type=Path,
default=None,
help="输出路径;默认覆盖输入文件",
)
args = p.parse_args()
path = args.xlsx.expanduser().resolve()
out_path = args.out.expanduser().resolve() if args.out else path
df = pd.read_excel(path, sheet_name=args.sheet, header=1)
ids = build_jq_ids(df)
if ids[ids != ""].duplicated().any():
raise SystemExit("内部错误:生成了重复序号")
wb = load_workbook(path)
if args.sheet not in wb.sheetnames:
raise SystemExit(f"未找到工作表: {args.sheet}")
ws = wb[args.sheet]
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
try:
col_序号 = headers.index("序号") + 1
except ValueError:
raise SystemExit(f"第 2 行表头中未找到「序号」列,当前为: {headers[:40]}")
n = len(df)
for i in range(n):
ws.cell(3 + i, col_序号).value = ids.iloc[i] if ids.iloc[i] else None
wb.save(out_path)
empty_n = (ids == "").sum()
print(f"已写入 {out_path},共 {n} 行;序号为空(日期缺失): {empty_n} 行。")
if empty_n:
print("请为日期为空的行补全日期后重新运行,或手工填写序号。")
if __name__ == "__main__":
main()