feat: sync full workspace including web modules, docs, and configurations to Gitea
Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
91
scripts/generate_jq_serial_for_h2_ledger.py
Normal file
91
scripts/generate_jq_serial_for_h2_ledger.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""为「明细台账」生成唯一序号:JQ + YYMMDD(6) + 当日序号(5位),每日从 00001 起。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
from openpyxl import load_workbook
|
||||
|
||||
|
||||
def parse_cell_date(val) -> pd.Timestamp:
|
||||
if pd.isna(val):
|
||||
return pd.NaT
|
||||
if isinstance(val, (pd.Timestamp, datetime)):
|
||||
return pd.Timestamp(val)
|
||||
if isinstance(val, (int, float)) and not isinstance(val, bool):
|
||||
v = float(val)
|
||||
if 20000 < v < 80000:
|
||||
return pd.to_datetime(v, unit="D", origin="1899-12-30")
|
||||
return pd.to_datetime(val, errors="coerce")
|
||||
|
||||
|
||||
def build_jq_ids(df: pd.DataFrame, date_col: str = "日期") -> pd.Series:
|
||||
"""与 df 同索引;无法解析日期的行为空字符串。"""
|
||||
ts = df[date_col].map(parse_cell_date)
|
||||
valid = ts.notna()
|
||||
out = pd.Series([""] * len(df), index=df.index, dtype=object)
|
||||
|
||||
sub = df.loc[valid].copy()
|
||||
sub["_ts"] = ts[valid]
|
||||
sub["_day"] = sub["_ts"].dt.normalize()
|
||||
sub["_orig"] = sub.index
|
||||
sub = sub.sort_values(["_day", "_ts", "_orig"])
|
||||
sub["_seq"] = sub.groupby("_day", sort=False).cumcount() + 1
|
||||
if (sub["_seq"] > 99999).any():
|
||||
raise SystemExit("单日记录超过 99999 条,无法使用 5 位序号,请调整规则或拆分数据。")
|
||||
yy_mm_dd = sub["_day"].dt.strftime("%y%m%d")
|
||||
out.loc[sub.index] = "JQ" + yy_mm_dd + sub["_seq"].astype(int).astype(str).str.zfill(5)
|
||||
return out
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("xlsx", type=Path, help="加氢记录 xlsx 路径")
|
||||
p.add_argument(
|
||||
"--sheet",
|
||||
default="明细台账",
|
||||
help="工作表名,默认「明细台账」",
|
||||
)
|
||||
p.add_argument(
|
||||
"--out",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="输出路径;默认覆盖输入文件",
|
||||
)
|
||||
args = p.parse_args()
|
||||
path = args.xlsx.expanduser().resolve()
|
||||
out_path = args.out.expanduser().resolve() if args.out else path
|
||||
|
||||
df = pd.read_excel(path, sheet_name=args.sheet, header=1)
|
||||
ids = build_jq_ids(df)
|
||||
|
||||
if ids[ids != ""].duplicated().any():
|
||||
raise SystemExit("内部错误:生成了重复序号")
|
||||
|
||||
wb = load_workbook(path)
|
||||
if args.sheet not in wb.sheetnames:
|
||||
raise SystemExit(f"未找到工作表: {args.sheet}")
|
||||
ws = wb[args.sheet]
|
||||
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
|
||||
try:
|
||||
col_序号 = headers.index("序号") + 1
|
||||
except ValueError:
|
||||
raise SystemExit(f"第 2 行表头中未找到「序号」列,当前为: {headers[:40]}")
|
||||
|
||||
n = len(df)
|
||||
for i in range(n):
|
||||
ws.cell(3 + i, col_序号).value = ids.iloc[i] if ids.iloc[i] else None
|
||||
|
||||
wb.save(out_path)
|
||||
empty_n = (ids == "").sum()
|
||||
print(f"已写入 {out_path},共 {n} 行;序号为空(日期缺失): {empty_n} 行。")
|
||||
if empty_n:
|
||||
print("请为日期为空的行补全日期后重新运行,或手工填写序号。")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user