feat: sync full workspace including web modules, docs, and configurations to Gitea
Optimized the root .gitignore to exclude virtual environments, node modules, and temp folders to ensure clean and lightweight version tracking. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
354
axhub-make/skills/third-party/deep-research/scripts/validate_report.py
vendored
Normal file
354
axhub-make/skills/third-party/deep-research/scripts/validate_report.py
vendored
Normal file
@@ -0,0 +1,354 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Report Validation Script
|
||||
Ensures research reports meet quality standards before delivery
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Dict
|
||||
|
||||
|
||||
class ReportValidator:
|
||||
"""Validates research report quality"""
|
||||
|
||||
def __init__(self, report_path: Path):
|
||||
self.report_path = report_path
|
||||
self.content = self._read_report()
|
||||
self.errors: List[str] = []
|
||||
self.warnings: List[str] = []
|
||||
|
||||
def _read_report(self) -> str:
|
||||
"""Read report file"""
|
||||
try:
|
||||
with open(self.report_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except Exception as e:
|
||||
print(f"❌ ERROR: Cannot read report: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""Run all validation checks"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"VALIDATING REPORT: {self.report_path.name}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
checks = [
|
||||
("Executive Summary", self._check_executive_summary),
|
||||
("Required Sections", self._check_required_sections),
|
||||
("Citations", self._check_citations),
|
||||
("Bibliography", self._check_bibliography),
|
||||
("Placeholder Text", self._check_placeholders),
|
||||
("Content Truncation", self._check_content_truncation),
|
||||
("Word Count", self._check_word_count),
|
||||
("Source Count", self._check_source_count),
|
||||
("Broken Links", self._check_broken_references),
|
||||
]
|
||||
|
||||
for check_name, check_func in checks:
|
||||
print(f"⏳ Checking: {check_name}...", end=" ")
|
||||
passed = check_func()
|
||||
if passed:
|
||||
print("✅ PASS")
|
||||
else:
|
||||
print("❌ FAIL")
|
||||
|
||||
self._print_summary()
|
||||
|
||||
return len(self.errors) == 0
|
||||
|
||||
def _check_executive_summary(self) -> bool:
|
||||
"""Check executive summary exists and is under 250 words"""
|
||||
pattern = r'## Executive Summary(.*?)(?=##|\Z)'
|
||||
match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)
|
||||
|
||||
if not match:
|
||||
self.errors.append("Missing 'Executive Summary' section")
|
||||
return False
|
||||
|
||||
summary = match.group(1).strip()
|
||||
word_count = len(summary.split())
|
||||
|
||||
if word_count > 250:
|
||||
self.warnings.append(f"Executive summary too long: {word_count} words (should be ≤250)")
|
||||
|
||||
if word_count < 50:
|
||||
self.warnings.append(f"Executive summary too short: {word_count} words (should be ≥50)")
|
||||
|
||||
return True
|
||||
|
||||
def _check_required_sections(self) -> bool:
|
||||
"""Check all required sections are present"""
|
||||
required = [
|
||||
"Executive Summary",
|
||||
"Introduction",
|
||||
"Main Analysis",
|
||||
"Synthesis",
|
||||
"Limitations",
|
||||
"Recommendations",
|
||||
"Bibliography",
|
||||
"Methodology"
|
||||
]
|
||||
|
||||
# Recommended sections (warnings if missing, not errors)
|
||||
recommended = [
|
||||
"Counterevidence Register",
|
||||
"Claims-Evidence Table"
|
||||
]
|
||||
|
||||
missing = []
|
||||
for section in required:
|
||||
if not re.search(rf'##.*{section}', self.content, re.IGNORECASE):
|
||||
missing.append(section)
|
||||
|
||||
if missing:
|
||||
self.errors.append(f"Missing sections: {', '.join(missing)}")
|
||||
return False
|
||||
|
||||
# Check recommended sections (warnings only)
|
||||
missing_recommended = []
|
||||
for section in recommended:
|
||||
if not re.search(rf'##.*{section}', self.content, re.IGNORECASE):
|
||||
missing_recommended.append(section)
|
||||
|
||||
if missing_recommended:
|
||||
self.warnings.append(f"Missing recommended sections (for academic rigor): {', '.join(missing_recommended)}")
|
||||
|
||||
return True
|
||||
|
||||
def _check_citations(self) -> bool:
|
||||
"""Check citation format and presence"""
|
||||
# Find all citation references [1], [2], etc.
|
||||
citations = re.findall(r'\[(\d+)\]', self.content)
|
||||
|
||||
if not citations:
|
||||
self.errors.append("No citations found in report")
|
||||
return False
|
||||
|
||||
unique_citations = set(citations)
|
||||
|
||||
if len(unique_citations) < 10:
|
||||
self.warnings.append(f"Only {len(unique_citations)} unique sources cited (recommended: ≥10)")
|
||||
|
||||
# Check for consecutive citation numbers
|
||||
citation_nums = sorted([int(c) for c in unique_citations])
|
||||
if citation_nums:
|
||||
max_citation = max(citation_nums)
|
||||
expected = set(range(1, max_citation + 1))
|
||||
missing = expected - set(citation_nums)
|
||||
|
||||
if missing:
|
||||
self.warnings.append(f"Non-consecutive citation numbers, missing: {sorted(missing)}")
|
||||
|
||||
return True
|
||||
|
||||
def _check_bibliography(self) -> bool:
|
||||
"""Check bibliography exists, matches citations, and has no truncation placeholders"""
|
||||
pattern = r'## Bibliography(.*?)(?=##|\Z)'
|
||||
match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)
|
||||
|
||||
if not match:
|
||||
self.errors.append("Missing 'Bibliography' section")
|
||||
return False
|
||||
|
||||
bib_section = match.group(1)
|
||||
|
||||
# CRITICAL: Check for truncation placeholders (2025 CiteGuard enhancement)
|
||||
truncation_patterns = [
|
||||
(r'\[\d+-\d+\]', 'Citation range (e.g., [8-75])'),
|
||||
(r'Additional.*citations', 'Phrase "Additional citations"'),
|
||||
(r'would be included', 'Phrase "would be included"'),
|
||||
(r'\[\.\.\.continue', 'Pattern "[...continue"'),
|
||||
(r'\[Continue with', 'Pattern "[Continue with"'),
|
||||
(r'etc\.(?!\w)', 'Standalone "etc."'),
|
||||
(r'and so on', 'Phrase "and so on"'),
|
||||
]
|
||||
|
||||
for pattern_re, description in truncation_patterns:
|
||||
if re.search(pattern_re, bib_section, re.IGNORECASE):
|
||||
self.errors.append(f"⚠️ CRITICAL: Bibliography contains truncation placeholder: {description}")
|
||||
self.errors.append(f" This makes the report UNUSABLE - complete bibliography required")
|
||||
return False
|
||||
|
||||
# Count bibliography entries [1], [2], etc.
|
||||
bib_entries = re.findall(r'^\[(\d+)\]', bib_section, re.MULTILINE)
|
||||
|
||||
if not bib_entries:
|
||||
self.errors.append("Bibliography has no entries")
|
||||
return False
|
||||
|
||||
# Check citation number continuity (no gaps)
|
||||
bib_nums = sorted([int(n) for n in bib_entries])
|
||||
if bib_nums:
|
||||
expected = list(range(1, bib_nums[-1] + 1))
|
||||
actual = bib_nums
|
||||
missing = [n for n in expected if n not in actual]
|
||||
if missing:
|
||||
self.errors.append(f"Bibliography has gaps in numbering: missing {missing}")
|
||||
return False
|
||||
|
||||
# Find citations in text
|
||||
text_citations = set(re.findall(r'\[(\d+)\]', self.content))
|
||||
bib_citations = set(bib_entries)
|
||||
|
||||
# Check all citations have bibliography entries
|
||||
missing_in_bib = text_citations - bib_citations
|
||||
if missing_in_bib:
|
||||
self.errors.append(f"Citations missing from bibliography: {sorted(missing_in_bib)}")
|
||||
return False
|
||||
|
||||
# Check for unused bibliography entries
|
||||
unused = bib_citations - text_citations
|
||||
if unused:
|
||||
self.warnings.append(f"Unused bibliography entries: {sorted(unused)}")
|
||||
|
||||
return True
|
||||
|
||||
def _check_placeholders(self) -> bool:
|
||||
"""Check for placeholder text that shouldn't be in final report"""
|
||||
placeholders = [
|
||||
'TBD', 'TODO', 'FIXME', 'XXX',
|
||||
'[citation needed]', '[needs citation]',
|
||||
'[placeholder]', '[TODO]', '[TBD]'
|
||||
]
|
||||
|
||||
found_placeholders = []
|
||||
for placeholder in placeholders:
|
||||
if placeholder in self.content:
|
||||
found_placeholders.append(placeholder)
|
||||
|
||||
if found_placeholders:
|
||||
self.errors.append(f"Found placeholder text: {', '.join(found_placeholders)}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_content_truncation(self) -> bool:
|
||||
"""Check for content truncation patterns (2025 Progressive Assembly enhancement)"""
|
||||
truncation_patterns = [
|
||||
(r'Content continues', 'Phrase "Content continues"'),
|
||||
(r'Due to length', 'Phrase "Due to length"'),
|
||||
(r'would continue', 'Phrase "would continue"'),
|
||||
(r'\[Sections \d+-\d+', 'Pattern "[Sections X-Y"'),
|
||||
(r'Additional sections', 'Phrase "Additional sections"'),
|
||||
(r'comprehensive.*word document that continues', 'Pattern "comprehensive...document that continues"'),
|
||||
]
|
||||
|
||||
for pattern_re, description in truncation_patterns:
|
||||
if re.search(pattern_re, self.content, re.IGNORECASE):
|
||||
self.errors.append(f"⚠️ CRITICAL: Content truncation detected: {description}")
|
||||
self.errors.append(f" Report is INCOMPLETE and UNUSABLE - regenerate with progressive assembly")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_word_count(self) -> bool:
|
||||
"""Check overall report length"""
|
||||
word_count = len(self.content.split())
|
||||
|
||||
if word_count < 500:
|
||||
self.warnings.append(f"Report is very short: {word_count} words (consider expanding)")
|
||||
# No upper limit warning - progressive assembly supports unlimited lengths
|
||||
|
||||
return True
|
||||
|
||||
def _check_source_count(self) -> bool:
|
||||
"""Check minimum source count"""
|
||||
pattern = r'## Bibliography(.*?)(?=##|\Z)'
|
||||
match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)
|
||||
|
||||
if not match:
|
||||
return True # Already caught in bibliography check
|
||||
|
||||
bib_section = match.group(1)
|
||||
bib_entries = re.findall(r'^\[(\d+)\]', bib_section, re.MULTILINE)
|
||||
|
||||
source_count = len(set(bib_entries))
|
||||
|
||||
if source_count < 10:
|
||||
self.warnings.append(f"Only {source_count} sources (recommended: ≥10)")
|
||||
|
||||
return True
|
||||
|
||||
def _check_broken_references(self) -> bool:
|
||||
"""Check for broken internal references"""
|
||||
# Find all markdown links [text](./path)
|
||||
internal_links = re.findall(r'\[.*?\]\((\.\/.*?)\)', self.content)
|
||||
|
||||
broken = []
|
||||
for link in internal_links:
|
||||
# Remove anchor if present
|
||||
link_path = link.split('#')[0]
|
||||
full_path = self.report_path.parent / link_path
|
||||
|
||||
if not full_path.exists():
|
||||
broken.append(link)
|
||||
|
||||
if broken:
|
||||
self.errors.append(f"Broken internal links: {', '.join(broken)}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _print_summary(self):
|
||||
"""Print validation summary"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"VALIDATION SUMMARY")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if self.errors:
|
||||
print(f"❌ ERRORS ({len(self.errors)}):")
|
||||
for error in self.errors:
|
||||
print(f" • {error}")
|
||||
print()
|
||||
|
||||
if self.warnings:
|
||||
print(f"⚠️ WARNINGS ({len(self.warnings)}):")
|
||||
for warning in self.warnings:
|
||||
print(f" • {warning}")
|
||||
print()
|
||||
|
||||
if not self.errors and not self.warnings:
|
||||
print("✅ ALL CHECKS PASSED - Report meets quality standards!\n")
|
||||
elif not self.errors:
|
||||
print("✅ VALIDATION PASSED (with warnings)\n")
|
||||
else:
|
||||
print("❌ VALIDATION FAILED - Please fix errors before delivery\n")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate research report quality",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python validate_report.py --report report.md
|
||||
python validate_report.py -r ~/.claude/research_output/research_report_20251104_153045.md
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--report', '-r',
|
||||
type=str,
|
||||
required=True,
|
||||
help='Path to research report markdown file'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
report_path = Path(args.report)
|
||||
|
||||
if not report_path.exists():
|
||||
print(f"❌ ERROR: Report file not found: {report_path}")
|
||||
sys.exit(1)
|
||||
|
||||
validator = ReportValidator(report_path)
|
||||
passed = validator.validate()
|
||||
|
||||
sys.exit(0 if passed else 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user