#!/usr/bin/env python3 """ Markdown to HTML converter for research reports Properly converts markdown sections to HTML while preserving structure and formatting """ import re from typing import Tuple from pathlib import Path def convert_markdown_to_html(markdown_text: str) -> Tuple[str, str]: """ Convert markdown to HTML in two parts: content and bibliography Args: markdown_text: Full markdown report text Returns: Tuple of (content_html, bibliography_html) """ # Split content and bibliography parts = markdown_text.split('## Bibliography') content_md = parts[0] bibliography_md = parts[1] if len(parts) > 1 else "" # Convert content (everything except bibliography) content_html = _convert_content_section(content_md) # Convert bibliography separately bibliography_html = _convert_bibliography_section(bibliography_md) return content_html, bibliography_html def _convert_content_section(markdown: str) -> str: """Convert main content sections to HTML""" html = markdown # Remove title and front matter (first ## heading is handled separately) lines = html.split('\n') processed_lines = [] skip_until_first_section = True for line in lines: # Skip everything until we hit "## Executive Summary" or first major section if skip_until_first_section: if line.startswith('## ') and not line.startswith('### '): skip_until_first_section = False processed_lines.append(line) continue processed_lines.append(line) html = '\n'.join(processed_lines) # Convert headers # ## Section Title →
\1', html)
# Convert unordered lists
html = _convert_lists(html)
# Convert tables
html = _convert_tables(html)
# Convert paragraphs (wrap non-HTML lines in tags) html = _convert_paragraphs(html) # Close all open sections html = _close_sections(html) # Wrap executive summary if present html = html.replace( '
| {cell} | ') result.append('
|---|
| {cell} | ') result.append('
' + line) in_paragraph = True else: result.append(line) if in_paragraph: result.append('
') return '\n'.join(result) def _close_sections(html: str) -> str: """Close all open section divs""" # Count open and closed divs open_divs = html.count('