\1

', html, flags=re.MULTILINE ) # ### Subsection →

Subsection

html = re.sub( r'^### (.+)$', r'

\1

', html, flags=re.MULTILINE ) # #### Subsubsection →

Title

html = re.sub( r'^#### (.+)$', r'

\1

', html, flags=re.MULTILINE ) # Convert **bold** text html = re.sub(r'\*\*(.+?)\*\*', r'\1', html) # Convert *italic* text html = re.sub(r'\*(.+?)\*', r'\1', html) # Convert inline code `code` html = re.sub(r'`(.+?)`', r'\1', html) # Convert unordered lists html = _convert_lists(html) # Convert tables html = _convert_tables(html) # Convert paragraphs (wrap non-HTML lines in

tags) html = _convert_paragraphs(html) # Close all open sections html = _close_sections(html) # Wrap executive summary if present html = html.replace( '

Executive Summary

', '

Executive Summary

' ) if '

' in html: # Close executive summary at the next section html = html.replace( '\n

', '

', 1 ) return html def _convert_bibliography_section(markdown: str) -> str: """Convert bibliography section to HTML""" if not markdown.strip(): return "" html = markdown # Convert each [N] citation to a proper bibliography entry # Look for patterns like [1] Title - URL html = re.sub( r'\[(\d+)\]\s*(.+?)\s*-\s*(https?://[^\s\)]+)', r'

[\1] \2

', html ) # Convert any remaining **bold** sections html = re.sub(r'\*\*(.+?)\*\*', r'\1', html) # Wrap in bibliography content div html = f'

{html}

' return html def _convert_lists(html: str) -> str: """Convert markdown lists to HTML lists""" lines = html.split('\n') result = [] in_list = False list_level = 0 for i, line in enumerate(lines): stripped = line.strip() # Check for unordered list item if stripped.startswith('- ') or stripped.startswith('* '): if not in_list: result.append('

{content}

{content}

' if '

' in '\n'.join(result[-10:]) else '') in_list = False list_level = 0 result.append(line) # Close any remaining open list if in_list: result.append('' if '

{cell}
{cell}

' in stripped or '' in stripped: if in_paragraph: result.append('

') in_paragraph = False result.append(line) continue # Regular text line - wrap in paragraph if not in_paragraph: result.append('

' + line) in_paragraph = True else: result.append(line) if in_paragraph: result.append('

') return '\n'.join(result) def _close_sections(html: str) -> str: """Close all open section divs""" # Count open and closed divs open_divs = html.count('

') closed_divs = html.count('

') # Add closing divs for sections # Each section should be closed before the next section starts lines = html.split('\n') result = [] section_open = False for i, line in enumerate(lines): if '

' in line: if section_open: result.append('

') # Close previous section section_open = True result.append(line) # Close final section if still open if section_open: result.append('

') return '\n'.join(result) def main(): """Test the converter with a sample markdown file""" import sys if len(sys.argv) < 2: print("Usage: python md_to_html.py ") sys.exit(1) md_file = Path(sys.argv[1]) if not md_file.exists(): print(f"Error: File {md_file} not found") sys.exit(1) markdown_text = md_file.read_text() content_html, bib_html = convert_markdown_to_html(markdown_text) print("=== CONTENT HTML ===") print(content_html[:1000]) print("\n=== BIBLIOGRAPHY HTML ===") print(bib_html[:500]) if __name__ == "__main__": main()

Section Title

\1

Subsection

\1

Title

\1

Executive Summary

Executive Summary