Source code for pyfcstm.utils.doc
"""
Multiline comment formatting utilities for extracted source documentation.
This module provides a single utility function used to clean and normalize
multiline comments that are commonly extracted from source code by tools such
as ANTLR4. It focuses on removing C-style comment delimiters, trimming
unnecessary whitespace, and normalizing indentation to produce readable text.
The module contains the following main components:
* :func:`format_multiline_comment` - Normalize and clean multiline comment text
.. note::
Line separators are normalized to ``'\\n'`` when the ``UNITTEST`` environment
variable is set, enabling deterministic behavior in test environments.
Example::
>>> raw = \"\"\"/* Example
... * multiline comment
... */\"\"\"
>>> format_multiline_comment(raw)
'Example\\nmultiline comment'
"""
import os
import re
import textwrap
[docs]
def format_multiline_comment(raw_doc: str) -> str:
"""
Format multiline comments parsed by ANTLR4 by removing comment markers
and aligning indentation.
This function takes a raw multiline comment (including ``/* */`` markers)
and processes it to produce clean, properly formatted documentation text.
It removes comment delimiters, trims unnecessary whitespace, and
normalizes indentation.
:param raw_doc: Raw comment text including ``/* */`` markers
:type raw_doc: str
:return: Formatted comment text with markers removed and proper indentation
:rtype: str
Example::
>>> raw = \"\"\"/* This is a
... * multiline comment
... */\"\"\"
>>> format_multiline_comment(raw)
'This is a\\nmultiline comment'
"""
if re.fullmatch(r'\s*/\*+/\s*', raw_doc.strip()):
return ""
# Use regex to remove opening comment markers (/* with one or more asterisks)
content = re.sub(r'^\s*/\*+', '', raw_doc.strip())
# Use regex to remove closing comment markers
content = re.sub(r'\*+/\s*$', '', content)
# Split into lines
lines = content.splitlines()
i = 0
while i < len(lines) and not lines[i].strip():
i += 1
lines = lines[i:]
i = len(lines) - 1
while i > 0 and not lines[i].strip():
i -= 1
lines = lines[:i + 1]
# Use textwrap.dedent to align indentation
linesep = '\n' if os.environ.get('UNITTEST') else os.linesep
formatted_text = textwrap.dedent(linesep.join(map(str.rstrip, lines)))
return formatted_text