Source code for pyfcstm.utils.doc

"""
Multiline comment formatting utilities for extracted source documentation.

This module provides a single utility function used to clean and normalize
multiline comments that are commonly extracted from source code by tools such
as ANTLR4. It focuses on removing C-style comment delimiters, trimming
unnecessary whitespace, and normalizing indentation to produce readable text.

The module contains the following main components:

* :func:`format_multiline_comment` - Normalize and clean multiline comment text

.. note::
   Line separators are normalized to ``'\\n'`` when the ``UNITTEST`` environment
   variable is set, enabling deterministic behavior in test environments.

Example::

    >>> raw = \"\"\"/* Example
    ...  *  multiline comment
    ...  */\"\"\"
    >>> format_multiline_comment(raw)
    'Example\\nmultiline comment'
"""

import os
import re
import textwrap


[docs] def format_multiline_comment(raw_doc: str) -> str: """ Format multiline comments parsed by ANTLR4 by removing comment markers and aligning indentation. This function takes a raw multiline comment (including ``/* */`` markers) and processes it to produce clean, properly formatted documentation text. It removes comment delimiters, trims unnecessary whitespace, and normalizes indentation. :param raw_doc: Raw comment text including ``/* */`` markers :type raw_doc: str :return: Formatted comment text with markers removed and proper indentation :rtype: str Example:: >>> raw = \"\"\"/* This is a ... * multiline comment ... */\"\"\" >>> format_multiline_comment(raw) 'This is a\\nmultiline comment' """ if re.fullmatch(r'\s*/\*+/\s*', raw_doc.strip()): return "" # Use regex to remove opening comment markers (/* with one or more asterisks) content = re.sub(r'^\s*/\*+', '', raw_doc.strip()) # Use regex to remove closing comment markers content = re.sub(r'\*+/\s*$', '', content) # Split into lines lines = content.splitlines() i = 0 while i < len(lines) and not lines[i].strip(): i += 1 lines = lines[i:] i = len(lines) - 1 while i > 0 and not lines[i].strip(): i -= 1 lines = lines[:i + 1] # Use textwrap.dedent to align indentation linesep = '\n' if os.environ.get('UNITTEST') else os.linesep formatted_text = textwrap.dedent(linesep.join(map(str.rstrip, lines))) return formatted_text