"""
String value parsing utilities for CLI option handling.
This module provides utilities for parsing string values from command-line options
into appropriate Python types. It supports automatic type inference and explicit
type hints for robust CLI argument parsing.
The main public components are:
* :func:`parse_value` - Parse a string value to appropriate Python type
* :func:`parse_key_value_pairs` - Parse multiple key=value pairs into a dictionary
Supported types:
* Primitive types: ``None``, ``bool``, ``int``, ``float``, ``str``, ``auto``
* Container types: ``tuple[T, ...]`` (variable length), ``tuple[T1, T2, ...]`` (fixed length)
* Optional types: ``optional`` (equivalent to ``optional[auto]``), ``optional[T]``
Example::
>>> from pyfcstm.utils.parse import parse_value, parse_key_value_pairs
>>>
>>> # Auto mode - infer type from value
>>> parse_value('42')
42
>>> parse_value('true')
True
>>> parse_value('"hello world"')
'hello world'
>>>
>>> # Explicit type hints
>>> parse_value('42', 'int')
42
>>> parse_value('name,path', 'tuple[str, ...]')
('name', 'path')
>>>
>>> # Optional types
>>> parse_value('none', 'optional[str]')
None
>>> parse_value('hello', 'optional[str]')
'hello'
>>> parse_value('42', 'optional')
42
>>>
>>> # Parse multiple options
>>> parse_key_value_pairs(
... ('show_events=true', 'max_depth=2', 'format=name,path'),
... type_hints={'format': 'tuple[str, ...]', 'max_depth': 'int'}
... )
{'show_events': True, 'max_depth': 2, 'format': ('name', 'path')}
"""
import codecs
from typing import Any, Dict, Optional, Tuple, Union
__all__ = [
'parse_value',
'parse_key_value_pairs',
]
[docs]
def parse_value(
value_str: str,
expected_type: Union[type, str, None] = 'auto'
) -> Any:
"""
Parse a string value to appropriate Python type.
Supports: None, bool, int, float, str, auto, tuple, and optional types.
Type Inference (Auto Mode)
---------------------------
When ``expected_type`` is ``'auto'`` (default), the function attempts to infer
the type using the following priority:
1. **int**: Try parsing as integer (e.g., ``'42'`` → ``42``)
2. **float**: Try parsing as float (e.g., ``'3.14'`` → ``3.14``)
3. **str (quoted)**: Check for quoted strings with escape sequences (e.g., ``'"hello\\nworld"'`` → ``'hello\\nworld'``)
4. **None**: Check for ``'none'`` or ``'null'`` (case-insensitive)
5. **bool**: Check for ``'true'``/``'yes'`` or ``'false'``/``'no'`` (case-insensitive)
6. **str (unquoted)**: Default to string for everything else
Explicit Type Hints
--------------------
When ``expected_type`` is specified, the value is parsed according to that type:
* **Primitive types**: Pass the type directly (``int``, ``float``, ``str``, ``bool``, ``None``, ``type(None)``)
or as string (``'int'``, ``'float'``, ``'str'``, ``'bool'``, ``'none'``, ``'auto'``)
* **Tuple types**: Use string notation:
* ``'tuple[str, ...]'`` - Variable length tuple with single element type
* ``'tuple[str, int]'`` - Fixed length tuple with specific types for each element
* ``'tuple[auto, ...]'`` - Variable length tuple with auto type inference for each element
* **Optional types**: Use string notation:
* ``'optional'`` - Equivalent to ``'optional[auto]'``, tries the specified type first, then None
* ``'optional[str]'`` - Tries to parse as string first, then None if value is 'none'/'null'
* ``'optional[int]'`` - Tries to parse as int first, then None if value is 'none'/'null'
String Handling
---------------
* **Quoted strings**: Strings enclosed in single or double quotes have the quotes removed
and escape sequences processed (``\\n``, ``\\t``, ``\\r``, ``\\\\``, ``\\'``, ``\\"``, etc.)
* **Unquoted strings**: Used as-is when type is ``str`` or in auto mode as fallback
:param value_str: String value to parse
:type value_str: str
:param expected_type: Expected type or ``'auto'`` for automatic inference (default).
Can be a type (``int``, ``float``, ``str``, ``bool``, ``None``, ``type(None)``),
a string representation (``'int'``, ``'float'``, ``'str'``, ``'bool'``, ``'none'``, ``'auto'``,
``'tuple[str, ...]'``, ``'tuple[auto, ...]'``, ``'optional'``, ``'optional[str]'``), or ``'auto'``.
:type expected_type: Union[type, str, None]
:return: Parsed value with appropriate type
:rtype: Any
:raises ValueError: If the value cannot be parsed as the expected type
Example::
>>> # Auto mode - type inference
>>> parse_value('42')
42
>>> parse_value('3.14')
3.14
>>> parse_value('true')
True
>>> parse_value('none')
None
>>> parse_value('hello')
'hello'
>>> parse_value('"hello world"')
'hello world'
>>> parse_value('"hello\\nworld"')
'hello\\nworld'
>>>
>>> # Explicit type hints (type objects)
>>> parse_value('42', int)
42
>>> parse_value('true', bool)
True
>>> parse_value('none', None)
None
>>> parse_value('none', type(None))
None
>>> parse_value('"hello world"', str)
'hello world'
>>>
>>> # Explicit type hints (string representations)
>>> parse_value('42', 'int')
42
>>> parse_value('3.14', 'float')
3.14
>>> parse_value('true', 'bool')
True
>>> parse_value('none', 'none')
None
>>> parse_value('hello', 'str')
'hello'
>>>
>>> # Tuple types
>>> parse_value('name,path', 'tuple[str, ...]')
('name', 'path')
>>> parse_value('name,42', 'tuple[str, int]')
('name', 42)
>>> parse_value('a,b,c', 'tuple[str, ...]')
('a', 'b', 'c')
"""
# Handle None type (both None and type(None) are equivalent)
if expected_type is None or expected_type is type(None):
if value_str.lower() in ('none', 'null'):
return None
raise ValueError(f"Expected None, got: {value_str}")
if isinstance(expected_type, str) and expected_type.lower() == 'none':
if value_str.lower() in ('none', 'null'):
return None
raise ValueError(f"Expected None, got: {value_str}")
# Handle optional types
if isinstance(expected_type, str) and expected_type.lower().startswith('optional'):
# Check if value is None first
if value_str.lower() in ('none', 'null'):
return None
# Parse optional type specification
if expected_type.lower() == 'optional':
# optional without type parameter is equivalent to optional[auto]
inner_type = 'auto'
else:
# Extract inner type from optional[T]
inner = expected_type[9:-1] # Remove 'optional[' and ']'
inner_type = inner.strip()
# Try to parse with the inner type
try:
return parse_value(value_str, inner_type)
except ValueError:
# If parsing fails and value is not 'none'/'null', re-raise
raise
# Handle tuple types
if isinstance(expected_type, str) and expected_type.startswith('tuple['):
# Parse tuple type specification
inner = expected_type[6:-1].strip() # Remove 'tuple[' and ']' and strip whitespace
# Check if it's a variable length tuple (ends with '...')
# Strip whitespace around commas to be completely insensitive
inner_normalized = inner.replace(' ', '')
if inner_normalized.endswith(',...'):
# Variable length tuple with single element type
# Extract element type before ', ...' or ',...'
if ', ...' in inner:
element_type_str = inner.rsplit(', ...', 1)[0].strip()
else:
element_type_str = inner_normalized.rsplit(',...', 1)[0].strip()
# Support 'auto' in tuple element types
if element_type_str.lower() == 'auto':
element_type = 'auto'
else:
element_type = _parse_type_string(element_type_str)
parts = value_str.split(',')
return tuple(parse_value(part.strip(), element_type) for part in parts)
else:
# Fixed length tuple with specific types
element_types_str = [t.strip() for t in inner.split(',')]
# Support 'auto' in tuple element types
element_types = []
for t in element_types_str:
if t.lower() == 'auto':
element_types.append('auto')
else:
element_types.append(_parse_type_string(t))
parts = value_str.split(',')
if len(parts) != len(element_types):
raise ValueError(
f"Expected {len(element_types)} elements for {expected_type}, got {len(parts)}"
)
return tuple(
parse_value(part.strip(), elem_type)
for part, elem_type in zip(parts, element_types)
)
# Handle specific type
if expected_type != 'auto':
if isinstance(expected_type, str):
expected_type = _parse_type_string(expected_type)
return _parse_single_value(value_str, expected_type)
# Auto mode: try to infer type
# Priority: int -> float -> quoted string -> bool/none -> unquoted string
# Try int
try:
return int(value_str)
except ValueError:
pass
# Try float
try:
return float(value_str)
except ValueError:
pass
# Check for quoted strings
if (value_str.startswith('"') and value_str.endswith('"')) or \
(value_str.startswith("'") and value_str.endswith("'")):
return _decode_string(value_str[1:-1])
# Check for None
if value_str.lower() in ('none', 'null'):
return None
# Check for bool
if value_str.lower() in ('true', 'yes'):
return True
if value_str.lower() in ('false', 'no'):
return False
# Default to string
return value_str
def _decode_string(s: str) -> str:
"""
Decode escape sequences in a string.
Supports all Python escape sequences: \\n, \\t, \\r, \\\\, \\', \\", \\xhh, \\uhhhh, \\Uhhhhhhhh, etc.
:param s: String with potential escape sequences
:type s: str
:return: Decoded string
:rtype: str
"""
try:
# Use Python's built-in codec to decode escape sequences
return codecs.decode(s, 'unicode_escape')
except (UnicodeDecodeError, ValueError):
# UnicodeDecodeError: codecs.decode raises this for malformed escape
# bytes (e.g. truncated ``\xZ`` or invalid UTF-8 after decoding).
# ValueError: raised for malformed ``\N{...}`` named escapes and
# other codec-level violations.
# Both indicate user-authored escape strings that can't be decoded
# — degrade gracefully by returning the original text. Anything
# outside this set (TypeError on a non-str, AttributeError, etc.)
# is a programmer bug and must surface.
return s
def _parse_type_string(type_str: str) -> Union[type, str]:
"""
Parse a type string to a Python type or special string.
:param type_str: Type string like 'int', 'float', 'str', 'bool', 'none', 'auto'
:type type_str: str
:return: Python type or 'auto' string
:rtype: Union[type, str]
:raises ValueError: If the type string is not recognized
"""
type_map = {
'int': int,
'float': float,
'str': str,
'bool': bool,
'none': type(None),
'auto': 'auto',
}
type_str_lower = type_str.lower()
if type_str_lower not in type_map:
raise ValueError(f"Unknown type: {type_str}")
return type_map[type_str_lower]
def _parse_single_value(value_str: str, expected_type: Union[type, str]) -> Any:
"""
Parse a single value with a specific expected type.
:param value_str: String value to parse
:type value_str: str
:param expected_type: Expected Python type or 'auto'
:type expected_type: Union[type, str]
:return: Parsed value
:rtype: Any
:raises ValueError: If the value cannot be parsed as the expected type
"""
# Handle 'auto' type - use auto mode inference
if expected_type == 'auto':
return parse_value(value_str, 'auto')
# Handle None
if expected_type is type(None):
if value_str.lower() in ('none', 'null'):
return None
raise ValueError(f"Expected None, got: {value_str}")
# Handle bool
if expected_type is bool:
if value_str.lower() in ('true', 'yes', '1'):
return True
if value_str.lower() in ('false', 'no', '0'):
return False
raise ValueError(f"Expected bool, got: {value_str}")
# Handle str with quote removal and escape sequence decoding
if expected_type is str:
if (value_str.startswith('"') and value_str.endswith('"')) or \
(value_str.startswith("'") and value_str.endswith("'")):
return _decode_string(value_str[1:-1])
return value_str
# Handle int
if expected_type is int:
try:
return int(value_str)
except ValueError:
raise ValueError(f"Expected int, got: {value_str}")
# Handle float
if expected_type is float:
try:
return float(value_str)
except ValueError:
raise ValueError(f"Expected float, got: {value_str}")
raise ValueError(f"Unsupported type: {expected_type}")
[docs]
def parse_key_value_pairs(
option_pairs: Tuple[str, ...],
type_hints: Optional[Dict[str, Union[type, str]]] = None
) -> Dict[str, Any]:
"""
Parse multiple key=value pairs into a dictionary.
Each pair must be in the format ``'key=value'``. Values are parsed according to
the type hints provided in ``type_hints``. If a key is not in ``type_hints``,
its value is parsed in auto mode (type inference).
:param option_pairs: Tuple of 'key=value' strings
:type option_pairs: Tuple[str, ...]
:param type_hints: Optional dictionary mapping field names to expected types.
If a field is not in type_hints, it will be parsed with 'auto' mode.
Type hints can be Python types (``int``, ``float``, ``str``, ``bool``, ``None``, ``type(None)``)
or string representations (``'int'``, ``'float'``, ``'str'``, ``'bool'``, ``'none'``,
``'tuple[str, ...]'``, ``'tuple[str, int]'``).
:type type_hints: Optional[Dict[str, Union[type, str]]]
:return: Dictionary of parsed options
:rtype: Dict[str, Any]
:raises ValueError: If a pair is not in 'key=value' format or parsing fails
Example::
>>> # Auto mode for all fields
>>> parse_key_value_pairs(('show_events=true', 'max_depth=2'))
{'show_events': True, 'max_depth': 2}
>>>
>>> # With type hints (type objects)
>>> parse_key_value_pairs(
... ('state_name_format=name,path', 'max_depth=2', 'enabled=true'),
... type_hints={'state_name_format': 'tuple[str, ...]', 'max_depth': int}
... )
{'state_name_format': ('name', 'path'), 'max_depth': 2, 'enabled': True}
>>>
>>> # With type hints (string representations)
>>> parse_key_value_pairs(
... ('state_name_format=name,path', 'max_depth=2', 'enabled=true'),
... type_hints={'state_name_format': 'tuple[str, ...]', 'max_depth': 'int', 'enabled': 'bool'}
... )
{'state_name_format': ('name', 'path'), 'max_depth': 2, 'enabled': True}
>>>
>>> # Complex example with multiple types
>>> parse_key_value_pairs(
... ('name="My App"', 'version=1.0', 'debug=false', 'ports=8080,8081,8082'),
... type_hints={'name': str, 'version': float, 'debug': bool, 'ports': 'tuple[int, ...]'}
... )
{'name': 'My App', 'version': 1.0, 'debug': False, 'ports': (8080, 8081, 8082)}
"""
type_hints = type_hints or {}
options = {}
for pair in option_pairs:
if '=' not in pair:
raise ValueError(
f"Option must be in 'key=value' format, got: {pair}"
)
key, value = pair.split('=', 1)
key = key.strip()
value = value.strip()
# Get expected type for this field
expected_type = type_hints.get(key, 'auto')
try:
options[key] = parse_value(value, expected_type)
except ValueError as e:
raise ValueError(
f"Failed to parse option '{key}': {e}"
)
return options