"""
Binary file detection utilities.
This module provides functionality to determine whether a given file is a binary
file or a text file by reading the first 1024 bytes of the file and checking for
the presence of non-text characters.
The module contains the following main components:
* :func:`is_binary_file` - Determine whether a file is binary based on byte content.
.. note::
Inspired from https://stackoverflow.com/a/7392391/6995899
Example::
>>> from pyfcstm.utils.binary import is_binary_file
>>> is_binary_file('example.txt')
False
>>> is_binary_file('example.bin')
True
"""
from __future__ import annotations
_TEXT_CHARS = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
[docs]
def is_binary_file(file: str) -> bool:
"""
Check if a given file is binary.
This function reads the first 1024 bytes of the file and checks whether it
contains any non-text (binary) characters. It uses a predefined set of
text characters to determine the nature of the file.
:param file: The path to the file to be checked.
:type file: str
:return: ``True`` if the file is binary, ``False`` if it is a text file.
:rtype: bool
:raises FileNotFoundError: If the specified file does not exist.
:raises OSError: If there is an error reading the file.
Example::
>>> is_binary_file('example.txt')
False
>>> is_binary_file('example.bin')
True
"""
with open(file, 'rb') as f:
prefix = f.read(1024)
return bool(prefix.translate(None, _TEXT_CHARS))