Initial commit

2026-01-17 13:49:51 +01:00
commit 0fef8d96c5
1897 changed files with 396119 additions and 0 deletions
--- a/dist/dicom2pacs.app/Contents/Resources/lib/python3.13/pydicom/fileutil.py
+++ b/dist/dicom2pacs.app/Contents/Resources/lib/python3.13/pydicom/fileutil.py
@@ -0,0 +1,592 @@
+# Copyright 2008-2024 pydicom authors. See LICENSE file for details.
+"""Functions for reading to certain bytes, e.g. delimiters."""
+
+from collections.abc import Generator, Iterator
+from contextlib import contextmanager
+from io import BufferedIOBase
+import os
+from struct import pack, unpack
+from typing import BinaryIO, cast
+
+from pydicom.misc import size_in_bytes
+from pydicom.tag import TupleTag, Tag, SequenceDelimiterTag, ItemTag, BaseTag
+from pydicom.datadict import dictionary_description
+from pydicom.filebase import ReadableBuffer, WriteableBuffer
+
+from pydicom.config import logger, settings
+
+
+PathType = str | bytes | os.PathLike
+
+
+def absorb_delimiter_item(
+    fp: BinaryIO, is_little_endian: bool, delimiter: BaseTag
+) -> None:
+    """Read (and ignore) undefined length sequence or item terminators."""
+    if is_little_endian:
+        struct_format = "<HHL"
+    else:
+        struct_format = ">HHL"
+    group, elem, length = unpack(struct_format, fp.read(8))
+    tag = TupleTag((group, elem))
+    if tag != delimiter:
+        logger.warn(
+            "Did not find expected delimiter "
+            f"'{dictionary_description(delimiter)}', instead found "
+            f"{tag} at file position 0x{fp.tell() - 8:X}"
+        )
+        fp.seek(fp.tell() - 8)
+        return
+
+    logger.debug(
+        "%04x: Found Delimiter '%s'", fp.tell() - 8, dictionary_description(delimiter)
+    )
+
+    if length == 0:
+        logger.debug("%04x: Read 0 bytes after delimiter", fp.tell() - 4)
+    else:
+        logger.debug(
+            "%04x: Expected 0x00000000 after delimiter, found 0x%x",
+            fp.tell() - 4,
+            length,
+        )
+
+
+def find_bytes(
+    fp: BinaryIO, bytes_to_find: bytes, read_size: int = 128, rewind: bool = True
+) -> int | None:
+    """Read in the file until a specific byte sequence found.
+
+    Parameters
+    ----------
+    fp : file-like
+        The file-like to search.
+    bytes_to_find : bytes
+        Contains the bytes to find. Must be in correct endian order already.
+    read_size : int
+        Number of bytes to read at a time.
+    rewind : bool
+        Flag to rewind file reading position.
+
+    Returns
+    -------
+    found_at : int or None
+        Position where byte sequence was found, else ``None``.
+    """
+
+    data_start = fp.tell()
+    search_rewind = len(bytes_to_find) - 1
+
+    found = False
+    eof = False
+    while not found:
+        chunk_start = fp.tell()
+        bytes_read = fp.read(read_size)
+        if len(bytes_read) < read_size:
+            # try again - if still don't get required amount,
+            # this is the last block
+            new_bytes = fp.read(read_size - len(bytes_read))
+            bytes_read += new_bytes
+            if len(bytes_read) < read_size:
+                eof = True  # but will still check whatever we did get
+        index = bytes_read.find(bytes_to_find)
+        if index != -1:
+            found = True
+        elif eof:
+            if rewind:
+                fp.seek(data_start)
+            return None
+        else:
+            # rewind a bit in case delimiter crossed read_size boundary
+            fp.seek(fp.tell() - search_rewind)
+    # if get here then have found the byte string
+    found_at = chunk_start + index
+    if rewind:
+        fp.seek(data_start)
+    else:
+        fp.seek(found_at + len(bytes_to_find))
+
+    return found_at
+
+
+def read_undefined_length_value(
+    fp: BinaryIO,
+    is_little_endian: bool,
+    delimiter_tag: BaseTag,
+    defer_size: int | float | None = None,
+    read_size: int = 1024 * 8,
+) -> bytes | None:
+    """Read until `delimiter_tag` and return the value up to that point.
+
+    On completion, the file will be set to the first byte after the delimiter
+    and its following four zero bytes.
+
+    Parameters
+    ----------
+    fp : file-like
+        The file-like to read.
+    is_little_endian : bool
+        ``True`` if file transfer syntax is little endian, else ``False``.
+    delimiter_tag : BaseTag
+        Tag used as end marker for reading
+    defer_size : int or None, optional
+        Size to avoid loading large elements in memory. See
+        :func:`~pydicom.filereader.dcmread` for more parameter info.
+    read_size : int, optional
+        Number of bytes to read at one time.
+
+    Returns
+    -------
+    delimiter : bytes or None
+        The file delimiter.
+
+    Raises
+    ------
+    EOFError
+        If EOF is reached before delimiter found.
+    """
+    data_start = fp.tell()
+    defer_size = size_in_bytes(defer_size)
+
+    # It's common for an undefined length value item to be an
+    # encapsulated pixel data as defined in PS3.5 section A.4.
+    # Attempt to parse the data under that assumption, since the method
+    #  1. is proof against coincidental embedded sequence delimiter tags
+    #  2. avoids accumulating any data in memory if the element is large
+    #     enough to be deferred
+    #  3. does not double-accumulate data (in chunks and then joined)
+    #
+    # Unfortunately, some implementations deviate from the standard and the
+    # encapsulated pixel data-parsing algorithm fails. In that case, we fall
+    # back to a method of scanning the entire element value for the
+    # sequence delimiter, as was done historically.
+    if delimiter_tag == SequenceDelimiterTag:
+        was_value_found, value = _try_read_encapsulated_pixel_data(
+            fp, is_little_endian, defer_size
+        )
+        if was_value_found:
+            return value
+
+    search_rewind = 3
+
+    if is_little_endian:
+        bytes_format = b"<HH"
+    else:
+        bytes_format = b">HH"
+    bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem)
+
+    found = False
+    eof = False
+    value_chunks = []
+    byte_count = 0  # for defer_size checks
+    while not found:
+        chunk_start = fp.tell()
+        bytes_read = fp.read(read_size)
+        if len(bytes_read) < read_size:
+            # try again - if still don't get required amount,
+            # this is the last block
+            new_bytes = fp.read(read_size - len(bytes_read))
+            bytes_read += new_bytes
+            if len(bytes_read) < read_size:
+                eof = True  # but will still check whatever we did get
+        index = bytes_read.find(bytes_to_find)
+        if index != -1:
+            found = True
+            new_bytes = bytes_read[:index]
+            byte_count += len(new_bytes)
+            if defer_size is None or byte_count < defer_size:
+                value_chunks.append(new_bytes)
+            fp.seek(chunk_start + index + 4)  # rewind to end of delimiter
+            length = fp.read(4)
+            if length != b"\0\0\0\0":
+                msg = (
+                    "Expected 4 zero bytes after undefined length delimiter"
+                    " at pos {0:04x}"
+                )
+                logger.error(msg.format(fp.tell() - 4))
+        elif eof:
+            fp.seek(data_start)
+            raise EOFError(
+                f"End of file reached before delimiter {delimiter_tag!r} found"
+            )
+        else:
+            # rewind a bit in case delimiter crossed read_size boundary
+            fp.seek(fp.tell() - search_rewind)
+            # accumulate the bytes read (not including the rewind)
+            new_bytes = bytes_read[:-search_rewind]
+            byte_count += len(new_bytes)
+            if defer_size is None or byte_count < defer_size:
+                value_chunks.append(new_bytes)
+    # if get here then have found the byte string
+    if defer_size is not None and byte_count >= defer_size:
+        return None
+    else:
+        return b"".join(value_chunks)
+
+
+def _try_read_encapsulated_pixel_data(
+    fp: BinaryIO,
+    is_little_endian: bool,
+    defer_size: float | int | None = None,
+) -> tuple[bool, bytes | None]:
+    """Attempt to read an undefined length value item as if it were
+    encapsulated pixel data as defined in PS3.5 section A.4.
+
+    On success, the file will be set to the first byte after the delimiter
+    and its following four zero bytes. If unsuccessful, the file will be left
+    in its original position.
+
+    Parameters
+    ----------
+    fp : file-like
+        The file-like to read.
+    is_little_endian : bool
+        ``True`` if the file transfer syntax is little endian, else ``False``.
+    defer_size : int or None, optional
+        Size to avoid loading large elements in memory. See
+        :func:`~pydicom.filereader.dcmread` for more parameter info.
+
+    Returns
+    -------
+    bool, bytes
+        Whether or not the value was parsed properly and, if it was,
+        the value.
+    """
+
+    if is_little_endian:
+        tag_format = b"<HH"
+        length_format = b"<L"
+    else:
+        tag_format = b">HH"
+        length_format = b">L"
+
+    sequence_delimiter_bytes = pack(
+        tag_format, SequenceDelimiterTag.group, SequenceDelimiterTag.elem
+    )
+    item_bytes = pack(tag_format, ItemTag.group, ItemTag.elem)
+
+    data_start = fp.tell()
+    byte_count = 0
+    while True:
+        tag_bytes = fp.read(4)
+        if len(tag_bytes) < 4:
+            # End of file reached while scanning.
+            # Maybe the sequence delimiter is missing or or maybe we read past
+            # it due to an inaccurate length indicator for an element
+            logger.debug(
+                "End of input encountered while parsing undefined length "
+                "value as encapsulated pixel data. Unable to find tag at "
+                "position 0x%x. Falling back to byte by byte scan.",
+                fp.tell() - len(tag_bytes),
+            )
+            fp.seek(data_start)
+            return (False, None)
+        byte_count += 4
+
+        if tag_bytes == sequence_delimiter_bytes:
+            break
+
+        if tag_bytes == item_bytes:
+            length_bytes = fp.read(4)
+            if len(length_bytes) < 4:
+                # End of file reached while scanning.
+                # Maybe the sequence delimiter is missing or or maybe we read
+                # past it due to an inaccurate length indicator for an element
+                logger.debug(
+                    "End of input encountered while parsing undefined length "
+                    "value as encapsulated pixel data. Unable to find length "
+                    "for tag %s at position 0x%x. Falling back to byte by "
+                    "byte scan.",
+                    ItemTag,
+                    fp.tell() - len(length_bytes),
+                )
+                fp.seek(data_start)
+                return (False, None)
+            byte_count += 4
+            length = unpack(length_format, length_bytes)[0]
+
+            try:
+                fp.seek(length, os.SEEK_CUR)
+            except OverflowError:
+                logger.debug(
+                    "Too-long length %04x for tag %s at position 0x%x found "
+                    "while parsing undefined length value as encapsulated "
+                    "pixel data. Falling back to byte-by-byte scan.",
+                    length,
+                    ItemTag,
+                    fp.tell() - 8,
+                )
+                fp.seek(data_start)
+                return (False, None)
+            byte_count += length
+        else:
+            logger.debug(
+                "Unknown tag bytes %s at position 0x%x found "
+                "while parsing undefined length value as encapsulated "
+                "pixel data. Falling back to byte-by-byte scan.",
+                tag_bytes.hex(),
+                fp.tell() - 4,
+            )
+            fp.seek(data_start)
+            return (False, None)
+
+    length = fp.read(4)
+    if length != b"\0\0\0\0":
+        msg = "Expected 4 zero bytes after undefined length delimiter at pos {0:04x}"
+        logger.debug(msg.format(fp.tell() - 4))
+
+    if defer_size is not None and defer_size <= byte_count:
+        value = None
+    else:
+        fp.seek(data_start)
+        value = fp.read(byte_count - 4)
+
+    fp.seek(data_start + byte_count + 4)
+    return (True, value)
+
+
+def find_delimiter(
+    fp: BinaryIO,
+    delimiter: BaseTag,
+    is_little_endian: bool,
+    read_size: int = 128,
+    rewind: bool = True,
+) -> int | None:
+    """Return file position where 4-byte delimiter is located.
+
+    Parameters
+    ----------
+    delimiter : int
+        The delimiter to search for.
+    is_little_endian : bool
+        ``True`` if little endian, ``False`` otherwise.
+    read_size : int
+        See :func:`find_bytes` for parameter info.
+    rewind : bool
+        Flag to rewind to initial position after searching.
+
+    Returns
+    -------
+    int or None
+        Returns ``None`` if end of file is reached without finding the
+        delimiter, otherwise the byte offset to the delimiter.
+    """
+    struct_format = "<H"
+    if not is_little_endian:
+        struct_format = ">H"
+    delimiter = Tag(delimiter)
+    bytes_to_find = pack(struct_format, delimiter.group) + pack(
+        struct_format, delimiter.elem
+    )
+
+    return find_bytes(fp, bytes_to_find, read_size=read_size, rewind=rewind)
+
+
+def length_of_undefined_length(
+    fp: BinaryIO,
+    delimiter: BaseTag,
+    is_little_endian: bool,
+    read_size: int = 128,
+    rewind: bool = True,
+) -> int | None:
+    """Search through the file to find the delimiter and return the length
+    of the data element.
+
+    Parameters
+    ----------
+    fp : file-like
+        The file-like to read.
+    delimiter :
+        See :func:`find_delimiter` for parameter info.
+    is_little_endian : bool
+        ``True`` if little endian, ``False`` otherwise.
+    read_size : int
+        See :func:`find_bytes` for parameter info.
+    rewind : bool
+        Flag to rewind to initial position after searching.
+
+    Returns
+    -------
+    int
+        Byte offset to the delimiter.
+
+    Notes
+    -----
+    Note the data element that the delimiter starts is not read here,
+    the calling routine must handle that. Delimiter must be 4 bytes long.
+    """
+    data_start = fp.tell()
+    delimiter_pos = find_delimiter(fp, delimiter, is_little_endian, rewind=rewind)
+    if delimiter_pos is not None:
+        return delimiter_pos - data_start
+
+    return None
+
+
+def path_from_pathlike(
+    file_object: PathType | BinaryIO | ReadableBuffer | WriteableBuffer,
+) -> str | BinaryIO:
+    """Returns the path if `file_object` is a path-like object, otherwise the
+    original `file_object`.
+
+    Parameters
+    ----------
+    file_object: str or PathLike or file-like
+
+    Returns
+    -------
+    str or file-like
+        the string representation of the given path object, or the object
+        itself in case of an object not representing a path.
+    """
+    try:
+        return os.fspath(file_object)  # type: ignore[arg-type]
+    except TypeError:
+        return cast(BinaryIO, file_object)
+
+
+def _unpack_tag(b: bytes, endianness: str) -> BaseTag:
+    return TupleTag(cast(tuple[int, int], unpack(f"{endianness}HH", b)))
+
+
+def check_buffer(buffer: BufferedIOBase) -> None:
+    """Raise an exception if `buffer` is not usable as an element value.
+
+    Parameters
+    ----------
+    buffer : io.BufferedIOBase
+        The buffer to check, must be :meth:`~io.IOBase.readable`,
+        :meth:`~io.IOBase.seekable` and not be :attr:`io.IOBase.closed`.
+    """
+    if not isinstance(buffer, BufferedIOBase):
+        raise TypeError("the buffer must inherit from 'io.BufferedIOBase'")
+
+    if buffer.closed:
+        raise ValueError("the buffer has been closed")
+
+    # readable() covers read(), seekable() covers seek() and tell()
+    if not buffer.readable() or not buffer.seekable():
+        raise ValueError("the buffer must be readable and seekable")
+
+
+@contextmanager
+def reset_buffer_position(buffer: BufferedIOBase) -> Generator[int, None, None]:
+    """Yields the initial position of the buffer and return to that position on exiting
+    the context.
+
+    Parameters
+    ----------
+    buffer : io.BufferedIOBase
+        The buffer to use.
+
+    Yields
+    ------
+    int
+        The initial position of the buffer.
+    """
+    check_buffer(buffer)
+
+    initial_offset = buffer.tell()
+    yield initial_offset
+
+    buffer.seek(initial_offset)
+
+
+def read_buffer(
+    buffer: BufferedIOBase, *, chunk_size: int | None = None
+) -> Iterator[bytes]:
+    """Read data from `buffer`.
+
+    The buffer is NOT returned to its starting position.
+
+    Parameters
+    ----------
+    buffer : io.BufferedIOBase
+        The buffer to read from.
+    chunk_size : int, optional
+        The amount of bytes to read per iteration (default 8192). Fewer bytes may be
+        yielded if there is insufficient remaining data in `buffer`.
+
+    Yields
+    -------
+    bytes
+        Data read from the buffer of length up to the specified chunk_size.
+    """
+    chunk_size = settings.buffered_read_size if chunk_size is None else chunk_size
+    if chunk_size <= 0:
+        raise ValueError(
+            f"Invalid 'chunk_size' value '{chunk_size}', must be greater than 0"
+        )
+
+    check_buffer(buffer)
+    while chunk := buffer.read(chunk_size):
+        if chunk:
+            yield chunk
+
+
+def buffer_length(buffer: BufferedIOBase) -> int:
+    """Return the total length of the buffer.
+
+    Parameters
+    ----------
+    buffer : io.BufferedIOBase
+        The buffer to return the remaining length for.
+
+    Returns
+    -------
+    int
+        The total length of the buffer.
+    """
+    with reset_buffer_position(buffer):
+        return buffer.seek(0, os.SEEK_END)
+
+
+def buffer_remaining(buffer: BufferedIOBase) -> int:
+    """Return the remaining length of the buffer with respect to the current position.
+
+    Parameters
+    ----------
+    buffer : io.BufferedIOBase
+        The buffer to return the remaining length for.
+
+    Returns
+    -------
+    int
+        The remaining length of the buffer from the current position.
+    """
+    with reset_buffer_position(buffer) as current_offset:
+        return buffer.seek(0, os.SEEK_END) - current_offset
+
+
+def buffer_equality(
+    buffer: BufferedIOBase,
+    other: bytes | bytearray | BufferedIOBase,
+) -> bool:
+    """Return ``True`` if `buffer` and `other` are equal, ``False`` otherwise."""
+    if not isinstance(other, bytes | bytearray | BufferedIOBase):
+        return False
+
+    # Avoid reading the entire buffer object into memory
+    with reset_buffer_position(buffer):
+        buffer.seek(0)
+        if isinstance(other, bytes | bytearray):
+            start = 0
+            for data in read_buffer(buffer):
+                nr_read = len(data)
+                if other[start : start + nr_read] != data:
+                    return False
+
+                start += nr_read
+
+            return len(other) == start
+
+        if buffer_length(buffer) != buffer_length(other):
+            return False
+
+        with reset_buffer_position(other):
+            other.seek(0)
+            for data_a, data_b in zip(read_buffer(buffer), read_buffer(other)):
+                if data_a != data_b:
+                    return False
+
+        return True