Files
René Mathieu 0fef8d96c5 Initial commit
2026-01-17 13:49:51 +01:00

1505 lines
52 KiB
Python
Executable File

# Copyright 2008-2021 pydicom authors. See LICENSE file for details.
"""Functions related to writing DICOM data."""
from collections.abc import Sequence, MutableSequence, Iterable
from copy import deepcopy
from io import BufferedIOBase
from struct import pack
from typing import BinaryIO, Any, cast
from collections.abc import Callable
import zlib
from pydicom import config
from pydicom.charset import default_encoding, convert_encodings, encode_string
from pydicom.dataelem import (
convert_raw_data_element,
DataElement,
RawDataElement,
)
from pydicom.dataset import Dataset, validate_file_meta, FileMetaDataset
from pydicom.filebase import DicomFile, DicomBytesIO, DicomIO, WriteableBuffer
from pydicom.fileutil import (
path_from_pathlike,
PathType,
buffer_remaining,
read_buffer,
reset_buffer_position,
)
from pydicom.misc import warn_and_log
from pydicom.multival import MultiValue
from pydicom.tag import (
Tag,
BaseTag,
ItemTag,
ItemDelimiterTag,
SequenceDelimiterTag,
tag_in_exception,
_LUT_DESCRIPTOR_TAGS,
)
from pydicom.uid import (
DeflatedExplicitVRLittleEndian,
UID,
ImplicitVRLittleEndian,
ExplicitVRBigEndian,
)
from pydicom.valuerep import (
PersonName,
IS,
DSclass,
DA,
DT,
TM,
EXPLICIT_VR_LENGTH_32,
VR,
AMBIGUOUS_VR,
CUSTOMIZABLE_CHARSET_VR,
)
from pydicom.values import convert_numbers
if config.have_numpy:
import numpy
# Ambiguous VR Correction
# (0018,9810) Zero Velocity Pixel Value
# (0022,1452) Mapped Pixel Value
# (0028,0104)/(0028,0105) Smallest/Largest Valid Pixel Value
# (0028,0106)/(0028,0107) Smallest/Largest Image Pixel Value
# (0028,0108)/(0028,0109) Smallest/Largest Pixel Value in Series
# (0028,0110)/(0028,0111) Smallest/Largest Image Pixel Value in Plane
# (0028,0120) Pixel Padding Value
# (0028,0121) Pixel Padding Range Limit
# (0028,1101-1103) Red/Green/Blue Palette Color Lookup Table Descriptor
# (0028,3002) LUT Descriptor
# (0040,9216)/(0040,9211) Real World Value First/Last Value Mapped
# (0060,3004)/(0060,3006) Histogram First/Last Bin Value
_AMBIGUOUS_US_SS_TAGS = {
0x00189810,
0x00221452,
0x00280104,
0x00280105,
0x00280106,
0x00280107,
0x00280108,
0x00280109,
0x00280110,
0x00280111,
0x00280120,
0x00280121,
0x00281101,
0x00281102,
0x00281103,
0x00283002,
0x00409211,
0x00409216,
0x00603004,
0x00603006,
}
# (5400,0110) Channel Minimum Value
# (5400,0112) Channel Maximum Value
# (5400,100A) Waveform Padding Data
# (5400,1010) Waveform Data
_AMBIGUOUS_OB_OW_TAGS = {0x54000110, 0x54000112, 0x5400100A, 0x54001010}
# (60xx,3000) Overlay Data
_OVERLAY_DATA_TAGS = {x << 16 | 0x3000 for x in range(0x6000, 0x601F, 2)}
def _correct_ambiguous_vr_element(
elem: DataElement,
ancestors: list[Dataset],
is_little_endian: bool,
) -> DataElement:
"""Implementation for `correct_ambiguous_vr_element`.
See `correct_ambiguous_vr_element` for description.
"""
# The zeroth dataset is the nearest, the last is the root dataset
ds = ancestors[0]
# 'OB or OW': 7fe0,0010 PixelData
if elem.tag == 0x7FE00010:
# Compressed Pixel Data
# PS3.5 Annex A.4
# If encapsulated, VR is OB and length is undefined
if elem.is_undefined_length:
elem.VR = VR.OB
elif ds.original_encoding[0]:
# Non-compressed Pixel Data - Implicit Little Endian
# PS3.5 Annex A1: VR is always OW
elem.VR = VR.OW
else:
# Non-compressed Pixel Data - Explicit VR
# PS3.5 Annex A.2:
# If BitsAllocated is > 8 then VR shall be OW,
# else may be OB or OW.
# If we get here, the data has not been written before
# or has been converted from Implicit Little Endian,
# so we default to OB for BitsAllocated 1 or 8
elem.VR = VR.OW if cast(int, ds.BitsAllocated) > 8 else VR.OB
# 'US or SS' and dependent on PixelRepresentation
elif elem.tag in _AMBIGUOUS_US_SS_TAGS:
# US if PixelRepresentation value is 0x0000, else SS
# For references, see the list at
# https://github.com/pydicom/pydicom/pull/298
# PixelRepresentation is usually set in the root dataset
# If correcting during write, or after implicit read when the
# element is on the same level as pixel representation
pixel_rep = next(
(
cast(int, x.PixelRepresentation)
for x in ancestors
if getattr(x, "PixelRepresentation", None) is not None
),
None,
)
if pixel_rep is None:
# If correcting after implicit read when the element isn't
# on the same level as pixel representation
pixel_rep = next(
(x._pixel_rep for x in ancestors if hasattr(x, "_pixel_rep")),
None,
)
if pixel_rep is None:
# If no pixel data is present, none if these tags is used,
# so we can just ignore a missing PixelRepresentation in this case
pixel_rep = 1
if (
"PixelRepresentation" not in ds
and "PixelData" not in ds
or ds.PixelRepresentation == 0
):
pixel_rep = 0
elem.VR = VR.US if pixel_rep == 0 else VR.SS
byte_type = "H" if pixel_rep == 0 else "h"
if elem.VM == 0:
return elem
# Need to handle type check for elements with VM > 1
elem_value = elem.value if elem.VM == 1 else cast(Sequence[Any], elem.value)[0]
if not isinstance(elem_value, int):
elem.value = convert_numbers(
cast(bytes, elem.value), is_little_endian, byte_type
)
# 'OB or OW' and dependent on WaveformBitsAllocated
elif elem.tag in _AMBIGUOUS_OB_OW_TAGS:
# If WaveformBitsAllocated is > 8 then OW, otherwise may be
# OB or OW.
# See PS3.3 C.10.9.1.
if ds.original_encoding[0]:
elem.VR = VR.OW
else:
elem.VR = VR.OW if cast(int, ds.WaveformBitsAllocated) > 8 else VR.OB
# 'US or OW': 0028,3006 LUTData
elif elem.tag == 0x00283006:
# First value in LUT Descriptor is how many values in
# LUTData, if there's only one value then must be US
# As per PS3.3 C.11.1.1.1
if cast(Sequence[int], ds.LUTDescriptor)[0] == 1:
elem.VR = VR.US
if elem.VM == 0:
return elem
elem_value = (
elem.value if elem.VM == 1 else cast(Sequence[Any], elem.value)[0]
)
if not isinstance(elem_value, int):
elem.value = convert_numbers(
cast(bytes, elem.value), is_little_endian, "H"
)
else:
elem.VR = VR.OW
# 'OB or OW': 60xx,3000 OverlayData and dependent on Transfer Syntax
elif elem.tag in _OVERLAY_DATA_TAGS:
# Implicit VR must be OW, explicit VR may be OB or OW
# as per PS3.5 Section 8.1.2 and Annex A
elem.VR = VR.OW
return elem
def correct_ambiguous_vr_element(
elem: DataElement | RawDataElement,
ds: Dataset,
is_little_endian: bool,
ancestors: list[Dataset] | None = None,
) -> DataElement | RawDataElement:
"""Attempt to correct the ambiguous VR element `elem`.
When it's not possible to correct the VR, the element will be returned
unchanged. Currently the only ambiguous VR elements not corrected for are
all retired or part of DICONDE.
If the VR is corrected and is 'US' or 'SS' then the value will be updated
using the :func:`~pydicom.values.convert_numbers` function.
.. versionchanged:: 3.0
The `ancestors` keyword argument was added.
Parameters
----------
elem : dataelem.DataElement or dataelem.RawDataElement
The element with an ambiguous VR.
ds : dataset.Dataset
The dataset containing `elem`.
is_little_endian : bool
The byte ordering of the values in the dataset.
ancestors : list[pydicom.dataset.Dataset] | None
A list of the ancestor datasets to look through when trying to find
the relevant element value to use in VR correction. Should be ordered
from closest to furthest. If ``None`` then will build itself
automatically starting at `ds` (default).
Returns
-------
dataelem.DataElement or dataelem.RawDataElement
The corrected element
"""
ancestors = [ds] if ancestors is None else ancestors
if elem.VR in AMBIGUOUS_VR:
# convert raw data elements before handling them
if isinstance(elem, RawDataElement):
elem = convert_raw_data_element(elem, ds=ds)
ds.__setitem__(elem.tag, elem)
try:
_correct_ambiguous_vr_element(elem, ancestors, is_little_endian)
except AttributeError as e:
raise AttributeError(
f"Failed to resolve ambiguous VR for tag {elem.tag}: {e}"
)
return elem
def correct_ambiguous_vr(
ds: Dataset,
is_little_endian: bool,
ancestors: list[Dataset] | None = None,
) -> Dataset:
"""Iterate through `ds` correcting ambiguous VR elements (if possible).
When it's not possible to correct the VR, the element will be returned
unchanged. Currently the only ambiguous VR elements not corrected for are
all retired or part of DICONDE.
If the VR is corrected and is 'US' or 'SS' then the value will be updated
using the :func:`~pydicom.values.convert_numbers` function.
.. versionchanged:: 3.0
The `ancestors` keyword argument was added.
Parameters
----------
ds : pydicom.dataset.Dataset
The dataset containing ambiguous VR elements.
is_little_endian : bool
The byte ordering of the values in the dataset.
ancestors : list[pydicom.dataset.Dataset] | None
A list of the ancestor datasets to look through when trying to find
the relevant element value to use in VR correction. Should be ordered
from closest to furthest. If ``None`` then will build itself
automatically starting at `ds` (default).
Returns
-------
ds : dataset.Dataset
The corrected dataset
Raises
------
AttributeError
If a tag is missing in `ds` that is required to resolve the ambiguity.
"""
# Construct the tree if `ds` is the root level dataset
# tree = Tree(ds) if tree is None else tree
ancestors = [ds] if ancestors is None else ancestors
# Iterate through the elements
for elem in ds.elements():
# raw data element sequences can be written as they are, because we
# have ensured that the transfer syntax has not changed at this point
if elem.VR == VR.SQ:
elem = ds[elem.tag]
for item in cast(MutableSequence["Dataset"], elem.value):
ancestors.insert(0, item)
correct_ambiguous_vr(item, is_little_endian, ancestors)
elif elem.VR in AMBIGUOUS_VR:
correct_ambiguous_vr_element(elem, ds, is_little_endian, ancestors)
del ancestors[0]
return ds
def write_numbers(fp: DicomIO, elem: DataElement, struct_format: str) -> None:
"""Write a "value" of type struct_format from the dicom file.
"Value" can be more than one number.
Parameters
----------
fp : file-like
The file-like to write the encoded data to.
elem : dataelem.DataElement
The element to encode.
struct_format : str
The character format as used by the struct module.
"""
value = elem.value
if value is None or value == "":
return # don't need to write anything for no or empty value
endianChar = "><"[fp.is_little_endian]
format_string = endianChar + struct_format
try:
try:
# works only if list, not if string or number
value.append
except AttributeError: # is a single value - the usual case
fp.write(pack(format_string, value))
else:
# Some ambiguous VR elements ignore the VR for part of the value
# e.g. LUT Descriptor is 'US or SS' and VM 3, but the first and
# third values are always US (the third should be <= 16, so SS is OK)
if struct_format == "h" and elem.tag in _LUT_DESCRIPTOR_TAGS and value:
fp.write(pack(f"{endianChar}H", value[0]))
value = value[1:]
fp.write(pack(f"{endianChar}{len(value)}{struct_format}", *value))
except Exception as exc:
raise OSError(f"{exc}\nfor data_element:\n{elem}")
def write_OBvalue(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element with VR of 'other byte' (OB)."""
if elem.is_buffered:
bytes_written = 0
buffer = cast(BufferedIOBase, elem.value)
with reset_buffer_position(buffer):
for chunk in read_buffer(buffer):
bytes_written += fp.write(chunk)
else:
bytes_written = fp.write(cast(bytes, elem.value))
if bytes_written % 2:
fp.write(b"\x00")
def write_OWvalue(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element with VR of 'other word' (OW).
Note: This **does not currently do the byte swapping** for Endian state.
"""
if elem.is_buffered:
bytes_written = 0
buffer = cast(BufferedIOBase, elem.value)
with reset_buffer_position(buffer):
for chunk in read_buffer(buffer):
bytes_written += fp.write(chunk)
else:
bytes_written = fp.write(cast(bytes, elem.value))
if bytes_written % 2:
fp.write(b"\x00")
def write_UI(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element with VR of 'unique identifier' (UI)."""
write_string(fp, elem, "\0") # pad with 0-byte to even length
def _is_multi_value(val: Any) -> bool:
"""Return True if `val` is a multi-value container."""
if config.have_numpy and isinstance(val, numpy.ndarray):
return True
return isinstance(val, MultiValue | list | tuple)
def multi_string(val: str | Iterable[str]) -> str:
"""Put a string together with delimiter if has more than one value"""
if _is_multi_value(val):
return "\\".join(val)
return cast(str, val)
def write_PN(
fp: DicomIO, elem: DataElement, encodings: list[str] | None = None
) -> None:
if not encodings:
encodings = [default_encoding]
val: list[PersonName]
if elem.VM == 1:
val = [cast(PersonName, elem.value)]
else:
val = cast(list[PersonName], elem.value)
enc = b"\\".join([elem.encode(encodings) for elem in val])
if len(enc) % 2 != 0:
enc += b" "
fp.write(enc)
def write_string(fp: DicomIO, elem: DataElement, padding: str = " ") -> None:
"""Write a single or multivalued ASCII string."""
val = multi_string(cast(str | Iterable[str], elem.value))
if val is not None:
if len(val) % 2 != 0:
val += padding # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding) # type: ignore[assignment]
fp.write(val) # type: ignore[arg-type]
def write_text(
fp: DicomIO, elem: DataElement, encodings: list[str] | None = None
) -> None:
"""Write a single or multivalued text string."""
encodings = encodings or [default_encoding]
val = elem.value
if val is not None:
if _is_multi_value(val):
val = cast(Sequence[bytes] | Sequence[str], val)
if isinstance(val[0], str):
val = cast(Sequence[str], val)
val = b"\\".join([encode_string(val, encodings) for val in val])
else:
val = cast(Sequence[bytes], val)
val = b"\\".join([val for val in val])
else:
val = cast(bytes | str, val)
if isinstance(val, str):
val = encode_string(val, encodings)
if len(val) % 2 != 0:
val = val + b" " # pad to even length
fp.write(val)
def write_number_string(fp: DicomIO, elem: DataElement) -> None:
"""Handle IS or DS VR - write a number stored as a string of digits."""
# If the DS or IS has an original_string attribute, use that, so that
# unchanged data elements are written with exact string as when read from
# file
val = elem.value
if _is_multi_value(val):
val = cast(Sequence[IS] | Sequence[DSclass], val)
val = "\\".join(
x.original_string if hasattr(x, "original_string") else str(x) for x in val
)
else:
val = cast(IS | DSclass, val)
if hasattr(val, "original_string"):
val = val.original_string
else:
val = str(val)
if len(val) % 2 != 0:
val = val + " " # pad to even length
val = bytes(val, default_encoding)
fp.write(val)
def _format_DA(val: DA | None) -> str:
if val is None:
return ""
if hasattr(val, "original_string"):
return val.original_string
return val.strftime("%Y%m%d")
def write_DA(fp: DicomIO, elem: DataElement) -> None:
val = elem.value
if isinstance(val, str):
write_string(fp, elem)
else:
if _is_multi_value(val):
val = cast(Sequence[DA], val)
val = "\\".join(x if isinstance(x, str) else _format_DA(x) for x in val)
else:
val = _format_DA(cast(DA, val))
if len(val) % 2 != 0:
val = val + " " # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding)
fp.write(val)
def _format_DT(val: DT | None) -> str:
if val is None:
return ""
if hasattr(val, "original_string"):
return val.original_string
if val.microsecond > 0:
return val.strftime("%Y%m%d%H%M%S.%f%z")
return val.strftime("%Y%m%d%H%M%S%z")
def write_DT(fp: DicomIO, elem: DataElement) -> None:
val = elem.value
if isinstance(val, str):
write_string(fp, elem)
else:
if _is_multi_value(val):
val = cast(Sequence[DT], val)
val = "\\".join(x if isinstance(x, str) else _format_DT(x) for x in val)
else:
val = _format_DT(cast(DT, val))
if len(val) % 2 != 0:
val = val + " " # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding)
fp.write(val)
def _format_TM(val: TM | None) -> str:
if val is None:
return ""
if hasattr(val, "original_string"):
return val.original_string
if val.microsecond > 0:
return val.strftime("%H%M%S.%f")
return val.strftime("%H%M%S")
def write_TM(fp: DicomIO, elem: DataElement) -> None:
val = elem.value
if isinstance(val, str):
write_string(fp, elem)
else:
if _is_multi_value(val):
val = cast(Sequence[TM], val)
val = "\\".join(x if isinstance(x, str) else _format_TM(x) for x in val)
else:
val = _format_TM(cast(TM, val))
if len(val) % 2 != 0:
val = val + " " # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding)
fp.write(val)
def write_data_element(
fp: DicomIO,
elem: DataElement | RawDataElement,
encodings: str | list[str] | None = None,
) -> None:
"""Write the data_element to file fp according to
dicom media storage rules.
"""
# Write element's tag
fp.write_tag(elem.tag)
# write into a buffer to avoid seeking back which can be expansive
buffer = DicomBytesIO()
buffer.is_little_endian = fp.is_little_endian
buffer.is_implicit_VR = fp.is_implicit_VR
vr: str | None = elem.VR
if not fp.is_implicit_VR and vr and len(vr) != 2:
msg = (
f"Cannot write ambiguous VR of '{vr}' for data element with "
f"tag {elem.tag!r}.\nSet the correct VR before "
f"writing, or use an implicit VR transfer syntax"
)
raise ValueError(msg)
if elem.is_raw:
elem = cast(RawDataElement, elem)
# raw data element values can be written as they are
buffer.write(cast(bytes, elem.value))
is_undefined_length = elem.length == 0xFFFFFFFF
else:
elem = cast(DataElement, elem)
if vr not in writers:
raise NotImplementedError(
f"write_data_element: unknown Value Representation '{vr}'"
)
encodings = encodings or [default_encoding]
encodings = convert_encodings(encodings)
fn, param = writers[cast(VR, vr)]
is_undefined_length = elem.is_undefined_length
if not elem.is_empty:
if vr in CUSTOMIZABLE_CHARSET_VR or vr == VR.SQ:
fn(buffer, elem, encodings=encodings) # type: ignore[operator]
else:
# Many numeric types use the same writer but with
# numeric format parameter
if param is not None:
fn(buffer, elem, param) # type: ignore[operator]
elif not elem.is_buffered:
# defer writing a buffered value until after we have written the
# tag and length in the file
fn(buffer, elem) # type: ignore[operator]
# valid pixel data with undefined length shall contain encapsulated
# data, e.g. sequence items - raise ValueError otherwise (see #238)
if is_undefined_length and elem.tag == 0x7FE00010:
if elem.is_buffered:
value = cast(BufferedIOBase, elem.value)
with reset_buffer_position(value):
pixel_data_bytes = value.read(4)
else:
pixel_data_bytes = cast(bytes, elem.value)[:4]
# Big endian encapsulation is non-conformant
tag = b"\xFE\xFF\x00\xE0" if fp.is_little_endian else b"\xFF\xFE\xE0\x00"
if not pixel_data_bytes.startswith(tag):
raise ValueError(
"The (7FE0,0010) 'Pixel Data' element value hasn't been "
"encapsulated as required for a compressed transfer syntax - "
"see pydicom.encaps.encapsulate() for more information"
)
value_length = (
buffer.tell()
if not elem.is_buffered
else buffer_remaining(cast(BufferedIOBase, elem.value))
)
if (
not fp.is_implicit_VR
and vr not in EXPLICIT_VR_LENGTH_32
and not is_undefined_length
and value_length > 0xFFFF
):
# see PS 3.5, section 6.2.2 for handling of this case
warn_and_log(
f"The value for the data element {elem.tag} exceeds the "
f"size of 64 kByte and cannot be written in an explicit transfer "
f"syntax. The data element VR is changed from '{vr}' to 'UN' "
f"to allow saving the data."
)
vr = VR.UN
# write the VR for explicit transfer syntax
if not fp.is_implicit_VR:
vr = cast(str, vr)
fp.write(bytes(vr, default_encoding))
if vr in EXPLICIT_VR_LENGTH_32:
fp.write_US(0) # reserved 2 bytes
if (
not fp.is_implicit_VR
and vr not in EXPLICIT_VR_LENGTH_32
and not is_undefined_length
):
fp.write_US(value_length) # Explicit VR length field is 2 bytes
else:
# write the proper length of the data_element in the length slot,
# unless is SQ with undefined length.
fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length)
# if the value is buffered, now we want to write the value directly to the fp
if elem.is_buffered:
fn(fp, elem) # type: ignore[operator]
else:
fp.write(buffer.getvalue())
if is_undefined_length:
fp.write_tag(SequenceDelimiterTag)
fp.write_UL(0) # 4-byte 'length' of delimiter data item
EncodingType = tuple[bool | None, bool | None]
def write_dataset(
fp: DicomIO, dataset: Dataset, parent_encoding: str | list[str] = default_encoding
) -> int:
"""Encode `dataset` and write the encoded data to `fp`.
*Encoding*
The `dataset` is encoded as specified by (in order of priority):
* ``fp.is_implicit_VR`` and ``fp.is_little_endian``.
* ``dataset.is_implicit_VR`` and ``dataset.is_little_endian``
* If `dataset` has been decoded from a file or buffer then
:attr:`~pydicom.dataset.Dataset.original_encoding`.
Parameters
----------
fp : pydicom.filebase.DicomIO
The file-like to write the encoded dataset to.
dataset : pydicom.dataset.Dataset
The dataset to be encoded.
parent_encoding : str | List[str], optional
The character set to use for encoding strings, defaults to ``"iso8859"``.
Returns
-------
int
The number of bytes written to `fp`.
"""
# TODO: Update in v4.0
# In order of encoding priority
fp_encoding: EncodingType = (
getattr(fp, "_implicit_vr", None),
getattr(fp, "_little_endian", None),
)
ds_encoding: EncodingType = (None, None)
if not config._use_future:
ds_encoding = (dataset.is_implicit_VR, dataset.is_little_endian)
or_encoding = dataset.original_encoding
if None in fp_encoding and None in ds_encoding and None in or_encoding:
raise AttributeError(
"'fp.is_implicit_VR' and 'fp.is_little_endian' attributes are required "
)
if None in fp_encoding:
if None not in ds_encoding:
fp_encoding = ds_encoding
elif None not in or_encoding:
fp_encoding = or_encoding
fp.is_implicit_VR, fp.is_little_endian = cast(tuple[bool, bool], fp_encoding)
get_item: Callable[[BaseTag], DataElement | RawDataElement] = dataset.get_item
# This function is doing some heavy lifting:
# If implicit -> explicit, runs ambiguous VR correction
# If implicit -> explicit, RawDataElements -> DataElement (VR lookup)
# If charset changed, RawDataElements -> DataElement
if (
fp_encoding != or_encoding
or dataset.original_character_set != dataset._character_set
):
dataset = correct_ambiguous_vr(dataset, fp.is_little_endian)
# Use __getitem__ instead or get_item to force parsing of RawDataElements into DataElements,
# so we can re-encode them with the correct charset and encoding
get_item = dataset.__getitem__
dataset_encoding = cast(
None | str | list[str], dataset.get("SpecificCharacterSet", parent_encoding)
)
fpStart = fp.tell()
# data_elements must be written in tag order
for tag in sorted(dataset.keys()):
# do not write retired Group Length (see PS3.5, 7.2)
if tag.element == 0 and tag.group > 6:
continue
with tag_in_exception(tag):
write_data_element(fp, get_item(tag), dataset_encoding)
return fp.tell() - fpStart
def write_sequence(fp: DicomIO, elem: DataElement, encodings: list[str]) -> None:
"""Write a sequence contained in `data_element` to the file-like `fp`.
Parameters
----------
fp : file-like
The file-like to write the encoded data to.
data_element : dataelem.DataElement
The sequence element to write to `fp`.
encodings : list of str
The character encodings to use on text values.
"""
# write_data_element has already written the VR='SQ' (if needed) and
# a placeholder for length"""
for ds in cast(Iterable[Dataset], elem.value):
write_sequence_item(fp, ds, encodings)
def write_sequence_item(fp: DicomIO, dataset: Dataset, encodings: list[str]) -> None:
"""Write a `dataset` in a sequence to the file-like `fp`.
This is similar to writing a data_element, but with a specific tag for
Sequence Item.
See DICOM Standard, Part 5, :dcm:`Section 7.5<sect_7.5.html>`.
Parameters
----------
fp : file-like
The file-like to write the encoded data to.
dataset : Dataset
The :class:`Dataset<pydicom.dataset.Dataset>` to write to `fp`.
encodings : list of str
The character encodings to use on text values.
"""
fp.write_tag(ItemTag) # marker for start of Sequence Item
length_location = fp.tell() # save location for later.
# will fill in real value later if not undefined length
fp.write_UL(0xFFFFFFFF)
write_dataset(fp, dataset, parent_encoding=encodings)
if getattr(dataset, "is_undefined_length_sequence_item", False):
fp.write_tag(ItemDelimiterTag)
fp.write_UL(0) # 4-bytes 'length' field for delimiter item
else: # we will be nice and set the lengths for the reader of this file
location = fp.tell()
fp.seek(length_location)
fp.write_UL(location - length_location - 4) # 4 is length of UL
fp.seek(location) # ready for next data_element
def write_UN(fp: DicomIO, elem: DataElement) -> None:
"""Write a byte string for an DataElement of value 'UN' (unknown)."""
fp.write(cast(bytes, elem.value))
def write_ATvalue(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element tag to a file."""
try:
iter(cast(Sequence[Any], elem.value)) # see if is multi-valued AT;
# Note will fail if Tag ever derived from true tuple rather than being
# a long
except TypeError:
# make sure is expressed as a Tag instance
tag = Tag(cast(int, elem.value))
fp.write_tag(tag)
else:
tags = [Tag(tag) for tag in cast(Sequence[int], elem.value)]
for tag in tags:
fp.write_tag(tag)
def write_file_meta_info(
fp: DicomIO, file_meta: FileMetaDataset, enforce_standard: bool = True
) -> None:
"""Write the File Meta Information elements in `file_meta` to `fp`.
If `enforce_standard` is ``True`` then the file-like `fp` should be
positioned past the 128 byte preamble + 4 byte prefix (which should
already have been written).
**DICOM File Meta Information Group Elements**
From the DICOM standard, Part 10,
:dcm:`Section 7.1<part10/chapter_7.html#sect_7.1>`, any DICOM file shall
contain a 128-byte preamble, a 4-byte DICOM prefix 'DICM' and (at a
minimum) the following Type 1 DICOM Elements (from
:dcm:`Table 7.1-1<part10/chapter_7.html#table_7.1-1>`):
* (0002,0000) *File Meta Information Group Length*, UL, 4
* (0002,0001) *File Meta Information Version*, OB, 2
* (0002,0002) *Media Storage SOP Class UID*, UI, N
* (0002,0003) *Media Storage SOP Instance UID*, UI, N
* (0002,0010) *Transfer Syntax UID*, UI, N
* (0002,0012) *Implementation Class UID*, UI, N
If `enforce_standard` is ``True`` then (0002,0000) will be added/updated,
(0002,0001) and (0002,0012) will be added if not already present and the
other required elements will be checked to see if they exist. If
`enforce_standard` is ``False`` then `file_meta` will be written as is
after minimal validation checking.
The following Type 3/1C Elements may also be present:
* (0002,0013) *Implementation Version Name*, SH, N
* (0002,0016) *Source Application Entity Title*, AE, N
* (0002,0017) *Sending Application Entity Title*, AE, N
* (0002,0018) *Receiving Application Entity Title*, AE, N
* (0002,0102) *Private Information*, OB, N
* (0002,0100) *Private Information Creator UID*, UI, N
If `enforce_standard` is ``True`` then (0002,0013) will be added/updated.
*Encoding*
The encoding of the *File Meta Information* shall be *Explicit VR Little
Endian*.
Parameters
----------
fp : file-like
The file-like to write the File Meta Information to.
file_meta : pydicom.dataset.Dataset
The File Meta Information elements.
enforce_standard : bool
If ``False``, then only the *File Meta Information* elements already in
`file_meta` will be written to `fp`. If ``True`` (default) then a DICOM
Standards conformant File Meta will be written to `fp`.
Raises
------
ValueError
If `enforce_standard` is ``True`` and any of the required *File Meta
Information* elements are missing from `file_meta`, with the
exception of (0002,0000), (0002,0001) and (0002,0012).
ValueError
If any non-Group 2 Elements are present in `file_meta`.
"""
validate_file_meta(file_meta, enforce_standard)
if enforce_standard and "FileMetaInformationGroupLength" not in file_meta:
# Will be updated with the actual length later
file_meta.FileMetaInformationGroupLength = 0
# Write the File Meta Information Group elements
# first write into a buffer to avoid seeking back, that can be
# expansive and is not allowed if writing into a zip file
buffer = DicomBytesIO()
buffer.is_little_endian = True
buffer.is_implicit_VR = False
write_dataset(buffer, file_meta)
# If FileMetaInformationGroupLength is present it will be the first written
# element and we must update its value to the correct length.
if "FileMetaInformationGroupLength" in file_meta:
# Update the FileMetaInformationGroupLength value, which is the number
# of bytes from the end of the FileMetaInformationGroupLength element
# to the end of all the File Meta Information elements.
# FileMetaInformationGroupLength has a VR of 'UL' and so has a value
# that is 4 bytes fixed. The total length of when encoded as
# Explicit VR must therefore be 12 bytes.
file_meta.FileMetaInformationGroupLength = buffer.tell() - 12
buffer.seek(0)
write_data_element(buffer, file_meta[0x00020000])
fp.write(buffer.getvalue())
def _determine_encoding(
ds: Dataset,
tsyntax: UID | None,
implicit_vr: bool | None,
little_endian: bool | None,
force_encoding: bool,
) -> tuple[bool, bool]:
"""Return the encoding to use for `ds`.
If `force_encoding` isn't ``True`` the priority is:
1. The encoding corresponding to `tsyntax`
2. The encoding set by `implicit_vr` and `little_endian`
3. `Dataset.is_implicit_VR` and `Dataset.is_little_endian`
4. `Dataset.original_encoding`
If none of those are valid, raise an exception.
If `force_encoding` is ``True`` then `implicit_vr` and `little_endian` are
required.
Parameters
----------
ds : pydicom.dataset.Dataset
The dataset that is to be encoded.
tsyntax : pydicom.uid.UID | None
The dataset's public or private transfer syntax (if any). Private
transfer syntaxes require `implicit_vr` and `little_endian` be used.
implicit_vr : bool | None
The VR encoding method (if supplied)
little_endian : bool | None
The encoding endianness (if supplied).
force_encoding : bool
If ``True`` then force the encoding to use `implicit_vr` and
`little_endian`. Default ``False``.
Returns
-------
tuple[bool, bool]
The encoding to use as ``[use implicit VR, use little endian]``.
Raises
------
ValueError
If unable to determine the encoding to use, if `transfer_syntax` is
not a transfer syntax, or if there's an inconsistency between
`transfer_syntax` and `implicit_vr` or `little_endian`.
"""
arg_encoding = (implicit_vr, little_endian)
if force_encoding:
if None in arg_encoding:
raise ValueError(
"'implicit_vr' and 'little_endian' are required if "
"'force_encoding' is used"
)
return cast(tuple[bool, bool], arg_encoding)
# The default for little_endian is `None` so we can require the use of
# args with `force_encoding`, but we actually default it to `True`
# when `implicit_vr` is used as a fallback
if implicit_vr is not None and little_endian is None:
arg_encoding = (implicit_vr, True)
ds_encoding: EncodingType = (None, None)
if not config._use_future:
ds_encoding = (ds.is_implicit_VR, ds.is_little_endian)
fallback_encoding: EncodingType = (None, None)
if None not in arg_encoding:
fallback_encoding = arg_encoding
elif None not in ds_encoding:
fallback_encoding = ds_encoding
elif None not in ds.original_encoding:
fallback_encoding = ds.original_encoding
if tsyntax is None:
if None not in fallback_encoding:
return cast(tuple[bool, bool], fallback_encoding)
raise ValueError(
"Unable to determine the encoding to use for writing the dataset, "
"please set the file meta's Transfer Syntax UID or use the "
"'implicit_vr' and 'little_endian' arguments"
)
if tsyntax.is_private and not tsyntax.is_transfer_syntax:
if None in fallback_encoding:
raise ValueError(
"The 'implicit_vr' and 'little_endian' arguments are required "
"when using a private transfer syntax"
)
return cast(tuple[bool, bool], fallback_encoding)
if not tsyntax.is_transfer_syntax:
raise ValueError(
f"The Transfer Syntax UID '{tsyntax.name}' is not a valid "
"transfer syntax"
)
# Check that supplied args match transfer syntax
if implicit_vr is not None and implicit_vr != tsyntax.is_implicit_VR:
raise ValueError(
f"The 'implicit_vr' value is not consistent with the required "
f"VR encoding for the '{tsyntax.name}' transfer syntax"
)
if little_endian is not None and little_endian != tsyntax.is_little_endian:
raise ValueError(
f"The 'little_endian' value is not consistent with the required "
f"endianness for the '{tsyntax.name}' transfer syntax"
)
return (tsyntax.is_implicit_VR, tsyntax.is_little_endian)
def dcmwrite(
filename: PathType | BinaryIO | WriteableBuffer,
dataset: Dataset,
/,
__write_like_original: bool | None = None,
*,
implicit_vr: bool | None = None,
little_endian: bool | None = None,
enforce_file_format: bool = False,
force_encoding: bool = False,
overwrite: bool = True,
**kwargs: Any,
) -> None:
"""Write `dataset` to `filename`, which can be a path, a file-like or a
writeable buffer.
.. versionchanged:: 3.0
Added the `enforce_file_format` and `overwrite` keyword arguments.
.. deprecated:: 3.0
`write_like_original` is deprecated and will be removed in v4.0, use
`enforce_file_format` instead.
If `enforce_file_format` is ``True`` then an attempt will be made to write
`dataset` using the :dcm:`DICOM File Format <part10/chapter_7.html>`, or
raise an exception if unable to do so.
If `enforce_file_format` is ``False`` (default) then `dataset` will be
written as-is (after minimal validation checking) and may or may not
contain all or parts of the *File Meta Information* and hence may or may
not be conformant with the DICOM File Format.
**DICOM File Format**
The *DICOM File Format* consists of a 128-byte preamble, a 4 byte
``b'DICM'`` prefix, the *File Meta Information Group* elements and finally
the encoded `dataset`.
**Preamble and Prefix**
The ``dataset.preamble`` attribute shall be 128-bytes long or ``None``. The
actual preamble written depends on `enforce_file_format` and
``dataset.preamble`` (see the table below).
+------------------+------------------------------+
| | enforce_file_format |
+------------------+-------------+----------------+
| dataset.preamble | False | True |
+==================+=============+================+
| None | no preamble | 128 0x00 bytes |
+------------------+-------------+----------------+
| 128 bytes | dataset.preamble |
+------------------+------------------------------+
The prefix shall be the bytestring ``b'DICM'`` and will be written if and
only if the preamble is present.
**File Meta Information Group Elements**
The preamble and prefix are followed by a set of DICOM elements from the
(0002,eeee) group. Some of these elements are required (Type 1) while
others are optional (Type 3/1C). If `enforce_file_format` is ``False``
then the *File Meta Information Group* elements are all optional, otherwise
an attempt will be made to add the required elements using `dataset`. See
:func:`~pydicom.filewriter.write_file_meta_info` for more information on
which elements are required.
The *File Meta Information Group* elements must be included within their
own :class:`~pydicom.dataset.FileMetaDataset` in the ``dataset.file_meta``
attribute.
*Encoding*
The preamble and prefix are encoding independent. The *File Meta
Information Group* elements are encoded as *Explicit VR Little Endian* as
required by the DICOM Standard.
**Dataset**
A DICOM Dataset representing a SOP Instance related to a DICOM Information
Object Definition (IOD). It's up to the user to ensure `dataset` conforms
to the requirements of the IOD.
*Encoding*
.. versionchanged:: 3.0
Added the `implicit_vr` and `little_endian` arguments.
The `dataset` is encoded as specified by (in order of priority):
* The encoding corresponding to the set *Transfer Syntax UID* in
:attr:`~pydicom.dataset.FileDataset.file_meta`.
* The `implicit_vr` and `little_endian` arguments
* :attr:`~pydicom.dataset.Dataset.is_implicit_VR` and
:attr:`~pydicom.dataset.Dataset.is_little_endian`
* :attr:`~pydicom.dataset.Dataset.original_encoding`
.. warning::
This function does not automatically convert `dataset` from little
to big endian encoding (or vice versa). The endianness of values for
elements with a VR of **OD**, **OF**, **OL**, **OW**, **OV** and
**UN** must be converted manually prior to calling
:func:`~pydicom.filewriter.dcmwrite`.
Parameters
----------
filename : str, PathLike, file-like or writeable buffer
File path, file-like or writeable buffer to write the encoded `dataset`
to. If using a writeable buffer it must have ``write()``, ``seek()``
and ``tell()`` methods.
dataset : pydicom.dataset.FileDataset
The dataset to be encoded.
write_like_original : bool, optional
If ``True`` (default) then write `dataset` as-is, otherwise
ensure that `dataset` is written in the DICOM File Format or
raise an exception if that isn't possible. This parameter is
deprecated, please use `enforce_file_format` instead.
implicit_vr : bool, optional
Required if `dataset` has no valid public *Transfer Syntax UID*
set in the file meta and `dataset` has been created from scratch. If
``True`` then encode `dataset` using implicit VR, otherwise use
explicit VR.
little_endian : bool, optional
Required if `dataset` has no valid public *Transfer Syntax UID*
set in the file meta and `dataset` has been created from scratch. If
``True`` (default) then use little endian byte order when encoding
`dataset`, otherwise use big endian.
enforce_file_format : bool, optional
If ``True`` then ensure `dataset` is written in the DICOM File
Format or raise an exception if that isn't possible.
If ``False`` (default) then write `dataset` as-is, preserving the
following - which may result in a non-conformant file:
- ``dataset.preamble``: if `dataset` has no preamble then none will
be written
- ``dataset.file_meta``: if `dataset` is missing any required *File
Meta Information Group* elements then they will not be written.
force_encoding : bool, optional
If ``True`` then force the encoding to follow `implicit_vr` and
`little_endian`. Cannot be used with `enforce_file_format`. Default
``False``.
overwrite : bool, optional
If ``False`` and `filename` is a :class:`str` or PathLike, then raise a
:class:`FileExistsError` if a file already exists with the given filename
(default ``True``).
Raises
------
ValueError
* If group ``0x0000`` *Command Set* elements are present in `dataset`.
* If group ``0x0002`` *File Meta Information Group* elements are present
in `dataset`.
* If ``dataset.preamble`` exists but is not 128 bytes long.
See Also
--------
pydicom.dataset.Dataset
Dataset class with relevant attributes and information.
pydicom.dataset.Dataset.save_as
Encode a dataset and write it to file, wraps ``dcmwrite()``.
"""
# TODO: Remove in v4.0
# Cover use of `write_like_original` as:
# optional arg - dcmwrite(fp, ds, write_like_original=bool)
# positional arg - dcmwrite(fp, ds, False)
write_like_original: bool | None = kwargs.get("write_like_original", None)
if None not in (__write_like_original, write_like_original):
if config._use_future:
raise TypeError(
"'write_like_original' is no longer accepted as a positional "
"or keyword argument, use 'enforce_file_format' instead"
)
raise TypeError(
"'write_like_original' cannot be used as both a positional "
"and keyword argument"
)
if write_like_original is None:
write_like_original = __write_like_original
if write_like_original is not None:
if config._use_future:
raise TypeError(
"'write_like_original' is no longer accepted as a positional "
"or keyword argument, use 'enforce_file_format' instead"
)
warn_and_log(
(
"'write_like_original' is deprecated and will be removed in "
"v4.0, please use 'enforce_file_format' instead"
),
DeprecationWarning,
)
enforce_file_format = not write_like_original
# Ensure kwargs only contains `write_like_original`
keys = [x for x in kwargs.keys() if x != "write_like_original"]
if keys:
raise TypeError(
f"Invalid keyword argument(s) for dcmwrite(): {', '.join(keys)}"
)
cls_name = dataset.__class__.__name__
# Check for disallowed tags
bad_tags = [x >> 16 for x in dataset._dict if x >> 16 in (0, 2)]
if bad_tags:
if 0 in bad_tags:
raise ValueError(
"Command Set elements (0000,eeee) are not allowed when using "
"dcmwrite(), use write_dataset() instead"
)
else:
raise ValueError(
"File Meta Information Group elements (0002,eeee) must be in a "
f"FileMetaDataset instance in the '{cls_name}.file_meta' attribute"
)
if force_encoding and enforce_file_format:
raise ValueError("'force_encoding' cannot be used with 'enforce_file_format'")
# Avoid making changes to the original File Meta Information
file_meta = FileMetaDataset()
if hasattr(dataset, "file_meta"):
file_meta = deepcopy(dataset.file_meta)
tsyntax: UID | None = file_meta.get("TransferSyntaxUID", None)
# The dataset encoding method
encoding = _determine_encoding(
dataset,
tsyntax,
implicit_vr,
little_endian,
force_encoding,
)
if not force_encoding and encoding == (True, False):
raise ValueError(
"Implicit VR and big endian is not a valid encoding combination"
)
preamble = getattr(dataset, "preamble", None)
if preamble and len(preamble) != 128:
raise ValueError(f"'{cls_name}.preamble' must be 128-bytes long")
if enforce_file_format:
# A valid File Meta Information is required
if tsyntax is None:
if encoding == (True, True):
file_meta.TransferSyntaxUID = ImplicitVRLittleEndian
elif encoding == (False, False):
file_meta.TransferSyntaxUID = ExplicitVRBigEndian
tsyntax = file_meta.get("TransferSyntaxUID", None)
# Ensure the file_meta Class and Instance UIDs are up to date
# but don't overwrite if nothing is set in the dataset
meta_class = file_meta.get("MediaStorageSOPClassUID", None)
ds_class = dataset.get("SOPClassUID", None)
if meta_class is None or (ds_class and ds_class != meta_class):
file_meta.MediaStorageSOPClassUID = ds_class
meta_instance = file_meta.get("MediaStorageSOPInstanceUID", None)
ds_instance = dataset.get("SOPInstanceUID", None)
if meta_instance is None or (ds_instance and ds_instance != meta_instance):
file_meta.MediaStorageSOPInstanceUID = ds_instance
# Will raise if the file meta isn't valid
validate_file_meta(file_meta, enforce_standard=True)
# A preamble is required
if not preamble:
preamble = b"\x00" * 128
if tsyntax and not tsyntax.is_private and tsyntax.is_transfer_syntax:
# PS3.5 Annex A.4 - the length of encapsulated pixel data is undefined
# and native pixel data uses actual length
if "PixelData" in dataset:
dataset["PixelData"].is_undefined_length = tsyntax.is_compressed
caller_owns_file = True
# Open file if not already a file object
filename = path_from_pathlike(filename)
if isinstance(filename, str):
# A path-like to be written to
file_mode = "xb" if not overwrite else "wb"
fp: DicomIO = DicomFile(filename, file_mode)
# caller provided a file name; we own the file handle
caller_owns_file = False
elif isinstance(filename, DicomIO):
# A wrapped writeable buffer, don't wrap it again
fp = filename
else:
# Anything else
try:
fp = DicomIO(filename)
except AttributeError as exc:
raise TypeError(
"dcmwrite: Expected a file path, file-like or writeable buffer, "
f"but got {type(filename).__name__}"
) from exc
# Set the encoding of the buffer/file-like
fp.is_implicit_VR, fp.is_little_endian = encoding
try:
if preamble:
# Write the 'DICM' prefix if and only if we write the preamble
fp.write(preamble)
fp.write(b"DICM")
if file_meta: # May be empty
write_file_meta_info(fp, file_meta, enforce_standard=enforce_file_format)
if tsyntax == DeflatedExplicitVRLittleEndian:
# See PS3.5 section A.5
# When writing, the entire dataset following the file meta data
# is encoded normally, then "deflate" compression applied
buffer = DicomBytesIO()
buffer.is_implicit_VR, buffer.is_little_endian = encoding
write_dataset(buffer, dataset)
# Compress the encoded data and write to file
compressor = zlib.compressobj(wbits=-zlib.MAX_WBITS)
deflated = bytearray(compressor.compress(buffer.getvalue()))
deflated += compressor.flush()
fp.write(deflated)
if len(deflated) % 2:
fp.write(b"\x00")
else:
write_dataset(fp, dataset)
finally:
if not caller_owns_file:
fp.close()
# Map each VR to a function which can write it
# for write_numbers, the Writer maps to a tuple (function, struct_format)
# (struct_format is python's struct module format)
writers = {
VR.AE: (write_string, None),
VR.AS: (write_string, None),
VR.AT: (write_ATvalue, None),
VR.CS: (write_string, None),
VR.DA: (write_DA, None),
VR.DS: (write_number_string, None),
VR.DT: (write_DT, None),
VR.FD: (write_numbers, "d"),
VR.FL: (write_numbers, "f"),
VR.IS: (write_number_string, None),
VR.LO: (write_text, None),
VR.LT: (write_text, None),
VR.OB: (write_OBvalue, None),
VR.OD: (write_OWvalue, None),
VR.OF: (write_OWvalue, None),
VR.OL: (write_OWvalue, None),
VR.OW: (write_OWvalue, None),
VR.OV: (write_OWvalue, None),
VR.PN: (write_PN, None),
VR.SH: (write_text, None),
VR.SL: (write_numbers, "l"),
VR.SQ: (write_sequence, None),
VR.SS: (write_numbers, "h"),
VR.ST: (write_text, None),
VR.SV: (write_numbers, "q"),
VR.TM: (write_TM, None),
VR.UC: (write_text, None),
VR.UI: (write_UI, None),
VR.UL: (write_numbers, "L"),
VR.UN: (write_UN, None),
VR.UR: (write_string, None),
VR.US: (write_numbers, "H"),
VR.UT: (write_text, None),
VR.UV: (write_numbers, "Q"),
VR.US_SS: (write_OWvalue, None),
VR.US_OW: (write_OWvalue, None),
VR.US_SS_OW: (write_OWvalue, None),
VR.OB_OW: (write_OBvalue, None),
}