Files
dicom2pacs/dist/dicom2pacs.app/Contents/Resources/lib/python3.13/pydicom/util/codify.py
René Mathieu 0fef8d96c5 Initial commit
2026-01-17 13:49:51 +01:00

530 lines
17 KiB
Python
Executable File

# Copyright 2008-2021 pydicom authors. See LICENSE file for details.
"""
Produce runnable python code which can recreate DICOM objects or files.
Can run as a script to produce code for an entire file,
or import and use specific functions to provide code for pydicom DICOM classes
"""
# Run this from the same directory as a "base" dicom file and
# this code will output to screen the dicom parameters like:
# ds.PatientName = 'TEST'
# etc for all parameters in the file.
# This can then be pasted into a python file and parameters edited as necessary
# to create a DICOM file from scratch
import argparse
import os.path
import re
import sys
from typing import cast
from collections.abc import Callable
from collections import deque
import pydicom
from pydicom.datadict import dictionary_keyword
from pydicom.dataelem import DataElement
from pydicom.dataset import Dataset
from pydicom.tag import BaseTag
from pydicom.valuerep import BYTES_VR, AMBIGUOUS_VR, VR
from pydicom.cli.main import filespec_help, filespec_parser
line_term = "\n"
# Precompiled search patterns for camel_to_underscore()
first_cap_re = re.compile("(.)([A-Z][a-z]+)")
all_cap_re = re.compile("([a-z0-9])([A-Z])")
def camel_to_underscore(name: str) -> str:
"""Convert name from CamelCase to lower_case_with_underscores"""
# From https://stackoverflow.com/questions/1175208
s1 = first_cap_re.sub(r"\1_\2", name)
return all_cap_re.sub(r"\1_\2", s1).lower()
def tag_repr(tag: BaseTag) -> str:
"""String of tag value as (0xgggg, 0xeeee)"""
return f"(0x{tag.group:04X}, 0x{tag.element:04X})"
def default_name_filter(name: str) -> str:
"""Callable to reduce some names in code to more readable short form
:arg name: a sequence variable name or sequence item name
:return: a shorter version of name if a known conversion,
else return original name
"""
name = camel_to_underscore(name)
name = name.replace("control_point", "cp")
name = name.replace("reference", "ref")
name = name.replace("fraction_group", "frxn_gp")
return name
# Functions to produce python code
def code_imports() -> str:
"""Code the import statements needed by other codify results
:return: a string of import statement lines
"""
line1 = "import pydicom"
line2 = "from pydicom.dataset import Dataset, FileMetaDataset"
line3 = "from pydicom.sequence import Sequence"
return line_term.join((line1, line2, line3))
def code_dataelem(
dataelem: DataElement,
dataset_name: str = "ds",
exclude_size: int | None = None,
include_private: bool = False,
var_names: deque | None = None,
) -> str:
"""Code lines for a single DICOM data element
Parameters
----------
dataelem : DataElement
The DataElement instance to turn into code
dataset_name : str
The variable name of the Dataset containing `dataelem`
exclude_size : int | None
If specified, values longer than this (in bytes)
will only have a commented string for a value,
causing a syntax error when the code is run,
and thus prompting the user to remove or fix that line.
var_names: deque | None
Used internally to ensure unique variable names in nested sequences.
Returns
-------
str
A string containing code to recreate the data element
If the data element is a sequence, calls code_sequence
"""
if dataelem.VR == VR.SQ:
return code_sequence(
dataelem, dataset_name, exclude_size, include_private, var_names=var_names
)
# If in DICOM dictionary, set using the keyword
# If not (e.g. is private element), set using add_new method
have_keyword = True
try:
keyword = dictionary_keyword(dataelem.tag)
except KeyError:
have_keyword = False
# If the value representation of the data element is AT (Attribute Tag),
# then format it as a tag
if dataelem.VR == "AT":
valuerep = tag_repr(dataelem.value)
else:
valuerep = repr(dataelem.value)
if exclude_size:
if (
dataelem.VR in (BYTES_VR | AMBIGUOUS_VR) - {VR.US_SS}
and not isinstance(dataelem.value, int | float)
and len(dataelem.value) > exclude_size
):
valuerep = f"# XXX Array of {len(dataelem.value)} bytes excluded"
if have_keyword:
line = f"{dataset_name}.{keyword} = {valuerep}"
else:
tag = tag_repr(dataelem.tag)
vr = dataelem.VR
line = f"{dataset_name}.add_new({tag}, '{vr}', {valuerep})"
return line
def code_sequence(
dataelem: DataElement,
dataset_name: str = "ds",
exclude_size: int | None = None,
include_private: bool = False,
name_filter: Callable[[str], str] = default_name_filter,
var_names: deque | None = None,
) -> str:
"""Code lines for recreating a Sequence data element
Parameters
----------
dataelem : DataElement
The DataElement instance whose value is the Sequence
dataset_name : str
Variable name of the dataset containing the Sequence
exclude_size : int, optional
If not ``None``, values longer than this (in bytes) will only have a
commented string for a value, causing a syntax error when the code is
run, and thus prompting the user to remove or fix that line.
include_private: bool
If ``False`` (default) private elements are skipped, otherwise private
data elements will be coded.
name_filter: Callable[[str], str]
A callable taking a sequence name or sequence item name, and returning
a shorter name for easier code reading
var_names: deque | None
Used internally to ensure unique variable names in nested sequences.
Returns
-------
str
A string containing code lines to recreate a DICOM sequence
"""
# Normally var_names is given from code_dataset, but for some tests need
# to initialize it
if var_names is None:
var_names = deque()
def unique_name(name: str) -> str:
name_count = (
cast(deque, var_names).count(name) - 1
) # type:ignore[redundant-cast]
return name if name_count == 0 else name + f"_{name_count}"
lines = []
seq = dataelem.value
seq_name = dataelem.name
seq_item_name = seq_name.replace(" Sequence", "")
try:
seq_keyword = dictionary_keyword(dataelem.tag)
except KeyError:
seq_keyword = f"Tag{dataelem.tag:08x}"
# Create comment line to document the start of Sequence
lines.append("")
lines.append("# " + seq_name)
# Code line to create a new Sequence object
seq_var = name_filter(seq_keyword)
var_names.append(seq_var)
orig_seq_var = seq_var
seq_var = unique_name(seq_var)
lines.append(seq_var + " = Sequence()")
# Code line to add the sequence to its parent
lines.append(dataset_name + "." + seq_keyword + " = " + seq_var)
# Code lines to add sequence items to the Sequence
for i, ds in enumerate(seq):
# Determine index to use. If seq item has a data element with 'Index',
# use that; if one with 'Number', use that, else start at 1
index_keyword = seq_keyword.replace("Sequence", "") + "Index"
number_keyword = seq_keyword.replace("Sequence", "") + "Number"
if hasattr(ds, index_keyword):
index_str = str(getattr(ds, index_keyword))
elif hasattr(ds, number_keyword):
index_str = str(getattr(ds, number_keyword))
else:
index_str = str(i + 1)
# Code comment line to mark start of sequence item
lines.append("")
lines.append("# " + seq_name + ": " + seq_item_name + " " + index_str)
# Determine the variable name to use for the sequence item (dataset)
ds_name = orig_seq_var.replace("_sequence", "") + index_str
# Append "_#" if name already in use (in parent sequences)
var_names.append(ds_name)
ds_name = unique_name(ds_name)
# Code the sequence item dataset
code_item = code_dataset(
ds, ds_name, exclude_size, include_private, var_names=var_names
)
# Remove variable name from stored list, this dataset complete
var_names.pop()
# Code dataset creation and appending that to sequence, then the rest
# This keeps the logic close together, rather than after many items set
code_split = code_item.splitlines()
lines.append(code_split[0]) # "<ds_name> = Dataset()"
lines.append(f"{seq_var}.append({ds_name})")
lines.extend(code_split[1:])
# Remove sequence variable name we've used
var_names.pop()
# Join the lines and return a single string
return line_term.join(lines)
def code_dataset(
ds: Dataset,
dataset_name: str = "ds",
exclude_size: int | None = None,
include_private: bool = False,
is_file_meta: bool = False,
var_names: deque | None = None,
) -> str:
"""Return Python code for creating `ds`.
Parameters
----------
ds : pydicom.dataset.Dataset
The dataset to codify.
dataset_name : str, optional
The Python variable name to use for the dataset, default ``'ds'``.
exclude_size : int, optional
If not ``None``, values longer than this (in bytes) will only have a
commented string for a value, causing a syntax error when the code is
run, and thus prompting the user to remove or fix that line.
include_private : bool, optional
If ``False`` (default) private elements are skipped, otherwise private
data elements will be coded.
is_file_meta : bool, optional
``True`` if `ds` contains file meta information elements.
var_names: deque, optional
Used internally to ensure unique variable names in nested sequences.
Returns
-------
str
The codified dataset.
"""
if var_names is None:
var_names = deque()
lines = []
ds_class = " = FileMetaDataset()" if is_file_meta else " = Dataset()"
lines.append(dataset_name + ds_class)
for dataelem in ds:
# If a private data element and flag says so, skip it and go to next
if not include_private and dataelem.tag.is_private:
continue
# Otherwise code the line and add it to the lines list
code_line = code_dataelem(
dataelem, dataset_name, exclude_size, include_private, var_names=var_names
)
lines.append(code_line)
# Add blank line if just coded a sequence
if dataelem.VR == VR.SQ:
lines.append("")
# If sequence was end of this dataset, remove the extra blank line
if len(lines) and lines[-1] == "":
lines.pop()
# Join all the code lines and return them
return line_term.join(lines)
def code_file(
filename: str, exclude_size: int | None = None, include_private: bool = False
) -> str:
"""Write a complete source code file to recreate a DICOM file
Parameters
----------
filename : str
Complete path and filename of a DICOM file to convert
exclude_size : int |None
If not None, values longer than this (in bytes)
will only have a commented string for a value,
causing a syntax error when the code is run,
and thus prompting the user to remove or fix that line.
include_private : bool
If ``False`` (default), private elements are skipped
If ``True``, private data elements will be coded.
Returns
-------
str
A string containing code lines to recreate the entire DICOM file
"""
ds = pydicom.dcmread(filename, force=True)
return code_file_from_dataset(ds, exclude_size, include_private)
def code_file_from_dataset(
ds: Dataset, exclude_size: int | None = None, include_private: bool = False
) -> str:
"""Write a complete source code file to recreate a DICOM file
Parameters
----------
ds : Dataset
A pydicom Dataset to convert
exclude_size : int |None
If not None, values longer than this (in bytes)
will only have a commented string for a value,
causing a syntax error when the code is run,
and thus prompting the user to remove or fix that line.
include_private : bool
If ``False`` (default), private elements are skipped
If ``True``, private data elements will be coded.
Returns
-------
str
A string containing code lines to recreate the entire DICOM file
"""
lines = []
# Code a nice header for the python file
filename = ds.get("filename")
identifier = f"DICOM file '{filename}'" if filename else "non-file dataset"
lines.append("# -*- coding: utf-8 -*-")
lines.append(f"# Coded version of {identifier}")
lines.append("# Produced by pydicom codify utility script")
# Code the necessary imports
lines.append(code_imports())
lines.append("")
# Code the file_meta information
if hasattr(ds, "file_meta"):
lines.append("# File meta info data elements")
code_meta = code_dataset(
ds.file_meta,
"file_meta",
exclude_size,
include_private,
is_file_meta=True,
)
lines.append(code_meta)
lines.append("")
# Code the main dataset
lines.append("# Main data elements")
code_ds = code_dataset(
ds, exclude_size=exclude_size, include_private=include_private
)
lines.append(code_ds)
lines.append("")
# Add the file meta to the dataset, and set transfer syntax
if hasattr(ds, "file_meta"):
lines.append("ds.file_meta = file_meta")
implicit_vr, little_endian = ds.original_encoding
lines.append(f"ds.set_original_encoding({implicit_vr}, {little_endian})")
# Return the complete code string
return line_term.join(lines)
def set_parser_arguments(
parser: argparse.ArgumentParser, default_exclude_size: int
) -> None:
parser.add_argument(
"filespec",
help=filespec_help,
type=filespec_parser,
)
parser.add_argument(
"outfile",
nargs="?",
type=argparse.FileType("w", encoding="UTF-8"),
help=(
"Filename to write Python code to, if not specified then code is "
"written to stdout"
),
default=sys.stdout,
)
parser.add_argument(
"-e",
"--exclude-size",
type=int,
default=default_exclude_size,
help=(
"Exclude binary data larger than specified (default: "
f"{default_exclude_size} bytes)"
),
)
parser.add_argument(
"-p",
"--include-private",
action="store_true",
help="Include private data elements (default is to exclude them)",
)
parser.add_argument(
"-s",
"--save-as",
help=(
"Specify the filename for ds.save_as(save_filename); "
"otherwise the input name + '_from_codify' will be used"
),
)
def do_codify(args: argparse.Namespace) -> None:
# Convert the requested dataset to python/pydicom code lines
if len(args.filespec) != 1:
raise NotImplementedError("Codify can only work on a single DICOM file input")
ds, element = args.filespec[0]
filename = ds.filename
if element and not isinstance(element, Dataset):
raise NotImplementedError(
f"Codify can only code a Dataset, not a {type(element)}"
)
code_str = code_file_from_dataset(
element or ds, args.exclude_size, args.include_private
)
# If requested, write a code line to save the dataset
if args.save_as:
save_as_filename = args.save_as
else:
base, _ = os.path.splitext(filename)
save_as_filename = base + "_from_codify" + ".dcm"
save_line = f"\nds.save_as(r'{save_as_filename}', enforce_file_format=True)"
code_str += save_line
# Write the code lines to specified file or to standard output
# For test_util, captured output .name throws error, ignore it:
try:
if args.outfile.name != "<stdout>":
print(f"Writing code to file '{args.outfile.name}'")
except AttributeError:
pass
args.outfile.write(code_str)
def main(default_exclude_size: int, args: list[str] | None = None) -> None:
"""Create Python code according to user options
Parameters:
-----------
default_exclude_size : int
Values longer than this will be coded as a commented syntax error
args : List[str], optional
Command-line arguments to parse. If ``None`` then :attr:`sys.argv` is
used.
"""
parser = argparse.ArgumentParser(
description="Produce python/pydicom code from a DICOM file",
epilog=(
"Binary data (e.g. pixels) larger than --exclude-size "
f"(default {default_exclude_size} bytes) is not included. A "
"dummy line with a syntax error is produced. "
"Private data elements are not included by default."
),
)
set_parser_arguments(parser, default_exclude_size)
do_codify(parser.parse_args(args))
if __name__ == "__main__": # pragma: no cover
main(default_exclude_size=100)