Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for reading DPI information from JPEG2000 images #5568

Merged
merged 8 commits into from
Aug 2, 2021
Binary file added Tests/images/expected_to_read.jp2
Binary file not shown.
Binary file added Tests/images/invalid_header_length.jp2
Binary file not shown.
Binary file added Tests/images/not_enough_data.jp2
Binary file not shown.
Binary file added Tests/images/zero_dpi.jp2
Binary file not shown.
24 changes: 23 additions & 1 deletion Tests/test_file_jpeg2k.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from PIL import Image, ImageFile, Jpeg2KImagePlugin, features
from PIL import Image, ImageFile, Jpeg2KImagePlugin, UnidentifiedImageError, features

from .helper import (
assert_image_equal,
Expand Down Expand Up @@ -151,6 +151,28 @@ def test_reduce():
assert im.size == (40, 30)


def test_load_dpi():
with Image.open("Tests/images/test-card-lossless.jp2") as im:
assert im.info["dpi"] == (71.9836, 71.9836)

with Image.open("Tests/images/zero_dpi.jp2") as im:
assert "dpi" not in im.info


def test_header_errors():
for path in (
"Tests/images/invalid_header_length.jp2",
"Tests/images/not_enough_data.jp2",
):
with pytest.raises(UnidentifiedImageError):
with Image.open(path):
pass

with pytest.raises(OSError):
with Image.open("Tests/images/expected_to_read.jp2"):
pass


def test_layers_type(tmp_path):
outfile = str(tmp_path / "temp_layers.jp2")
for quality_layers in [[100, 50, 10], (100, 50, 10), None]:
Expand Down
153 changes: 113 additions & 40 deletions src/PIL/Jpeg2KImagePlugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#
# History:
# 2014-03-12 ajh Created
# 2021-06-30 rogermb Extract dpi information from the 'resc' header box
#
# Copyright (c) 2014 Coriolis Systems Limited
# Copyright (c) 2014 Alastair Houghton
Expand All @@ -19,6 +20,79 @@
from . import Image, ImageFile


class BoxReader:
"""
A small helper class to read fields stored in JPEG2000 header boxes
and to easily step into and read sub-boxes.
"""

def __init__(self, fp, length=-1):
self.fp = fp
self.has_length = length >= 0
self.length = length
self.remaining_in_box = -1

def _can_read(self, num_bytes):
if self.has_length and self.fp.tell() + num_bytes > self.length:
# Outside box: ensure we don't read past the known file length
return False
if self.remaining_in_box >= 0:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could technically also be an elif for symmetry and whatnot -- not that it matters, since we return from the previous if branch anyway. 👍 on spotting and fixing the bug where we could read past the parent box length!

# Inside box contents: ensure read does not go past box boundaries
return num_bytes <= self.remaining_in_box
else:
return True # No length known, just read

def _read_bytes(self, num_bytes):
if not self._can_read(num_bytes):
raise SyntaxError("Not enough data in header")

data = self.fp.read(num_bytes)
if len(data) < num_bytes:
raise OSError(
f"Expected to read {num_bytes} bytes but only got {len(data)}."
)

if self.remaining_in_box > 0:
self.remaining_in_box -= num_bytes
return data

def read_fields(self, field_format):
size = struct.calcsize(field_format)
data = self._read_bytes(size)
return struct.unpack(field_format, data)

def read_boxes(self):
size = self.remaining_in_box
data = self._read_bytes(size)
return BoxReader(io.BytesIO(data), size)

def has_next_box(self):
if self.has_length:
return self.fp.tell() + self.remaining_in_box < self.length
else:
return True

def next_box_type(self):
# Skip the rest of the box if it has not been read
if self.remaining_in_box > 0:
self.fp.seek(self.remaining_in_box, os.SEEK_CUR)
self.remaining_in_box = -1

# Read the length and type of the next box
lbox, tbox = self.read_fields(">I4s")
if lbox == 1:
lbox = self.read_fields(">Q")[0]
hlen = 16
else:
hlen = 8

if lbox < hlen or not self._can_read(lbox - hlen):
raise SyntaxError("Invalid header length")

self.remaining_in_box = lbox - hlen
return tbox


def _parse_codestream(fp):
"""Parse the JPEG 2000 codestream to extract the size and component
count from the SIZ marker segment, returning a PIL (size, mode) tuple."""
Expand Down Expand Up @@ -53,55 +127,45 @@ def _parse_codestream(fp):
return (size, mode)


def _res_to_dpi(num, denom, exp):
"""Convert JPEG2000's (numerator, denominator, exponent-base-10) resolution,
calculated as (num / denom) * 10^exp and stored in dots per meter,
to floating-point dots per inch."""
if denom != 0:
return (254 * num * (10 ** exp)) / (10000 * denom)


def _parse_jp2_header(fp):
"""Parse the JP2 header box to extract size, component count and
color space information, returning a (size, mode, mimetype) tuple."""
"""Parse the JP2 header box to extract size, component count,
color space information, and optionally DPI information,
returning a (size, mode, mimetype, dpi) tuple."""

# Find the JP2 header box
reader = BoxReader(fp)
header = None
mimetype = None
while True:
lbox, tbox = struct.unpack(">I4s", fp.read(8))
if lbox == 1:
lbox = struct.unpack(">Q", fp.read(8))[0]
hlen = 16
else:
hlen = 8

if lbox < hlen:
raise SyntaxError("Invalid JP2 header length")
while reader.has_next_box():
tbox = reader.next_box_type()

if tbox == b"jp2h":
header = fp.read(lbox - hlen)
header = reader.read_boxes()
break
elif tbox == b"ftyp":
if fp.read(4) == b"jpx ":
if reader.read_fields(">4s")[0] == b"jpx ":
mimetype = "image/jpx"
fp.seek(lbox - hlen - 4, os.SEEK_CUR)
else:
fp.seek(lbox - hlen, os.SEEK_CUR)

if header is None:
raise SyntaxError("could not find JP2 header")

size = None
mode = None
bpc = None
nc = None
dpi = None # 2-tuple of DPI info, or None
unkc = 0 # Colorspace information unknown

hio = io.BytesIO(header)
while True:
lbox, tbox = struct.unpack(">I4s", hio.read(8))
if lbox == 1:
lbox = struct.unpack(">Q", hio.read(8))[0]
hlen = 16
else:
hlen = 8

content = hio.read(lbox - hlen)
while header.has_next_box():
tbox = header.next_box_type()

if tbox == b"ihdr":
height, width, nc, bpc, c, unkc, ipr = struct.unpack(">IIHBBBB", content)
height, width, nc, bpc, c, unkc, ipr = header.read_fields(">IIHBBBB")
size = (width, height)
if unkc:
if nc == 1 and (bpc & 0x7F) > 8:
Expand All @@ -114,11 +178,10 @@ def _parse_jp2_header(fp):
mode = "RGB"
elif nc == 4:
mode = "RGBA"
break
elif tbox == b"colr":
meth, prec, approx = struct.unpack_from(">BBB", content)
if meth == 1:
cs = struct.unpack_from(">I", content, 3)[0]
meth, prec, approx = header.read_fields(">BBB")
if meth == 1 and unkc == 0:
radarhere marked this conversation as resolved.
Show resolved Hide resolved
cs = header.read_fields(">I")[0]
if cs == 16: # sRGB
if nc == 1 and (bpc & 0x7F) > 8:
mode = "I;16"
Expand All @@ -128,26 +191,34 @@ def _parse_jp2_header(fp):
mode = "RGB"
elif nc == 4:
mode = "RGBA"
break
elif cs == 17: # grayscale
if nc == 1 and (bpc & 0x7F) > 8:
mode = "I;16"
elif nc == 1:
mode = "L"
elif nc == 2:
mode = "LA"
break
elif cs == 18: # sYCC
if nc == 3:
mode = "RGB"
elif nc == 4:
mode = "RGBA"
elif tbox == b"res ":
res = header.read_boxes()
while res.has_next_box():
tres = res.next_box_type()
if tres == b"resc":
vrcn, vrcd, hrcn, hrcd, vrce, hrce = res.read_fields(">HHHHBB")
hres = _res_to_dpi(hrcn, hrcd, hrce)
vres = _res_to_dpi(vrcn, vrcd, vrce)
if hres is not None and vres is not None:
dpi = (hres, vres)
break

if size is None or mode is None:
raise SyntaxError("Malformed jp2 header")
raise SyntaxError("Malformed JP2 header")

return (size, mode, mimetype)
return (size, mode, mimetype, dpi)


##
Expand All @@ -169,7 +240,9 @@ def _open(self):
if sig == b"\x00\x00\x00\x0cjP \x0d\x0a\x87\x0a":
self.codec = "jp2"
header = _parse_jp2_header(self.fp)
self._size, self.mode, self.custom_mimetype = header
self._size, self.mode, self.custom_mimetype, dpi = header
if dpi is not None:
self.info["dpi"] = dpi
else:
raise SyntaxError("not a JPEG 2000 file")

Expand Down