Skip to content

Commit

Permalink
pdf: Use explicit palette when saving indexed images
Browse files Browse the repository at this point in the history
Asking Pillow for an "adaptive palette" does not appear to guarantee
that the chosen colours will be the same, even if asking for exactly the
same number as exist in the image. And asking Pillow to quantize with an
explicit palette does not work either, as Pillow uses a cache that trims
the last two bits from the colour and never makes an explicit match.
python-pillow/Pillow#1852 (comment)

So instead, manually calculate the indexed image using some NumPy
tricks.

Additionally, since now the palette may be smaller than 256 colours,
Pillow may choose to encode the image data with fewer than 8 bits per
component, so we need to properly reflect that in the decode parameters
(this was already done for the image parameters).

The effect on test images with _many_ colours is small, with a maximum
RMS of 1.024, but for images with few colours, the result can be
completely wrong as in the reported matplotlib#25806.
  • Loading branch information
QuLogic committed Jun 10, 2023
1 parent 3b30f47 commit 96fad22
Show file tree
Hide file tree
Showing 21 changed files with 46 additions and 22 deletions.
38 changes: 21 additions & 17 deletions lib/matplotlib/backends/backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1731,39 +1731,43 @@ def _writeImg(self, data, id, smask=None):
'Subtype': Name('Image'),
'Width': width,
'Height': height,
'ColorSpace': Name({1: 'DeviceGray',
3: 'DeviceRGB'}[color_channels]),
'ColorSpace': Name({1: 'DeviceGray', 3: 'DeviceRGB'}[color_channels]),
'BitsPerComponent': 8}
if smask:
obj['SMask'] = smask
if mpl.rcParams['pdf.compression']:
if data.shape[-1] == 1:
data = data.squeeze(axis=-1)
png = {'Predictor': 10, 'Colors': color_channels, 'Columns': width}
img = Image.fromarray(data)
img_colors = img.getcolors(maxcolors=256)
if color_channels == 3 and img_colors is not None:
# Convert to indexed color if there are 256 colors or fewer
# This can significantly reduce the file size
# Convert to indexed color if there are 256 colors or fewer. This can
# significantly reduce the file size.
num_colors = len(img_colors)
# These constants were converted to IntEnums and deprecated in
# Pillow 9.2
dither = getattr(Image, 'Dither', Image).NONE
pmode = getattr(Image, 'Palette', Image).ADAPTIVE
img = img.convert(
mode='P', dither=dither, palette=pmode, colors=num_colors
)
palette = np.array([comp for _, color in img_colors for comp in color],
dtype=np.uint8)
palette24 = ((palette[0::3].astype(np.uint32) << 16) |
(palette[1::3].astype(np.uint32) << 8) |
palette[2::3])
rgb24 = ((data[:, :, 0].astype(np.uint32) << 16) |
(data[:, :, 1].astype(np.uint32) << 8) |
data[:, :, 2])
indices = np.argsort(palette24).astype(np.uint8)
rgb8 = indices[np.searchsorted(palette24, rgb24, sorter=indices)]
img = Image.fromarray(rgb8, mode='P')
img.putpalette(palette)
png_data, bit_depth, palette = self._writePng(img)
if bit_depth is None or palette is None:
raise RuntimeError("invalid PNG header")
palette = palette[:num_colors * 3] # Trim padding
obj['ColorSpace'] = Verbatim(
b'[/Indexed /DeviceRGB %d %s]'
% (num_colors - 1, pdfRepr(palette)))
palette = palette[:num_colors * 3] # Trim padding; remove for Pillow>=9
obj['ColorSpace'] = [Name('Indexed'), Name('DeviceRGB'),
num_colors - 1, palette]
obj['BitsPerComponent'] = bit_depth
color_channels = 1
png['Colors'] = 1
png['BitsPerComponent'] = bit_depth
else:
png_data, _, _ = self._writePng(img)
png = {'Predictor': 10, 'Colors': color_channels, 'Columns': width}
else:
png = None
self.beginStream(
Expand Down
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_axes/imshow.pdf
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_axes/imshow_clip.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_image/figimage.pdf
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_image/image_alpha.pdf
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_image/image_interps.pdf
Binary file not shown.
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_image/image_shift.pdf
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_image/imshow.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified lib/matplotlib/tests/baseline_images/test_image/rotate_image.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
24 changes: 24 additions & 0 deletions lib/matplotlib/tests/test_backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,30 @@ def test_composite_image():
assert len(pdf._file._images) == 2


def test_indexed_image():
# An image with low color count should compress to a palette-indexed format.
pikepdf = pytest.importorskip('pikepdf')

data = np.zeros((256, 1, 3), dtype=np.uint8)
data[:, 0, 0] = np.arange(256) # Maximum unique colours for an indexed image.

rcParams['pdf.compression'] = True
fig = plt.figure()
fig.figimage(data, resize=True)
buf = io.BytesIO()
fig.savefig(buf, format='pdf', dpi='figure')

with pikepdf.Pdf.open(buf) as pdf:
page, = pdf.pages
image, = page.images.values()
pdf_image = pikepdf.PdfImage(image)
assert pdf_image.indexed
pil_image = pdf_image.as_pil_image()
rgb = np.asarray(pil_image.convert('RGB'))

np.testing.assert_array_equal(data, rgb)


def test_savefig_metadata(monkeypatch):
pikepdf = pytest.importorskip('pikepdf')
monkeypatch.setenv('SOURCE_DATE_EPOCH', '0')
Expand Down
6 changes: 1 addition & 5 deletions lib/matplotlib/tests/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,11 +754,7 @@ def test_log_scale_image():
ax.set(yscale='log')


# Increased tolerance is needed for PDF test to avoid failure. After the PDF
# backend was modified to use indexed color, there are ten pixels that differ
# due to how the subpixel calculation is done when converting the PDF files to
# PNG images.
@image_comparison(['rotate_image'], remove_text=True, tol=0.35)
@image_comparison(['rotate_image'], remove_text=True)
def test_rotate_image():
delta = 0.25
x = y = np.arange(-3.0, 3.0, delta)
Expand Down

0 comments on commit 96fad22

Please sign in to comment.