pdf: Use explicit palette when saving indexed images

Asking Pillow for an "adaptive palette" does not appear to guarantee that the chosen colours will be the same, even if asking for exactly the same number as exist in the image. And asking Pillow to quantize with an explicit palette does not work either, as Pillow uses a cache that trims the last two bits from the colour and never makes an explicit match. python-pillow/Pillow#1852 (comment) So instead, manually calculate the indexed image using some NumPy tricks. Additionally, since now the palette may be smaller than 256 colours, Pillow may choose to encode the image data with fewer than 8 bits per component, so we need to properly reflect that in the decode parameters (this was already done for the image parameters). The effect on test images with _many_ colours is small, with a maximum RMS of 1.024, but for images with few colours, the result can be completely wrong as in the reported matplotlib#25806.
QuLogic · Jun 10, 2023 · 96fad22 · 96fad22
1 parent 3b30f47
commit 96fad22
Show file tree

Hide file tree

Showing 21 changed files with 46 additions and 22 deletions.
diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py
@@ -1731,39 +1731,43 @@ def _writeImg(self, data, id, smask=None):
                'Subtype': Name('Image'),
                'Width': width,
                'Height': height,
-               'ColorSpace': Name({1: 'DeviceGray',
-                                   3: 'DeviceRGB'}[color_channels]),
+               'ColorSpace': Name({1: 'DeviceGray', 3: 'DeviceRGB'}[color_channels]),
                'BitsPerComponent': 8}
         if smask:
             obj['SMask'] = smask
         if mpl.rcParams['pdf.compression']:
             if data.shape[-1] == 1:
                 data = data.squeeze(axis=-1)
+            png = {'Predictor': 10, 'Colors': color_channels, 'Columns': width}
             img = Image.fromarray(data)
             img_colors = img.getcolors(maxcolors=256)
             if color_channels == 3 and img_colors is not None:
-                # Convert to indexed color if there are 256 colors or fewer
-                # This can significantly reduce the file size
+                # Convert to indexed color if there are 256 colors or fewer. This can
+                # significantly reduce the file size.
                 num_colors = len(img_colors)
-                # These constants were converted to IntEnums and deprecated in
-                # Pillow 9.2
-                dither = getattr(Image, 'Dither', Image).NONE
-                pmode = getattr(Image, 'Palette', Image).ADAPTIVE
-                img = img.convert(
-                    mode='P', dither=dither, palette=pmode, colors=num_colors
-                )
+                palette = np.array([comp for _, color in img_colors for comp in color],
+                                   dtype=np.uint8)
+                palette24 = ((palette[0::3].astype(np.uint32) << 16) |
+                             (palette[1::3].astype(np.uint32) << 8) |
+                             palette[2::3])
+                rgb24 = ((data[:, :, 0].astype(np.uint32) << 16) |
+                         (data[:, :, 1].astype(np.uint32) << 8) |
+                         data[:, :, 2])
+                indices = np.argsort(palette24).astype(np.uint8)
+                rgb8 = indices[np.searchsorted(palette24, rgb24, sorter=indices)]
+                img = Image.fromarray(rgb8, mode='P')
+                img.putpalette(palette)
                 png_data, bit_depth, palette = self._writePng(img)
                 if bit_depth is None or palette is None:
                     raise RuntimeError("invalid PNG header")
-                palette = palette[:num_colors * 3]  # Trim padding
-                obj['ColorSpace'] = Verbatim(
-                    b'[/Indexed /DeviceRGB %d %s]'
-                    % (num_colors - 1, pdfRepr(palette)))
+                palette = palette[:num_colors * 3]  # Trim padding; remove for Pillow>=9
+                obj['ColorSpace'] = [Name('Indexed'), Name('DeviceRGB'),
+                                     num_colors - 1, palette]
                 obj['BitsPerComponent'] = bit_depth
-                color_channels = 1
+                png['Colors'] = 1
+                png['BitsPerComponent'] = bit_depth
             else:
                 png_data, _, _ = self._writePng(img)
-            png = {'Predictor': 10, 'Colors': color_channels, 'Columns': width}
         else:
             png = None
         self.beginStream(

diff --git a/lib/matplotlib/tests/baseline_images/test_agg_filter/agg_filter_alpha.pdf b/lib/matplotlib/tests/baseline_images/test_agg_filter/agg_filter_alpha.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_axes/imshow.pdf b/lib/matplotlib/tests/baseline_images/test_axes/imshow.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_axes/imshow_clip.pdf b/lib/matplotlib/tests/baseline_images/test_axes/imshow_clip.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_backend_pdf/grayscale_alpha.pdf b/lib/matplotlib/tests/baseline_images/test_backend_pdf/grayscale_alpha.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/bbox_image_inverted.pdf b/lib/matplotlib/tests/baseline_images/test_image/bbox_image_inverted.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/figimage.pdf b/lib/matplotlib/tests/baseline_images/test_image/figimage.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/image_alpha.pdf b/lib/matplotlib/tests/baseline_images/test_image/image_alpha.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/image_interps.pdf b/lib/matplotlib/tests/baseline_images/test_image/image_interps.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/image_placement.pdf b/lib/matplotlib/tests/baseline_images/test_image/image_placement.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/image_shift.pdf b/lib/matplotlib/tests/baseline_images/test_image/image_shift.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/imshow.pdf b/lib/matplotlib/tests/baseline_images/test_image/imshow.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/imshow_masked_interpolation.pdf b/lib/matplotlib/tests/baseline_images/test_image/imshow_masked_interpolation.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/log_scale_image.pdf b/lib/matplotlib/tests/baseline_images/test_image/log_scale_image.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/no_interpolation_origin.pdf b/lib/matplotlib/tests/baseline_images/test_image/no_interpolation_origin.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_image/rotate_image.pdf b/lib/matplotlib/tests/baseline_images/test_image/rotate_image.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_patheffects/patheffect1.pdf b/lib/matplotlib/tests/baseline_images/test_patheffects/patheffect1.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_streamplot/streamplot_colormap.pdf b/lib/matplotlib/tests/baseline_images/test_streamplot/streamplot_colormap.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_tightlayout/tight_layout5.pdf b/lib/matplotlib/tests/baseline_images/test_tightlayout/tight_layout5.pdf
diff --git a/lib/matplotlib/tests/test_backend_pdf.py b/lib/matplotlib/tests/test_backend_pdf.py
@@ -131,6 +131,30 @@ def test_composite_image():
         assert len(pdf._file._images) == 2
 
 
+def test_indexed_image():
+    # An image with low color count should compress to a palette-indexed format.
+    pikepdf = pytest.importorskip('pikepdf')
+
+    data = np.zeros((256, 1, 3), dtype=np.uint8)
+    data[:, 0, 0] = np.arange(256)  # Maximum unique colours for an indexed image.
+
+    rcParams['pdf.compression'] = True
+    fig = plt.figure()
+    fig.figimage(data, resize=True)
+    buf = io.BytesIO()
+    fig.savefig(buf, format='pdf', dpi='figure')
+
+    with pikepdf.Pdf.open(buf) as pdf:
+        page, = pdf.pages
+        image, = page.images.values()
+        pdf_image = pikepdf.PdfImage(image)
+        assert pdf_image.indexed
+        pil_image = pdf_image.as_pil_image()
+        rgb = np.asarray(pil_image.convert('RGB'))
+
+    np.testing.assert_array_equal(data, rgb)
+
+
 def test_savefig_metadata(monkeypatch):
     pikepdf = pytest.importorskip('pikepdf')
     monkeypatch.setenv('SOURCE_DATE_EPOCH', '0')

diff --git a/lib/matplotlib/tests/test_image.py b/lib/matplotlib/tests/test_image.py
@@ -754,11 +754,7 @@ def test_log_scale_image():
     ax.set(yscale='log')
 
 
-# Increased tolerance is needed for PDF test to avoid failure. After the PDF
-# backend was modified to use indexed color, there are ten pixels that differ
-# due to how the subpixel calculation is done when converting the PDF files to
-# PNG images.
-@image_comparison(['rotate_image'], remove_text=True, tol=0.35)
+@image_comparison(['rotate_image'], remove_text=True)
 def test_rotate_image():
     delta = 0.25
     x = y = np.arange(-3.0, 3.0, delta)