feat: Added Export Images functionality (#96)

Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
googleapis · Apr 6, 2023 · 383e105 · 383e105
1 parent c607136
commit 383e105
Show file tree

Hide file tree

Showing 9 changed files with 224 additions and 4 deletions.
diff --git a/google/cloud/documentai_toolbox/constants.py b/google/cloud/documentai_toolbox/constants.py
@@ -37,3 +37,5 @@
     "image/tiff",
     "image/webp",
 }
+
+IMAGE_ENTITIES = {"Portrait"}
diff --git a/google/cloud/documentai_toolbox/wrappers/document.py b/google/cloud/documentai_toolbox/wrappers/document.py
@@ -60,10 +60,17 @@ def _entities_from_shards(
     """
     result = []
     for shard in shards:
-        for entity in shard.entities:
-            result.append(Entity(documentai_entity=entity))
-            for prop in entity.properties:
-                result.append(Entity(documentai_entity=prop))
+        for documentai_entity in shard.entities:
+            entity = Entity(documentai_entity=documentai_entity)
+            entity.crop_image(shard)
+            result.append(entity)
+            for documentai_prop in documentai_entity.properties:
+                prop = Entity(documentai_entity=documentai_prop)
+                prop.crop_image(shard)
+                result.append(prop)
+
+    if len(result) > 1 and result[0].documentai_entity.id:
+        result.sort(key=lambda x: int(x.documentai_entity.id))
     return result
 
 
@@ -504,3 +511,37 @@ def convert_document_to_annotate_file_response(self) -> AnnotateFileResponse:
                 Proto with TextAnnotations.
         """
         return _convert_to_vision_annotate_file_response(self.text, self.pages)
+
+    def export_images(
+        self, output_path: str, output_file_prefix: str, output_file_extension: str
+    ) -> List[str]:
+        r"""Exports images from `Document` to files.
+
+        Args:
+            output_path (str):
+                Required. The path to the output directory.
+            output_file_prefix (str):
+                Required. The output file name prefix.
+            output_file_extension (str):
+                Required. The output file extension.
+
+                Format: `png`, `jpg`, etc.
+        Returns:
+            List[str]:
+                A list of output image file names.
+                Format: `{output_path}/{output_file_prefix}_{index}_{Entity.type_}.{output_file_extension}`
+        """
+        output_filenames: List[str] = []
+        index = 0
+        for entity in self.entities:
+            if not entity.image:
+                continue
+
+            output_filename = (
+                f"{output_file_prefix}_{index}_{entity.type_}.{output_file_extension}"
+            )
+            entity.image.save(os.path.join(output_path, output_filename))
+            output_filenames.append(output_filename)
+            index += 1
+
+        return output_filenames
diff --git a/google/cloud/documentai_toolbox/wrappers/entity.py b/google/cloud/documentai_toolbox/wrappers/entity.py
@@ -17,7 +17,11 @@
 
 import dataclasses
 
+from io import BytesIO
+
 from google.cloud import documentai
+from google.cloud.documentai_toolbox import constants
+from PIL import Image
 
 
 @dataclasses.dataclass
@@ -38,10 +42,14 @@ class Entity:
     type_: str = dataclasses.field(init=False)
     mention_text: str = dataclasses.field(init=False, default="")
     normalized_text: str = dataclasses.field(init=False, default="")
+
     # Only Populated for Splitter/Classifier Output
     start_page: int = dataclasses.field(init=False)
     end_page: int = dataclasses.field(init=False)
 
+    # Only Populated for Identity Documents
+    image: Image.Image = dataclasses.field(init=False, default=None)
+
     def __post_init__(self):
         self.type_ = self.documentai_entity.type_
         self.mention_text = self.documentai_entity.mention_text
@@ -54,3 +62,29 @@ def __post_init__(self):
         if self.documentai_entity.page_anchor.page_refs:
             self.start_page = int(self.documentai_entity.page_anchor.page_refs[0].page)
             self.end_page = int(self.documentai_entity.page_anchor.page_refs[-1].page)
+
+    def crop_image(self, documentai_document: documentai.Document):
+        r"""Return image cropped from page image for detected entity.
+
+        Args:
+            documentai_document (documentai.Document):
+                Required. The `Document` containing the `Entity`.
+        Returns:
+            PIL.Image.Image:
+                Image from `Document.Entity`. Returns `None` if there is no image.
+        """
+        if self.type_ not in constants.IMAGE_ENTITIES or self.mention_text:
+            return
+
+        page_ref = self.documentai_entity.page_anchor.page_refs[0]
+        doc_page = documentai_document.pages[page_ref.page]
+        image_content = doc_page.image.content
+
+        doc_image = Image.open(BytesIO(image_content))
+        w, h = doc_image.size
+        vertices = [
+            (int(v.x * w + 0.5), int(v.y * h + 0.5))
+            for v in page_ref.bounding_poly.normalized_vertices
+        ]
+        (top, left), (bottom, right) = vertices[0], vertices[2]
+        self.image = doc_image.crop((top, left, bottom, right))
diff --git a/samples/snippets/export_images_sample.py b/samples/snippets/export_images_sample.py
@@ -0,0 +1,47 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+# [START documentai_toolbox_export_images]
+
+from google.cloud.documentai_toolbox import document
+
+# TODO(developer): Uncomment these variables before running the sample.
+# Given a local document.proto or sharded document.proto from an identity processor in path
+# document_path = "path/to/local/document.json"
+# output_path = "resources/output/"
+# output_file_prefix = "exported_photo"
+# output_file_extension = "png"
+
+
+def export_images_sample(
+    document_path: str,
+    output_path: str,
+    output_file_prefix: str,
+    output_file_extension: str,
+) -> None:
+    wrapped_document = document.Document.from_document_path(document_path=document_path)
+
+    output_files = wrapped_document.export_images(
+        output_path=output_path,
+        output_file_prefix=output_file_prefix,
+        output_file_extension=output_file_extension,
+    )
+    print("Images Successfully Exported")
+    for output_file in output_files:
+        print(output_file)
+
+
+# [END documentai_toolbox_export_images]
diff --git a/samples/snippets/test_export_images_sample.py b/samples/snippets/test_export_images_sample.py
@@ -0,0 +1,46 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import shutil
+
+import pytest
+from samples.snippets import export_images_sample
+
+document_path = "../../tests/unit/resources/images/dl3-0.json"
+output_path = "resources/output/"
+output_file_prefix = "exported_photo"
+output_file_extension = "png"
+
+
+def test_export_images_sample(capsys: pytest.CaptureFixture) -> None:
+    os.makedirs(output_path)
+    current_directory = os.path.dirname(__file__)
+    rel_document_path = os.path.relpath(document_path, current_directory)
+
+    export_images_sample.export_images_sample(
+        document_path=rel_document_path,
+        output_path=output_path,
+        output_file_prefix=output_file_prefix,
+        output_file_extension=output_file_extension,
+    )
+
+    out, _ = capsys.readouterr()
+
+    assert "Images Successfully Exported" in out
+    assert "exported_photo_0_Portrait.png" in out
+
+    assert os.path.exists(output_path)
+    shutil.rmtree(output_path)
diff --git a/setup.py b/setup.py
@@ -59,6 +59,7 @@
         "pikepdf >= 6.2.9, < 8.0.0",
         "pikepdf >= 6.2.9, < 7.0.0; python_version<'3.8'",
         "immutabledict >= 2.0.0, < 3.0.0dev",
+        "Pillow >= 9.5.0, < 10.0.0",
     ),
     python_requires=">=3.7",
     classifiers=[

diff --git a/tests/unit/resources/images/dl3-0.json b/tests/unit/resources/images/dl3-0.json
diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import os
+import shutil
 
 # try/except added for compatibility with python < 3.8
 try:
@@ -75,6 +76,13 @@ def get_bytes_splitter_mock():
         yield byte_factory
 
 
+@pytest.fixture
+def get_bytes_images_mock():
+    with mock.patch.object(document, "_get_bytes") as byte_factory:
+        byte_factory.return_value = get_bytes("tests/unit/resources/images")
+        yield byte_factory
+
+
 def test_get_shards_with_gcs_uri_contains_file_type():
     with pytest.raises(ValueError, match="gcs_prefix cannot contain file types"):
         document._get_shards(
@@ -379,3 +387,26 @@ def test_convert_document_to_annotate_file_response():
     actual = doc.convert_document_to_annotate_file_response()
 
     assert actual != AnnotateFileResponse()
+
+
+def test_export_images(get_bytes_images_mock):
+    doc = document.Document.from_gcs(
+        gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0"
+    )
+    output_path = "resources/output/"
+
+    os.makedirs(output_path)
+
+    actual = doc.export_images(
+        output_path=output_path,
+        output_file_prefix="exported_photo",
+        output_file_extension="png",
+    )
+    get_bytes_images_mock.assert_called_once()
+
+    assert os.path.exists(output_path)
+    shutil.rmtree(output_path)
+
+    assert actual == [
+        "exported_photo_0_Portrait.png",
+    ]
diff --git a/tests/unit/test_entity.py b/tests/unit/test_entity.py
@@ -14,10 +14,20 @@
 # limitations under the License.
 #
 
+import pytest
+
 from google.cloud import documentai
+
+from google.cloud.documentai_toolbox import document
 from google.cloud.documentai_toolbox import entity
 
 
+@pytest.fixture
+def docproto():
+    with open("tests/unit/resources/images/dl3-0.json", "r", encoding="utf-8") as f:
+        return documentai.Document.from_json(f.read())
+
+
 def test_Entity():
     documentai_entity = documentai.Document.Entity(
         type_="some_entity_type", mention_text="some_mention_text"
@@ -58,3 +68,10 @@ def test_Entity_splitter():
     assert wrapper_entity.type_ == "invoice_statement"
     assert wrapper_entity.start_page == 0
     assert wrapper_entity.end_page == 2
+
+
+def test_crop_image(docproto):
+    doc = document.Document.from_documentai_document(docproto)
+    doc.entities[0].crop_image(documentai_document=docproto)
+
+    assert doc.entities[0].image