Skip to content

Commit

Permalink
feat: Added Export Images functionality (#96)
Browse files Browse the repository at this point in the history
Co-authored-by: Gal Zahavi <38544478+galz10@users.noreply.github.com>
  • Loading branch information
holtskinner and galz10 committed Apr 6, 2023
1 parent c607136 commit 383e105
Show file tree
Hide file tree
Showing 9 changed files with 224 additions and 4 deletions.
2 changes: 2 additions & 0 deletions google/cloud/documentai_toolbox/constants.py
Expand Up @@ -37,3 +37,5 @@
"image/tiff",
"image/webp",
}

IMAGE_ENTITIES = {"Portrait"}
49 changes: 45 additions & 4 deletions google/cloud/documentai_toolbox/wrappers/document.py
Expand Up @@ -60,10 +60,17 @@ def _entities_from_shards(
"""
result = []
for shard in shards:
for entity in shard.entities:
result.append(Entity(documentai_entity=entity))
for prop in entity.properties:
result.append(Entity(documentai_entity=prop))
for documentai_entity in shard.entities:
entity = Entity(documentai_entity=documentai_entity)
entity.crop_image(shard)
result.append(entity)
for documentai_prop in documentai_entity.properties:
prop = Entity(documentai_entity=documentai_prop)
prop.crop_image(shard)
result.append(prop)

if len(result) > 1 and result[0].documentai_entity.id:
result.sort(key=lambda x: int(x.documentai_entity.id))
return result


Expand Down Expand Up @@ -504,3 +511,37 @@ def convert_document_to_annotate_file_response(self) -> AnnotateFileResponse:
Proto with TextAnnotations.
"""
return _convert_to_vision_annotate_file_response(self.text, self.pages)

def export_images(
self, output_path: str, output_file_prefix: str, output_file_extension: str
) -> List[str]:
r"""Exports images from `Document` to files.
Args:
output_path (str):
Required. The path to the output directory.
output_file_prefix (str):
Required. The output file name prefix.
output_file_extension (str):
Required. The output file extension.
Format: `png`, `jpg`, etc.
Returns:
List[str]:
A list of output image file names.
Format: `{output_path}/{output_file_prefix}_{index}_{Entity.type_}.{output_file_extension}`
"""
output_filenames: List[str] = []
index = 0
for entity in self.entities:
if not entity.image:
continue

output_filename = (
f"{output_file_prefix}_{index}_{entity.type_}.{output_file_extension}"
)
entity.image.save(os.path.join(output_path, output_filename))
output_filenames.append(output_filename)
index += 1

return output_filenames
34 changes: 34 additions & 0 deletions google/cloud/documentai_toolbox/wrappers/entity.py
Expand Up @@ -17,7 +17,11 @@

import dataclasses

from io import BytesIO

from google.cloud import documentai
from google.cloud.documentai_toolbox import constants
from PIL import Image


@dataclasses.dataclass
Expand All @@ -38,10 +42,14 @@ class Entity:
type_: str = dataclasses.field(init=False)
mention_text: str = dataclasses.field(init=False, default="")
normalized_text: str = dataclasses.field(init=False, default="")

# Only Populated for Splitter/Classifier Output
start_page: int = dataclasses.field(init=False)
end_page: int = dataclasses.field(init=False)

# Only Populated for Identity Documents
image: Image.Image = dataclasses.field(init=False, default=None)

def __post_init__(self):
self.type_ = self.documentai_entity.type_
self.mention_text = self.documentai_entity.mention_text
Expand All @@ -54,3 +62,29 @@ def __post_init__(self):
if self.documentai_entity.page_anchor.page_refs:
self.start_page = int(self.documentai_entity.page_anchor.page_refs[0].page)
self.end_page = int(self.documentai_entity.page_anchor.page_refs[-1].page)

def crop_image(self, documentai_document: documentai.Document):
r"""Return image cropped from page image for detected entity.
Args:
documentai_document (documentai.Document):
Required. The `Document` containing the `Entity`.
Returns:
PIL.Image.Image:
Image from `Document.Entity`. Returns `None` if there is no image.
"""
if self.type_ not in constants.IMAGE_ENTITIES or self.mention_text:
return

page_ref = self.documentai_entity.page_anchor.page_refs[0]
doc_page = documentai_document.pages[page_ref.page]
image_content = doc_page.image.content

doc_image = Image.open(BytesIO(image_content))
w, h = doc_image.size
vertices = [
(int(v.x * w + 0.5), int(v.y * h + 0.5))
for v in page_ref.bounding_poly.normalized_vertices
]
(top, left), (bottom, right) = vertices[0], vertices[2]
self.image = doc_image.crop((top, left, bottom, right))
47 changes: 47 additions & 0 deletions samples/snippets/export_images_sample.py
@@ -0,0 +1,47 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


# [START documentai_toolbox_export_images]

from google.cloud.documentai_toolbox import document

# TODO(developer): Uncomment these variables before running the sample.
# Given a local document.proto or sharded document.proto from an identity processor in path
# document_path = "path/to/local/document.json"
# output_path = "resources/output/"
# output_file_prefix = "exported_photo"
# output_file_extension = "png"


def export_images_sample(
document_path: str,
output_path: str,
output_file_prefix: str,
output_file_extension: str,
) -> None:
wrapped_document = document.Document.from_document_path(document_path=document_path)

output_files = wrapped_document.export_images(
output_path=output_path,
output_file_prefix=output_file_prefix,
output_file_extension=output_file_extension,
)
print("Images Successfully Exported")
for output_file in output_files:
print(output_file)


# [END documentai_toolbox_export_images]
46 changes: 46 additions & 0 deletions samples/snippets/test_export_images_sample.py
@@ -0,0 +1,46 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os
import shutil

import pytest
from samples.snippets import export_images_sample

document_path = "../../tests/unit/resources/images/dl3-0.json"
output_path = "resources/output/"
output_file_prefix = "exported_photo"
output_file_extension = "png"


def test_export_images_sample(capsys: pytest.CaptureFixture) -> None:
os.makedirs(output_path)
current_directory = os.path.dirname(__file__)
rel_document_path = os.path.relpath(document_path, current_directory)

export_images_sample.export_images_sample(
document_path=rel_document_path,
output_path=output_path,
output_file_prefix=output_file_prefix,
output_file_extension=output_file_extension,
)

out, _ = capsys.readouterr()

assert "Images Successfully Exported" in out
assert "exported_photo_0_Portrait.png" in out

assert os.path.exists(output_path)
shutil.rmtree(output_path)
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -59,6 +59,7 @@
"pikepdf >= 6.2.9, < 8.0.0",
"pikepdf >= 6.2.9, < 7.0.0; python_version<'3.8'",
"immutabledict >= 2.0.0, < 3.0.0dev",
"Pillow >= 9.5.0, < 10.0.0",
),
python_requires=">=3.7",
classifiers=[
Expand Down
1 change: 1 addition & 0 deletions tests/unit/resources/images/dl3-0.json

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions tests/unit/test_document.py
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.

import os
import shutil

# try/except added for compatibility with python < 3.8
try:
Expand Down Expand Up @@ -75,6 +76,13 @@ def get_bytes_splitter_mock():
yield byte_factory


@pytest.fixture
def get_bytes_images_mock():
with mock.patch.object(document, "_get_bytes") as byte_factory:
byte_factory.return_value = get_bytes("tests/unit/resources/images")
yield byte_factory


def test_get_shards_with_gcs_uri_contains_file_type():
with pytest.raises(ValueError, match="gcs_prefix cannot contain file types"):
document._get_shards(
Expand Down Expand Up @@ -379,3 +387,26 @@ def test_convert_document_to_annotate_file_response():
actual = doc.convert_document_to_annotate_file_response()

assert actual != AnnotateFileResponse()


def test_export_images(get_bytes_images_mock):
doc = document.Document.from_gcs(
gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/0"
)
output_path = "resources/output/"

os.makedirs(output_path)

actual = doc.export_images(
output_path=output_path,
output_file_prefix="exported_photo",
output_file_extension="png",
)
get_bytes_images_mock.assert_called_once()

assert os.path.exists(output_path)
shutil.rmtree(output_path)

assert actual == [
"exported_photo_0_Portrait.png",
]
17 changes: 17 additions & 0 deletions tests/unit/test_entity.py
Expand Up @@ -14,10 +14,20 @@
# limitations under the License.
#

import pytest

from google.cloud import documentai

from google.cloud.documentai_toolbox import document
from google.cloud.documentai_toolbox import entity


@pytest.fixture
def docproto():
with open("tests/unit/resources/images/dl3-0.json", "r", encoding="utf-8") as f:
return documentai.Document.from_json(f.read())


def test_Entity():
documentai_entity = documentai.Document.Entity(
type_="some_entity_type", mention_text="some_mention_text"
Expand Down Expand Up @@ -58,3 +68,10 @@ def test_Entity_splitter():
assert wrapper_entity.type_ == "invoice_statement"
assert wrapper_entity.start_page == 0
assert wrapper_entity.end_page == 2


def test_crop_image(docproto):
doc = document.Document.from_documentai_document(docproto)
doc.entities[0].crop_image(documentai_document=docproto)

assert doc.entities[0].image

0 comments on commit 383e105

Please sign in to comment.