From ff06e23d239bd851504e936114ecc54e8997f7e3 Mon Sep 17 00:00:00 2001 From: "David L. Jones" Date: Mon, 31 Aug 2020 10:40:57 -0700 Subject: [PATCH] Add a setuptools extension for generating Python protobufs (#7783) This extension allows Python code to be generated from setup.py files, so they are created as part of a normal Python build. The extension uses an already-existing protoc binary, which can be explicitly specified if needed. --- python/protobuf_distutils/README.md | 106 +++++++++++++ .../protobuf_distutils/__init__.py | 0 .../generate_py_protobufs.py | 145 ++++++++++++++++++ python/protobuf_distutils/setup.py | 133 ++++++++++++++++ python/setup.py | 1 + tests.sh | 2 +- 6 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 python/protobuf_distutils/README.md create mode 100644 python/protobuf_distutils/protobuf_distutils/__init__.py create mode 100644 python/protobuf_distutils/protobuf_distutils/generate_py_protobufs.py create mode 100644 python/protobuf_distutils/setup.py diff --git a/python/protobuf_distutils/README.md b/python/protobuf_distutils/README.md new file mode 100644 index 000000000000..f3f2009f3b36 --- /dev/null +++ b/python/protobuf_distutils/README.md @@ -0,0 +1,106 @@ +# Python setuptools extension + +This is an extension for Python setuptools which uses an installed protobuf +compiler (`protoc`) to generate Python sources. + +## Installing + +To use this extension, it needs to be installed so it can be imported by other +projects' setup.py. + +```shell +$ python setup.py build +$ python setup.py install +``` + +(If you want to test changes to the extension, you can use `python setup.py +develop`.) + +## Usage + +### Example setup.py configuration + +```python +from setuptools import setup +setup( + # ... + name='example_project', + + # Require this package, but only for setup (not installation): + setup_requires=['protobuf_distutils'], + + options={ + # See below for details. + 'generate_py_protobufs': { + 'source_dir': 'path/to/protos', + 'extra_proto_paths': ['path/to/other/project/protos'], + 'output_dir': 'path/to/project/sources', # default '.' + 'proto_files': ['relative/path/to/just_this_file.proto'], + 'protoc': 'path/to/protoc.exe', + }, + }, +) +``` + +### Example build invocation + +These steps will generate protobuf sources so they are included when building +and installing `example_project` (see above): + +```shell +$ python setup.py generate_py_protobufs +$ python setup.py build +$ python setup.py install +``` + +## Options + +- `source_dir`: + + This is the directory holding .proto files to be processed. + + The default behavior is to generate sources for all .proto files found under + `source_dir`, recursively. This behavior can be controlled with options below. + +- `proto_root_path`: + + This is the root path for resolving imports in source .proto files. + + The default is the shortest prefix of `source_dir` among `[source_dir] + + self.extra_proto_paths`. + +- `extra_proto_paths`: + + Specifies additional paths that should be used to find imports, in + addition to `source_dir`. + + This option can be used to specify the path to other protobuf sources, + which are imported by files under `source_dir`. No Python code will + be generated for .proto files under `extra_proto_paths`. + +- `output_dir`: + + Specifies where generated code should be placed. + + Typically, this should be the root package that generated Python modules + should be below. + + The generated files will be placed under `output_dir` according to the + relative source paths under `proto_root_path`. For example, the source file + `${proto_root_path}/subdir/message.proto` will be generated as the Python + module `${output_dir}/subdir/message_pb2.py`. + +- `proto_files`: + + A list of strings, specific .proto file paths for generating code, instead of + searching for all .proto files under `source_path`. + + These paths are relative to `source_dir`. For example, to generate code + for just `${source_dir}/subdir/message.proto`, specify + `['subdir/message.proto']`. + +- `protoc`: + + By default, the protoc binary (the Protobuf compiler) is found by + searching the environment path. To use a specific protoc binary, its + path can be specified. diff --git a/python/protobuf_distutils/protobuf_distutils/__init__.py b/python/protobuf_distutils/protobuf_distutils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/protobuf_distutils/protobuf_distutils/generate_py_protobufs.py b/python/protobuf_distutils/protobuf_distutils/generate_py_protobufs.py new file mode 100644 index 000000000000..452b5d7e1e26 --- /dev/null +++ b/python/protobuf_distutils/protobuf_distutils/generate_py_protobufs.py @@ -0,0 +1,145 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# https://developers.google.com/protocol-buffers/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Implements the generate_py_protobufs command.""" + +__author__ = 'dlj@google.com (David L. Jones)' + +import glob +import sys +import os +import distutils.spawn as spawn +from distutils.cmd import Command +from distutils.errors import DistutilsOptionError, DistutilsExecError + +class generate_py_protobufs(Command): + """Generates Python sources for .proto files.""" + + description = 'Generate Python sources for .proto files' + user_options = [ + ('extra-proto-paths=', None, + 'Additional paths to resolve imports in .proto files.'), + + ('protoc', None, + 'Path to a specific `protoc` command to use.'), + ] + boolean_options = ['recurse'] + + def initialize_options(self): + """Sets the defaults for the command options.""" + self.source_dir = None + self.proto_root_path = None + self.extra_proto_paths = [] + self.output_dir = '.' + self.proto_files = None + self.recurse = True + self.protoc = None + + def finalize_options(self): + """Sets the final values for the command options. + + Defaults were set in `initialize_options`, but could have been changed + by command-line options or by other commands. + """ + self.ensure_dirname('source_dir') + self.ensure_string_list('extra_proto_paths') + + if self.output_dir is None: + self.output_dir = '.' + self.ensure_dirname('output_dir') + + # SUBTLE: if 'source_dir' is a subdirectory of any entry in + # 'extra_proto_paths', then in general, the shortest --proto_path prefix + # (and the longest relative .proto filenames) must be used for + # correctness. For example, consider: + # + # source_dir = 'a/b/c' + # extra_proto_paths = ['a/b', 'x/y'] + # + # In this case, we must ensure that a/b/c/d/foo.proto resolves + # canonically as c/d/foo.proto, not just d/foo.proto. Otherwise, this + # import: + # + # import "c/d/foo.proto"; + # + # would result in different FileDescriptor.name keys from "d/foo.proto". + # That will cause all the definitions in the file to be flagged as + # duplicates, with an error similar to: + # + # c/d/foo.proto: "packagename.MessageName" is already defined in file "d/foo.proto" + # + # For paths in self.proto_files, we transform them to be relative to + # self.proto_root_path, which may be different from self.source_dir. + # + # Although the order of --proto_paths is significant, shadowed filenames + # are errors: if 'a/b/c.proto' resolves to different files under two + # different --proto_path arguments, then the path is rejected as an + # error. (Implementation note: this is enforced in protoc's + # DiskSourceTree class.) + + if self.proto_root_path is None: + self.proto_root_path = os.path.normpath(self.source_dir) + for root_candidate in self.extra_proto_paths: + root_candidate = os.path.normpath(root_candidate) + if self.proto_root_path.startswith(root_candidate): + self.proto_root_path = root_candidate + if self.proto_root_path != self.source_dir: + self.announce('using computed proto_root_path: ' + self.proto_root_path, level=2) + + if not self.source_dir.startswith(self.proto_root_path): + raise DistutilsOptionError('source_dir ' + self.source_dir + + ' is not under proto_root_path ' + self.proto_root_path) + + if self.proto_files is None: + files = glob.glob(os.path.join(self.source_dir, '*.proto')) + if self.recurse: + files.extend(glob.glob(os.path.join(self.source_dir, '**', '*.proto'))) + self.proto_files = [f.partition(self.proto_root_path + os.path.sep)[-1] for f in files] + if not self.proto_files: + raise DistutilsOptionError('no .proto files were found under ' + self.source_dir) + + self.ensure_string_list('proto_files') + + if self.protoc is None: + self.protoc = spawn.find_executable('protoc') + + def run(self): + # All proto file paths were adjusted in finalize_options to be relative + # to self.proto_root_path. + proto_paths = ['--proto_path=' + self.proto_root_path] + proto_paths.extend(['--proto_path=' + x for x in self.extra_proto_paths]) + + # Run protoc. It was already resolved, so don't try to resolve + # through PATH. + spawn.spawn( + [self.protoc, + '--python_out=' + self.output_dir, + ] + proto_paths + self.proto_files, + search_path=0) diff --git a/python/protobuf_distutils/setup.py b/python/protobuf_distutils/setup.py new file mode 100644 index 000000000000..bfc6d7f6cd3d --- /dev/null +++ b/python/protobuf_distutils/setup.py @@ -0,0 +1,133 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# https://developers.google.com/protocol-buffers/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Setuptools/distutils extension for generating Python protobuf code. + +This extension uses a prebuilt 'protoc' binary to generate Python types for +protobuf sources. By default, it will use a system-installed protoc binary, but +a custom protoc can be specified by flag. + +This command should usually be run before the 'build' command, so that the +generated sources are treated the same way as the rest of the Python +sources. + +Options: + + source_dir: + This is the directory holding .proto files to be processed. + + The default behavior is to generate sources for all .proto files found + under `source_dir`, recursively. This behavior can be controlled with + options below. + + proto_root_path: + This is the root path for resolving imports in source .proto files. + + The default is the shortest prefix of `source_dir` among: + [source_dir] + self.extra_proto_paths + + extra_proto_paths: + Specifies additional paths that should be used to find imports, in + addition to `source_dir`. + + This option can be used to specify the path to other protobuf sources, + which are imported by files under `source_dir`. No Python code will be + generated for .proto files under `extra_proto_paths`. + + output_dir: + Specifies where generated code should be placed. + + Typically, this should be the root package that generated Python modules + should be below. + + The generated files will be named according to the relative source paths + under `proto_root_path`. For example, this source .proto file: + ${proto_root_path}/subdir/message.proto + will correspond to this generated Python module: + ${output_dir}/subdir/message_pb2.py + + proto_files: + Specific .proto files can be specified for generating code, instead of + searching for all .proto files under `source_path`. + + These paths are relative to `source_dir`. For example, to generate code + for just ${source_dir}/subdir/message.proto, specify + ['subdir/message.proto']. + + protoc: + By default, the protoc binary (the Protobuf compiler) is found by + searching the environment path. To use a specific protoc binary, its + path can be specified. + + recurse: + If `proto_files` are not specified, then the default behavior is to + search `source_dir` recursively. This option controls the recursive + search; if it is False, only .proto files immediately under `source_dir` + will be used to generate sources. + +""" + +__author__ = 'dlj@google.com (David L. Jones)' + +from setuptools import setup, find_packages + +setup( + name='protobuf_distutils', + version='1.0', + packages=find_packages(), + maintainer='protobuf@googlegroups.com', + maintainer_email='protobuf@googlegroups.com', + license='3-Clause BSD License', + classifiers=[ + "Framework :: Setuptools Plugin", + "Operating System :: OS Independent", + # These Python versions should match the protobuf package: + "Programming Language :: Python", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Software Development :: Code Generators", + ], + description=('This is a distutils extension to generate Python code for ' + '.proto files using an installed protoc binary.'), + url='https://github.com/protocolbuffers/protobuf/', + entry_points={ + 'distutils.commands': [ + ('generate_py_protobufs = ' + 'protobuf_distutils.generate_py_protobufs:generate_py_protobufs'), + ], + }, +) diff --git a/python/setup.py b/python/setup.py index d29ff22820b7..b44e88b2f927 100755 --- a/python/setup.py +++ b/python/setup.py @@ -284,6 +284,7 @@ def get_option_from_sys_argv(option_str): packages=find_packages( exclude=[ 'import_test_package', + 'protobuf_distutils', ], ), test_suite='google.protobuf.internal', diff --git a/tests.sh b/tests.sh index 3d47b6cbf268..8b499d0a3e11 100755 --- a/tests.sh +++ b/tests.sh @@ -64,7 +64,7 @@ build_cpp_distcheck() { git ls-files | grep "^\(java\|python\|objectivec\|csharp\|js\|ruby\|php\|cmake\|examples\|src/google/protobuf/.*\.proto\)" |\ grep -v ".gitignore" | grep -v "java/compatibility_tests" | grep -v "java/lite/proguard.pgcfg" |\ grep -v "python/compatibility_tests" | grep -v "python/docs" | grep -v "python/.repo-metadata.json" |\ - grep -v "csharp/compatibility_tests" > dist.lst + grep -v "python/protobuf_distutils" | grep -v "csharp/compatibility_tests" > dist.lst # Unzip the dist tar file. DIST=`ls *.tar.gz` tar -xf $DIST