Skip to content

Commit

Permalink
fix: building on windows (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
kboroszko committed Dec 20, 2021
1 parent 7701dc6 commit fb09814
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 21 deletions.
12 changes: 12 additions & 0 deletions WORKSPACE
Expand Up @@ -59,9 +59,21 @@ http_archive(
)

# Note com_google_googleapis is placed earlier as we need to adjust switched_rules_by_language option
# Note we have to change one word in the field_behavior.proto so it compiles on WINDOWS
# for more infor please refer to https://github.com/protocolbuffers/protobuf/issues/7076
# Because of a bug in protocol buffers (protocolbuffers/protobuf#7076), new versions of this project
# fail to compile on Windows. The problem hinges on OPTIONAL being defined as an empty string under
# Windows. This makes the preprocessor remove every mention of OPTIONAL from the code, which causes
# compilation failures. This temporary workaround renames the name of the protobuf value OPTIONAL to
# OPIONAL. This should be safe as it does not affect the generated protobufs.
http_archive(
name = "com_google_googleapis",
build_file = "@com_github_googleapis_google_cloud_cpp//bazel:googleapis.BUILD",
patch_cmds = [
"""sed -i.bak 's/OPTIONAL/OPIONAL/g' google/api/field_behavior.proto""",
"""sed -i.bak 's/OPTIONAL/OPIONAL/g' google/pubsub/v1beta2/pubsub.proto""",
"""sed -i.bak 's/OPTIONAL/OPIONAL/g' google/pubsub/v1/pubsub.proto""",
],
sha256 = "a53e15405f81d5a32594d7f6486e649131fadda5431cf28377dff4ae54d45d16",
strip_prefix = "googleapis-d4d09eb3aec152015f35717102f9b423988b94f7",
urls = [
Expand Down
98 changes: 81 additions & 17 deletions tensorflow_io/core/kernels/bigtable/serialization.cc
Expand Up @@ -16,14 +16,73 @@ limitations under the License.

#include "tensorflow_io/core/kernels/bigtable/serialization.h"

#include "rpc/xdr.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/statusor.h"

namespace cbt = ::google::cloud::bigtable;

namespace tensorflow {
namespace io {
namespace {

#ifdef _WIN32

#include <winsock.h>

inline StatusOr<int32_t> BytesToInt32(const cbt::Cell& cell) {
std::string const& bytes = cell.value();
union {
char bytes[4];
int32_t res;
} u;
if (bytes.size() != 4U) {
return errors::InvalidArgument("Invalid int32 representation.");
}
memcpy(u.bytes, bytes.data(), 4);
return ntohl(u.res);
}

inline StatusOr<int64_t> BytesToInt64(const cbt::Cell& cell) {
auto maybe_value = cell.decode_big_endian_integer<int64_t>();
if (!maybe_value.ok()) {
return errors::InvalidArgument("Invalid int32 representation.");
}
return maybe_value.value();
}

inline StatusOr<float> BytesToFloat(const cbt::Cell& cell) {
auto const int_rep = BytesToInt32(cell);
if (!int_rep.ok()) {
return int_rep;
}
union {
float res;
int32_t int_rep;
} u;
u.int_rep = *int_rep;
return u.res;
}

inline StatusOr<double> BytesToDouble(const cbt::Cell& cell) {
auto const int_rep = BytesToInt64(cell);
if (!int_rep.ok()) {
return int_rep;
}
union {
double res;
int64_t int_rep;
} u;
u.int_rep = *int_rep;
return u.res;
}

#else // _WIN32

#include "rpc/types.h"
#include "rpc/xdr.h"

inline StatusOr<float> BytesToFloat(std::string const& s) {
inline StatusOr<float> BytesToFloat(const cbt::Cell& cell) {
std::string const& s = cell.value();
float v;
XDR xdrs;
xdrmem_create(&xdrs, const_cast<char*>(s.data()), sizeof(v), XDR_DECODE);
Expand All @@ -33,7 +92,8 @@ inline StatusOr<float> BytesToFloat(std::string const& s) {
return v;
}

inline StatusOr<double> BytesToDouble(std::string const& s) {
inline StatusOr<double> BytesToDouble(const cbt::Cell& cell) {
std::string const& s = cell.value();
double v;
XDR xdrs;
xdrmem_create(&xdrs, const_cast<char*>(s.data()), sizeof(v), XDR_DECODE);
Expand All @@ -43,7 +103,8 @@ inline StatusOr<double> BytesToDouble(std::string const& s) {
return v;
}

inline StatusOr<int64_t> BytesToInt64(std::string const& s) {
inline StatusOr<int64_t> BytesToInt64(const cbt::Cell& cell) {
std::string const& s = cell.value();
int64_t v;
XDR xdrs;
xdrmem_create(&xdrs, const_cast<char*>(s.data()), sizeof(v), XDR_DECODE);
Expand All @@ -53,7 +114,8 @@ inline StatusOr<int64_t> BytesToInt64(std::string const& s) {
return v;
}

inline StatusOr<int32_t> BytesToInt32(std::string const& s) {
inline StatusOr<int32_t> BytesToInt32(const cbt::Cell& cell) {
std::string const& s = cell.value();
int32_t v;
XDR xdrs;
xdrmem_create(&xdrs, const_cast<char*>(s.data()), sizeof(v), XDR_DECODE);
Expand All @@ -63,16 +125,18 @@ inline StatusOr<int32_t> BytesToInt32(std::string const& s) {
return v;
}

inline StatusOr<bool_t> BytesToBool(std::string const& s) {
bool_t v;
XDR xdrs;
xdrmem_create(&xdrs, const_cast<char*>(s.data()), sizeof(v), XDR_DECODE);
if (!xdr_bool(&xdrs, &v)) {
return errors::InvalidArgument("Error reading bool from byte array.");
#endif // _WIN32

inline StatusOr<bool> BytesToBool(const cbt::Cell& cell) {
std::string const& bytes = cell.value();
if (bytes.size() != 1U) {
return errors::InvalidArgument("Invalid bool representation.");
}
return v;
return (*bytes.data()) != 0;
}

} // namespace

Status PutCellValueInTensor(Tensor& tensor, size_t index, DataType cell_type,
google::cloud::bigtable::Cell const& cell) {
switch (cell_type) {
Expand All @@ -82,39 +146,39 @@ Status PutCellValueInTensor(Tensor& tensor, size_t index, DataType cell_type,
} break;
case DT_BOOL: {
auto tensor_data = tensor.tensor<bool, 1>();
auto maybe_parsed_data = BytesToBool(cell.value());
auto maybe_parsed_data = BytesToBool(cell);
if (!maybe_parsed_data.ok()) {
return maybe_parsed_data.status();
}
tensor_data(index) = maybe_parsed_data.ValueOrDie();
} break;
case DT_INT32: {
auto tensor_data = tensor.tensor<int32_t, 1>();
auto maybe_parsed_data = BytesToInt32(cell.value());
auto maybe_parsed_data = BytesToInt32(cell);
if (!maybe_parsed_data.ok()) {
return maybe_parsed_data.status();
}
tensor_data(index) = maybe_parsed_data.ValueOrDie();
} break;
case DT_INT64: {
auto tensor_data = tensor.tensor<int64_t, 1>();
auto maybe_parsed_data = BytesToInt64(cell.value());
auto maybe_parsed_data = BytesToInt64(cell);
if (!maybe_parsed_data.ok()) {
return maybe_parsed_data.status();
}
tensor_data(index) = maybe_parsed_data.ValueOrDie();
} break;
case DT_FLOAT: {
auto tensor_data = tensor.tensor<float, 1>();
auto maybe_parsed_data = BytesToFloat(cell.value());
auto maybe_parsed_data = BytesToFloat(cell);
if (!maybe_parsed_data.ok()) {
return maybe_parsed_data.status();
}
tensor_data(index) = maybe_parsed_data.ValueOrDie();
} break;
case DT_DOUBLE: {
auto tensor_data = tensor.tensor<double, 1>();
auto maybe_parsed_data = BytesToDouble(cell.value());
auto maybe_parsed_data = BytesToDouble(cell);
if (!maybe_parsed_data.ok()) {
return maybe_parsed_data.status();
}
Expand Down
17 changes: 13 additions & 4 deletions tensorflow_io/core/kernels/bigtable/serialization.h
Expand Up @@ -18,15 +18,24 @@ limitations under the License.

#include "google/cloud/bigtable/table.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/platform/statusor.h"

namespace tensorflow {
namespace io {

// Bigtable only stores values as byte buffers - except for int64 the server
// Bigtable only stores byte buffers as values - except for int64 the server
// side does not have any notion of types. Tensorflow, needs to store shorter
// integers, floats, doubles, so we needed to decide on how. We chose to follow
// what HBase does, since there is a path for migrating from HBase to Bigtable.
// XDR seems to match what HBase does.
// integers, floats, doubles, so we needed to decide on how. We chose to
// follow what HBase does, since there is a path for migrating from HBase to
// Bigtable. HBase stores integers as big-endian and floats as IEEE754
// (also big-endian). Given that integer endianness does not always match
// float endianness, and the fact that there are architectures where it is
// neither little nor big (BE-32), implementing this properly is non-trivial.
// Ideally, we would use a library to do that. XDR matches what HBase does,
// but it is not easily available on Windows, so we decided to go with a
// hybrid approach. On Windows we assume that integer endianness matches float
// endianness and implement the deserialization ourselves and everywhere else
// we use XDR. For that reason we provide two implementations
Status PutCellValueInTensor(Tensor& tensor, size_t index, DataType cell_type,
google::cloud::bigtable::Cell const& cell);

Expand Down

0 comments on commit fb09814

Please sign in to comment.