Skip to content

Commit

Permalink
internal/impl: enable fully lazy extensions (over Size and Marshal)
Browse files Browse the repository at this point in the history
Extensions will be kept in wire format over proto.Size and proto.Marshal.

This change is a significant performance optimization for jobs that read and
write Protobuf messages of the same type, but do not need to process extensions.

This change is based on work by Patrik Nyblom.

Note that the proto.Size semantics for lazy messages might be surprising;
see https://protobuf.dev/reference/go/size/ for details.

We have been running this change for about two weeks in Google,
all known breakages have already been addressed with CL 579995.

related to golang/protobuf#1609

Change-Id: I16be78d15304d775bb30e76356a1a61d61300b43
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/580015
Reviewed-by: Lasse Folger <lassefolger@google.com>
Auto-Submit: Michael Stapelberg <stapelberg@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
stapelberg authored and gopherbot committed May 15, 2024
1 parent 15d7b13 commit 0e93293
Show file tree
Hide file tree
Showing 11 changed files with 2,026 additions and 1 deletion.
2 changes: 1 addition & 1 deletion internal/impl/checkinit.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (mi *MessageInfo) isInitExtensions(ext *map[int32]ExtensionField) error {
}
for _, x := range *ext {
ei := getExtensionFieldInfo(x.Type())
if ei.funcs.isInit == nil {
if ei.funcs.isInit == nil || x.isUnexpandedLazy() {
continue
}
v := x.Value()
Expand Down
22 changes: 22 additions & 0 deletions internal/impl/codec_extension.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,28 @@ func (f *ExtensionField) canLazy(xt protoreflect.ExtensionType) bool {
return false
}

// isUnexpandedLazy returns true if the ExensionField is lazy and not
// yet expanded, which means it's present and already checked for
// initialized required fields.
func (f *ExtensionField) isUnexpandedLazy() bool {
return f.lazy != nil && atomic.LoadUint32(&f.lazy.atomicOnce) == 0
}

// lazyBuffer retrieves the buffer for a lazy extension if it's not yet expanded.
//
// The returned buffer has to be kept over whatever operation we're planning,
// as re-retrieving it will fail after the message is lazily decoded.
func (f *ExtensionField) lazyBuffer() []byte {
// This function might be in the critical path, so check the atomic without
// taking a look first, then only take the lock if needed.
if !f.isUnexpandedLazy() {
return nil
}
f.lazy.mu.Lock()
defer f.lazy.mu.Unlock()
return f.lazy.b
}

func (f *ExtensionField) lazyInit() {
f.lazy.mu.Lock()
defer f.lazy.mu.Unlock()
Expand Down
22 changes: 22 additions & 0 deletions internal/impl/codec_messageset.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ func sizeMessageSet(mi *MessageInfo, p pointer, opts marshalOptions) (size int)
}
num, _ := protowire.DecodeTag(xi.wiretag)
size += messageset.SizeField(num)
if fullyLazyExtensions(opts) {
// Don't expand the extension, instead use the buffer to calculate size
if lb := x.lazyBuffer(); lb != nil {
// We got hold of the buffer, so it's still lazy.
// Don't count the tag size in the extension buffer, it's already added.
size += protowire.SizeTag(messageset.FieldMessage) + len(lb) - xi.tagsize
continue
}
}
size += xi.funcs.size(x.Value(), protowire.SizeTag(messageset.FieldMessage), opts)
}

Expand Down Expand Up @@ -85,6 +94,19 @@ func marshalMessageSetField(mi *MessageInfo, b []byte, x ExtensionField, opts ma
xi := getExtensionFieldInfo(x.Type())
num, _ := protowire.DecodeTag(xi.wiretag)
b = messageset.AppendFieldStart(b, num)

if fullyLazyExtensions(opts) {
// Don't expand the extension if it's still in wire format, instead use the buffer content.
if lb := x.lazyBuffer(); lb != nil {
// The tag inside the lazy buffer is a different tag (the extension
// number), but what we need here is the tag for FieldMessage:
b = protowire.AppendVarint(b, protowire.EncodeTag(messageset.FieldMessage, protowire.BytesType))
b = append(b, lb[xi.tagsize:]...)
b = messageset.AppendFieldEnd(b)
return b, nil
}
}

b, err := xi.funcs.marshal(b, x.Value(), protowire.EncodeTag(messageset.FieldMessage, protowire.BytesType), opts)
if err != nil {
return b, err
Expand Down
30 changes: 30 additions & 0 deletions internal/impl/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ func (mi *MessageInfo) marshalAppendPointer(b []byte, p pointer, opts marshalOpt
return b, nil
}

// fullyLazyExtensions returns true if we should attempt to keep extensions lazy over size and marshal.
func fullyLazyExtensions(opts marshalOptions) bool {
// When deterministic marshaling is requested, force an unmarshal for lazy
// extensions to produce a deterministic result, instead of passing through
// bytes lazily that may or may not match what Go Protobuf would produce.
return opts.flags&piface.MarshalDeterministic == 0
}

func (mi *MessageInfo) sizeExtensions(ext *map[int32]ExtensionField, opts marshalOptions) (n int) {
if ext == nil {
return 0
Expand All @@ -158,6 +166,14 @@ func (mi *MessageInfo) sizeExtensions(ext *map[int32]ExtensionField, opts marsha
if xi.funcs.size == nil {
continue
}
if fullyLazyExtensions(opts) {
// Don't expand the extension, instead use the buffer to calculate size
if lb := x.lazyBuffer(); lb != nil {
// We got hold of the buffer, so it's still lazy.
n += len(lb)
continue
}
}
n += xi.funcs.size(x.Value(), xi.tagsize, opts)
}
return n
Expand All @@ -176,6 +192,13 @@ func (mi *MessageInfo) appendExtensions(b []byte, ext *map[int32]ExtensionField,
var err error
for _, x := range *ext {
xi := getExtensionFieldInfo(x.Type())
if fullyLazyExtensions(opts) {
// Don't expand the extension if it's still in wire format, instead use the buffer content.
if lb := x.lazyBuffer(); lb != nil {
b = append(b, lb...)
continue
}
}
b, err = xi.funcs.marshal(b, x.Value(), xi.wiretag, opts)
}
return b, err
Expand All @@ -191,6 +214,13 @@ func (mi *MessageInfo) appendExtensions(b []byte, ext *map[int32]ExtensionField,
for _, k := range keys {
x := (*ext)[int32(k)]
xi := getExtensionFieldInfo(x.Type())
if fullyLazyExtensions(opts) {
// Don't expand the extension if it's still in wire format, instead use the buffer content.
if lb := x.lazyBuffer(); lb != nil {
b = append(b, lb...)
continue
}
}
b, err = xi.funcs.marshal(b, x.Value(), xi.wiretag, opts)
if err != nil {
return b, err
Expand Down
88 changes: 88 additions & 0 deletions internal/impl/lazy_normalized_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package impl_test

import (
"testing"

"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/testing/protopack"

lazytestpb "google.golang.org/protobuf/internal/testprotos/lazy"
)

// Constructs a message encoded in denormalized (non-minimal) wire format, but
// using two levels of nesting: A top-level message with a child message which
// in turn has a grandchild message.
func denormalizedTwoLevel(t *testing.T) ([]byte, *lazytestpb.Top, error) {
// Construct a message with denormalized (non-minimal) wire format:
// 1. Encode a top-level message with submessage B (ext) + C (field)
// 2. Replace the encoding of submessage C (field) with
// another instance of submessage B (ext)
//
// This modification of the wire format is spec'd in Protobuf:
// https://github.com/protocolbuffers/protobuf/issues/9257
grandchild := &lazytestpb.Sub{}
proto.SetExtension(grandchild, lazytestpb.E_Ext_B, &lazytestpb.Ext{
SomeFlag: proto.Bool(true),
})
expectedMessage := &lazytestpb.Top{
Child: &lazytestpb.Sub{
Grandchild: grandchild,
},
A: proto.Uint32(2342),
}

fullMessage := protopack.Message{
protopack.Tag{1, protopack.VarintType}, protopack.Varint(2342),
// Child
protopack.Tag{2, protopack.BytesType}, protopack.LengthPrefix(protopack.Message{
// Grandchild
protopack.Tag{4, protopack.BytesType}, protopack.LengthPrefix(protopack.Message{
// The first occurrence of B matches expectedMessage:
protopack.Tag{2, protopack.BytesType}, protopack.LengthPrefix(protopack.Message{
protopack.Tag{1, protopack.VarintType}, protopack.Varint(1),
}),
// This second duplicative occurrence of B is spec'd in Protobuf:
// https://github.com/protocolbuffers/protobuf/issues/9257
protopack.Tag{2, protopack.BytesType}, protopack.LengthPrefix(protopack.Message{
protopack.Tag{1, protopack.VarintType}, protopack.Varint(1),
}),
}),
}),
}.Marshal()

return fullMessage, expectedMessage, nil
}

func TestNoInvalidWireFormatWithDeterministicLazy(t *testing.T) {
fullMessage, _, err := denormalizedTwoLevel(t)
if err != nil {
t.Fatal(err)
}

top := &lazytestpb.Top{}
if err := proto.Unmarshal(fullMessage, top); err != nil {
t.Fatal(err)
}

// Requesting deterministic marshaling should result in unmarshaling (and
// thereby normalizing the non-minimal encoding) when sizing.
//
// If the deterministic flag is dropped (like before cl/624951104), the size
// cache is populated with the non-minimal size. The Marshal call below
// lazily unmarshals (due to the Deterministic flag), which includes
// normalization, and will then report a size mismatch error (instead of
// producing invalid wire format).
proto.MarshalOptions{Deterministic: true}.Size(top)

_, err = proto.MarshalOptions{
Deterministic: true,
UseCachedSize: true,
}.Marshal(top)
if err != nil {
t.Fatal(err)
}
}

0 comments on commit 0e93293

Please sign in to comment.