From 4a0ff347d688bdd9eebb2afbc274f0d62224b9fb Mon Sep 17 00:00:00 2001 From: noahdietz Date: Wed, 5 Oct 2022 16:04:31 -0700 Subject: [PATCH 1/6] feat(v2): copy DetermineContentType functionality --- v2/media.go | 112 +++++++++++++++++++++++++++++ v2/media_test.go | 178 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 290 insertions(+) create mode 100644 v2/media.go create mode 100644 v2/media_test.go diff --git a/v2/media.go b/v2/media.go new file mode 100644 index 0000000..4ea8578 --- /dev/null +++ b/v2/media.go @@ -0,0 +1,112 @@ +// Copyright 2022, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package gax + +import ( + "io" + "net/http" + + "google.golang.org/api/googleapi" +) + +const sniffBuffSize = 512 + +func newContentSniffer(r io.Reader) *contentSniffer { + return &contentSniffer{r: r} +} + +// contentSniffer wraps a Reader, and reports the content type determined by sniffing up to 512 bytes from the Reader. +type contentSniffer struct { + r io.Reader + start []byte // buffer for the sniffed bytes. + err error // set to any error encountered while reading bytes to be sniffed. + + ctype string // set on first sniff. + sniffed bool // set to true on first sniff. +} + +func (cs *contentSniffer) Read(p []byte) (n int, err error) { + // Ensure that the content type is sniffed before any data is consumed from Reader. + _, _ = cs.ContentType() + + if len(cs.start) > 0 { + n := copy(p, cs.start) + cs.start = cs.start[n:] + return n, nil + } + + // We may have read some bytes into start while sniffing, even if the read ended in an error. + // We should first return those bytes, then the error. + if cs.err != nil { + return 0, cs.err + } + + // Now we have handled all bytes that were buffered while sniffing. Now just delegate to the underlying reader. + return cs.r.Read(p) +} + +// ContentType returns the sniffed content type, and whether the content type was successfully sniffed. +func (cs *contentSniffer) ContentType() (string, bool) { + if cs.sniffed { + return cs.ctype, cs.ctype != "" + } + cs.sniffed = true + // If ReadAll hits EOF, it returns err==nil. + cs.start, cs.err = io.ReadAll(io.LimitReader(cs.r, sniffBuffSize)) + + // Don't try to detect the content type based on possibly incomplete data. + if cs.err != nil { + return "", false + } + + cs.ctype = http.DetectContentType(cs.start) + return cs.ctype, true +} + +// DetermineContentType determines the content type of the supplied reader. +// The content of media will be sniffed to determine the content type. +// If media implements googleapi.ContentTyper (deprecated), this will be used +// instead of sniffing the content. +// After calling DetectContentType the caller must not perform further reads on +// media, but rather read from the Reader that is returned. +func DetermineContentType(media io.Reader) (io.Reader, string) { + // For backwards compatibility, allow clients to set content + // type by providing a ContentTyper for media. + if typer, ok := media.(googleapi.ContentTyper); ok { + return media, typer.ContentType() + } + + sniffer := newContentSniffer(media) + if ctype, ok := sniffer.ContentType(); ok { + return sniffer, ctype + } + // If content type could not be sniffed, reads from sniffer will eventually fail with an error. + return sniffer, "" +} diff --git a/v2/media_test.go b/v2/media_test.go new file mode 100644 index 0000000..8b41281 --- /dev/null +++ b/v2/media_test.go @@ -0,0 +1,178 @@ +// Copyright 2022, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package gax + +import ( + "bytes" + "io" + "reflect" + "testing" +) + +// errReader reads out of a buffer until it is empty, then returns the specified error. +type errReader struct { + buf []byte + err error +} + +func (er *errReader) Read(p []byte) (int, error) { + if len(er.buf) == 0 { + if er.err == nil { + return 0, io.EOF + } + return 0, er.err + } + n := copy(p, er.buf) + er.buf = er.buf[n:] + return n, nil +} + +func TestContentSniffing(t *testing.T) { + type testCase struct { + data []byte // the data to read from the Reader + finalErr error // error to return after data has been read + + wantContentType string + wantContentTypeResult bool + } + + for _, tc := range []testCase{ + { + data: []byte{0, 0, 0, 0}, + finalErr: nil, + wantContentType: "application/octet-stream", + wantContentTypeResult: true, + }, + { + data: []byte(""), + finalErr: nil, + wantContentType: "text/plain; charset=utf-8", + wantContentTypeResult: true, + }, + { + data: []byte(""), + finalErr: io.ErrUnexpectedEOF, + wantContentType: "text/plain; charset=utf-8", + wantContentTypeResult: false, + }, + { + data: []byte("abc"), + finalErr: nil, + wantContentType: "text/plain; charset=utf-8", + wantContentTypeResult: true, + }, + { + data: []byte("abc"), + finalErr: io.ErrUnexpectedEOF, + wantContentType: "text/plain; charset=utf-8", + wantContentTypeResult: false, + }, + // The following examples contain more bytes than are buffered for sniffing. + { + data: bytes.Repeat([]byte("a"), 513), + finalErr: nil, + wantContentType: "text/plain; charset=utf-8", + wantContentTypeResult: true, + }, + { + data: bytes.Repeat([]byte("a"), 513), + finalErr: io.ErrUnexpectedEOF, + wantContentType: "text/plain; charset=utf-8", + wantContentTypeResult: true, // true because error is after first 512 bytes. + }, + } { + er := &errReader{buf: tc.data, err: tc.finalErr} + + sct := newContentSniffer(er) + + // Even if was an error during the first 512 bytes, we should still be able to read those bytes. + buf, err := io.ReadAll(sct) + + if !reflect.DeepEqual(buf, tc.data) { + t.Fatalf("Failed reading buffer: got: %q; want:%q", buf, tc.data) + } + + if err != tc.finalErr { + t.Fatalf("Reading buffer error: got: %v; want: %v", err, tc.finalErr) + } + + ct, ok := sct.ContentType() + if ok != tc.wantContentTypeResult { + t.Fatalf("Content type result got: %v; want: %v", ok, tc.wantContentTypeResult) + } + if ok && ct != tc.wantContentType { + t.Fatalf("Content type got: %q; want: %q", ct, tc.wantContentType) + } + } +} + +type staticContentTyper struct { + io.Reader +} + +func (sct staticContentTyper) ContentType() string { + return "static content type" +} + +func TestDetermineContentType(t *testing.T) { + data := []byte("abc") + rdr := func() io.Reader { + return bytes.NewBuffer(data) + } + + type testCase struct { + r io.Reader + wantContentType string + } + + for _, tc := range []testCase{ + { + r: rdr(), + wantContentType: "text/plain; charset=utf-8", + }, + { + r: staticContentTyper{rdr()}, + wantContentType: "static content type", + }, + } { + r, ctype := DetermineContentType(tc.r) + got, err := io.ReadAll(r) + if err != nil { + t.Fatalf("Failed reading buffer: %v", err) + } + if !reflect.DeepEqual(got, data) { + t.Fatalf("Failed reading buffer: got: %q; want:%q", got, data) + } + + if ctype != tc.wantContentType { + t.Fatalf("Content type got: %q; want: %q", ctype, tc.wantContentType) + } + } +} From f1646c180a26d3f54afa708b44762ec3d73932fe Mon Sep 17 00:00:00 2001 From: noahdietz Date: Wed, 5 Oct 2022 16:07:06 -0700 Subject: [PATCH 2/6] retain ctype --- v2/media.go | 12 ++++++++++-- v2/media_test.go | 12 +++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/v2/media.go b/v2/media.go index 4ea8578..6af70a1 100644 --- a/v2/media.go +++ b/v2/media.go @@ -91,12 +91,20 @@ func (cs *contentSniffer) ContentType() (string, bool) { } // DetermineContentType determines the content type of the supplied reader. -// The content of media will be sniffed to determine the content type. +// If the content type is already known, it can be specified via ctype. +// Otherwise, the content of media will be sniffed to determine the content type. // If media implements googleapi.ContentTyper (deprecated), this will be used // instead of sniffing the content. // After calling DetectContentType the caller must not perform further reads on // media, but rather read from the Reader that is returned. -func DetermineContentType(media io.Reader) (io.Reader, string) { +func DetermineContentType(media io.Reader, ctype string) (io.Reader, string) { + // Note: callers could avoid calling DetectContentType if ctype != "", + // but doing the check inside this function reduces the amount of + // generated code. + if ctype != "" { + return media, ctype + } + // For backwards compatibility, allow clients to set content // type by providing a ContentTyper for media. if typer, ok := media.(googleapi.ContentTyper); ok { diff --git a/v2/media_test.go b/v2/media_test.go index 8b41281..b5bd498 100644 --- a/v2/media_test.go +++ b/v2/media_test.go @@ -148,8 +148,9 @@ func TestDetermineContentType(t *testing.T) { } type testCase struct { - r io.Reader - wantContentType string + r io.Reader + explicitConentType string + wantContentType string } for _, tc := range []testCase{ @@ -161,8 +162,13 @@ func TestDetermineContentType(t *testing.T) { r: staticContentTyper{rdr()}, wantContentType: "static content type", }, + { + r: staticContentTyper{rdr()}, + explicitConentType: "explicit", + wantContentType: "explicit", + }, } { - r, ctype := DetermineContentType(tc.r) + r, ctype := DetermineContentType(tc.r, tc.explicitConentType) got, err := io.ReadAll(r) if err != nil { t.Fatalf("Failed reading buffer: %v", err) From 15a97638f9bf4ed457597b5bb89244e0d938171f Mon Sep 17 00:00:00 2001 From: noahdietz Date: Wed, 5 Oct 2022 16:32:06 -0700 Subject: [PATCH 3/6] address feedback --- v2/{media.go => content_type.go} | 7 ++++--- v2/{media_test.go => content_type_test.go} | 0 2 files changed, 4 insertions(+), 3 deletions(-) rename v2/{media.go => content_type.go} (96%) rename v2/{media_test.go => content_type_test.go} (100%) diff --git a/v2/media.go b/v2/content_type.go similarity index 96% rename from v2/media.go rename to v2/content_type.go index 6af70a1..08e97ce 100644 --- a/v2/media.go +++ b/v2/content_type.go @@ -32,8 +32,6 @@ package gax import ( "io" "net/http" - - "google.golang.org/api/googleapi" ) const sniffBuffSize = 512 @@ -107,7 +105,10 @@ func DetermineContentType(media io.Reader, ctype string) (io.Reader, string) { // For backwards compatibility, allow clients to set content // type by providing a ContentTyper for media. - if typer, ok := media.(googleapi.ContentTyper); ok { + // Note: This is an anonymous interface definition copied from googleapi.ContentTyper. + if typer, ok := media.(interface { + ContentType() string + }); ok { return media, typer.ContentType() } diff --git a/v2/media_test.go b/v2/content_type_test.go similarity index 100% rename from v2/media_test.go rename to v2/content_type_test.go From b5cc9a7e1a20b3671a4628ad134abc16d63b6dc3 Mon Sep 17 00:00:00 2001 From: noahdietz Date: Wed, 5 Oct 2022 16:34:04 -0700 Subject: [PATCH 4/6] use ioutil for go1.15 compat --- v2/content_type.go | 3 ++- v2/content_type_test.go | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/v2/content_type.go b/v2/content_type.go index 08e97ce..ec42a7b 100644 --- a/v2/content_type.go +++ b/v2/content_type.go @@ -31,6 +31,7 @@ package gax import ( "io" + "io/ioutil" "net/http" ) @@ -77,7 +78,7 @@ func (cs *contentSniffer) ContentType() (string, bool) { } cs.sniffed = true // If ReadAll hits EOF, it returns err==nil. - cs.start, cs.err = io.ReadAll(io.LimitReader(cs.r, sniffBuffSize)) + cs.start, cs.err = ioutil.ReadAll(io.LimitReader(cs.r, sniffBuffSize)) // Don't try to detect the content type based on possibly incomplete data. if cs.err != nil { diff --git a/v2/content_type_test.go b/v2/content_type_test.go index b5bd498..ae58d2f 100644 --- a/v2/content_type_test.go +++ b/v2/content_type_test.go @@ -32,6 +32,7 @@ package gax import ( "bytes" "io" + "io/ioutil" "reflect" "testing" ) @@ -113,7 +114,7 @@ func TestContentSniffing(t *testing.T) { sct := newContentSniffer(er) // Even if was an error during the first 512 bytes, we should still be able to read those bytes. - buf, err := io.ReadAll(sct) + buf, err := ioutil.ReadAll(sct) if !reflect.DeepEqual(buf, tc.data) { t.Fatalf("Failed reading buffer: got: %q; want:%q", buf, tc.data) @@ -169,7 +170,7 @@ func TestDetermineContentType(t *testing.T) { }, } { r, ctype := DetermineContentType(tc.r, tc.explicitConentType) - got, err := io.ReadAll(r) + got, err := ioutil.ReadAll(r) if err != nil { t.Fatalf("Failed reading buffer: %v", err) } From 70df01e2d026a8cf60ad5eae0b951004313d1d9d Mon Sep 17 00:00:00 2001 From: noahdietz Date: Wed, 5 Oct 2022 16:38:09 -0700 Subject: [PATCH 5/6] address feedback --- v2/content_type.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/v2/content_type.go b/v2/content_type.go index ec42a7b..a3c470d 100644 --- a/v2/content_type.go +++ b/v2/content_type.go @@ -90,16 +90,14 @@ func (cs *contentSniffer) ContentType() (string, bool) { } // DetermineContentType determines the content type of the supplied reader. -// If the content type is already known, it can be specified via ctype. +// If ctype is not empty, because the content type is already available, media and ctype +// will be returned without modification. This is useful to avoid an external check of ctype. // Otherwise, the content of media will be sniffed to determine the content type. // If media implements googleapi.ContentTyper (deprecated), this will be used // instead of sniffing the content. // After calling DetectContentType the caller must not perform further reads on // media, but rather read from the Reader that is returned. func DetermineContentType(media io.Reader, ctype string) (io.Reader, string) { - // Note: callers could avoid calling DetectContentType if ctype != "", - // but doing the check inside this function reduces the amount of - // generated code. if ctype != "" { return media, ctype } From 4404067beeb5a99db095d12e422123c1d98b00b7 Mon Sep 17 00:00:00 2001 From: noahdietz Date: Mon, 10 Oct 2022 15:02:01 -0700 Subject: [PATCH 6/6] address feedback --- v2/content_type.go | 12 ++---------- v2/content_type_test.go | 12 +++--------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/v2/content_type.go b/v2/content_type.go index a3c470d..1b53d0a 100644 --- a/v2/content_type.go +++ b/v2/content_type.go @@ -90,18 +90,10 @@ func (cs *contentSniffer) ContentType() (string, bool) { } // DetermineContentType determines the content type of the supplied reader. -// If ctype is not empty, because the content type is already available, media and ctype -// will be returned without modification. This is useful to avoid an external check of ctype. -// Otherwise, the content of media will be sniffed to determine the content type. -// If media implements googleapi.ContentTyper (deprecated), this will be used -// instead of sniffing the content. +// The content of media will be sniffed to determine the content type. // After calling DetectContentType the caller must not perform further reads on // media, but rather read from the Reader that is returned. -func DetermineContentType(media io.Reader, ctype string) (io.Reader, string) { - if ctype != "" { - return media, ctype - } - +func DetermineContentType(media io.Reader) (io.Reader, string) { // For backwards compatibility, allow clients to set content // type by providing a ContentTyper for media. // Note: This is an anonymous interface definition copied from googleapi.ContentTyper. diff --git a/v2/content_type_test.go b/v2/content_type_test.go index ae58d2f..92b53f3 100644 --- a/v2/content_type_test.go +++ b/v2/content_type_test.go @@ -149,9 +149,8 @@ func TestDetermineContentType(t *testing.T) { } type testCase struct { - r io.Reader - explicitConentType string - wantContentType string + r io.Reader + wantContentType string } for _, tc := range []testCase{ @@ -163,13 +162,8 @@ func TestDetermineContentType(t *testing.T) { r: staticContentTyper{rdr()}, wantContentType: "static content type", }, - { - r: staticContentTyper{rdr()}, - explicitConentType: "explicit", - wantContentType: "explicit", - }, } { - r, ctype := DetermineContentType(tc.r, tc.explicitConentType) + r, ctype := DetermineContentType(tc.r) got, err := ioutil.ReadAll(r) if err != nil { t.Fatalf("Failed reading buffer: %v", err)