From 7bd40f548987b3f0ce544902b9c4fbdf8d310882 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Tue, 2 Apr 2024 11:31:17 +0200 Subject: [PATCH 1/5] plumbing: Properly detect EOF when reading index extensions Before this, go-git was relying on the peeked header to not include a valid 4 char string header. While doing this, it did not differentiate between the errournously read final hash and an unknown extension. This made it impossible to properly skip unknown optional extensions while detecting EOF early enough. --- plumbing/format/index/decoder.go | 70 ++++++++++++++++---------------- storage/filesystem/index.go | 2 +- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/plumbing/format/index/decoder.go b/plumbing/format/index/decoder.go index 6778cf74e..6f3241844 100644 --- a/plumbing/format/index/decoder.go +++ b/plumbing/format/index/decoder.go @@ -39,6 +39,7 @@ const ( // A Decoder reads and decodes index files from an input stream. type Decoder struct { + buf *bufio.Reader r io.Reader hash hash.Hash lastEntry *Entry @@ -49,8 +50,10 @@ type Decoder struct { // NewDecoder returns a new decoder that reads from r. func NewDecoder(r io.Reader) *Decoder { h := hash.New(hash.CryptoType) + buf := bufio.NewReader(r) return &Decoder{ - r: io.TeeReader(r, h), + buf: buf, + r: io.TeeReader(buf, h), hash: h, extReader: bufio.NewReader(nil), } @@ -210,64 +213,64 @@ func (d *Decoder) readExtensions(idx *Index) error { // count that they are not supported by jgit or libgit var expected []byte + var peeked []byte var err error - var header [4]byte + // we should always be able to peek for 4 bytes (header) + 4 bytes (extlen) + final hash + // if this fails, we know that we're at the end of the index + peekLen := 4 + 4 + d.hash.Size() + for { expected = d.hash.Sum(nil) - - var n int - if n, err = io.ReadFull(d.r, header[:]); err != nil { - if n == 0 { - err = io.EOF - } - + peeked, err = d.buf.Peek(peekLen) + if len(peeked) < peekLen { + // there can't be an extension at this point, so let's bail out + err = nil break } + if err != nil { + return err + } - err = d.readExtension(idx, header[:]) + err = d.readExtension(idx) if err != nil { break } } - if err != errUnknownExtension { + if err != nil && err != errUnknownExtension { return err } - return d.readChecksum(expected, header) + return d.readChecksum(expected) } -func (d *Decoder) readExtension(idx *Index, header []byte) error { - switch { - case bytes.Equal(header, treeExtSignature): - r, err := d.getExtensionReader() - if err != nil { - return err - } +func (d *Decoder) readExtension(idx *Index) error { + var header [4]byte + if _, err := io.ReadFull(d.r, header[:]); err != nil { + return err + } + + r, err := d.getExtensionReader() + if err != nil { + return err + } + + switch { + case bytes.Equal(header[:], treeExtSignature): idx.Cache = &Tree{} d := &treeExtensionDecoder{r} if err := d.Decode(idx.Cache); err != nil { return err } - case bytes.Equal(header, resolveUndoExtSignature): - r, err := d.getExtensionReader() - if err != nil { - return err - } - + case bytes.Equal(header[:], resolveUndoExtSignature): idx.ResolveUndo = &ResolveUndo{} d := &resolveUndoDecoder{r} if err := d.Decode(idx.ResolveUndo); err != nil { return err } - case bytes.Equal(header, endOfIndexEntryExtSignature): - r, err := d.getExtensionReader() - if err != nil { - return err - } - + case bytes.Equal(header[:], endOfIndexEntryExtSignature): idx.EndOfIndexEntry = &EndOfIndexEntry{} d := &endOfIndexEntryDecoder{r} if err := d.Decode(idx.EndOfIndexEntry); err != nil { @@ -290,11 +293,10 @@ func (d *Decoder) getExtensionReader() (*bufio.Reader, error) { return d.extReader, nil } -func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error { +func (d *Decoder) readChecksum(expected []byte) error { var h plumbing.Hash - copy(h[:4], alreadyRead[:]) - if _, err := io.ReadFull(d.r, h[4:]); err != nil { + if _, err := io.ReadFull(d.r, h[:]); err != nil { return err } diff --git a/storage/filesystem/index.go b/storage/filesystem/index.go index a19176f83..a86ef3e2e 100644 --- a/storage/filesystem/index.go +++ b/storage/filesystem/index.go @@ -48,7 +48,7 @@ func (s *IndexStorage) Index() (i *index.Index, err error) { defer ioutil.CheckClose(f, &err) - d := index.NewDecoder(bufio.NewReader(f)) + d := index.NewDecoder(f) err = d.Decode(idx) return idx, err } From 8a3757ce0b63e842f1e466955f0a4cc2eed429eb Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Tue, 2 Apr 2024 11:42:32 +0200 Subject: [PATCH 2/5] plumbing: Properly support skipping of non-mandatory extensions Before this, go-git would prematurely bail out of extensions processing when an unknown extension was encountered. This had two issues: 1. It did not account for mandatory (lower case header) extensions 2. It did not properly update the calculated hash, leading to an "invalid checksum" error. --- plumbing/format/index/decoder.go | 38 +++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/plumbing/format/index/decoder.go b/plumbing/format/index/decoder.go index 6f3241844..6634688b7 100644 --- a/plumbing/format/index/decoder.go +++ b/plumbing/format/index/decoder.go @@ -24,8 +24,8 @@ var ( // ErrInvalidChecksum is returned by Decode if the SHA1 hash mismatch with // the read content ErrInvalidChecksum = errors.New("invalid checksum") - - errUnknownExtension = errors.New("unknown extension") + // ErrUnknownExtension is returned when an index extension is encountered that is considered mandatory + ErrUnknownExtension = errors.New("unknown extension") ) const ( @@ -234,14 +234,10 @@ func (d *Decoder) readExtensions(idx *Index) error { err = d.readExtension(idx) if err != nil { - break + return err } } - if err != nil && err != errUnknownExtension { - return err - } - return d.readChecksum(expected) } @@ -277,7 +273,14 @@ func (d *Decoder) readExtension(idx *Index) error { return err } default: - return errUnknownExtension + if header[0] < 'A' || header[0] > 'Z' { + return ErrUnknownExtension + } + + d := &unknownExtensionDecoder{r} + if err := d.Decode(); err != nil { + return err + } } return nil @@ -478,3 +481,22 @@ func (d *endOfIndexEntryDecoder) Decode(e *EndOfIndexEntry) error { _, err = io.ReadFull(d.r, e.Hash[:]) return err } + +type unknownExtensionDecoder struct { + r *bufio.Reader +} + +func (d *unknownExtensionDecoder) Decode() error { + var buf [1024]byte + + for { + _, err := d.r.Read(buf[:]) + if err == io.EOF { + break + } + if err != nil { + return err + } + } + return nil +} From 4cf29e8ac6ad5a7861a6f8eb70cff57fda6ebefe Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Sat, 6 Apr 2024 09:26:42 +0200 Subject: [PATCH 3/5] plumbing: Implement encodeRawExtension that can be used by tests --- plumbing/format/index/encoder.go | 34 +++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/plumbing/format/index/encoder.go b/plumbing/format/index/encoder.go index fa2d81445..c292c2cd6 100644 --- a/plumbing/format/index/encoder.go +++ b/plumbing/format/index/encoder.go @@ -3,6 +3,7 @@ package index import ( "bytes" "errors" + "fmt" "io" "sort" "time" @@ -35,6 +36,11 @@ func NewEncoder(w io.Writer) *Encoder { // Encode writes the Index to the stream of the encoder. func (e *Encoder) Encode(idx *Index) error { + return e.encode(idx, true) +} + +func (e *Encoder) encode(idx *Index, footer bool) error { + // TODO: support v4 // TODO: support extensions if idx.Version > EncodeVersionSupported { @@ -49,7 +55,10 @@ func (e *Encoder) Encode(idx *Index) error { return err } - return e.encodeFooter() + if footer { + return e.encodeFooter() + } + return nil } func (e *Encoder) encodeHeader(idx *Index) error { @@ -135,6 +144,29 @@ func (e *Encoder) encodeEntry(entry *Entry) error { return binary.Write(e.w, []byte(entry.Name)) } +func (e *Encoder) encodeRawExtension(signature string, data []byte) error { + if len(signature) != 4 { + return fmt.Errorf("invalid signature length") + } + + _, err := e.w.Write([]byte(signature)) + if err != nil { + return err + } + + err = binary.WriteUint32(e.w, uint32(len(data))) + if err != nil { + return err + } + + _, err = e.w.Write(data) + if err != nil { + return err + } + + return nil +} + func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) { if t.IsZero() { return 0, 0, nil From f763fd3baf2677698937e7e864489b1440136b84 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Sat, 6 Apr 2024 09:27:42 +0200 Subject: [PATCH 4/5] plumbing: Add tests for unknown extensions, truncated extensions and invalid hashes --- plumbing/format/index/decoder_test.go | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/plumbing/format/index/decoder_test.go b/plumbing/format/index/decoder_test.go index 39ab3361f..4adddda09 100644 --- a/plumbing/format/index/decoder_test.go +++ b/plumbing/format/index/decoder_test.go @@ -1,6 +1,11 @@ package index import ( + "bytes" + "crypto" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" + "io" "testing" "github.com/go-git/go-git/v5/plumbing" @@ -218,3 +223,100 @@ func (s *IndexSuite) TestDecodeEndOfIndexEntry(c *C) { c.Assert(idx.EndOfIndexEntry.Offset, Equals, uint32(716)) c.Assert(idx.EndOfIndexEntry.Hash.String(), Equals, "922e89d9ffd7cefce93a211615b2053c0f42bd78") } + +func (s *IndexSuite) readSimpleIndex(c *C) *Index { + f, err := fixtures.Basic().One().DotGit().Open("index") + c.Assert(err, IsNil) + defer func() { c.Assert(f.Close(), IsNil) }() + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + return idx +} + +func (s *IndexSuite) buildIndexWithExtension(c *C, signature string, data string) []byte { + idx := s.readSimpleIndex(c) + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + + err := e.encode(idx, false) + c.Assert(err, IsNil) + err = e.encodeRawExtension(signature, []byte(data)) + c.Assert(err, IsNil) + + err = e.encodeFooter() + c.Assert(err, IsNil) + + return buf.Bytes() +} + +func (s *IndexSuite) TestDecodeUnknownOptionalExt(c *C) { + f := bytes.NewReader(s.buildIndexWithExtension(c, "TEST", "testdata")) + + idx := &Index{} + d := NewDecoder(f) + err := d.Decode(idx) + c.Assert(err, IsNil) +} + +func (s *IndexSuite) TestDecodeUnknownMandatoryExt(c *C) { + f := bytes.NewReader(s.buildIndexWithExtension(c, "test", "testdata")) + + idx := &Index{} + d := NewDecoder(f) + err := d.Decode(idx) + c.Assert(err, ErrorMatches, ErrUnknownExtension.Error()) +} + +func (s *IndexSuite) TestDecodeTruncatedExt(c *C) { + idx := s.readSimpleIndex(c) + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + + err := e.encode(idx, false) + c.Assert(err, IsNil) + + _, err = e.w.Write([]byte("TEST")) + c.Assert(err, IsNil) + + err = binary.WriteUint32(e.w, uint32(100)) + c.Assert(err, IsNil) + + _, err = e.w.Write([]byte("truncated")) + c.Assert(err, IsNil) + + err = e.encodeFooter() + c.Assert(err, IsNil) + + idx = &Index{} + d := NewDecoder(buf) + err = d.Decode(idx) + c.Assert(err, ErrorMatches, io.EOF.Error()) +} + +func (s *IndexSuite) TestDecodeInvalidHash(c *C) { + idx := s.readSimpleIndex(c) + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + + err := e.encode(idx, false) + c.Assert(err, IsNil) + + err = e.encodeRawExtension("TEST", []byte("testdata")) + c.Assert(err, IsNil) + + h := hash.New(crypto.SHA1) + err = binary.Write(e.w, h.Sum(nil)) + c.Assert(err, IsNil) + + idx = &Index{} + d := NewDecoder(buf) + err = d.Decode(idx) + c.Assert(err, ErrorMatches, ErrInvalidChecksum.Error()) +} From 23fa5899766dd023a3b6d341796b166efac56441 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Tue, 9 Apr 2024 00:23:26 +0200 Subject: [PATCH 5/5] plumbing: Add link to index-format docs --- plumbing/format/index/decoder.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plumbing/format/index/decoder.go b/plumbing/format/index/decoder.go index 6634688b7..f43b1c5d3 100644 --- a/plumbing/format/index/decoder.go +++ b/plumbing/format/index/decoder.go @@ -273,6 +273,8 @@ func (d *Decoder) readExtension(idx *Index) error { return err } default: + // See https://git-scm.com/docs/index-format, which says: + // If the first byte is 'A'..'Z' the extension is optional and can be ignored. if header[0] < 'A' || header[0] > 'Z' { return ErrUnknownExtension }