Skip to content

Commit

Permalink
Fix CSV render error (#17406) (#17431)
Browse files Browse the repository at this point in the history
Backport #17406.

Closes #17378 

Both errors from #17378 were caused by  #15175.

Problem 1 (error with added file):
`ToUTF8WithFallbackReader` creates a `MultiReader` from a `byte[2048]` and the remaining reader. `CreateReaderAndGuessDelimiter` tries to read 10000 bytes from this reader but only gets 2048 because that's the first reader in the `MultiReader`. Then the `if size < 1e4` thinks the input is at EOF and just returns that.

Problem 2 (error with changed file):
The blob reader gets defer closed. That was fine because the old version reads the whole file into memory. Now with the streaming version the close needs to defer after the method.

Co-authored-by: zeripath <art27@cantab.net>
  • Loading branch information
KN4CK3R and zeripath committed Oct 25, 2021
1 parent 5159055 commit 1fbdf96
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 22 deletions.
20 changes: 6 additions & 14 deletions modules/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,24 @@ func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader {
}

// CreateReaderAndGuessDelimiter tries to guess the field delimiter from the content and creates a csv.Reader.
// Reads at most 10k bytes.
func CreateReaderAndGuessDelimiter(rd io.Reader) (*stdcsv.Reader, error) {
var data = make([]byte, 1e4)
size, err := util.ReadAtMost(rd, data)
if err != nil {
return nil, err
}

delimiter := guessDelimiter(data[:size])

var newInput io.Reader
if size < 1e4 {
newInput = bytes.NewReader(data[:size])
} else {
newInput = io.MultiReader(bytes.NewReader(data), rd)
}

return CreateReader(newInput, delimiter), nil
return CreateReader(
io.MultiReader(bytes.NewReader(data[:size]), rd),
guessDelimiter(data[:size]),
), nil
}

// guessDelimiter scores the input CSV data against delimiters, and returns the best match.
// Reads at most 10k bytes & 10 lines.
func guessDelimiter(data []byte) rune {
maxLines := 10
maxBytes := util.Min(len(data), 1e4)
text := string(data[:maxBytes])
text = quoteRegexp.ReplaceAllLiteralString(text, "")
text := quoteRegexp.ReplaceAllLiteralString(string(data), "")
lines := strings.SplitN(text, "\n", maxLines+1)
lines = lines[:util.Min(maxLines, len(lines))]

Expand Down
23 changes: 15 additions & 8 deletions routers/web/repo/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"errors"
"fmt"
"html"
"io"
"net/http"
"path"
"path/filepath"
Expand Down Expand Up @@ -104,30 +105,36 @@ func setCsvCompareContext(ctx *context.Context) {

errTooLarge := errors.New(ctx.Locale.Tr("repo.error.csv.too_large"))

csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, error) {
csvReaderFromCommit := func(c *git.Commit) (*csv.Reader, io.Closer, error) {
blob, err := c.GetBlobByPath(diffFile.Name)
if err != nil {
return nil, err
return nil, nil, err
}

if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < blob.Size() {
return nil, errTooLarge
return nil, nil, errTooLarge
}

reader, err := blob.DataAsync()
if err != nil {
return nil, err
return nil, nil, err
}
defer reader.Close()

return csv_module.CreateReaderAndGuessDelimiter(charset.ToUTF8WithFallbackReader(reader))
csvReader, err := csv_module.CreateReaderAndGuessDelimiter(charset.ToUTF8WithFallbackReader(reader))
return csvReader, reader, err
}

baseReader, err := csvReaderFromCommit(baseCommit)
baseReader, baseBlobCloser, err := csvReaderFromCommit(baseCommit)
if baseBlobCloser != nil {
defer baseBlobCloser.Close()
}
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
headReader, err := csvReaderFromCommit(headCommit)
headReader, headBlobCloser, err := csvReaderFromCommit(headCommit)
if headBlobCloser != nil {
defer headBlobCloser.Close()
}
if err == errTooLarge {
return CsvDiffResult{nil, err.Error()}
}
Expand Down

0 comments on commit 1fbdf96

Please sign in to comment.