From cab94f36070ae2367293d3943eb277d16aa35957 Mon Sep 17 00:00:00 2001 From: krassowski Date: Tue, 5 Jan 2021 19:09:16 +0000 Subject: [PATCH 1/4] Reproduce failure of DSVModel to parse empty file --- packages/csvviewer/test/model.spec.ts | 4 +++- packages/csvviewer/test/parse.spec.ts | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/packages/csvviewer/test/model.spec.ts b/packages/csvviewer/test/model.spec.ts index 85187cdc232f..743b2615a397 100644 --- a/packages/csvviewer/test/model.spec.ts +++ b/packages/csvviewer/test/model.spec.ts @@ -19,7 +19,7 @@ const CSV_TEST_FILES = [ ], [ - 'empty', + 'empty_values', readCSV('csv-spectrum/csvs/empty.csv'), require('csv-spectrum/json/empty.json') ], @@ -30,6 +30,8 @@ const CSV_TEST_FILES = [ require('csv-spectrum/json/empty_crlf.json') ], + ['empty_file', '', []], + [ 'escaped_quotes', readCSV('csv-spectrum/csvs/escaped_quotes.csv'), diff --git a/packages/csvviewer/test/parse.spec.ts b/packages/csvviewer/test/parse.spec.ts index c671a78a99f4..7266c0392438 100644 --- a/packages/csvviewer/test/parse.spec.ts +++ b/packages/csvviewer/test/parse.spec.ts @@ -436,6 +436,21 @@ describe('csvviewer/parse', () => { expect(results.ncols).toEqual(3); expect(results.offsets).toEqual([0, 2, 5]); }); + + it('handles empty file', () => { + const data = ``; + const options = { data, rowDelimiter: '\n' }; + let results; + + results = parser({ ...options, columnOffsets: false }); + expect(results.nrows).toEqual(0); + expect(results.offsets).toEqual([]); + + results = parser({ ...options, columnOffsets: true }); + expect(results.nrows).toEqual(0); + expect(results.ncols).toEqual(0); + expect(results.offsets).toEqual([]); + }); }); }); From 45df2777e4076cc0a0eaab50030f1cb4ce15f45f Mon Sep 17 00:00:00 2001 From: krassowski Date: Tue, 5 Jan 2021 19:11:52 +0000 Subject: [PATCH 2/4] Fix parsing of empty CSV files --- packages/csvviewer/src/model.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/csvviewer/src/model.ts b/packages/csvviewer/src/model.ts index 5a1cd6faea8b..f89d4f93ed73 100644 --- a/packages/csvviewer/src/model.ts +++ b/packages/csvviewer/src/model.ts @@ -457,7 +457,8 @@ export class DSVModel extends DataModel implements IDisposable { // Copy the new offsets into a new row offset array. const oldRowOffsets = this._rowOffsets; - this._rowOffsets = new Uint32Array(this._rowCount); + // Always store at least one offset, even if data is empty + this._rowOffsets = new Uint32Array(this._rowCount || 1); this._rowOffsets.set(oldRowOffsets); this._rowOffsets.set(offsets, oldRowCount - 1); From bcb5d048b9ef0d8bb574a9dc0ce46d8316cc3bd8 Mon Sep 17 00:00:00 2001 From: Jason Grout Date: Tue, 5 Jan 2021 15:22:37 -0800 Subject: [PATCH 3/4] Make the initial rowCount 0 in the CSV viewer model. Also, handle the case when we get zero rows back from the parser, for the cases of empty files. --- packages/csvviewer/src/model.ts | 45 ++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/packages/csvviewer/src/model.ts b/packages/csvviewer/src/model.ts index f89d4f93ed73..f7950db7e6b6 100644 --- a/packages/csvviewer/src/model.ts +++ b/packages/csvviewer/src/model.ts @@ -424,19 +424,25 @@ export class DSVModel extends DataModel implements IDisposable { } // Parse the data up to and including the requested row, starting from the - // last row offset we have. + // beginning of the last row offset. The first row offset returned will be + // the same as the last row offset we already have. + const reparse = this._rowCount! > 0; const { nrows, offsets } = PARSERS[this._parser]({ data: this._rawData, - startIndex: this._rowOffsets[this._rowCount! - 1], + startIndex: reparse + ? this._rowOffsets[this._rowCount! - 1] + : 0, delimiter: this._delimiter, rowDelimiter: this._rowDelimiter, quote: this._quote, columnOffsets: false, - maxRows: endRow - this._rowCount! + 1 + // We are reparsing the last row if we've already started, so add one + maxRows: endRow - this._rowCount! + (reparse ? 1 : 0) }); - // Return if we didn't actually get any new rows beyond the one we've - // already parsed. + // If we have already set up our initial bookkeeping, return early if we + // did not get any new rows beyond the last row that we've parsed, i.e., + // nrows===1. if (this._startedParsing && nrows <= 1) { this._doneParsing = true; this._ready.resolve(undefined); @@ -445,9 +451,12 @@ export class DSVModel extends DataModel implements IDisposable { this._startedParsing = true; - // Update the row count. + // Update the row count. If we requested to reparse a row, and we received + // at least one new row, then we should correct the count for that + // duplicate row. const oldRowCount = this._rowCount!; - this._rowCount = oldRowCount + nrows - 1; + const duplicateRows = reparse && nrows > 0 ? 1 : 0; + this._rowCount = oldRowCount + nrows - duplicateRows; // If we didn't reach the requested row, we must be done. if (this._rowCount < endRow) { @@ -455,12 +464,13 @@ export class DSVModel extends DataModel implements IDisposable { this._ready.resolve(undefined); } - // Copy the new offsets into a new row offset array. - const oldRowOffsets = this._rowOffsets; - // Always store at least one offset, even if data is empty - this._rowOffsets = new Uint32Array(this._rowCount || 1); - this._rowOffsets.set(oldRowOffsets); - this._rowOffsets.set(offsets, oldRowCount - 1); + // Copy the new offsets into a new row offset array if needed. + if (this._rowCount > oldRowCount) { + const oldRowOffsets = this._rowOffsets; + this._rowOffsets = new Uint32Array(this._rowCount); + this._rowOffsets.set(oldRowOffsets); + this._rowOffsets.set(offsets, oldRowCount - duplicateRows); + } // Expand the column offsets array if needed @@ -597,9 +607,8 @@ export class DSVModel extends DataModel implements IDisposable { private _resetParser(): void { this._columnCount = undefined; - // First row offset is *always* 0, so we always have the first row offset. - this._rowOffsets = new Uint32Array(1); - this._rowCount = 1; + this._rowOffsets = new Uint32Array(0); + this._rowCount = 0; this._startedParsing = false; this._columnOffsets = new Uint32Array(0); @@ -632,7 +641,7 @@ export class DSVModel extends DataModel implements IDisposable { // Data values private _rawData: string; - private _rowCount: number | undefined = 1; + private _rowCount: number | undefined = 0; private _columnCount: number | undefined; // Cache information @@ -659,7 +668,7 @@ export class DSVModel extends DataModel implements IDisposable { /** * The index for the start of each row. */ - private _rowOffsets: Uint32Array = new Uint32Array(1); + private _rowOffsets: Uint32Array = new Uint32Array(0); /** * The number of rows to parse initially before doing a delayed parse of the * entire data. From c0f4a8aedafdb176f0427d9a9ae328f8639ebdeb Mon Sep 17 00:00:00 2001 From: Jason Grout Date: Tue, 5 Jan 2021 15:59:12 -0800 Subject: [PATCH 4/4] =?UTF-8?q?Simplify=20logic=20by=20changing=20?= =?UTF-8?q?=E2=80=98reparse=E2=80=99=20to=20the=20number=20of=20rows=20we?= =?UTF-8?q?=20request=20to=20reparse.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/csvviewer/src/model.ts | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/packages/csvviewer/src/model.ts b/packages/csvviewer/src/model.ts index f7950db7e6b6..36b5b1b35620 100644 --- a/packages/csvviewer/src/model.ts +++ b/packages/csvviewer/src/model.ts @@ -423,27 +423,25 @@ export class DSVModel extends DataModel implements IDisposable { }).ncols; } - // Parse the data up to and including the requested row, starting from the - // beginning of the last row offset. The first row offset returned will be - // the same as the last row offset we already have. - const reparse = this._rowCount! > 0; + // `reparse` is the number of rows we are requesting to parse over again. + // We generally start at the beginning of the last row offset, so that the + // first row offset returned is the same as the last row offset we already + // have. We parse the data up to and including the requested row. + const reparse = this._rowCount! > 0 ? 1 : 0; const { nrows, offsets } = PARSERS[this._parser]({ data: this._rawData, - startIndex: reparse - ? this._rowOffsets[this._rowCount! - 1] - : 0, + startIndex: this._rowOffsets[this._rowCount! - reparse] ?? 0, delimiter: this._delimiter, rowDelimiter: this._rowDelimiter, quote: this._quote, columnOffsets: false, - // We are reparsing the last row if we've already started, so add one - maxRows: endRow - this._rowCount! + (reparse ? 1 : 0) + maxRows: endRow - this._rowCount! + reparse }); // If we have already set up our initial bookkeeping, return early if we // did not get any new rows beyond the last row that we've parsed, i.e., // nrows===1. - if (this._startedParsing && nrows <= 1) { + if (this._startedParsing && (nrows <= reparse)) { this._doneParsing = true; this._ready.resolve(undefined); return; @@ -451,11 +449,9 @@ export class DSVModel extends DataModel implements IDisposable { this._startedParsing = true; - // Update the row count. If we requested to reparse a row, and we received - // at least one new row, then we should correct the count for that - // duplicate row. + // Update the row count, accounting for how many rows were reparsed. const oldRowCount = this._rowCount!; - const duplicateRows = reparse && nrows > 0 ? 1 : 0; + const duplicateRows = Math.min(nrows, reparse); this._rowCount = oldRowCount + nrows - duplicateRows; // If we didn't reach the requested row, we must be done.