From 4f56409e5ddeaf9aa1796135008869dec9d7c690 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 13 Dec 2021 22:07:10 +0100 Subject: [PATCH] feat(NODE-3784): Add `enableUtf8Validation` option (#3074) Co-authored-by: Bailey Pearson --- src/bson.ts | 13 ++- src/cmap/commands.ts | 14 ++- src/cmap/connection.ts | 2 + src/cmap/wire_protocol/shared.ts | 4 +- src/connection_string.ts | 1 + src/db.ts | 1 + src/operations/create_collection.ts | 3 +- src/operations/map_reduce.ts | 1 + test/functional/cmap/commands.test.ts | 122 ++++++++++++++++++++++++ test/types/bson.test-d.ts | 1 + test/unit/commands.test.ts | 128 ++++++++++++++------------ 11 files changed, 222 insertions(+), 68 deletions(-) create mode 100644 test/functional/cmap/commands.test.ts diff --git a/src/bson.ts b/src/bson.ts index 356c158c16..d516ffb303 100644 --- a/src/bson.ts +++ b/src/bson.ts @@ -55,6 +55,9 @@ export interface BSONSerializeOptions > { /** Return BSON filled buffers from operations */ raw?: boolean; + + /** Enable utf8 validation when deserializing BSON documents. Defaults to true. */ + enableUtf8Validation?: boolean; } export function pluckBSONSerializeOptions(options: BSONSerializeOptions): BSONSerializeOptions { @@ -66,7 +69,8 @@ export function pluckBSONSerializeOptions(options: BSONSerializeOptions): BSONSe serializeFunctions, ignoreUndefined, bsonRegExp, - raw + raw, + enableUtf8Validation } = options; return { fieldsAsRaw, @@ -76,7 +80,8 @@ export function pluckBSONSerializeOptions(options: BSONSerializeOptions): BSONSe serializeFunctions, ignoreUndefined, bsonRegExp, - raw + raw, + enableUtf8Validation }; } @@ -99,6 +104,8 @@ export function resolveBSONOptions( ignoreUndefined: options?.ignoreUndefined ?? parentOptions?.ignoreUndefined ?? false, bsonRegExp: options?.bsonRegExp ?? parentOptions?.bsonRegExp ?? false, serializeFunctions: options?.serializeFunctions ?? parentOptions?.serializeFunctions ?? false, - fieldsAsRaw: options?.fieldsAsRaw ?? parentOptions?.fieldsAsRaw ?? {} + fieldsAsRaw: options?.fieldsAsRaw ?? parentOptions?.fieldsAsRaw ?? {}, + enableUtf8Validation: + options?.enableUtf8Validation ?? parentOptions?.enableUtf8Validation ?? true }; } diff --git a/src/cmap/commands.ts b/src/cmap/commands.ts index 322ca736a3..29791ebccf 100644 --- a/src/cmap/commands.ts +++ b/src/cmap/commands.ts @@ -469,8 +469,6 @@ export interface MessageHeader { export interface OpResponseOptions extends BSONSerializeOptions { raw?: boolean; documentsReturnedIn?: string | null; - // For now we use this internally to only prevent writeErrors from crashing the driver - validation?: { utf8: { writeErrors: boolean } }; } /** @internal */ @@ -839,7 +837,7 @@ export class BinMsg { const promoteValues = options.promoteValues ?? this.opts.promoteValues; const promoteBuffers = options.promoteBuffers ?? this.opts.promoteBuffers; const bsonRegExp = options.bsonRegExp ?? this.opts.bsonRegExp; - const validation = options.validation ?? { utf8: { writeErrors: false } }; + const validation = this.parseBsonSerializationOptions(options); // Set up the options const bsonOptions: BSONSerializeOptions = { @@ -876,4 +874,14 @@ export class BinMsg { this.parsed = true; } + + parseBsonSerializationOptions({ enableUtf8Validation }: BSONSerializeOptions): { + utf8: { writeErrors: false } | false; + } { + if (enableUtf8Validation === false) { + return { utf8: false }; + } + + return { utf8: { writeErrors: false } }; + } } diff --git a/src/cmap/connection.ts b/src/cmap/connection.ts index ccee670fc0..b3ff7f9c0b 100644 --- a/src/cmap/connection.ts +++ b/src/cmap/connection.ts @@ -787,6 +787,8 @@ function write( promoteValues: typeof options.promoteValues === 'boolean' ? options.promoteValues : true, promoteBuffers: typeof options.promoteBuffers === 'boolean' ? options.promoteBuffers : false, bsonRegExp: typeof options.bsonRegExp === 'boolean' ? options.bsonRegExp : false, + enableUtf8Validation: + typeof options.enableUtf8Validation === 'boolean' ? options.enableUtf8Validation : true, raw: typeof options.raw === 'boolean' ? options.raw : false, started: 0 }; diff --git a/src/cmap/wire_protocol/shared.ts b/src/cmap/wire_protocol/shared.ts index 0fd624df80..53db9b85b7 100644 --- a/src/cmap/wire_protocol/shared.ts +++ b/src/cmap/wire_protocol/shared.ts @@ -45,7 +45,9 @@ export function applyCommonQueryOptions( promoteLongs: typeof options.promoteLongs === 'boolean' ? options.promoteLongs : true, promoteValues: typeof options.promoteValues === 'boolean' ? options.promoteValues : true, promoteBuffers: typeof options.promoteBuffers === 'boolean' ? options.promoteBuffers : false, - bsonRegExp: typeof options.bsonRegExp === 'boolean' ? options.bsonRegExp : false + bsonRegExp: typeof options.bsonRegExp === 'boolean' ? options.bsonRegExp : false, + enableUtf8Validation: + typeof options.enableUtf8Validation === 'boolean' ? options.enableUtf8Validation : true }); if (options.session) { diff --git a/src/connection_string.ts b/src/connection_string.ts index 4fc5d817e0..e33765e322 100644 --- a/src/connection_string.ts +++ b/src/connection_string.ts @@ -688,6 +688,7 @@ export const OPTIONS = { }); } }, + enableUtf8Validation: { type: 'boolean', default: true }, family: { transform({ name, values: [value] }): 4 | 6 { const transformValue = getInt(name, value); diff --git a/src/db.ts b/src/db.ts index aed434a24e..0fa9dad2a8 100644 --- a/src/db.ts +++ b/src/db.ts @@ -73,6 +73,7 @@ const DB_OPTIONS_ALLOW_LIST = [ 'promoteBuffers', 'promoteLongs', 'bsonRegExp', + 'enableUtf8Validation', 'promoteValues', 'compression', 'retryWrites' diff --git a/src/operations/create_collection.ts b/src/operations/create_collection.ts index 5fcc33682c..7f350e48a8 100644 --- a/src/operations/create_collection.ts +++ b/src/operations/create_collection.ts @@ -27,7 +27,8 @@ const ILLEGAL_COMMAND_FIELDS = new Set([ 'promoteBuffers', 'bsonRegExp', 'serializeFunctions', - 'ignoreUndefined' + 'ignoreUndefined', + 'enableUtf8Validation' ]); /** @public diff --git a/src/operations/map_reduce.ts b/src/operations/map_reduce.ts index 3e84aeb0e1..5d73ee7a94 100644 --- a/src/operations/map_reduce.ts +++ b/src/operations/map_reduce.ts @@ -33,6 +33,7 @@ const exclusionList = [ 'bsonRegExp', 'serializeFunctions', 'ignoreUndefined', + 'enableUtf8Validation', 'scope' // this option is reformatted thus exclude the original ]; diff --git a/test/functional/cmap/commands.test.ts b/test/functional/cmap/commands.test.ts new file mode 100644 index 0000000000..a7557af013 --- /dev/null +++ b/test/functional/cmap/commands.test.ts @@ -0,0 +1,122 @@ +import { spy } from 'sinon'; +import { expect } from 'chai'; +import * as BSON from '../../../src/bson'; + +const deserializeSpy = spy(BSON, 'deserialize'); + +const EXPECTED_VALIDATION_DISABLED_ARGUMENT = { + utf8: false +}; + +const EXPECTED_VALIDATION_ENABLED_ARGUMENT = { + utf8: { + writeErrors: false + } +}; + +describe('class BinMsg', () => { + beforeEach(() => { + deserializeSpy.resetHistory(); + }); + + describe('enableUtf8Validation option set to false', () => { + let client; + const option = { enableUtf8Validation: false }; + + for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { + it(`should disable validation with option passed to ${passOptionTo}`, async function () { + try { + client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); + await client.connect(); + + const db = client.db( + 'bson_utf8Validation_db', + passOptionTo === 'db' ? option : undefined + ); + const collection = db.collection( + 'bson_utf8Validation_coll', + passOptionTo === 'collection' ? option : undefined + ); + + await collection.insertOne( + { name: 'John Doe' }, + passOptionTo === 'operation' ? option : {} + ); + + expect(deserializeSpy.called).to.be.true; + const validationArgument = deserializeSpy.lastCall.lastArg.validation; + expect(validationArgument).to.deep.equal(EXPECTED_VALIDATION_DISABLED_ARGUMENT); + } finally { + await client.close(); + } + }); + } + }); + + describe('enableUtf8Validation option set to true', () => { + // define client and option for tests to use + let client; + const option = { enableUtf8Validation: true }; + for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { + it(`should enable validation with option passed to ${passOptionTo}`, async function () { + try { + client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); + await client.connect(); + + const db = client.db( + 'bson_utf8Validation_db', + passOptionTo === 'db' ? option : undefined + ); + const collection = db.collection( + 'bson_utf8Validation_coll', + passOptionTo === 'collection' ? option : undefined + ); + + await collection.insertOne( + { name: 'John Doe' }, + passOptionTo === 'operation' ? option : {} + ); + + expect(deserializeSpy.called).to.be.true; + const validationArgument = deserializeSpy.lastCall.lastArg.validation; + expect(validationArgument).to.deep.equal(EXPECTED_VALIDATION_ENABLED_ARGUMENT); + } finally { + await client.close(); + } + }); + } + }); + + describe('enableUtf8Validation option not set', () => { + let client; + const option = { enableUtf8Validation: true }; + for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { + it(`should default to enabled with option passed to ${passOptionTo}`, async function () { + try { + client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); + await client.connect(); + + const db = client.db( + 'bson_utf8Validation_db', + passOptionTo === 'db' ? option : undefined + ); + const collection = db.collection( + 'bson_utf8Validation_coll', + passOptionTo === 'collection' ? option : undefined + ); + + await collection.insertOne( + { name: 'John Doe' }, + passOptionTo === 'operation' ? option : {} + ); + + expect(deserializeSpy.called).to.be.true; + const validationArgument = deserializeSpy.lastCall.lastArg.validation; + expect(validationArgument).to.deep.equal(EXPECTED_VALIDATION_ENABLED_ARGUMENT); + } finally { + await client.close(); + } + }); + } + }); +}); diff --git a/test/types/bson.test-d.ts b/test/types/bson.test-d.ts index a98699c15e..8c5381ee2b 100644 --- a/test/types/bson.test-d.ts +++ b/test/types/bson.test-d.ts @@ -21,6 +21,7 @@ type PermittedBSONOptionKeys = | 'promoteValues' | 'bsonRegExp' | 'fieldsAsRaw' + | 'enableUtf8Validation' | 'raw'; const keys = null as unknown as PermittedBSONOptionKeys; diff --git a/test/unit/commands.test.ts b/test/unit/commands.test.ts index a8156abc73..ccc44eb5e8 100644 --- a/test/unit/commands.test.ts +++ b/test/unit/commands.test.ts @@ -2,6 +2,7 @@ import { expect } from 'chai'; import { BinMsg, MessageHeader } from '../../src/cmap/commands'; import { BSONError } from 'bson'; import * as BSON from '../../src/bson'; +import { test } from 'mocha'; const msgHeader: MessageHeader = { length: 735, @@ -37,45 +38,66 @@ const invalidUtf8InWriteErrorsJSON = { // when another top-level key besides writeErrors has invalid utf8 const nKeyWithInvalidUtf8 = '0000000000cc020000026e0005000000f09f98ff000477726974654572726f727300a60200000330009e02000010696e646578000000000010636f646500f82a0000036b65795061747465726e000f0000001074657874000100000000036b657956616c756500610100000274657874005201000064e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e298830000026572726d736700f2000000453131303030206475706c6963617465206b6579206572726f7220636f6c6c656374696f6e3a20626967646174612e7465737420696e6465783a20746578745f3120647570206b65793a207b20746578743a202264e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883e29883efbfbd2e2e2e22207d000000106f6b000100000000'; -const nKeyWithInvalidUtf8DeserializeInput = Buffer.from(nKeyWithInvalidUtf8.substring(10), 'hex'); const msgBodyNKeyWithInvalidUtf8 = Buffer.from(nKeyWithInvalidUtf8, 'hex'); -const invalidUtf8InNKeyJSON = { - n: '��', - writeErrors: [ - { - index: 0, - code: 11000, - keyPattern: { - text: 1 - }, - keyValue: { - text: 'd☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃' - }, - errmsg: - 'E11000 duplicate key error collection: bigdata.test index: text_1 dup key: { text: "d☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃�..." }' - } - ], - ok: 1 -}; describe('BinMsg BSON utf8 validation', () => { - context('when validation is disabled for writeErrors', () => { + test('bson correctly deserializes data with replacement characters for invalid utf8 in writeErrors object', () => { + // this is a sanity check to make sure nothing unexpected is happening in the deserialize method itself + + const options = { validation: { utf8: { writeErrors: false } as const } }; + expect(BSON.deserialize(invalidUtf8ErrorMsgDeserializeInput, options)).to.deep.equals( + invalidUtf8InWriteErrorsJSON + ); + }); + + context('when enableUtf8Validation option is not specified', () => { const binMsgInvalidUtf8ErrorMsg = new BinMsg( Buffer.alloc(0), msgHeader, msgBodyInvalidUtf8WriteErrors ); - const options = { validation: { utf8: { writeErrors: false } as const } }; - it('contains replacement characters for invalid utf8 in writeError object', () => { - expect(BSON.deserialize(invalidUtf8ErrorMsgDeserializeInput, options)).to.deep.equals( - invalidUtf8InWriteErrorsJSON + const options = {}; + it('does not validate the writeErrors key', () => { + expect(() => binMsgInvalidUtf8ErrorMsg.parse(options)).to.not.throw(); + }); + + it('should validate keys other than the writeErrors key', () => { + const binMsgAnotherKeyWithInvalidUtf8 = new BinMsg( + Buffer.alloc(0), + msgHeader, + msgBodyNKeyWithInvalidUtf8 + ); + expect(() => binMsgAnotherKeyWithInvalidUtf8.parse(options)).to.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' ); }); + }); + + context('when validation is disabled', () => { + const binMsgInvalidUtf8ErrorMsg = new BinMsg( + Buffer.alloc(0), + msgHeader, + msgBodyInvalidUtf8WriteErrors + ); - it('should not throw invalid utf8 error', () => { + const options = { enableUtf8Validation: false }; + it('should not validate the writeErrors key', () => { expect(() => binMsgInvalidUtf8ErrorMsg.parse(options)).to.not.throw(); }); + + it('should not validate keys other than the writeErrors key', () => { + const binMsgAnotherKeyWithInvalidUtf8 = new BinMsg( + Buffer.alloc(0), + msgHeader, + msgBodyNKeyWithInvalidUtf8 + ); + expect(() => binMsgAnotherKeyWithInvalidUtf8.parse(options)).to.not.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' + ); + }); }); it('should by default disable validation for writeErrors if no validation specified', () => { @@ -93,44 +115,30 @@ describe('BinMsg BSON utf8 validation', () => { expect(() => binMsgInvalidUtf8ErrorMsg.parse(options)).to.not.throw(); }); - context('when another key has invalid utf8 and validation is enabled for writeErrors', () => { - const binMsgAnotherKeyWithInvalidUtf8 = new BinMsg( - Buffer.alloc(0), - msgHeader, - msgBodyNKeyWithInvalidUtf8 - ); - const options = { validation: { utf8: { writeErrors: true } as const } }; - - it('should not throw invalid utf8 error', () => { - expect(() => binMsgAnotherKeyWithInvalidUtf8.parse(options)).to.not.throw(); + context('utf8 validation enabled', () => { + const options = { enableUtf8Validation: true }; + it('should not validate the writeErrors key', () => { + const binMsgInvalidUtf8ErrorMsg = new BinMsg( + Buffer.alloc(0), + msgHeader, + msgBodyInvalidUtf8WriteErrors + ); + expect(() => binMsgInvalidUtf8ErrorMsg.parse(options)).not.to.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' + ); }); - it('contains replacement characters for invalid utf8 key', () => { - expect(BSON.deserialize(nKeyWithInvalidUtf8DeserializeInput, options)).to.deep.equals( - invalidUtf8InNKeyJSON + it('should validate keys other than the writeErrors key', () => { + const binMsgAnotherKeyWithInvalidUtf8 = new BinMsg( + Buffer.alloc(0), + msgHeader, + msgBodyNKeyWithInvalidUtf8 + ); + expect(() => binMsgAnotherKeyWithInvalidUtf8.parse(options)).to.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' ); }); }); - - it('should throw invalid utf8 error when validation enabled for writeErrors', () => { - const binMsgInvalidUtf8ErrorMsg = new BinMsg( - Buffer.alloc(0), - msgHeader, - msgBodyInvalidUtf8WriteErrors - ); - expect(() => - binMsgInvalidUtf8ErrorMsg.parse({ validation: { utf8: { writeErrors: true } } }) - ).to.throw(BSONError, 'Invalid UTF-8 string in BSON document'); - }); - - it('should throw error when another key has invalid utf8 and writeErrors is not validated', () => { - const binMsgAnotherKeyWithInvalidUtf8 = new BinMsg( - Buffer.alloc(0), - msgHeader, - msgBodyNKeyWithInvalidUtf8 - ); - expect(() => - binMsgAnotherKeyWithInvalidUtf8.parse({ validation: { utf8: { writeErrors: false } } }) - ).to.throw(BSONError, 'Invalid UTF-8 string in BSON document'); - }); });