diff --git a/node/.eslintrc.js b/node/.eslintrc.js index 743f74f39..fe7d61e1c 100644 --- a/node/.eslintrc.js +++ b/node/.eslintrc.js @@ -13,5 +13,10 @@ module.exports = { }, rules: { "@typescript-eslint/method-signature-style": "off", + "@typescript-eslint/quotes": "off", + "@typescript-eslint/semi": "off", + "@typescript-eslint/explicit-function-return-type": "off", + "@typescript-eslint/space-before-function-paren": "off", + "@typescript-eslint/indent": "off", } } diff --git a/node/package.json b/node/package.json index ec18d6293..3952a5df3 100644 --- a/node/package.json +++ b/node/package.json @@ -41,6 +41,7 @@ "@types/temp": "^0.9.1", "@types/uuid": "^9.0.3", "@typescript-eslint/eslint-plugin": "^5.59.1", + "apache-arrow-old": "npm:apache-arrow@13.0.0", "cargo-cp-artifact": "^0.1", "chai": "^4.3.7", "chai-as-promised": "^7.1.1", @@ -93,4 +94,4 @@ "@lancedb/vectordb-linux-x64-gnu": "0.4.11", "@lancedb/vectordb-win32-x64-msvc": "0.4.11" } -} \ No newline at end of file +} diff --git a/node/src/arrow.ts b/node/src/arrow.ts index bdf20cf49..792c68f28 100644 --- a/node/src/arrow.ts +++ b/node/src/arrow.ts @@ -20,19 +20,20 @@ import { type Vector, FixedSizeList, vectorFromArray, - type Schema, + Schema, Table as ArrowTable, RecordBatchStreamWriter, List, RecordBatch, makeData, Struct, - type Float, + Float, DataType, Binary, Float32 } from 'apache-arrow' import { type EmbeddingFunction } from './index' +import { sanitizeSchema } from './sanitize' /* * Options to control how a column should be converted to a vector array @@ -201,10 +202,13 @@ export function makeArrowTable ( } const opt = new MakeArrowTableOptions(options !== undefined ? options : {}) + if (opt.schema !== undefined && opt.schema !== null) { + opt.schema = sanitizeSchema(opt.schema) + } const columns: Record = {} // TODO: sample dataset to find missing columns // Prefer the field ordering of the schema, if present - const columnNames = ((options?.schema) != null) ? (options?.schema?.names as string[]) : Object.keys(data[0]) + const columnNames = ((opt.schema) != null) ? (opt.schema.names as string[]) : Object.keys(data[0]) for (const colName of columnNames) { if (data.length !== 0 && !Object.prototype.hasOwnProperty.call(data[0], colName)) { // The field is present in the schema, but not in the data, skip it @@ -329,6 +333,9 @@ async function applyEmbeddings (table: ArrowTable, embeddings?: EmbeddingFunc if (embeddings == null) { return table } + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema) + } // Convert from ArrowTable to Record const colEntries = [...Array(table.numCols).keys()].map((_, idx) => { @@ -439,6 +446,9 @@ export async function fromRecordsToBuffer ( embeddings?: EmbeddingFunction, schema?: Schema ): Promise { + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema) + } const table = await convertToTable(data, embeddings, { schema }) const writer = RecordBatchFileWriter.writeAll(table) return Buffer.from(await writer.toUint8Array()) @@ -456,6 +466,9 @@ export async function fromRecordsToStreamBuffer ( embeddings?: EmbeddingFunction, schema?: Schema ): Promise { + if (schema !== null && schema !== undefined) { + schema = sanitizeSchema(schema) + } const table = await convertToTable(data, embeddings, { schema }) const writer = RecordBatchStreamWriter.writeAll(table) return Buffer.from(await writer.toUint8Array()) @@ -474,6 +487,9 @@ export async function fromTableToBuffer ( embeddings?: EmbeddingFunction, schema?: Schema ): Promise { + if (schema !== null && schema !== undefined) { + schema = sanitizeSchema(schema) + } const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema) const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings) return Buffer.from(await writer.toUint8Array()) @@ -492,6 +508,9 @@ export async function fromTableToStreamBuffer ( embeddings?: EmbeddingFunction, schema?: Schema ): Promise { + if (schema !== null && schema !== undefined) { + schema = sanitizeSchema(schema) + } const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema) const writer = RecordBatchStreamWriter.writeAll(tableWithEmbeddings) return Buffer.from(await writer.toUint8Array()) @@ -528,5 +547,5 @@ function alignTable (table: ArrowTable, schema: Schema): ArrowTable { // Creates an empty Arrow Table export function createEmptyTable (schema: Schema): ArrowTable { - return new ArrowTable(schema) + return new ArrowTable(sanitizeSchema(schema)) } diff --git a/node/src/sanitize.ts b/node/src/sanitize.ts new file mode 100644 index 000000000..0788f41e8 --- /dev/null +++ b/node/src/sanitize.ts @@ -0,0 +1,501 @@ +// Copyright 2023 LanceDB Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The utilities in this file help sanitize data from the user's arrow +// library into the types expected by vectordb's arrow library. Node +// generally allows for mulitple versions of the same library (and sometimes +// even multiple copies of the same version) to be installed at the same +// time. However, arrow-js uses instanceof which expected that the input +// comes from the exact same library instance. This is not always the case +// and so we must sanitize the input to ensure that it is compatible. + +import { + Field, + Utf8, + FixedSizeBinary, + FixedSizeList, + Schema, + List, + Struct, + Float, + Bool, + Date_, + Decimal, + DataType, + Dictionary, + Binary, + Float32, + Interval, + Map_, + Duration, + Union, + Time, + Timestamp, + Type, + Null, + Int, + type Precision, + type DateUnit, + Int8, + Int16, + Int32, + Int64, + Uint8, + Uint16, + Uint32, + Uint64, + Float16, + Float64, + DateDay, + DateMillisecond, + DenseUnion, + SparseUnion, + TimeNanosecond, + TimeMicrosecond, + TimeMillisecond, + TimeSecond, + TimestampNanosecond, + TimestampMicrosecond, + TimestampMillisecond, + TimestampSecond, + IntervalDayTime, + IntervalYearMonth, + DurationNanosecond, + DurationMicrosecond, + DurationMillisecond, + DurationSecond, +} from "apache-arrow"; +import type { IntBitWidth, TimeBitWidth } from "apache-arrow/type"; + +function sanitizeMetadata( + metadataLike?: unknown +): Map | undefined { + if (metadataLike === undefined || metadataLike === null) { + return undefined; + } + if (!(metadataLike instanceof Map)) { + throw Error("Expected metadata, if present, to be a Map"); + } + for (const item of metadataLike) { + if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) { + throw Error( + "Expected metadata, if present, to be a Map but it had non-string keys or values" + ); + } + } + return metadataLike as Map; +} + +function sanitizeInt(typeLike: object) { + if ( + !("bitWidth" in typeLike) || + typeof typeLike.bitWidth !== "number" || + !("isSigned" in typeLike) || + typeof typeLike.isSigned !== "boolean" + ) { + throw Error( + "Expected an Int Type to have a `bitWidth` and `isSigned` property" + ); + } + return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth); +} + +function sanitizeFloat(typeLike: object) { + if (!("precision" in typeLike) || typeof typeLike.precision !== "number") { + throw Error("Expected a Float Type to have a `precision` property"); + } + return new Float(typeLike.precision as Precision); +} + +function sanitizeDecimal(typeLike: object) { + if ( + !("scale" in typeLike) || + typeof typeLike.scale !== "number" || + !("precision" in typeLike) || + typeof typeLike.precision !== "number" || + !("bitWidth" in typeLike) || + typeof typeLike.bitWidth !== "number" + ) { + throw Error( + "Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties" + ); + } + return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth); +} + +function sanitizeDate(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected a Date type to have a `unit` property"); + } + return new Date_(typeLike.unit as DateUnit); +} + +function sanitizeTime(typeLike: object) { + if ( + !("unit" in typeLike) || + typeof typeLike.unit !== "number" || + !("bitWidth" in typeLike) || + typeof typeLike.bitWidth !== "number" + ) { + throw Error( + "Expected a Time type to have `unit` and `bitWidth` properties" + ); + } + return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth); +} + +function sanitizeTimestamp(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected a Timestamp type to have a `unit` property"); + } + let timezone = null; + if ("timezone" in typeLike && typeof typeLike.timezone === "string") { + timezone = typeLike.timezone; + } + return new Timestamp(typeLike.unit, timezone); +} + +function sanitizeTypedTimestamp( + typeLike: object, + Datatype: + | typeof TimestampNanosecond + | typeof TimestampMicrosecond + | typeof TimestampMillisecond + | typeof TimestampSecond +) { + let timezone = null; + if ("timezone" in typeLike && typeof typeLike.timezone === "string") { + timezone = typeLike.timezone; + } + return new Datatype(timezone); +} + +function sanitizeInterval(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected an Interval type to have a `unit` property"); + } + return new Interval(typeLike.unit); +} + +function sanitizeList(typeLike: object) { + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a List type to have an array-like `children` property" + ); + } + if (typeLike.children.length !== 1) { + throw Error("Expected a List type to have exactly one child"); + } + return new List(sanitizeField(typeLike.children[0])); +} + +function sanitizeStruct(typeLike: object) { + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a Struct type to have an array-like `children` property" + ); + } + return new Struct(typeLike.children.map((child) => sanitizeField(child))); +} + +function sanitizeUnion(typeLike: object) { + if ( + !("typeIds" in typeLike) || + !("mode" in typeLike) || + typeof typeLike.mode !== "number" + ) { + throw Error( + "Expected a Union type to have `typeIds` and `mode` properties" + ); + } + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a Union type to have an array-like `children` property" + ); + } + + return new Union( + typeLike.mode, + typeLike.typeIds as any, + typeLike.children.map((child) => sanitizeField(child)) + ); +} + +function sanitizeTypedUnion( + typeLike: object, + UnionType: typeof DenseUnion | typeof SparseUnion +) { + if (!("typeIds" in typeLike)) { + throw Error( + "Expected a DenseUnion/SparseUnion type to have a `typeIds` property" + ); + } + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a DenseUnion/SparseUnion type to have an array-like `children` property" + ); + } + + return new UnionType( + typeLike.typeIds as any, + typeLike.children.map((child) => sanitizeField(child)) + ); +} + +function sanitizeFixedSizeBinary(typeLike: object) { + if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") { + throw Error( + "Expected a FixedSizeBinary type to have a `byteWidth` property" + ); + } + return new FixedSizeBinary(typeLike.byteWidth); +} + +function sanitizeFixedSizeList(typeLike: object) { + if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") { + throw Error("Expected a FixedSizeList type to have a `listSize` property"); + } + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a FixedSizeList type to have an array-like `children` property" + ); + } + if (typeLike.children.length !== 1) { + throw Error("Expected a FixedSizeList type to have exactly one child"); + } + return new FixedSizeList( + typeLike.listSize, + sanitizeField(typeLike.children[0]) + ); +} + +function sanitizeMap(typeLike: object) { + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a Map type to have an array-like `children` property" + ); + } + if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") { + throw Error("Expected a Map type to have a `keysSorted` property"); + } + return new Map_( + typeLike.children.map((field) => sanitizeField(field)) as any, + typeLike.keysSorted + ); +} + +function sanitizeDuration(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected a Duration type to have a `unit` property"); + } + return new Duration(typeLike.unit); +} + +function sanitizeDictionary(typeLike: object) { + if (!("id" in typeLike) || typeof typeLike.id !== "number") { + throw Error("Expected a Dictionary type to have an `id` property"); + } + if (!("indices" in typeLike) || typeof typeLike.indices !== "object") { + throw Error("Expected a Dictionary type to have an `indices` property"); + } + if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") { + throw Error("Expected a Dictionary type to have an `dictionary` property"); + } + if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") { + throw Error("Expected a Dictionary type to have an `isOrdered` property"); + } + return new Dictionary( + sanitizeType(typeLike.dictionary), + sanitizeType(typeLike.indices) as any, + typeLike.id, + typeLike.isOrdered + ); +} + +function sanitizeType(typeLike: unknown): DataType { + if (typeof typeLike !== "object" || typeLike === null) { + throw Error("Expected a Type but object was null/undefined"); + } + if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) { + throw Error("Expected a Type to have a typeId function"); + } + let typeId: Type; + if (typeof typeLike.typeId === "function") { + typeId = (typeLike.typeId as () => unknown)() as Type; + } else if (typeof typeLike.typeId === "number") { + typeId = typeLike.typeId as Type; + } else { + throw Error("Type's typeId property was not a function or number"); + } + + switch (typeId) { + case Type.NONE: + throw Error("Received a Type with a typeId of NONE"); + case Type.Null: + return new Null(); + case Type.Int: + return sanitizeInt(typeLike); + case Type.Float: + return sanitizeFloat(typeLike); + case Type.Binary: + return new Binary(); + case Type.Utf8: + return new Utf8(); + case Type.Bool: + return new Bool(); + case Type.Decimal: + return sanitizeDecimal(typeLike); + case Type.Date: + return sanitizeDate(typeLike); + case Type.Time: + return sanitizeTime(typeLike); + case Type.Timestamp: + return sanitizeTimestamp(typeLike); + case Type.Interval: + return sanitizeInterval(typeLike); + case Type.List: + return sanitizeList(typeLike); + case Type.Struct: + return sanitizeStruct(typeLike); + case Type.Union: + return sanitizeUnion(typeLike); + case Type.FixedSizeBinary: + return sanitizeFixedSizeBinary(typeLike); + case Type.FixedSizeList: + return sanitizeFixedSizeList(typeLike); + case Type.Map: + return sanitizeMap(typeLike); + case Type.Duration: + return sanitizeDuration(typeLike); + case Type.Dictionary: + return sanitizeDictionary(typeLike); + case Type.Int8: + return new Int8(); + case Type.Int16: + return new Int16(); + case Type.Int32: + return new Int32(); + case Type.Int64: + return new Int64(); + case Type.Uint8: + return new Uint8(); + case Type.Uint16: + return new Uint16(); + case Type.Uint32: + return new Uint32(); + case Type.Uint64: + return new Uint64(); + case Type.Float16: + return new Float16(); + case Type.Float32: + return new Float32(); + case Type.Float64: + return new Float64(); + case Type.DateMillisecond: + return new DateMillisecond(); + case Type.DateDay: + return new DateDay(); + case Type.TimeNanosecond: + return new TimeNanosecond(); + case Type.TimeMicrosecond: + return new TimeMicrosecond(); + case Type.TimeMillisecond: + return new TimeMillisecond(); + case Type.TimeSecond: + return new TimeSecond(); + case Type.TimestampNanosecond: + return sanitizeTypedTimestamp(typeLike, TimestampNanosecond); + case Type.TimestampMicrosecond: + return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond); + case Type.TimestampMillisecond: + return sanitizeTypedTimestamp(typeLike, TimestampMillisecond); + case Type.TimestampSecond: + return sanitizeTypedTimestamp(typeLike, TimestampSecond); + case Type.DenseUnion: + return sanitizeTypedUnion(typeLike, DenseUnion); + case Type.SparseUnion: + return sanitizeTypedUnion(typeLike, SparseUnion); + case Type.IntervalDayTime: + return new IntervalDayTime(); + case Type.IntervalYearMonth: + return new IntervalYearMonth(); + case Type.DurationNanosecond: + return new DurationNanosecond(); + case Type.DurationMicrosecond: + return new DurationMicrosecond(); + case Type.DurationMillisecond: + return new DurationMillisecond(); + case Type.DurationSecond: + return new DurationSecond(); + } +} + +function sanitizeField(fieldLike: unknown): Field { + if (fieldLike instanceof Field) { + return fieldLike; + } + if (typeof fieldLike !== "object" || fieldLike === null) { + throw Error("Expected a Field but object was null/undefined"); + } + if ( + !("type" in fieldLike) || + !("name" in fieldLike) || + !("nullable" in fieldLike) + ) { + throw Error( + "The field passed in is missing a `type`/`name`/`nullable` property" + ); + } + const type = sanitizeType(fieldLike.type); + const name = fieldLike.name; + if (!(typeof name === "string")) { + throw Error("The field passed in had a non-string `name` property"); + } + const nullable = fieldLike.nullable; + if (!(typeof nullable === "boolean")) { + throw Error("The field passed in had a non-boolean `nullable` property"); + } + let metadata; + if ("metadata" in fieldLike) { + metadata = sanitizeMetadata(fieldLike.metadata); + } + return new Field(name, type, nullable, metadata); +} + +export function sanitizeSchema(schemaLike: unknown): Schema { + if (schemaLike instanceof Schema) { + return schemaLike; + } + if (typeof schemaLike !== "object" || schemaLike === null) { + throw Error("Expected a Schema but object was null/undefined"); + } + if (!("fields" in schemaLike)) { + throw Error( + "The schema passed in does not appear to be a schema (no 'fields' property)" + ); + } + let metadata; + if ("metadata" in schemaLike) { + metadata = sanitizeMetadata(schemaLike.metadata); + } + if (!Array.isArray(schemaLike.fields)) { + throw Error( + "The schema passed in had a 'fields' property but it was not an array" + ); + } + const sanitizedFields = schemaLike.fields.map((field) => + sanitizeField(field) + ); + return new Schema(sanitizedFields, metadata); +} diff --git a/node/src/test/arrow.test.ts b/node/src/test/arrow.test.ts index c356c9d8a..38005e6a7 100644 --- a/node/src/test/arrow.test.ts +++ b/node/src/test/arrow.test.ts @@ -34,8 +34,20 @@ import { List, DataType, Dictionary, - Int64 + Int64, + MetadataVersion } from 'apache-arrow' +import { + Dictionary as OldDictionary, + Field as OldField, + FixedSizeList as OldFixedSizeList, + Float32 as OldFloat32, + Int32 as OldInt32, + Struct as OldStruct, + Schema as OldSchema, + TimestampNanosecond as OldTimestampNanosecond, + Utf8 as OldUtf8 +} from 'apache-arrow-old' import { type EmbeddingFunction } from '../embedding/embedding_function' chaiUse(chaiAsPromised) @@ -318,3 +330,31 @@ describe('makeEmptyTable', function () { await checkTableCreation(async (_, __, schema) => makeEmptyTable(schema)) }) }) + +describe('when using two versions of arrow', function () { + it('can still import data', async function() { + const schema = new OldSchema([ + new OldField('id', new OldInt32()), + new OldField('vector', new OldFixedSizeList(1024, new OldField("item", new OldFloat32(), true))), + new OldField('struct', new OldStruct([ + new OldField('nested', new OldDictionary(new OldUtf8(), new OldInt32(), 1, true)), + new OldField('ts_with_tz', new OldTimestampNanosecond("some_tz")), + new OldField('ts_no_tz', new OldTimestampNanosecond(null)) + ])) + ]) as any + // We use arrow version 13 to emulate a "foreign arrow" and this version doesn't have metadataVersion + // In theory, this wouldn't matter. We don't rely on that property. However, it causes deepEqual to + // fail so we patch it back in + schema.metadataVersion = MetadataVersion.V5 + const table = makeArrowTable( + [], + { schema } + ) + + const buf = await fromTableToBuffer(table) + assert.isAbove(buf.byteLength, 0) + const actual = tableFromIPC(buf) + const actualSchema = actual.schema + assert.deepEqual(actualSchema, schema) + }) +}) diff --git a/nodejs/__test__/arrow.test.ts b/nodejs/__test__/arrow.test.ts index 66c8ccfb3..4a267c57e 100644 --- a/nodejs/__test__/arrow.test.ts +++ b/nodejs/__test__/arrow.test.ts @@ -38,9 +38,22 @@ import { Int64, Float, Precision, + MetadataVersion, } from "apache-arrow"; +import { + Dictionary as OldDictionary, + Field as OldField, + FixedSizeList as OldFixedSizeList, + Float32 as OldFloat32, + Int32 as OldInt32, + Struct as OldStruct, + Schema as OldSchema, + TimestampNanosecond as OldTimestampNanosecond, + Utf8 as OldUtf8, +} from "apache-arrow-old"; import { type EmbeddingFunction } from "../dist/embedding/embedding_function"; +// eslint-disable-next-line @typescript-eslint/no-explicit-any function sampleRecords(): Array> { return [ { @@ -57,8 +70,8 @@ function sampleRecords(): Array> { // Helper method to verify various ways to create a table async function checkTableCreation( tableCreationMethod: ( - records: any, - recordsReversed: any, + records: Record[], + recordsReversed: Record[], schema: Schema, ) => Promise, infersTypes: boolean, @@ -402,3 +415,56 @@ describe("makeEmptyTable", function () { ); }); }); + +describe("when using two versions of arrow", function () { + it("can still import data", async function () { + const schema = new OldSchema([ + new OldField("id", new OldInt32()), + new OldField( + "vector", + new OldFixedSizeList( + 1024, + new OldField("item", new OldFloat32(), true), + ), + ), + new OldField( + "struct", + new OldStruct([ + new OldField( + "nested", + new OldDictionary(new OldUtf8(), new OldInt32(), 1, true), + ), + new OldField("ts_with_tz", new OldTimestampNanosecond("some_tz")), + new OldField("ts_no_tz", new OldTimestampNanosecond(null)), + ]), + ), + // eslint-disable-next-line @typescript-eslint/no-explicit-any + ]) as any; + schema.metadataVersion = MetadataVersion.V5; + const table = makeArrowTable([], { schema }); + + const buf = await fromTableToBuffer(table); + expect(buf.byteLength).toBeGreaterThan(0); + const actual = tableFromIPC(buf); + const actualSchema = actual.schema; + expect(actualSchema.fields.length).toBe(3); + + // Deep equality gets hung up on some very minor unimportant differences + // between arrow version 13 and 15 which isn't really what we're testing for + // and so we do our own comparison that just checks name/type/nullability + function compareFields(lhs: Field, rhs: Field) { + expect(lhs.name).toEqual(rhs.name); + expect(lhs.nullable).toEqual(rhs.nullable); + expect(lhs.typeId).toEqual(rhs.typeId); + if ("children" in lhs.type && lhs.type.children !== null) { + const lhs_children = lhs.type.children as Field[]; + lhs_children.forEach((child: Field, idx) => { + compareFields(child, rhs.type.children[idx]); + }); + } + } + actualSchema.fields.forEach((field, idx) => { + compareFields(field, actualSchema.fields[idx]); + }); + }); +}); diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 74080fa5e..c06fccaee 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -103,12 +103,12 @@ describe("Test creating index", () => { // TODO: check index type. // Search without specifying the column - let query_vector = data.toArray()[5].vec.toJSON(); - let rst = await tbl.query().nearestTo(query_vector).limit(2).toArrow(); + const query_vector = data.toArray()[5].vec.toJSON(); + const rst = await tbl.query().nearestTo(query_vector).limit(2).toArrow(); expect(rst.numRows).toBe(2); // Search with specifying the column - let rst2 = await tbl.search(query_vector, "vec").limit(2).toArrow(); + const rst2 = await tbl.search(query_vector, "vec").limit(2).toArrow(); expect(rst2.numRows).toBe(2); expect(rst.toString()).toEqual(rst2.toString()); }); diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index a86804ff1..21c84b53a 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -33,6 +33,7 @@ import { Float32, } from "apache-arrow"; import { type EmbeddingFunction } from "./embedding/embedding_function"; +import { sanitizeSchema } from "./sanitize"; /** Data type accepted by NodeJS SDK */ export type Data = Record[] | ArrowTable; @@ -208,13 +209,14 @@ export function makeArrowTable( } const opt = new MakeArrowTableOptions(options !== undefined ? options : {}); + if (opt.schema !== undefined && opt.schema !== null) { + opt.schema = sanitizeSchema(opt.schema); + } const columns: Record = {}; // TODO: sample dataset to find missing columns // Prefer the field ordering of the schema, if present const columnNames = - options?.schema != null - ? (options?.schema?.names as string[]) - : Object.keys(data[0]); + opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]); for (const colName of columnNames) { if ( data.length !== 0 && @@ -381,6 +383,10 @@ async function applyEmbeddings( return table; } + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema); + } + // Convert from ArrowTable to Record const colEntries = [...Array(table.numCols).keys()].map((_, idx) => { const name = table.schema.fields[idx].name; @@ -510,6 +516,9 @@ export async function fromRecordsToBuffer( embeddings?: EmbeddingFunction, schema?: Schema, ): Promise { + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema); + } const table = await convertToTable(data, embeddings, { schema }); const writer = RecordBatchFileWriter.writeAll(table); return Buffer.from(await writer.toUint8Array()); @@ -527,6 +536,9 @@ export async function fromRecordsToStreamBuffer( embeddings?: EmbeddingFunction, schema?: Schema, ): Promise { + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema); + } const table = await convertToTable(data, embeddings, { schema }); const writer = RecordBatchStreamWriter.writeAll(table); return Buffer.from(await writer.toUint8Array()); @@ -545,6 +557,9 @@ export async function fromTableToBuffer( embeddings?: EmbeddingFunction, schema?: Schema, ): Promise { + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema); + } const tableWithEmbeddings = await applyEmbeddings(table, embeddings, schema); const writer = RecordBatchFileWriter.writeAll(tableWithEmbeddings); return Buffer.from(await writer.toUint8Array()); @@ -555,6 +570,9 @@ export async function fromDataToBuffer( embeddings?: EmbeddingFunction, schema?: Schema, ): Promise { + if (schema !== undefined && schema !== null) { + schema = sanitizeSchema(schema); + } if (data instanceof ArrowTable) { return fromTableToBuffer(data, embeddings, schema); } else { @@ -612,5 +630,5 @@ function alignTable(table: ArrowTable, schema: Schema): ArrowTable { // Creates an empty Arrow Table export function createEmptyTable(schema: Schema): ArrowTable { - return new ArrowTable(schema); + return new ArrowTable(sanitizeSchema(schema)); } diff --git a/nodejs/lancedb/sanitize.ts b/nodejs/lancedb/sanitize.ts new file mode 100644 index 000000000..9a5face30 --- /dev/null +++ b/nodejs/lancedb/sanitize.ts @@ -0,0 +1,507 @@ +// Copyright 2023 LanceDB Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The utilities in this file help sanitize data from the user's arrow +// library into the types expected by vectordb's arrow library. Node +// generally allows for mulitple versions of the same library (and sometimes +// even multiple copies of the same version) to be installed at the same +// time. However, arrow-js uses instanceof which expected that the input +// comes from the exact same library instance. This is not always the case +// and so we must sanitize the input to ensure that it is compatible. + +import { + Field, + Utf8, + FixedSizeBinary, + FixedSizeList, + Schema, + List, + Struct, + Float, + Bool, + Date_, + Decimal, + DataType, + Dictionary, + Binary, + Float32, + Interval, + Map_, + Duration, + Union, + Time, + Timestamp, + Type, + Null, + Int, + type Precision, + type DateUnit, + Int8, + Int16, + Int32, + Int64, + Uint8, + Uint16, + Uint32, + Uint64, + Float16, + Float64, + DateDay, + DateMillisecond, + DenseUnion, + SparseUnion, + TimeNanosecond, + TimeMicrosecond, + TimeMillisecond, + TimeSecond, + TimestampNanosecond, + TimestampMicrosecond, + TimestampMillisecond, + TimestampSecond, + IntervalDayTime, + IntervalYearMonth, + DurationNanosecond, + DurationMicrosecond, + DurationMillisecond, + DurationSecond, +} from "apache-arrow"; +import type { IntBitWidth, TKeys, TimeBitWidth } from "apache-arrow/type"; + +function sanitizeMetadata( + metadataLike?: unknown, +): Map | undefined { + if (metadataLike === undefined || metadataLike === null) { + return undefined; + } + if (!(metadataLike instanceof Map)) { + throw Error("Expected metadata, if present, to be a Map"); + } + for (const item of metadataLike) { + if (!(typeof item[0] === "string" || !(typeof item[1] === "string"))) { + throw Error( + "Expected metadata, if present, to be a Map but it had non-string keys or values", + ); + } + } + return metadataLike as Map; +} + +function sanitizeInt(typeLike: object) { + if ( + !("bitWidth" in typeLike) || + typeof typeLike.bitWidth !== "number" || + !("isSigned" in typeLike) || + typeof typeLike.isSigned !== "boolean" + ) { + throw Error( + "Expected an Int Type to have a `bitWidth` and `isSigned` property", + ); + } + return new Int(typeLike.isSigned, typeLike.bitWidth as IntBitWidth); +} + +function sanitizeFloat(typeLike: object) { + if (!("precision" in typeLike) || typeof typeLike.precision !== "number") { + throw Error("Expected a Float Type to have a `precision` property"); + } + return new Float(typeLike.precision as Precision); +} + +function sanitizeDecimal(typeLike: object) { + if ( + !("scale" in typeLike) || + typeof typeLike.scale !== "number" || + !("precision" in typeLike) || + typeof typeLike.precision !== "number" || + !("bitWidth" in typeLike) || + typeof typeLike.bitWidth !== "number" + ) { + throw Error( + "Expected a Decimal Type to have `scale`, `precision`, and `bitWidth` properties", + ); + } + return new Decimal(typeLike.scale, typeLike.precision, typeLike.bitWidth); +} + +function sanitizeDate(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected a Date type to have a `unit` property"); + } + return new Date_(typeLike.unit as DateUnit); +} + +function sanitizeTime(typeLike: object) { + if ( + !("unit" in typeLike) || + typeof typeLike.unit !== "number" || + !("bitWidth" in typeLike) || + typeof typeLike.bitWidth !== "number" + ) { + throw Error( + "Expected a Time type to have `unit` and `bitWidth` properties", + ); + } + return new Time(typeLike.unit, typeLike.bitWidth as TimeBitWidth); +} + +function sanitizeTimestamp(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected a Timestamp type to have a `unit` property"); + } + let timezone = null; + if ("timezone" in typeLike && typeof typeLike.timezone === "string") { + timezone = typeLike.timezone; + } + return new Timestamp(typeLike.unit, timezone); +} + +function sanitizeTypedTimestamp( + typeLike: object, + Datatype: + | typeof TimestampNanosecond + | typeof TimestampMicrosecond + | typeof TimestampMillisecond + | typeof TimestampSecond, +) { + let timezone = null; + if ("timezone" in typeLike && typeof typeLike.timezone === "string") { + timezone = typeLike.timezone; + } + return new Datatype(timezone); +} + +function sanitizeInterval(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected an Interval type to have a `unit` property"); + } + return new Interval(typeLike.unit); +} + +function sanitizeList(typeLike: object) { + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a List type to have an array-like `children` property", + ); + } + if (typeLike.children.length !== 1) { + throw Error("Expected a List type to have exactly one child"); + } + return new List(sanitizeField(typeLike.children[0])); +} + +function sanitizeStruct(typeLike: object) { + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a Struct type to have an array-like `children` property", + ); + } + return new Struct(typeLike.children.map((child) => sanitizeField(child))); +} + +function sanitizeUnion(typeLike: object) { + if ( + !("typeIds" in typeLike) || + !("mode" in typeLike) || + typeof typeLike.mode !== "number" + ) { + throw Error( + "Expected a Union type to have `typeIds` and `mode` properties", + ); + } + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a Union type to have an array-like `children` property", + ); + } + + return new Union( + typeLike.mode, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + typeLike.typeIds as any, + typeLike.children.map((child) => sanitizeField(child)), + ); +} + +function sanitizeTypedUnion( + typeLike: object, + UnionType: typeof DenseUnion | typeof SparseUnion, +) { + if (!("typeIds" in typeLike)) { + throw Error( + "Expected a DenseUnion/SparseUnion type to have a `typeIds` property", + ); + } + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a DenseUnion/SparseUnion type to have an array-like `children` property", + ); + } + + return new UnionType( + typeLike.typeIds as Int32Array | number[], + typeLike.children.map((child) => sanitizeField(child)), + ); +} + +function sanitizeFixedSizeBinary(typeLike: object) { + if (!("byteWidth" in typeLike) || typeof typeLike.byteWidth !== "number") { + throw Error( + "Expected a FixedSizeBinary type to have a `byteWidth` property", + ); + } + return new FixedSizeBinary(typeLike.byteWidth); +} + +function sanitizeFixedSizeList(typeLike: object) { + if (!("listSize" in typeLike) || typeof typeLike.listSize !== "number") { + throw Error("Expected a FixedSizeList type to have a `listSize` property"); + } + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a FixedSizeList type to have an array-like `children` property", + ); + } + if (typeLike.children.length !== 1) { + throw Error("Expected a FixedSizeList type to have exactly one child"); + } + return new FixedSizeList( + typeLike.listSize, + sanitizeField(typeLike.children[0]), + ); +} + +function sanitizeMap(typeLike: object) { + if (!("children" in typeLike) || !Array.isArray(typeLike.children)) { + throw Error( + "Expected a Map type to have an array-like `children` property", + ); + } + if (!("keysSorted" in typeLike) || typeof typeLike.keysSorted !== "boolean") { + throw Error("Expected a Map type to have a `keysSorted` property"); + } + + return new Map_( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + typeLike.children.map((field) => sanitizeField(field)) as any, + typeLike.keysSorted, + ); +} + +function sanitizeDuration(typeLike: object) { + if (!("unit" in typeLike) || typeof typeLike.unit !== "number") { + throw Error("Expected a Duration type to have a `unit` property"); + } + return new Duration(typeLike.unit); +} + +function sanitizeDictionary(typeLike: object) { + if (!("id" in typeLike) || typeof typeLike.id !== "number") { + throw Error("Expected a Dictionary type to have an `id` property"); + } + if (!("indices" in typeLike) || typeof typeLike.indices !== "object") { + throw Error("Expected a Dictionary type to have an `indices` property"); + } + if (!("dictionary" in typeLike) || typeof typeLike.dictionary !== "object") { + throw Error("Expected a Dictionary type to have an `dictionary` property"); + } + if (!("isOrdered" in typeLike) || typeof typeLike.isOrdered !== "boolean") { + throw Error("Expected a Dictionary type to have an `isOrdered` property"); + } + return new Dictionary( + sanitizeType(typeLike.dictionary), + sanitizeType(typeLike.indices) as TKeys, + typeLike.id, + typeLike.isOrdered, + ); +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function sanitizeType(typeLike: unknown): DataType { + if (typeof typeLike !== "object" || typeLike === null) { + throw Error("Expected a Type but object was null/undefined"); + } + if (!("typeId" in typeLike) || !(typeof typeLike.typeId !== "function")) { + throw Error("Expected a Type to have a typeId function"); + } + let typeId: Type; + if (typeof typeLike.typeId === "function") { + typeId = (typeLike.typeId as () => unknown)() as Type; + } else if (typeof typeLike.typeId === "number") { + typeId = typeLike.typeId as Type; + } else { + throw Error("Type's typeId property was not a function or number"); + } + + switch (typeId) { + case Type.NONE: + throw Error("Received a Type with a typeId of NONE"); + case Type.Null: + return new Null(); + case Type.Int: + return sanitizeInt(typeLike); + case Type.Float: + return sanitizeFloat(typeLike); + case Type.Binary: + return new Binary(); + case Type.Utf8: + return new Utf8(); + case Type.Bool: + return new Bool(); + case Type.Decimal: + return sanitizeDecimal(typeLike); + case Type.Date: + return sanitizeDate(typeLike); + case Type.Time: + return sanitizeTime(typeLike); + case Type.Timestamp: + return sanitizeTimestamp(typeLike); + case Type.Interval: + return sanitizeInterval(typeLike); + case Type.List: + return sanitizeList(typeLike); + case Type.Struct: + return sanitizeStruct(typeLike); + case Type.Union: + return sanitizeUnion(typeLike); + case Type.FixedSizeBinary: + return sanitizeFixedSizeBinary(typeLike); + case Type.FixedSizeList: + return sanitizeFixedSizeList(typeLike); + case Type.Map: + return sanitizeMap(typeLike); + case Type.Duration: + return sanitizeDuration(typeLike); + case Type.Dictionary: + return sanitizeDictionary(typeLike); + case Type.Int8: + return new Int8(); + case Type.Int16: + return new Int16(); + case Type.Int32: + return new Int32(); + case Type.Int64: + return new Int64(); + case Type.Uint8: + return new Uint8(); + case Type.Uint16: + return new Uint16(); + case Type.Uint32: + return new Uint32(); + case Type.Uint64: + return new Uint64(); + case Type.Float16: + return new Float16(); + case Type.Float32: + return new Float32(); + case Type.Float64: + return new Float64(); + case Type.DateMillisecond: + return new DateMillisecond(); + case Type.DateDay: + return new DateDay(); + case Type.TimeNanosecond: + return new TimeNanosecond(); + case Type.TimeMicrosecond: + return new TimeMicrosecond(); + case Type.TimeMillisecond: + return new TimeMillisecond(); + case Type.TimeSecond: + return new TimeSecond(); + case Type.TimestampNanosecond: + return sanitizeTypedTimestamp(typeLike, TimestampNanosecond); + case Type.TimestampMicrosecond: + return sanitizeTypedTimestamp(typeLike, TimestampMicrosecond); + case Type.TimestampMillisecond: + return sanitizeTypedTimestamp(typeLike, TimestampMillisecond); + case Type.TimestampSecond: + return sanitizeTypedTimestamp(typeLike, TimestampSecond); + case Type.DenseUnion: + return sanitizeTypedUnion(typeLike, DenseUnion); + case Type.SparseUnion: + return sanitizeTypedUnion(typeLike, SparseUnion); + case Type.IntervalDayTime: + return new IntervalDayTime(); + case Type.IntervalYearMonth: + return new IntervalYearMonth(); + case Type.DurationNanosecond: + return new DurationNanosecond(); + case Type.DurationMicrosecond: + return new DurationMicrosecond(); + case Type.DurationMillisecond: + return new DurationMillisecond(); + case Type.DurationSecond: + return new DurationSecond(); + default: + throw new Error("Unrecoginized type id in schema: " + typeId); + } +} + +function sanitizeField(fieldLike: unknown): Field { + if (fieldLike instanceof Field) { + return fieldLike; + } + if (typeof fieldLike !== "object" || fieldLike === null) { + throw Error("Expected a Field but object was null/undefined"); + } + if ( + !("type" in fieldLike) || + !("name" in fieldLike) || + !("nullable" in fieldLike) + ) { + throw Error( + "The field passed in is missing a `type`/`name`/`nullable` property", + ); + } + const type = sanitizeType(fieldLike.type); + const name = fieldLike.name; + if (!(typeof name === "string")) { + throw Error("The field passed in had a non-string `name` property"); + } + const nullable = fieldLike.nullable; + if (!(typeof nullable === "boolean")) { + throw Error("The field passed in had a non-boolean `nullable` property"); + } + let metadata; + if ("metadata" in fieldLike) { + metadata = sanitizeMetadata(fieldLike.metadata); + } + return new Field(name, type, nullable, metadata); +} + +export function sanitizeSchema(schemaLike: unknown): Schema { + if (schemaLike instanceof Schema) { + return schemaLike; + } + if (typeof schemaLike !== "object" || schemaLike === null) { + throw Error("Expected a Schema but object was null/undefined"); + } + if (!("fields" in schemaLike)) { + throw Error( + "The schema passed in does not appear to be a schema (no 'fields' property)", + ); + } + let metadata; + if ("metadata" in schemaLike) { + metadata = sanitizeMetadata(schemaLike.metadata); + } + if (!Array.isArray(schemaLike.fields)) { + throw Error( + "The schema passed in had a 'fields' property but it was not an array", + ); + } + const sanitizedFields = schemaLike.fields.map((field) => + sanitizeField(field), + ); + return new Schema(sanitizedFields, metadata); +} diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index 3fc7eb2ff..6f3938cd0 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -23,6 +23,7 @@ "@types/tmp": "^0.2.6", "@typescript-eslint/eslint-plugin": "^6.19.0", "@typescript-eslint/parser": "^6.19.0", + "apache-arrow-old": "npm:apache-arrow@13.0.0", "eslint": "^8.57.0", "eslint-config-prettier": "^9.1.0", "jest": "^29.7.0", @@ -52,7 +53,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.1.tgz", "integrity": "sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw==", - "peer": true, "dependencies": { "lodash.assignwith": "^4.2.0", "typical": "^7.1.1" @@ -65,7 +65,6 @@ "version": "7.1.1", "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", - "peer": true, "engines": { "node": ">=12.17" } @@ -1437,8 +1436,7 @@ "node_modules/@types/command-line-usage": { "version": "5.0.2", "resolved": "https://registry.npmjs.org/@types/command-line-usage/-/command-line-usage-5.0.2.tgz", - "integrity": "sha512-n7RlEEJ+4x4TS7ZQddTmNSxP+zziEG0TNsMfiRIxcIVXt71ENJ9ojeXmGO3wPoTdn7pJcU2xc3CJYMktNT6DPg==", - "peer": true + "integrity": "sha512-n7RlEEJ+4x4TS7ZQddTmNSxP+zziEG0TNsMfiRIxcIVXt71ENJ9ojeXmGO3wPoTdn7pJcU2xc3CJYMktNT6DPg==" }, "node_modules/@types/graceful-fs": { "version": "4.1.9", @@ -1507,6 +1505,12 @@ "form-data": "^4.0.0" } }, + "node_modules/@types/pad-left": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@types/pad-left/-/pad-left-2.1.1.tgz", + "integrity": "sha512-Xd22WCRBydkGSApl5Bw0PhAOHKSVjNL3E3AwzKaps96IMraPqy5BvZIsBVK6JLwdybUzjHnuWVwpDd0JjTfHXA==", + "dev": true + }, "node_modules/@types/semver": { "version": "7.5.6", "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.5.6.tgz", @@ -1910,6 +1914,40 @@ "arrow2csv": "bin/arrow2csv.cjs" } }, + "node_modules/apache-arrow-old": { + "name": "apache-arrow", + "version": "13.0.0", + "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-13.0.0.tgz", + "integrity": "sha512-3gvCX0GDawWz6KFNC28p65U+zGh/LZ6ZNKWNu74N6CQlKzxeoWHpi4CgEQsgRSEMuyrIIXi1Ea2syja7dwcHvw==", + "dev": true, + "dependencies": { + "@types/command-line-args": "5.2.0", + "@types/command-line-usage": "5.0.2", + "@types/node": "20.3.0", + "@types/pad-left": "2.1.1", + "command-line-args": "5.2.1", + "command-line-usage": "7.0.1", + "flatbuffers": "23.5.26", + "json-bignum": "^0.0.3", + "pad-left": "^2.1.0", + "tslib": "^2.5.3" + }, + "bin": { + "arrow2csv": "bin/arrow2csv.js" + } + }, + "node_modules/apache-arrow-old/node_modules/@types/command-line-args": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.0.tgz", + "integrity": "sha512-UuKzKpJJ/Ief6ufIaIzr3A/0XnluX7RvFgwkV89Yzvm77wCh1kFaFmqN8XEnGcN62EuHdedQjEMb8mYxFLGPyA==", + "dev": true + }, + "node_modules/apache-arrow-old/node_modules/@types/node": { + "version": "20.3.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz", + "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==", + "dev": true + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -1923,7 +1961,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/array-back/-/array-back-3.1.0.tgz", "integrity": "sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==", - "peer": true, "engines": { "node": ">=6" } @@ -2200,7 +2237,6 @@ "version": "0.4.0", "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", - "peer": true, "dependencies": { "chalk": "^4.1.2" }, @@ -2297,7 +2333,6 @@ "version": "5.2.1", "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz", "integrity": "sha512-H4UfQhZyakIjC74I9d34fGYDwk3XpSr17QhEd0Q3I9Xq1CETHo4Hcuo87WyWHpAF1aSLjLRf5lD9ZGX2qStUvg==", - "peer": true, "dependencies": { "array-back": "^3.1.0", "find-replace": "^3.0.0", @@ -2312,7 +2347,6 @@ "version": "7.0.1", "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.1.tgz", "integrity": "sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ==", - "peer": true, "dependencies": { "array-back": "^6.2.2", "chalk-template": "^0.4.0", @@ -2327,7 +2361,6 @@ "version": "6.2.2", "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", - "peer": true, "engines": { "node": ">=12.17" } @@ -2336,7 +2369,6 @@ "version": "7.1.1", "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", - "peer": true, "engines": { "node": ">=12.17" } @@ -2872,7 +2904,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/find-replace/-/find-replace-3.0.0.tgz", "integrity": "sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==", - "peer": true, "dependencies": { "array-back": "^3.0.1" }, @@ -2913,8 +2944,7 @@ "node_modules/flatbuffers": { "version": "23.5.26", "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.5.26.tgz", - "integrity": "sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ==", - "peer": true + "integrity": "sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ==" }, "node_modules/flatted": { "version": "3.2.9", @@ -4158,7 +4188,6 @@ "version": "0.0.3", "resolved": "https://registry.npmjs.org/json-bignum/-/json-bignum-0.0.3.tgz", "integrity": "sha512-2WHyXj3OfHSgNyuzDbSxI1w2jgw5gkWSWhS7Qg4bWXx1nLk3jnbwfUeS0PSba3IzpTUWdHxBieELUzXRjQB2zg==", - "peer": true, "engines": { "node": ">=0.8" } @@ -4269,14 +4298,12 @@ "node_modules/lodash.assignwith": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz", - "integrity": "sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g==", - "peer": true + "integrity": "sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g==" }, "node_modules/lodash.camelcase": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", - "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", - "peer": true + "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==" }, "node_modules/lodash.memoize": { "version": "4.1.2", @@ -4588,6 +4615,18 @@ "node": ">=6" } }, + "node_modules/pad-left": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/pad-left/-/pad-left-2.1.0.tgz", + "integrity": "sha512-HJxs9K9AztdIQIAIa/OIazRAUW/L6B9hbQDxO4X07roW3eo9XqZc2ur9bn1StH9CnbbI9EgvejHQX7CBpCF1QA==", + "dev": true, + "dependencies": { + "repeat-string": "^1.5.4" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -4865,6 +4904,15 @@ "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==", "dev": true }, + "node_modules/repeat-string": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz", + "integrity": "sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w==", + "dev": true, + "engines": { + "node": ">=0.10" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -5088,7 +5136,6 @@ "version": "3.0.1", "resolved": "https://registry.npmjs.org/stream-read-all/-/stream-read-all-3.0.1.tgz", "integrity": "sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A==", - "peer": true, "engines": { "node": ">=10" } @@ -5180,7 +5227,6 @@ "version": "3.0.2", "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-3.0.2.tgz", "integrity": "sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw==", - "peer": true, "dependencies": { "@75lb/deep-merge": "^1.1.1", "array-back": "^6.2.2", @@ -5201,7 +5247,6 @@ "version": "6.2.2", "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", - "peer": true, "engines": { "node": ">=12.17" } @@ -5210,7 +5255,6 @@ "version": "7.1.1", "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", - "peer": true, "engines": { "node": ">=12.17" } @@ -5335,8 +5379,7 @@ "node_modules/tslib": { "version": "2.6.2", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", - "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==", - "peer": true + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" }, "node_modules/type-check": { "version": "0.4.0", @@ -5672,7 +5715,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/typical/-/typical-4.0.0.tgz", "integrity": "sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw==", - "peer": true, "engines": { "node": ">=8" } @@ -5819,7 +5861,6 @@ "version": "5.1.0", "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==", - "peer": true, "engines": { "node": ">=12.17" } diff --git a/nodejs/package.json b/nodejs/package.json index 0d2cecb64..e023e3560 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -22,6 +22,7 @@ "@types/tmp": "^0.2.6", "@typescript-eslint/eslint-plugin": "^6.19.0", "@typescript-eslint/parser": "^6.19.0", + "apache-arrow-old": "npm:apache-arrow@13.0.0", "eslint": "^8.57.0", "eslint-config-prettier": "^9.1.0", "jest": "^29.7.0", @@ -55,7 +56,7 @@ "build": "npm run build:debug && tsc -b", "chkformat": "prettier . --check", "docs": "typedoc --plugin typedoc-plugin-markdown lancedb/index.ts", - "lint": "eslint lancedb", + "lint": "eslint lancedb && eslint __test__", "prepublishOnly": "napi prepublish -t npm", "test": "npm run build && jest --verbose", "universal": "napi universal",