vercel · May 16, 2024
diff --git a/Diff for: ‎.changeset/five-knives-deny.md
+5 b/Diff for: ‎.changeset/five-knives-deny.md
+5
diff --git a/Diff for: ‎content/docs/03-ai-sdk-core/30-embeddings.mdx
+29-2 b/Diff for: ‎content/docs/03-ai-sdk-core/30-embeddings.mdx
+29-2
diff --git a/Diff for: ‎content/docs/07-reference/ai-sdk-core/06-embed-many.mdx
+18 b/Diff for: ‎content/docs/07-reference/ai-sdk-core/06-embed-many.mdx
+18
diff --git a/Diff for: ‎content/docs/07-reference/ai-sdk-core/index.mdx
+6 b/Diff for: ‎content/docs/07-reference/ai-sdk-core/index.mdx
+6
diff --git a/Diff for: ‎content/providers/01-ai-sdk-providers/01-openai.mdx
+46-8 b/Diff for: ‎content/providers/01-ai-sdk-providers/01-openai.mdx
+46-8
diff --git a/Diff for: ‎content/providers/01-ai-sdk-providers/02-anthropic.mdx
+2-2 b/Diff for: ‎content/providers/01-ai-sdk-providers/02-anthropic.mdx
+2-2
diff --git a/Diff for: ‎content/providers/01-ai-sdk-providers/03-google-generative-ai.mdx
+2-2 b/Diff for: ‎content/providers/01-ai-sdk-providers/03-google-generative-ai.mdx
+2-2
diff --git a/Diff for: ‎content/providers/01-ai-sdk-providers/04-mistral.mdx
+12-2 b/Diff for: ‎content/providers/01-ai-sdk-providers/04-mistral.mdx
+12-2
diff --git a/Diff for: ‎examples/ai-core/src/embed-many/mistral.ts
+20 b/Diff for: ‎examples/ai-core/src/embed-many/mistral.ts
+20
diff --git a/Diff for: ‎examples/ai-core/src/embed-many/openai.ts
+20 b/Diff for: ‎examples/ai-core/src/embed-many/openai.ts
+20
diff --git a/Diff for: ‎packages/core/core/embed/embed-many.test.ts
+74 b/Diff for: ‎packages/core/core/embed/embed-many.test.ts
+74
diff --git a/Diff for: ‎packages/core/core/embed/embed-many.ts
+98 b/Diff for: ‎packages/core/core/embed/embed-many.ts
+98
diff --git a/Diff for: ‎packages/core/core/embed/embed.test.ts
+18-8 b/Diff for: ‎packages/core/core/embed/embed.test.ts
+18-8
diff --git a/Diff for: ‎packages/core/core/embed/index.ts
+1 b/Diff for: ‎packages/core/core/embed/index.ts
+1
diff --git a/Diff for: ‎packages/core/core/test/mock-embedding-model-v1.ts
+11 b/Diff for: ‎packages/core/core/test/mock-embedding-model-v1.ts
+11
diff --git a/Diff for: ‎packages/core/core/util/split-array.test.ts
+62 b/Diff for: ‎packages/core/core/util/split-array.test.ts
+62
diff --git a/Diff for: ‎packages/core/core/util/split-array.ts
+20 b/Diff for: ‎packages/core/core/util/split-array.ts
+20
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+feat (ai/core): add embedMany function
@@ -10,15 +10,42 @@ In this space, similar words are close to each other, and the distance between w
 
 ## Embedding a Single Value
 
-The Vercel AI SDK provides the `embed` function to embed single values, which is useful for tasks such as finding similar words
-or phrases or clustering text. You can use it with embeddings models, e.g. `openai.embedding('text-embedding-3-large')` or `mistral.embedding('mistral-embed')`.
+The Vercel AI SDK provides the [`embed`](/docs/reference/ai-sdk-core/embed) function to embed single values, which is useful for tasks such as finding similar words
+or phrases or clustering text.
+You can use it with embeddings models, e.g. `openai.embedding('text-embedding-3-large')` or `mistral.embedding('mistral-embed')`.
 
 ```tsx
 import { embed } from 'ai';
 import { openai } from '@ai-sdk/openai';
 
+// 'embedding' is a single embedding object (number[])
 const { embedding } = await embed({
   model: openai.embedding('text-embedding-3-small'),
   value: 'sunny day at the beach',
 });
 ```
+
+## Embedding Many Values
+
+When loading data, e.g. when preparing a data store for retrieval-augmented generation (RAG),
+it is often useful to embed many values at once (batch embedding).
+
+The Vercel AI SDK provides the `embedMany` function for this purpose.
+Similar to `embed`, you can use it with embeddings models,
+e.g. `openai.embedding('text-embedding-3-large')` or `mistral.embedding('mistral-embed')`.
+
+```tsx
+import { openai } from '@ai-sdk/openai';
+import { embedMany } from 'ai';
+
+// 'embeddings' is an array of embedding objects (number[][]).
+// It is sorted in the same order as the input values.
+const { embeddings } = await embedMany({
+  model: openai.embedding('text-embedding-3-small'),
+  values: [
+    'sunny day at the beach',
+    'rainy afternoon in the city',
+    'snowy night in the mountains',
+  ],
+});
+```
@@ -0,0 +1,18 @@
+---
+title: embedMany
+description: Embed several values using the AI SDK Core (batch embedding)
+---
+
+# `embedMany`
+
+Embed several values using an embedding model. The type of the value is defined
+by the embedding model.
+
+`embedMany` automatically splits large requests into smaller chunks if the model
+has a limit on how many embeddings can be generated in a single call.
+
+## Import
+
+<Snippet text={`import { embedMany } from "ai"`} prompt={false} />
+
+<ReferenceTable packageName="core" functionName="embedMany" />
@@ -33,5 +33,11 @@ description: Reference documentation for the AI SDK Core
         'Generate an embedding for a single value using an embedding model.',
       href: '/docs/reference/ai-sdk-core/embed',
     },
+    {
+      title: 'embedMany',
+      description:
+        'Generate embeddings for several values using an embedding model (batch embedding).',
+      href: '/docs/reference/ai-sdk-core/embed-many',
+    },
   ]}
 />
@@ -72,9 +72,9 @@ You can use the following optional settings to customize the OpenAI provider ins
   and `compatible` when using 3rd party providers. In `compatible` mode, newer
   information such as streamOptions are not being sent. Defaults to 'compatible'.
 
-## Models
+## Language Models
 
-The OpenAI provider instance is a function that you can invoke to create a model:
+The OpenAI provider instance is a function that you can invoke to create a language model:
 
 ```ts
 const model = openai('gpt-3.5-turbo');
@@ -92,6 +92,14 @@ const model = openai('gpt-3.5-turbo', {
 The available options depend on the API that's automatically chosen for the model (see below).
 If you want to explicitly select a specific model API, you can use `.chat` or `.completion`.
 
+### Model Capabilities
+
+| Model           | Image Input         | Object Generation   | Tool Usage          | Tool Streaming      |
+| --------------- | ------------------- | ------------------- | ------------------- | ------------------- |
+| `gpt-4-turbo`   | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `gpt-4`         | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `gpt-3.5-turbo` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+
 ### Chat Models
 
 You can create models that call the [OpenAI chat API](https://platform.openai.com/docs/api-reference/chat) using the `.chat()` factory method.
@@ -215,10 +223,40 @@ The following optional settings are available for OpenAI completion models:
   A unique identifier representing your end-user, which can help OpenAI to
   monitor and detect abuse. Learn more.
 
-## Model Capabilities
+## Embedding Models
 
-| Model           | Image Input         | Object Generation   | Tool Usage          | Tool Streaming      |
-| --------------- | ------------------- | ------------------- | ------------------- | ------------------- |
-| `gpt-4-turbo`   | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `gpt-4`         | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `gpt-3.5-turbo` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+You can create models that call the [OpenAI embeddings API](https://platform.openai.com/docs/api-reference/embeddings)
+using the `.embedding()` factory method.
+
+```ts
+const model = openai.embedding('text-embedding-3-large');
+```
+
+OpenAI embedding models support several aditional settings.
+You can pass them as an options argument:
+
+```ts
+const model = openai.embedding('text-embedding-3-large', {
+  dimensions: 512 // optional, number of dimensions for the embedding
+  user: 'test-user' // optional unique user identifier
+})
+```
+
+The following optional settings are available for OpenAI embedding models:
+
+- **dimensions**: _number_
+
+  Echo back the prompt in addition to the completion.
+
+- **user** _string_
+
+  A unique identifier representing your end-user, which can help OpenAI to
+  monitor and detect abuse. Learn more.
+
+### Model Capabilities
+
+| Model                    | Default Dimensions | Custom Dimensions   |
+| ------------------------ | ------------------ | ------------------- |
+| `text-embedding-3-large` | 3072               | <Check size={18} /> |
+| `text-embedding-3-small` | 1536               | <Check size={18} /> |
+| `text-embedding-ada-002` | 1536               | <Cross size={18} /> |
@@ -60,7 +60,7 @@ You can use the following optional settings to customize the Google Generative A
 
   Custom headers to include in the requests.
 
-## Models
+## Language Models
 
 You can create models that call the [Anthropic Messages API](https://docs.anthropic.com/claude/reference/messages_post) using the provider instance.
 The first argument is the model id, e.g. `claude-3-haiku-20240307`.
@@ -88,7 +88,7 @@ The following optional settings are available for Anthropic models:
   Used to remove "long tail" low probability responses.
   Recommended for advanced use cases only. You usually only need to use temperature.
 
-## Model Capabilities
+### Model Capabilities
 
 | Model                      | Image Input         | Object Generation   | Tool Usage          | Tool Streaming      |
 | -------------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
 
@@ -58,7 +58,7 @@ You can use the following optional settings to customize the Google Generative A
 
   Custom headers to include in the requests.
 
-## Models
+## Language Models
 
 You can create models that call the [Google Generative AI API](https://ai.google.dev/api/rest) using the provider instance.
 The first argument is the model id, e.g. `models/gemini-pro`.
@@ -87,7 +87,7 @@ The following optional settings are available for Google Generative AI models:
   Top-k sampling considers the set of topK most probable tokens.
   Models running with nucleus sampling don't allow topK setting.
 
-## Model Capabilities
+### Model Capabilities
 
 | Model                          | Image Input         | Object Generation   | Tool Usage          | Tool Streaming      |
 | ------------------------------ | ------------------- | ------------------- | ------------------- | ------------------- |
 
@@ -23,6 +23,7 @@ The Mistral provider is available in the `@ai-sdk/mistral` module. You can insta
     <Snippet text="yarn add @ai-sdk/mistral" dark />
   </Tab>
 </Tabs>
+
 ## Provider Instance
 
 You can import the default provider instance `mistral` from `@ai-sdk/mistral`:
@@ -58,7 +59,7 @@ You can use the following optional settings to customize the Mistral provider in
 
   Custom headers to include in the requests.
 
-## Models
+## Language Models
 
 You can create models that call the [Mistral chat API](https://docs.mistral.ai/api/#operation/createChatCompletion) using provider instance.
 The first argument is the model id, e.g. `mistral-large-latest`.
@@ -85,9 +86,18 @@ The following optional settings are available for Mistral models:
 
   Defaults to `false`.
 
-## Model Capabilities
+### Model Capabilities
 
 | Model                  | Image Input         | Object Generation   | Tool Usage          | Tool Streaming      |
 | ---------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
 | `mistral-large-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | `mistral-small-latest` | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+
+## Embedding Models
+
+You can create models that call the [Mistral embeddings API](https://docs.mistral.ai/api/#operation/createEmbedding)
+using the `.embedding()` factory method.
+
+```ts
+const model = mistral.embedding('mistral-embed');
+```
@@ -0,0 +1,20 @@
+import { mistral } from '@ai-sdk/mistral';
+import { embedMany } from 'ai';
+import dotenv from 'dotenv';
+
+dotenv.config();
+
+async function main() {
+  const { embeddings } = await embedMany({
+    model: mistral.embedding('mistral-embed'),
+    values: [
+      'sunny day at the beach',
+      'rainy afternoon in the city',
+      'snowy night in the mountains',
+    ],
+  });
+
+  console.log(embeddings);
+}
+
+main().catch(console.error);
@@ -0,0 +1,20 @@
+import { openai } from '@ai-sdk/openai';
+import { embedMany } from 'ai';
+import dotenv from 'dotenv';
+
+dotenv.config();
+
+async function main() {
+  const { embeddings } = await embedMany({
+    model: openai.embedding('text-embedding-3-small'),
+    values: [
+      'sunny day at the beach',
+      'rainy afternoon in the city',
+      'snowy night in the mountains',
+    ],
+  });
+
+  console.log(embeddings);
+}
+
+main().catch(console.error);
@@ -0,0 +1,74 @@
+import assert from 'node:assert';
+import {
+  MockEmbeddingModelV1,
+  mockEmbed,
+} from '../test/mock-embedding-model-v1';
+import { embedMany } from './embed-many';
+
+const dummyEmbeddings = [
+  [0.1, 0.2, 0.3],
+  [0.4, 0.5, 0.6],
+  [0.7, 0.8, 0.9],
+];
+
+const testValues = [
+  'sunny day at the beach',
+  'rainy afternoon in the city',
+  'snowy night in the mountains',
+];
+
+describe('result.embedding', () => {
+  it('should generate embeddings', async () => {
+    const result = await embedMany({
+      model: new MockEmbeddingModelV1({
+        maxEmbeddingsPerCall: 5,
+        doEmbed: mockEmbed(testValues, dummyEmbeddings),
+      }),
+      values: testValues,
+    });
+
+    assert.deepStrictEqual(result.embeddings, dummyEmbeddings);
+  });
+
+  it('should generate embeddings when several calls are required', async () => {
+    let callCount = 0;
+
+    const result = await embedMany({
+      model: new MockEmbeddingModelV1({
+        maxEmbeddingsPerCall: 2,
+        doEmbed: async ({ values }) => {
+          if (callCount === 0) {
+            assert.deepStrictEqual(values, testValues.slice(0, 2));
+            callCount++;
+            return { embeddings: dummyEmbeddings.slice(0, 2) };
+          }
+
+          if (callCount === 1) {
+            assert.deepStrictEqual(values, testValues.slice(2));
+            callCount++;
+            return { embeddings: dummyEmbeddings.slice(2) };
+          }
+
+          throw new Error('Unexpected call');
+        },
+      }),
+      values: testValues,
+    });
+
+    assert.deepStrictEqual(result.embeddings, dummyEmbeddings);
+  });
+});
+
+describe('result.values', () => {
+  it('should include values in the result', async () => {
+    const result = await embedMany({
+      model: new MockEmbeddingModelV1({
+        maxEmbeddingsPerCall: 5,
+        doEmbed: mockEmbed(testValues, dummyEmbeddings),
+      }),
+      values: testValues,
+    });
+
+    assert.deepStrictEqual(result.values, testValues);
+  });
+});
@@ -0,0 +1,98 @@
+import { Embedding, EmbeddingModel } from '../types';
+import { retryWithExponentialBackoff } from '../util/retry-with-exponential-backoff';
+import { splitArray } from '../util/split-array';
+
+/**
+Embed several values using an embedding model. The type of the value is defined 
+by the embedding model.
+
+`embedMany` automatically splits large requests into smaller chunks if the model
+has a limit on how many embeddings can be generated in a single call.
+
+@param model - The embedding model to use.
+@param values - The values that should be embedded.
+
+@param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
+@param abortSignal - An optional abort signal that can be used to cancel the call.
+
+@returns A result object that contains the embeddings, the value, and additional information.
+ */
+export async function embedMany<VALUE>({
+  model,
+  values,
+  maxRetries,
+  abortSignal,
+}: {
+  /**
+The embedding model to use.
+     */
+  model: EmbeddingModel<VALUE>;
+
+  /**
+The values that should be embedded.
+   */
+  values: Array<VALUE>;
+
+  /**
+Maximum number of retries per embedding model call. Set to 0 to disable retries.
+
+@default 2
+   */
+  maxRetries?: number;
+
+  /**
+Abort signal.
+ */
+  abortSignal?: AbortSignal;
+}): Promise<EmbedManyResult<VALUE>> {
+  const retry = retryWithExponentialBackoff({ maxRetries });
+  const maxEmbeddingsPerCall = model.maxEmbeddingsPerCall;
+
+  // the model has not specified limits on
+  // how many embeddings can be generated in a single call
+  if (maxEmbeddingsPerCall == null) {
+    const modelResponse = await retry(() =>
+      model.doEmbed({ values, abortSignal }),
+    );
+
+    return new EmbedManyResult({
+      values,
+      embeddings: modelResponse.embeddings,
+    });
+  }
+
+  // split the values into chunks that are small enough for the model:
+  const valueChunks = splitArray(values, maxEmbeddingsPerCall);
+
+  // serially embed the chunks:
+  const embeddings = [];
+  for (const chunk of valueChunks) {
+    const modelResponse = await retry(() =>
+      model.doEmbed({ values: chunk, abortSignal }),
+    );
+    embeddings.push(...modelResponse.embeddings);
+  }
+
+  return new EmbedManyResult({ values, embeddings });
+}
+
+/**
+The result of a `embedMany` call.
+It contains the embeddings, the values, and additional information.
+ */
+export class EmbedManyResult<VALUE> {
+  /**
+The values that were embedded.
+   */
+  readonly values: Array<VALUE>;
+
+  /**
+The embeddings. They are in the same order as the values.
+  */
+  readonly embeddings: Array<Embedding>;
+
+  constructor(options: { values: Array<VALUE>; embeddings: Array<Embedding> }) {
+    this.values = options.values;
+    this.embeddings = options.embeddings;
+  }
+}
@@ -1,5 +1,8 @@
 import assert from 'node:assert';
-import { MockEmbeddingModelV1 } from '../test/mock-embedding-model-v1';
+import {
+  MockEmbeddingModelV1,
+  mockEmbed,
+} from '../test/mock-embedding-model-v1';
 import { embed } from './embed';
 
 const dummyEmbedding = [0.1, 0.2, 0.3];
@@ -9,17 +12,24 @@ describe('result.embedding', () => {
   it('should generate embedding', async () => {
     const result = await embed({
       model: new MockEmbeddingModelV1({
-        doEmbed: async ({ values }) => {
-          assert.deepStrictEqual(values, [testValue]);
-
-          return {
-            embeddings: [dummyEmbedding],
-          };
-        },
+        doEmbed: mockEmbed([testValue], [dummyEmbedding]),
       }),
       value: testValue,
     });
 
     assert.deepStrictEqual(result.embedding, dummyEmbedding);
   });
 });
+
+describe('result.value', () => {
+  it('should include value in the result', async () => {
+    const result = await embed({
+      model: new MockEmbeddingModelV1({
+        doEmbed: mockEmbed([testValue], [dummyEmbedding]),
+      }),
+      value: testValue,
+    });
+
+    assert.deepStrictEqual(result.value, testValue);
+  });
+});
@@ -1 +1,2 @@
 export * from './embed';
+export * from './embed-many';
@@ -1,4 +1,5 @@
 import { EmbeddingModelV1 } from '@ai-sdk/provider';
+import { Embedding } from '../types';
 
 export class MockEmbeddingModelV1<VALUE> implements EmbeddingModelV1<VALUE> {
   readonly specificationVersion = 'v1';
@@ -31,6 +32,16 @@ export class MockEmbeddingModelV1<VALUE> implements EmbeddingModelV1<VALUE> {
   }
 }
 
+export function mockEmbed<VALUE>(
+  expectedValues: Array<VALUE>,
+  embeddings: Array<Embedding>,
+): EmbeddingModelV1<VALUE>['doEmbed'] {
+  return async ({ values }) => {
+    assert.deepStrictEqual(expectedValues, values);
+    return { embeddings };
+  };
+}
+
 function notImplemented(): never {
   throw new Error('Not implemented');
 }
@@ -0,0 +1,62 @@
+import { describe, it, expect } from 'vitest';
+import { splitArray } from './split-array';
+
+describe('splitArray', () => {
+  it('should split an array into chunks of the specified size', () => {
+    const array = [1, 2, 3, 4, 5];
+    const size = 2;
+    const result = splitArray(array, size);
+    expect(result).toEqual([[1, 2], [3, 4], [5]]);
+  });
+
+  it('should return an empty array when the input array is empty', () => {
+    const array: number[] = [];
+    const size = 2;
+    const result = splitArray(array, size);
+    expect(result).toEqual([]);
+  });
+
+  it('should return the original array when the chunk size is greater than the array length', () => {
+    const array = [1, 2, 3];
+    const size = 5;
+    const result = splitArray(array, size);
+    expect(result).toEqual([[1, 2, 3]]);
+  });
+
+  it('should return the original array when the chunk size is equal to the array length', () => {
+    const array = [1, 2, 3];
+    const size = 3;
+    const result = splitArray(array, size);
+    expect(result).toEqual([[1, 2, 3]]);
+  });
+
+  it('should handle chunk size of 1 correctly', () => {
+    const array = [1, 2, 3];
+    const size = 1;
+    const result = splitArray(array, size);
+    expect(result).toEqual([[1], [2], [3]]);
+  });
+
+  it('should throw an error for chunk size of 0', () => {
+    const array = [1, 2, 3];
+    const size = 0;
+    expect(() => splitArray(array, size)).toThrow(
+      'chunkSize must be greater than 0',
+    );
+  });
+
+  it('should throw an error for negative chunk size', () => {
+    const array = [1, 2, 3];
+    const size = -1;
+    expect(() => splitArray(array, size)).toThrow(
+      'chunkSize must be greater than 0',
+    );
+  });
+
+  it('should handle non-integer chunk size by flooring the size', () => {
+    const array = [1, 2, 3, 4, 5];
+    const size = 2.5;
+    const result = splitArray(array, Math.floor(size));
+    expect(result).toEqual([[1, 2], [3, 4], [5]]);
+  });
+});
@@ -0,0 +1,20 @@
+/**
+ * Splits an array into chunks of a specified size.
+ *
+ * @template T - The type of elements in the array.
+ * @param {T[]} array - The array to split.
+ * @param {number} chunkSize - The size of each chunk.
+ * @returns {T[][]} - A new array containing the chunks.
+ */
+export function splitArray<T>(array: T[], chunkSize: number): T[][] {
+  if (chunkSize <= 0) {
+    throw new Error('chunkSize must be greater than 0');
+  }
+
+  const result = [];
+  for (let i = 0; i < array.length; i += chunkSize) {
+    result.push(array.slice(i, i + chunkSize));
+  }
+
+  return result;
+}
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`export * from './embed';`
	`2`	`+export * from './embed-many';`