feat (core): add object promise to streamObject result (#1858)

lgrammel · web-flow · commit 102ca22f6c37 · 2024-06-06T09:58:13.000+02:00
diff --git a/.changeset/metal-ducks-compete.md b/.changeset/metal-ducks-compete.md
@@ -0,0 +1,5 @@
+---
+'@ai-sdk/provider': patch
+---
+
+fix (@ai-sdk/provider): fix TypeValidationError.isTypeValidationError
diff --git a/.changeset/orange-bananas-roll.md b/.changeset/orange-bananas-roll.md
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+feat (core): add object promise to streamObject result
diff --git a/content/docs/03-ai-sdk-core/10-generating-structured-data.mdx b/content/docs/03-ai-sdk-core/10-generating-structured-data.mdx
@@ -47,7 +47,7 @@ While some models (like OpenAI) natively support object generation, others requi
 - `json`: The JSON schema and an instruction is injected into the prompt. If the provider supports JSON mode, it is enabled.
 - `grammar`: The provider is instructed to convert the JSON schema into a provider specific grammar and use it to select the output tokens.
 
-<Note>Please note that most providers do not support all modes.</Note>
+<Note>Please note that not every provider supports all generation modes.</Note>
 
 ## Streaming Objects
 
diff --git a/content/docs/07-reference/ai-sdk-core/04-stream-object.mdx b/content/docs/07-reference/ai-sdk-core/04-stream-object.mdx
@@ -342,6 +342,12 @@ for await (const partialObject of partialObjectStream) {
         },
       ],
     },
+    {
+      name: 'object',
+      type: 'Promise<T>',
+      description:
+        'The generated object (typed according to the schema). Resolved when the response is finished.',
+    },
     {
       name: 'partialObjectStream',
       type: 'AsyncIterableStream<DeepPartial<T>>',
@@ -450,5 +456,9 @@ for await (const partialObject of partialObjectStream) {
       title: 'Recording Token Usage',
       link: '/examples/node/streaming-structured-data/token-usage',
     },
+    {
+      title: 'Recording Final Object',
+      link: '/examples/node/streaming-structured-data/object',
+    },
   ]}
 />
diff --git a/content/examples/03-node/02-streaming-structured-data/10-token-usage.mdx b/content/examples/03-node/02-streaming-structured-data/10-token-usage.mdx
@@ -42,4 +42,8 @@ result.usage.then(recordTokenUsage);
 
 // use with async/await:
 recordTokenUsage(await result.usage);
+
+// note: the stream needs to be consumed because of backpressure
+for await (const partialObject of result.partialObjectStream) {
+}
 ```
diff --git a/content/examples/03-node/02-streaming-structured-data/12-object.mdx b/content/examples/03-node/02-streaming-structured-data/12-object.mdx
@@ -0,0 +1,44 @@
+---
+title: Recording Final Object
+description: Examples of how to record the final object when streaming structured data.
+---
+
+# Recording Final Object
+
+When you're streaming structured data, you may want to record the final object for logging or other purposes.
+
+The [`streamObject`](/docs/reference/ai-sdk-core/stream-object) result contains an `object` promise that resolves to the final object.
+The object is fully typed. When the type validation according to the schema fails, the promise will be rejected with a `TypeValidationError`.
+
+```ts file='index.ts' highlight={"17-26"}
+import { openai } from '@ai-sdk/openai';
+import { streamObject, TokenUsage } from 'ai';
+import { z } from 'zod';
+
+const result = await streamObject({
+  model: openai('gpt-4-turbo'),
+  schema: z.object({
+    recipe: z.object({
+      name: z.string(),
+      ingredients: z.array(z.string()),
+      steps: z.array(z.string()),
+    }),
+  }),
+  prompt: 'Generate a lasagna recipe.',
+});
+
+result.object
+  .then(({ recipe }) => {
+    // do something with the fully typed, final object:
+    console.log('Recipe:', JSON.stringify(recipe, null, 2));
+  })
+  .catch(error => {
+    // handle type validation failure
+    // (when the object does not match the schema):
+    console.error(error);
+  });
+
+// note: the stream needs to be consumed because of backpressure
+for await (const partialObject of result.partialObjectStream) {
+}
+```
diff --git a/content/examples/03-node/02-streaming-structured-data/index.mdx b/content/examples/03-node/02-streaming-structured-data/index.mdx
@@ -17,5 +17,9 @@ The following sections will guide you through streaming structured data with Nod
       title: 'Recording Token Usage',
       href: '/examples/node/streaming-structured-data/token-usage',
     },
+    {
+      title: 'Recording Final Object',
+      href: '/examples/node/streaming-structured-data/object',
+    },
   ]}
 />
diff --git a/examples/ai-core/src/stream-object/openai-object.ts b/examples/ai-core/src/stream-object/openai-object.ts
@@ -0,0 +1,37 @@
+import { openai } from '@ai-sdk/openai';
+import { streamObject } from 'ai';
+import dotenv from 'dotenv';
+import { z } from 'zod';
+
+dotenv.config();
+
+async function main() {
+  const result = await streamObject({
+    model: openai('gpt-4-turbo'),
+    schema: z.object({
+      recipe: z.object({
+        name: z.string(),
+        ingredients: z.array(z.string()),
+        steps: z.array(z.string()),
+      }),
+    }),
+    prompt: 'Generate a lasagna recipe.',
+  });
+
+  result.object
+    .then(({ recipe }) => {
+      // do something with the fully typed, final object:
+      console.log('Recipe:', JSON.stringify(recipe, null, 2));
+    })
+    .catch(error => {
+      // handle type validation failure
+      // (when the object does not match the schema):
+      console.error(error);
+    });
+
+  // note: the stream needs to be consumed because of backpressure
+  for await (const partialObject of result.partialObjectStream) {
+  }
+}
+
+main().catch(console.error);
diff --git a/examples/ai-core/src/stream-object/openai-token-usage.ts b/examples/ai-core/src/stream-object/openai-token-usage.ts
@@ -0,0 +1,43 @@
+import { openai } from '@ai-sdk/openai';
+import { streamObject, TokenUsage } from 'ai';
+import dotenv from 'dotenv';
+import { z } from 'zod';
+
+dotenv.config();
+
+async function main() {
+  const result = await streamObject({
+    model: openai('gpt-4-turbo'),
+    schema: z.object({
+      recipe: z.object({
+        name: z.string(),
+        ingredients: z.array(z.string()),
+        steps: z.array(z.string()),
+      }),
+    }),
+    prompt: 'Generate a lasagna recipe.',
+  });
+
+  // your custom function to record token usage:
+  function recordTokenUsage({
+    promptTokens,
+    completionTokens,
+    totalTokens,
+  }: TokenUsage) {
+    console.log('Prompt tokens:', promptTokens);
+    console.log('Completion tokens:', completionTokens);
+    console.log('Total tokens:', totalTokens);
+  }
+
+  // use as promise:
+  result.usage.then(recordTokenUsage);
+
+  // use with async/await:
+  recordTokenUsage(await result.usage);
+
+  // note: the stream needs to be consumed because of backpressure
+  for await (const partialObject of result.partialObjectStream) {
+  }
+}
+
+main().catch(console.error);
diff --git a/examples/ai-core/src/stream-object/token-usage.ts b/examples/ai-core/src/stream-object/token-usage.ts
diff --git a/packages/core/core/generate-object/stream-object.test.ts b/packages/core/core/generate-object/stream-object.test.ts
@@ -4,6 +4,7 @@ import { convertArrayToReadableStream } from '../test/convert-array-to-readable-
 import { convertAsyncIterableToArray } from '../test/convert-async-iterable-to-array';
 import { MockLanguageModelV1 } from '../test/mock-language-model-v1';
 import { streamObject } from './stream-object';
+import { TypeValidationError } from '@ai-sdk/provider';
 
 describe('result.objectStream', () => {
   it('should send object deltas with json mode', async () => {
@@ -255,3 +256,103 @@ describe('result.usage', () => {
     });
   });
 });
+
+describe('result.object', () => {
+  it('should resolve with typed object', async () => {
+    const result = await streamObject({
+      model: new MockLanguageModelV1({
+        doStream: async ({ prompt, mode }) => {
+          assert.deepStrictEqual(mode, { type: 'object-json' });
+          assert.deepStrictEqual(prompt, [
+            {
+              role: 'system',
+              content:
+                'JSON schema:\n' +
+                '{"type":"object","properties":{"content":{"type":"string"}},"required":["content"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}\n' +
+                'You MUST answer with a JSON object that matches the JSON schema above.',
+            },
+            { role: 'user', content: [{ type: 'text', text: 'prompt' }] },
+          ]);
+
+          return {
+            stream: convertArrayToReadableStream([
+              { type: 'text-delta', textDelta: '{ ' },
+              { type: 'text-delta', textDelta: '"content": ' },
+              { type: 'text-delta', textDelta: `"Hello, ` },
+              { type: 'text-delta', textDelta: `world` },
+              { type: 'text-delta', textDelta: `!"` },
+              { type: 'text-delta', textDelta: ' }' },
+              {
+                type: 'finish',
+                finishReason: 'stop',
+                usage: { completionTokens: 10, promptTokens: 3 },
+              },
+            ]),
+            rawCall: { rawPrompt: 'prompt', rawSettings: {} },
+          };
+        },
+      }),
+      schema: z.object({ content: z.string() }),
+      mode: 'json',
+      prompt: 'prompt',
+    });
+
+    // consume stream (runs in parallel)
+    convertAsyncIterableToArray(result.partialObjectStream);
+
+    assert.deepStrictEqual(await result.object, {
+      content: 'Hello, world!',
+    });
+  });
+
+  it('should reject object promise when the streamed object does not match the schema', async () => {
+    const result = await streamObject({
+      model: new MockLanguageModelV1({
+        doStream: async ({ prompt, mode }) => {
+          assert.deepStrictEqual(mode, { type: 'object-json' });
+          assert.deepStrictEqual(prompt, [
+            {
+              role: 'system',
+              content:
+                'JSON schema:\n' +
+                '{"type":"object","properties":{"content":{"type":"string"}},"required":["content"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}\n' +
+                'You MUST answer with a JSON object that matches the JSON schema above.',
+            },
+            { role: 'user', content: [{ type: 'text', text: 'prompt' }] },
+          ]);
+
+          return {
+            stream: convertArrayToReadableStream([
+              { type: 'text-delta', textDelta: '{ ' },
+              { type: 'text-delta', textDelta: '"invalid": ' },
+              { type: 'text-delta', textDelta: `"Hello, ` },
+              { type: 'text-delta', textDelta: `world` },
+              { type: 'text-delta', textDelta: `!"` },
+              { type: 'text-delta', textDelta: ' }' },
+              {
+                type: 'finish',
+                finishReason: 'stop',
+                usage: { completionTokens: 10, promptTokens: 3 },
+              },
+            ]),
+            rawCall: { rawPrompt: 'prompt', rawSettings: {} },
+          };
+        },
+      }),
+      schema: z.object({ content: z.string() }),
+      mode: 'json',
+      prompt: 'prompt',
+    });
+
+    // consume stream (runs in parallel)
+    convertAsyncIterableToArray(result.partialObjectStream);
+
+    await result.object
+      .then(() => {
+        assert.fail('Expected object promise to be rejected');
+      })
+      .catch(error => {
+        expect(TypeValidationError.isTypeValidationError(error)).toBeTruthy();
+      });
+  });
+});
diff --git a/packages/core/core/generate-object/stream-object.ts b/packages/core/core/generate-object/stream-object.ts
diff --git a/packages/provider/src/errors/type-validation-error.ts b/packages/provider/src/errors/type-validation-error.ts

-Original file line number
+Diff line change
 import { parsePartialJson } from '../util/parse-partial-json';
 import { retryWithExponentialBackoff } from '../util/retry-with-exponential-backoff';
 import { injectJsonSchemaIntoSystem } from './inject-json-schema-into-system';
 +import { safeValidateTypes } from '@ai-sdk/provider-utils';
 /**
 Generate a structured, typed object for a given prompt and schema using a language model.
     stream: result.stream.pipeThrough(new TransformStream(transformer)),
     warnings: result.warnings,
     rawResponse: result.rawResponse,
 +    schema,
   });
+}
 The result of a `streamObject` call that contains the partial object stream and additional information.
  */
 export class StreamObjectResult<T> {
 -  readonly originalStream: ReadableStream<ObjectStreamPart<T>>;
 +  private readonly originalStream: ReadableStream<ObjectStreamPart<T>>;
   /**
 Warnings from the model provider (e.g. unsupported settings)
    */
   readonly warnings: CallWarning[] | undefined;
 +  /**
 +The generated object (typed according to the schema). Resolved when the response is finished.
 +   */
 +  readonly object: Promise<T>;
++
   /**
 The token usage of the generated response. Resolved when the response is finished.
    */
     stream,
     warnings,
     rawResponse,
 +    schema,
   }: {
     stream: ReadableStream<string | ObjectStreamInputPart>;
     warnings: CallWarning[] | undefined;
     rawResponse?: {
       headers?: Record<string, string>;
     };
 +    schema: z.Schema<T>;
   }) {
     this.warnings = warnings;
     this.rawResponse = rawResponse;
 +    // initialize object promise
 +    let resolveObject: (value: T | PromiseLike<T>) => void;
 +    let rejectObject: (reason?: any) => void;
 +    this.object = new Promise<T>((resolve, reject) => {
 +      resolveObject = resolve;
 +      rejectObject = reject;
 +    });
++
     // initialize usage promise
     let resolveUsage: (value: TokenUsage | PromiseLike<TokenUsage>) => void;
     this.usage = new Promise<TokenUsage>(resolve => {
               // store usage for promises and onFinish callback:
               usage = calculateTokenUsage(chunk.usage);
 -              controller.enqueue({
 -                ...chunk,
 -                usage,
 -              });
 +              controller.enqueue({ ...chunk, usage });
               // resolve promises that can be resolved now:
               resolveUsage(usage);
 +              // resolve the object promise with the latest object:
 +              const validationResult = safeValidateTypes({
 +                value: latestObject,
 +                schema,
 +              });
++
 +              if (validationResult.success) {
 +                resolveObject(validationResult.value);
 +              } else {
 +                rejectObject(validationResult.error);
 +              }
++
               break;
+            }
Original file line number	Diff line number	Diff line change
`@@ -18,12 +18,7 @@ export class TypeValidationError extends Error {`
`18`	`18`	`}`
`19`	`19`
`20`	`20`	`static isTypeValidationError(error: unknown): error is TypeValidationError {`
`21`		`- return (`
`22`		`- error instanceof Error &&`
`23`		`- error.name === 'AI_TypeValidationError' &&`
`24`		`- typeof (error as TypeValidationError).value === 'string' &&`
`25`		`- typeof (error as TypeValidationError).cause === 'string'`
`26`		`- );`
	`21`	`+ return error instanceof Error && error.name === 'AI_TypeValidationError';`
`27`	`22`	`}`
`28`	`23`
`29`	`24`	`toJSON() {`