feat (core): add tokenUsage to streamObject result. (#1815)

lgrammel · web-flow · commit 08b5c509077d · 2024-06-04T11:35:43.000+02:00
diff --git a/.changeset/perfect-avocados-provide.md b/.changeset/perfect-avocados-provide.md
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+feat (core): add tokenUsage to streamObject result
diff --git a/content/docs/07-reference/ai-sdk-core/04-stream-object.mdx b/content/docs/07-reference/ai-sdk-core/04-stream-object.mdx
@@ -314,9 +314,37 @@ for await (const partialObject of partialObjectStream) {
 
 <PropertiesTable
   content={[
+    {
+      name: 'usage',
+      type: 'Promise<TokenUsage>',
+      description:
+        'The token usage of the generated text. Resolved when the response is finished.',
+      properties: [
+        {
+          type: 'TokenUsage',
+          parameters: [
+            {
+              name: 'promptTokens',
+              type: 'number',
+              description: 'The total number of tokens in the prompt.',
+            },
+            {
+              name: 'completionTokens',
+              type: 'number',
+              description: 'The total number of tokens in the completion.',
+            },
+            {
+              name: 'totalTokens',
+              type: 'number',
+              description: 'The total number of tokens generated.',
+            },
+          ],
+        },
+      ],
+    },
     {
       name: 'partialObjectStream',
-      type: 'AsyncIterable<DeepPartial<T>> & ReadableStream<DeepPartial<T>>',
+      type: 'AsyncIterableStream<DeepPartial<T>>',
       description:
         'Note that the partial object is not validated. If you want to be certain that the actual content matches your schema, you need to implement your own validation for partial results.',
     },
@@ -356,6 +384,10 @@ for await (const partialObject of partialObjectStream) {
         {
           type: 'FinishPart',
           parameters: [
+            {
+              name: 'type',
+              type: "'finish'",
+            },
             {
               name: 'finishReason',
               type: 'FinishReason',
diff --git a/content/examples/03-node/02-generating-structured-data/02-stream-object.mdx b/content/examples/03-node/02-generating-structured-data/02-stream-object.mdx
diff --git a/content/examples/03-node/02-generating-structured-data/index.mdx b/content/examples/03-node/02-generating-structured-data/index.mdx
@@ -13,10 +13,6 @@ The following sections will guide you through generating structured data with No
       title: 'Generating an Object',
       href: '/examples/node/generating-structured-data/generate-object',
     },
-    {
-      title: 'Stream Object Generation',
-      href: '/examples/node/generating-structured-data/stream-object',
-    },
     {
       title: 'Generate Object with Image Prompt',
       href: '/examples/node/generating-structured-data/add-images-to-prompt',
diff --git a/content/examples/03-node/02-streaming-structured-data/01-stream-object.mdx b/content/examples/03-node/02-streaming-structured-data/01-stream-object.mdx
@@ -0,0 +1,36 @@
+---
+title: Streaming Partial Objects
+description: Examples of how to stream partial objects.
+---
+
+# Streaming Partial Objects
+
+Object generation can sometimes take a long time to complete,
+especially when you're generating a large schema.
+
+In Generative UI use cases, it is useful to stream the object to the client in real-time
+to render UIs as the object is being generated.
+You can use the [`streamObject`](/docs/reference/ai-sdk-core/stream-object) function to generate partial object streams.
+
+```ts file='index.ts'
+import { openai } from '@ai-sdk/openai';
+import { streamObject } from 'ai';
+import { z } from 'zod';
+
+const { partialObjectStream } = await streamObject({
+  model: openai('gpt-4-turbo'),
+  schema: z.object({
+    recipe: z.object({
+      name: z.string(),
+      ingredients: z.array(z.string()),
+      steps: z.array(z.string()),
+    }),
+  }),
+  prompt: 'Generate a lasagna recipe.',
+});
+
+for await (const partialObject of partialObjectStream) {
+  console.clear();
+  console.log(partialObject);
+}
+```
diff --git a/content/examples/03-node/02-streaming-structured-data/10-token-usage.mdx b/content/examples/03-node/02-streaming-structured-data/10-token-usage.mdx
@@ -0,0 +1,45 @@
+---
+title: Recording Token Usage
+description: Examples of how to record token usage when streaming structured data.
+---
+
+# Recording Token Usage
+
+When you're streaming structured data, you may want to record the token usage for billing purposes.
+
+The [`streamObject`](/docs/reference/ai-sdk-core/stream-object) result contains a `usage` promise that resolves to the total token usage.
+
+```ts file='index.ts'
+import { openai } from '@ai-sdk/openai';
+import { streamObject, TokenUsage } from 'ai';
+import { z } from 'zod';
+
+const result = await streamObject({
+  model: openai('gpt-4-turbo'),
+  schema: z.object({
+    recipe: z.object({
+      name: z.string(),
+      ingredients: z.array(z.string()),
+      steps: z.array(z.string()),
+    }),
+  }),
+  prompt: 'Generate a lasagna recipe.',
+});
+
+// your custom function to record token usage:
+function recordTokenUsage({
+  promptTokens,
+  completionTokens,
+  totalTokens,
+}: TokenUsage) {
+  console.log('Prompt tokens:', promptTokens);
+  console.log('Completion tokens:', completionTokens);
+  console.log('Total tokens:', totalTokens);
+}
+
+// use as promise:
+result.usage.then(recordTokenUsage);
+
+// use with async/await:
+recordTokenUsage(await result.usage);
+```
diff --git a/content/examples/03-node/02-streaming-structured-data/index.mdx b/content/examples/03-node/02-streaming-structured-data/index.mdx
@@ -0,0 +1,21 @@
+---
+title: Streaming Structured Data
+description: Examples of how to stream structured data.
+---
+
+# Streaming Structured Data
+
+The following sections will guide you through streaming structured data with Node.js and the Vercel AI SDK.
+
+<IndexCards
+  cards={[
+    {
+      title: 'Streaming Partial Objects',
+      href: '/examples/node/streaming-structured-data/stream-object',
+    },
+    {
+      title: 'Recording Token Usage',
+      href: '/examples/node/streaming-structured-data/token-usage',
+    },
+  ]}
+/>
diff --git a/examples/ai-core/src/stream-object/token-usage.ts b/examples/ai-core/src/stream-object/token-usage.ts
@@ -0,0 +1,32 @@
+import { openai } from '@ai-sdk/openai';
+import { streamObject, TokenUsage } from 'ai';
+import { z } from 'zod';
+
+const result = await streamObject({
+  model: openai('gpt-4-turbo'),
+  schema: z.object({
+    recipe: z.object({
+      name: z.string(),
+      ingredients: z.array(z.string()),
+      steps: z.array(z.string()),
+    }),
+  }),
+  prompt: 'Generate a lasagna recipe.',
+});
+
+// your custom function to record token usage:
+function recordTokenUsage({
+  promptTokens,
+  completionTokens,
+  totalTokens,
+}: TokenUsage) {
+  console.log('Prompt tokens:', promptTokens);
+  console.log('Completion tokens:', completionTokens);
+  console.log('Total tokens:', totalTokens);
+}
+
+// use as promise:
+result.usage.then(recordTokenUsage);
+
+// use with async/await:
+recordTokenUsage(await result.usage);
diff --git a/packages/core/core/generate-object/stream-object.test.ts b/packages/core/core/generate-object/stream-object.test.ts
@@ -138,7 +138,9 @@ describe('result.objectStream', () => {
       ],
     );
   });
+});
 
+describe('result.fullStream', () => {
   it('should send full stream data', async () => {
     const result = await streamObject({
       model: new MockLanguageModelV1({
@@ -202,3 +204,54 @@ describe('result.objectStream', () => {
     );
   });
 });
+
+describe('result.usage', () => {
+  it('should resolve with token usage', async () => {
+    const result = await streamObject({
+      model: new MockLanguageModelV1({
+        doStream: async ({ prompt, mode }) => {
+          assert.deepStrictEqual(mode, { type: 'object-json' });
+          assert.deepStrictEqual(prompt, [
+            {
+              role: 'system',
+              content:
+                'JSON schema:\n' +
+                '{"type":"object","properties":{"content":{"type":"string"}},"required":["content"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}\n' +
+                'You MUST answer with a JSON object that matches the JSON schema above.',
+            },
+            { role: 'user', content: [{ type: 'text', text: 'prompt' }] },
+          ]);
+
+          return {
+            stream: convertArrayToReadableStream([
+              { type: 'text-delta', textDelta: '{ ' },
+              { type: 'text-delta', textDelta: '"content": ' },
+              { type: 'text-delta', textDelta: `"Hello, ` },
+              { type: 'text-delta', textDelta: `world` },
+              { type: 'text-delta', textDelta: `!"` },
+              { type: 'text-delta', textDelta: ' }' },
+              {
+                type: 'finish',
+                finishReason: 'stop',
+                usage: { completionTokens: 10, promptTokens: 3 },
+              },
+            ]),
+            rawCall: { rawPrompt: 'prompt', rawSettings: {} },
+          };
+        },
+      }),
+      schema: z.object({ content: z.string() }),
+      mode: 'json',
+      prompt: 'prompt',
+    });
+
+    // consume stream (runs in parallel)
+    convertAsyncIterableToArray(result.partialObjectStream);
+
+    assert.deepStrictEqual(await result.usage, {
+      completionTokens: 10,
+      promptTokens: 3,
+      totalTokens: 13,
+    });
+  });
+});
diff --git a/packages/core/core/generate-object/stream-object.ts b/packages/core/core/generate-object/stream-object.ts
diff --git a/packages/core/core/generate-text/index.ts b/packages/core/core/generate-text/index.ts
diff --git a/packages/core/core/generate-text/token-usage.ts b/packages/core/core/generate-text/token-usage.ts

-Original file line number
+Diff line change
   LanguageModelV1StreamPart,
 } from '@ai-sdk/provider';
 import { z } from 'zod';
 -import { calculateTokenUsage } from '../generate-text/token-usage';
 +import { TokenUsage, calculateTokenUsage } from '../generate-text/token-usage';
 import { CallSettings } from '../prompt/call-settings';
 import { convertToLanguageModelPrompt } from '../prompt/convert-to-language-model-prompt';
 import { getValidatedPrompt } from '../prompt/get-validated-prompt';
   });
+}
 -export type ObjectStreamPartInput =
 +export type ObjectStreamInputPart =
   | {
       type: 'error';
       error: unknown;
     };
 export type ObjectStreamPart<T> =
 -  | ObjectStreamPartInput
 +  | ObjectStreamInputPart
   | {
       type: 'object';
       object: DeepPartial<T>;
 The result of a `streamObject` call that contains the partial object stream and additional information.
  */
 export class StreamObjectResult<T> {
 -  private readonly originalStream: ReadableStream<
 -    string | ObjectStreamPartInput
 -  >;
 +  readonly originalStream: ReadableStream<ObjectStreamPart<T>>;
   /**
 Warnings from the model provider (e.g. unsupported settings)
    */
   readonly warnings: CallWarning[] | undefined;
 +  /**
 +The token usage of the generated response. Resolved when the response is finished.
 +   */
 +  readonly usage: Promise<TokenUsage>;
++
   /**
 Optional raw response data.
    */
     warnings,
     rawResponse,
   }: {
 -    stream: ReadableStream<string | ObjectStreamPartInput>;
 +    stream: ReadableStream<string | ObjectStreamInputPart>;
     warnings: CallWarning[] | undefined;
     rawResponse?: {
       headers?: Record<string, string>;
     };
   }) {
 -    this.originalStream = stream;
     this.warnings = warnings;
     this.rawResponse = rawResponse;
 -  }
+-
 -  get partialObjectStream(): AsyncIterableStream<DeepPartial<T>> {
 -    let accumulatedText = '';
 -    let latestObject: DeepPartial<T> | undefined = undefined;
+-
 -    return createAsyncIterableStream(this.originalStream, {
 -      transform(chunk, controller) {
 -        if (typeof chunk === 'string') {
 -          accumulatedText += chunk;
+-
 -          const currentObject = parsePartialJson(
 -            accumulatedText,
 -          ) as DeepPartial<T>;
 -          if (!isDeepEqualData(latestObject, currentObject)) {
 -            latestObject = currentObject;
+-
 -            controller.enqueue(currentObject);
 -          }
 -        } else if (chunk.type === 'error') {
 -          throw chunk.error;
 -        }
 -      },
 +    // initialize usage promise
 +    let resolveUsage: (value: TokenUsage | PromiseLike<TokenUsage>) => void;
 +    this.usage = new Promise<TokenUsage>(resolve => {
 +      resolveUsage = resolve;
     });
 -  }
 -  get fullStream(): AsyncIterableStream<ObjectStreamPart<T>> {
 +    // store information for onFinish callback:
 +    let usage: TokenUsage | undefined;
++
 +    // pipe chunks through a transformation stream that extracts metadata:
     let accumulatedText = '';
     let latestObject: DeepPartial<T> | undefined = undefined;
 -    return createAsyncIterableStream(this.originalStream, {
 -      transform(chunk, controller) {
 -        if (typeof chunk === 'string') {
 -          accumulatedText += chunk;
 -          const currentObject = parsePartialJson(
 -            accumulatedText,
 -          ) as DeepPartial<T>;
 +    this.originalStream = stream.pipeThrough(
 +      new TransformStream<string | ObjectStreamInputPart, ObjectStreamPart<T>>({
 +        async transform(chunk, controller): Promise<void> {
 +          // process partial text chunks
 +          if (typeof chunk === 'string') {
 +            accumulatedText += chunk;
++
 +            const currentObject = parsePartialJson(
 +              accumulatedText,
 +            ) as DeepPartial<T>;
++
 +            if (!isDeepEqualData(latestObject, currentObject)) {
 +              latestObject = currentObject;
 -          if (!isDeepEqualData(latestObject, currentObject)) {
 -            latestObject = currentObject;
 +              controller.enqueue({ type: 'object', object: currentObject });
 +            }
 -            controller.enqueue({ type: 'object', object: currentObject });
 +            return;
+          }
 -        } else {
++
           switch (chunk.type) {
 -            case 'finish':
 +            case 'finish': {
 +              // store usage for promises and onFinish callback:
 +              usage = calculateTokenUsage(chunk.usage);
++
               controller.enqueue({
                 ...chunk,
 -                usage: calculateTokenUsage(chunk.usage),
 +                usage,
               });
++
 +              // resolve promises that can be resolved now:
 +              resolveUsage(usage);
++
               break;
 -            default:
 +            }
++
 +            default: {
               controller.enqueue(chunk);
               break;
 +            }
 +          }
 +        },
 +      }),
 +    );
 +  }
++
 +  get partialObjectStream(): AsyncIterableStream<DeepPartial<T>> {
 +    return createAsyncIterableStream(this.originalStream, {
 +      transform(chunk, controller) {
 +        switch (chunk.type) {
 +          case 'object':
 +            controller.enqueue(chunk.object);
 +            break;
++
 +          case 'finish':
 +            break;
++
 +          case 'error':
 +            controller.error(chunk.error);
 +            break;
++
 +          default: {
 +            const _exhaustiveCheck: never = chunk;
 +            throw new Error(`Unsupported chunk type: ${_exhaustiveCheck}`);
+          }
+        }
       },
     });
+  }
++
 +  get fullStream(): AsyncIterableStream<ObjectStreamPart<T>> {
 +    return createAsyncIterableStream(this.originalStream, {
 +      transform(chunk, controller) {
 +        controller.enqueue(chunk);
 +      },
 +    });
 +  }
+}
 /**
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`export * from './generate-text';`
`2`	`2`	`export * from './stream-text';`
	`3`	`+export type { TokenUsage } from './token-usage';`
-Original file line number
+Diff line change
 +/**
 +Represents the number of tokens used in a prompt and completion.
 + */
 export type TokenUsage = {
 +  /**
 +The number of tokens used in the prompt
 +   */
   promptTokens: number;
++
 +  /**
 +The number of tokens used in the completion.
 + */
   completionTokens: number;
++
 +  /**
 +The total number of tokens used (promptTokens + completionTokens).
 +   */
   totalTokens: number;
 };