Skip to content

Commit 08b5c50

Browse files
authoredJun 4, 2024··
feat (core): add tokenUsage to streamObject result. (#1815)
1 parent ed08236 commit 08b5c50

File tree

12 files changed

+320
-91
lines changed

12 files changed

+320
-91
lines changed
 
+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'ai': patch
3+
---
4+
5+
feat (core): add tokenUsage to streamObject result

‎content/docs/07-reference/ai-sdk-core/04-stream-object.mdx

+33-1
Original file line numberDiff line numberDiff line change
@@ -314,9 +314,37 @@ for await (const partialObject of partialObjectStream) {
314314

315315
<PropertiesTable
316316
content={[
317+
{
318+
name: 'usage',
319+
type: 'Promise<TokenUsage>',
320+
description:
321+
'The token usage of the generated text. Resolved when the response is finished.',
322+
properties: [
323+
{
324+
type: 'TokenUsage',
325+
parameters: [
326+
{
327+
name: 'promptTokens',
328+
type: 'number',
329+
description: 'The total number of tokens in the prompt.',
330+
},
331+
{
332+
name: 'completionTokens',
333+
type: 'number',
334+
description: 'The total number of tokens in the completion.',
335+
},
336+
{
337+
name: 'totalTokens',
338+
type: 'number',
339+
description: 'The total number of tokens generated.',
340+
},
341+
],
342+
},
343+
],
344+
},
317345
{
318346
name: 'partialObjectStream',
319-
type: 'AsyncIterable<DeepPartial<T>> & ReadableStream<DeepPartial<T>>',
347+
type: 'AsyncIterableStream<DeepPartial<T>>',
320348
description:
321349
'Note that the partial object is not validated. If you want to be certain that the actual content matches your schema, you need to implement your own validation for partial results.',
322350
},
@@ -356,6 +384,10 @@ for await (const partialObject of partialObjectStream) {
356384
{
357385
type: 'FinishPart',
358386
parameters: [
387+
{
388+
name: 'type',
389+
type: "'finish'",
390+
},
359391
{
360392
name: 'finishReason',
361393
type: 'FinishReason',

‎content/examples/03-node/02-generating-structured-data/02-stream-object.mdx

-39
This file was deleted.

‎content/examples/03-node/02-generating-structured-data/index.mdx

-4
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@ The following sections will guide you through generating structured data with No
1313
title: 'Generating an Object',
1414
href: '/examples/node/generating-structured-data/generate-object',
1515
},
16-
{
17-
title: 'Stream Object Generation',
18-
href: '/examples/node/generating-structured-data/stream-object',
19-
},
2016
{
2117
title: 'Generate Object with Image Prompt',
2218
href: '/examples/node/generating-structured-data/add-images-to-prompt',
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
title: Streaming Partial Objects
3+
description: Examples of how to stream partial objects.
4+
---
5+
6+
# Streaming Partial Objects
7+
8+
Object generation can sometimes take a long time to complete,
9+
especially when you're generating a large schema.
10+
11+
In Generative UI use cases, it is useful to stream the object to the client in real-time
12+
to render UIs as the object is being generated.
13+
You can use the [`streamObject`](/docs/reference/ai-sdk-core/stream-object) function to generate partial object streams.
14+
15+
```ts file='index.ts'
16+
import { openai } from '@ai-sdk/openai';
17+
import { streamObject } from 'ai';
18+
import { z } from 'zod';
19+
20+
const { partialObjectStream } = await streamObject({
21+
model: openai('gpt-4-turbo'),
22+
schema: z.object({
23+
recipe: z.object({
24+
name: z.string(),
25+
ingredients: z.array(z.string()),
26+
steps: z.array(z.string()),
27+
}),
28+
}),
29+
prompt: 'Generate a lasagna recipe.',
30+
});
31+
32+
for await (const partialObject of partialObjectStream) {
33+
console.clear();
34+
console.log(partialObject);
35+
}
36+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
---
2+
title: Recording Token Usage
3+
description: Examples of how to record token usage when streaming structured data.
4+
---
5+
6+
# Recording Token Usage
7+
8+
When you're streaming structured data, you may want to record the token usage for billing purposes.
9+
10+
The [`streamObject`](/docs/reference/ai-sdk-core/stream-object) result contains a `usage` promise that resolves to the total token usage.
11+
12+
```ts file='index.ts'
13+
import { openai } from '@ai-sdk/openai';
14+
import { streamObject, TokenUsage } from 'ai';
15+
import { z } from 'zod';
16+
17+
const result = await streamObject({
18+
model: openai('gpt-4-turbo'),
19+
schema: z.object({
20+
recipe: z.object({
21+
name: z.string(),
22+
ingredients: z.array(z.string()),
23+
steps: z.array(z.string()),
24+
}),
25+
}),
26+
prompt: 'Generate a lasagna recipe.',
27+
});
28+
29+
// your custom function to record token usage:
30+
function recordTokenUsage({
31+
promptTokens,
32+
completionTokens,
33+
totalTokens,
34+
}: TokenUsage) {
35+
console.log('Prompt tokens:', promptTokens);
36+
console.log('Completion tokens:', completionTokens);
37+
console.log('Total tokens:', totalTokens);
38+
}
39+
40+
// use as promise:
41+
result.usage.then(recordTokenUsage);
42+
43+
// use with async/await:
44+
recordTokenUsage(await result.usage);
45+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
---
2+
title: Streaming Structured Data
3+
description: Examples of how to stream structured data.
4+
---
5+
6+
# Streaming Structured Data
7+
8+
The following sections will guide you through streaming structured data with Node.js and the Vercel AI SDK.
9+
10+
<IndexCards
11+
cards={[
12+
{
13+
title: 'Streaming Partial Objects',
14+
href: '/examples/node/streaming-structured-data/stream-object',
15+
},
16+
{
17+
title: 'Recording Token Usage',
18+
href: '/examples/node/streaming-structured-data/token-usage',
19+
},
20+
]}
21+
/>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import { openai } from '@ai-sdk/openai';
2+
import { streamObject, TokenUsage } from 'ai';
3+
import { z } from 'zod';
4+
5+
const result = await streamObject({
6+
model: openai('gpt-4-turbo'),
7+
schema: z.object({
8+
recipe: z.object({
9+
name: z.string(),
10+
ingredients: z.array(z.string()),
11+
steps: z.array(z.string()),
12+
}),
13+
}),
14+
prompt: 'Generate a lasagna recipe.',
15+
});
16+
17+
// your custom function to record token usage:
18+
function recordTokenUsage({
19+
promptTokens,
20+
completionTokens,
21+
totalTokens,
22+
}: TokenUsage) {
23+
console.log('Prompt tokens:', promptTokens);
24+
console.log('Completion tokens:', completionTokens);
25+
console.log('Total tokens:', totalTokens);
26+
}
27+
28+
// use as promise:
29+
result.usage.then(recordTokenUsage);
30+
31+
// use with async/await:
32+
recordTokenUsage(await result.usage);

‎packages/core/core/generate-object/stream-object.test.ts

+53
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ describe('result.objectStream', () => {
138138
],
139139
);
140140
});
141+
});
141142

143+
describe('result.fullStream', () => {
142144
it('should send full stream data', async () => {
143145
const result = await streamObject({
144146
model: new MockLanguageModelV1({
@@ -202,3 +204,54 @@ describe('result.objectStream', () => {
202204
);
203205
});
204206
});
207+
208+
describe('result.usage', () => {
209+
it('should resolve with token usage', async () => {
210+
const result = await streamObject({
211+
model: new MockLanguageModelV1({
212+
doStream: async ({ prompt, mode }) => {
213+
assert.deepStrictEqual(mode, { type: 'object-json' });
214+
assert.deepStrictEqual(prompt, [
215+
{
216+
role: 'system',
217+
content:
218+
'JSON schema:\n' +
219+
'{"type":"object","properties":{"content":{"type":"string"}},"required":["content"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}\n' +
220+
'You MUST answer with a JSON object that matches the JSON schema above.',
221+
},
222+
{ role: 'user', content: [{ type: 'text', text: 'prompt' }] },
223+
]);
224+
225+
return {
226+
stream: convertArrayToReadableStream([
227+
{ type: 'text-delta', textDelta: '{ ' },
228+
{ type: 'text-delta', textDelta: '"content": ' },
229+
{ type: 'text-delta', textDelta: `"Hello, ` },
230+
{ type: 'text-delta', textDelta: `world` },
231+
{ type: 'text-delta', textDelta: `!"` },
232+
{ type: 'text-delta', textDelta: ' }' },
233+
{
234+
type: 'finish',
235+
finishReason: 'stop',
236+
usage: { completionTokens: 10, promptTokens: 3 },
237+
},
238+
]),
239+
rawCall: { rawPrompt: 'prompt', rawSettings: {} },
240+
};
241+
},
242+
}),
243+
schema: z.object({ content: z.string() }),
244+
mode: 'json',
245+
prompt: 'prompt',
246+
});
247+
248+
// consume stream (runs in parallel)
249+
convertAsyncIterableToArray(result.partialObjectStream);
250+
251+
assert.deepStrictEqual(await result.usage, {
252+
completionTokens: 10,
253+
promptTokens: 3,
254+
totalTokens: 13,
255+
});
256+
});
257+
});

‎packages/core/core/generate-object/stream-object.ts

+80-47
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import {
33
LanguageModelV1StreamPart,
44
} from '@ai-sdk/provider';
55
import { z } from 'zod';
6-
import { calculateTokenUsage } from '../generate-text/token-usage';
6+
import { TokenUsage, calculateTokenUsage } from '../generate-text/token-usage';
77
import { CallSettings } from '../prompt/call-settings';
88
import { convertToLanguageModelPrompt } from '../prompt/convert-to-language-model-prompt';
99
import { getValidatedPrompt } from '../prompt/get-validated-prompt';
@@ -230,7 +230,7 @@ Default and recommended: 'auto' (best mode for the model).
230230
});
231231
}
232232

233-
export type ObjectStreamPartInput =
233+
export type ObjectStreamInputPart =
234234
| {
235235
type: 'error';
236236
error: unknown;
@@ -247,7 +247,7 @@ export type ObjectStreamPartInput =
247247
};
248248

249249
export type ObjectStreamPart<T> =
250-
| ObjectStreamPartInput
250+
| ObjectStreamInputPart
251251
| {
252252
type: 'object';
253253
object: DeepPartial<T>;
@@ -257,15 +257,18 @@ export type ObjectStreamPart<T> =
257257
The result of a `streamObject` call that contains the partial object stream and additional information.
258258
*/
259259
export class StreamObjectResult<T> {
260-
private readonly originalStream: ReadableStream<
261-
string | ObjectStreamPartInput
262-
>;
260+
readonly originalStream: ReadableStream<ObjectStreamPart<T>>;
263261

264262
/**
265263
Warnings from the model provider (e.g. unsupported settings)
266264
*/
267265
readonly warnings: CallWarning[] | undefined;
268266

267+
/**
268+
The token usage of the generated response. Resolved when the response is finished.
269+
*/
270+
readonly usage: Promise<TokenUsage>;
271+
269272
/**
270273
Optional raw response data.
271274
*/
@@ -281,75 +284,105 @@ Response headers.
281284
warnings,
282285
rawResponse,
283286
}: {
284-
stream: ReadableStream<string | ObjectStreamPartInput>;
287+
stream: ReadableStream<string | ObjectStreamInputPart>;
285288
warnings: CallWarning[] | undefined;
286289
rawResponse?: {
287290
headers?: Record<string, string>;
288291
};
289292
}) {
290-
this.originalStream = stream;
291293
this.warnings = warnings;
292294
this.rawResponse = rawResponse;
293-
}
294-
295-
get partialObjectStream(): AsyncIterableStream<DeepPartial<T>> {
296-
let accumulatedText = '';
297-
let latestObject: DeepPartial<T> | undefined = undefined;
298-
299-
return createAsyncIterableStream(this.originalStream, {
300-
transform(chunk, controller) {
301-
if (typeof chunk === 'string') {
302-
accumulatedText += chunk;
303-
304-
const currentObject = parsePartialJson(
305-
accumulatedText,
306-
) as DeepPartial<T>;
307295

308-
if (!isDeepEqualData(latestObject, currentObject)) {
309-
latestObject = currentObject;
310-
311-
controller.enqueue(currentObject);
312-
}
313-
} else if (chunk.type === 'error') {
314-
throw chunk.error;
315-
}
316-
},
296+
// initialize usage promise
297+
let resolveUsage: (value: TokenUsage | PromiseLike<TokenUsage>) => void;
298+
this.usage = new Promise<TokenUsage>(resolve => {
299+
resolveUsage = resolve;
317300
});
318-
}
319301

320-
get fullStream(): AsyncIterableStream<ObjectStreamPart<T>> {
302+
// store information for onFinish callback:
303+
let usage: TokenUsage | undefined;
304+
305+
// pipe chunks through a transformation stream that extracts metadata:
321306
let accumulatedText = '';
322307
let latestObject: DeepPartial<T> | undefined = undefined;
323308

324-
return createAsyncIterableStream(this.originalStream, {
325-
transform(chunk, controller) {
326-
if (typeof chunk === 'string') {
327-
accumulatedText += chunk;
328-
const currentObject = parsePartialJson(
329-
accumulatedText,
330-
) as DeepPartial<T>;
309+
this.originalStream = stream.pipeThrough(
310+
new TransformStream<string | ObjectStreamInputPart, ObjectStreamPart<T>>({
311+
async transform(chunk, controller): Promise<void> {
312+
// process partial text chunks
313+
if (typeof chunk === 'string') {
314+
accumulatedText += chunk;
315+
316+
const currentObject = parsePartialJson(
317+
accumulatedText,
318+
) as DeepPartial<T>;
319+
320+
if (!isDeepEqualData(latestObject, currentObject)) {
321+
latestObject = currentObject;
331322

332-
if (!isDeepEqualData(latestObject, currentObject)) {
333-
latestObject = currentObject;
323+
controller.enqueue({ type: 'object', object: currentObject });
324+
}
334325

335-
controller.enqueue({ type: 'object', object: currentObject });
326+
return;
336327
}
337-
} else {
328+
338329
switch (chunk.type) {
339-
case 'finish':
330+
case 'finish': {
331+
// store usage for promises and onFinish callback:
332+
usage = calculateTokenUsage(chunk.usage);
333+
340334
controller.enqueue({
341335
...chunk,
342-
usage: calculateTokenUsage(chunk.usage),
336+
usage,
343337
});
338+
339+
// resolve promises that can be resolved now:
340+
resolveUsage(usage);
341+
344342
break;
345-
default:
343+
}
344+
345+
default: {
346346
controller.enqueue(chunk);
347347
break;
348+
}
349+
}
350+
},
351+
}),
352+
);
353+
}
354+
355+
get partialObjectStream(): AsyncIterableStream<DeepPartial<T>> {
356+
return createAsyncIterableStream(this.originalStream, {
357+
transform(chunk, controller) {
358+
switch (chunk.type) {
359+
case 'object':
360+
controller.enqueue(chunk.object);
361+
break;
362+
363+
case 'finish':
364+
break;
365+
366+
case 'error':
367+
controller.error(chunk.error);
368+
break;
369+
370+
default: {
371+
const _exhaustiveCheck: never = chunk;
372+
throw new Error(`Unsupported chunk type: ${_exhaustiveCheck}`);
348373
}
349374
}
350375
},
351376
});
352377
}
378+
379+
get fullStream(): AsyncIterableStream<ObjectStreamPart<T>> {
380+
return createAsyncIterableStream(this.originalStream, {
381+
transform(chunk, controller) {
382+
controller.enqueue(chunk);
383+
},
384+
});
385+
}
353386
}
354387

355388
/**
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
export * from './generate-text';
22
export * from './stream-text';
3+
export type { TokenUsage } from './token-usage';

‎packages/core/core/generate-text/token-usage.ts

+14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1+
/**
2+
Represents the number of tokens used in a prompt and completion.
3+
*/
14
export type TokenUsage = {
5+
/**
6+
The number of tokens used in the prompt
7+
*/
28
promptTokens: number;
9+
10+
/**
11+
The number of tokens used in the completion.
12+
*/
313
completionTokens: number;
14+
15+
/**
16+
The total number of tokens used (promptTokens + completionTokens).
17+
*/
418
totalTokens: number;
519
};
620

0 commit comments

Comments
 (0)
Please sign in to comment.