Skip to content

Commit 09295e2

Browse files
authoredJun 5, 2024··
feat (provider): download of image urls (#1849)
1 parent fe7f4eb commit 09295e2

26 files changed

+698
-169
lines changed
 

‎.changeset/happy-cougars-drum.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/anthropic': patch
3+
---
4+
5+
feat (@ai-sdk/anthropic): automatically download image URLs

‎.changeset/heavy-camels-laugh.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/google-vertex': patch
3+
---
4+
5+
feat (@ai-sdk/google-vertex): automatically download image URLs

‎.changeset/loud-seahorses-camp.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/provider': patch
3+
---
4+
5+
feat (@ai-sdk/provider): add DownloadError

‎.changeset/rude-rockets-stare.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/provider-utils': patch
3+
---
4+
5+
feat (@ai-sdk/provider-utils): add download helper

‎.changeset/strong-experts-punch.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/google': patch
3+
---
4+
5+
feat (@ai-sdk/google): automatically download image URLs
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import { anthropic } from '@ai-sdk/anthropic';
2+
import { generateText } from 'ai';
3+
import dotenv from 'dotenv';
4+
5+
dotenv.config();
6+
7+
async function main() {
8+
const result = await generateText({
9+
model: anthropic('claude-3-haiku-20240307'),
10+
maxTokens: 512,
11+
messages: [
12+
{
13+
role: 'user',
14+
content: [
15+
{ type: 'text', text: 'Describe the image in detail.' },
16+
{
17+
type: 'image',
18+
image: new URL(
19+
'https://github.com/vercel/ai/blob/main/examples/ai-core/data/comic-cat.png?raw=true',
20+
),
21+
},
22+
],
23+
},
24+
],
25+
});
26+
27+
console.log(result.text);
28+
}
29+
30+
main().catch(console.error);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import { google } from '@ai-sdk/google';
2+
import { generateText } from 'ai';
3+
import dotenv from 'dotenv';
4+
5+
dotenv.config();
6+
7+
async function main() {
8+
const result = await generateText({
9+
model: google('models/gemini-pro-vision'),
10+
maxTokens: 512,
11+
messages: [
12+
{
13+
role: 'user',
14+
content: [
15+
{ type: 'text', text: 'Describe the image in detail.' },
16+
{
17+
type: 'image',
18+
image: new URL(
19+
'https://github.com/vercel/ai/blob/main/examples/ai-core/data/comic-cat.png?raw=true',
20+
),
21+
},
22+
],
23+
},
24+
],
25+
});
26+
27+
console.log(result.text);
28+
}
29+
30+
main().catch(console.error);

‎examples/ai-core/src/generate-text/google-vertex-multimodal-base64.ts

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ dotenv.config();
88
async function main() {
99
const result = await generateText({
1010
model: vertex('gemini-1.0-pro-vision'),
11-
maxTokens: 512,
1211
messages: [
1312
{
1413
role: 'user',
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { vertex } from '@ai-sdk/google-vertex';
2+
import { generateText } from 'ai';
3+
import dotenv from 'dotenv';
4+
5+
dotenv.config();
6+
7+
async function main() {
8+
const result = await generateText({
9+
model: vertex('gemini-1.0-pro-vision'),
10+
messages: [
11+
{
12+
role: 'user',
13+
content: [
14+
{ type: 'text', text: 'Describe the image in detail.' },
15+
{
16+
type: 'image',
17+
image: new URL(
18+
'https://github.com/vercel/ai/blob/main/examples/ai-core/data/comic-cat.png?raw=true',
19+
),
20+
},
21+
],
22+
},
23+
],
24+
});
25+
26+
console.log(result.text);
27+
}
28+
29+
main().catch(console.error);

‎packages/anthropic/src/anthropic-messages-language-model.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
5050
return this.config.provider;
5151
}
5252

53-
private getArgs({
53+
private async getArgs({
5454
mode,
5555
prompt,
5656
maxTokens,
@@ -85,7 +85,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
8585
});
8686
}
8787

88-
const messagesPrompt = convertToAnthropicMessagesPrompt(prompt);
88+
const messagesPrompt = await convertToAnthropicMessagesPrompt({ prompt });
8989

9090
const baseArgs = {
9191
// model id:
@@ -152,7 +152,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
152152
async doGenerate(
153153
options: Parameters<LanguageModelV1['doGenerate']>[0],
154154
): Promise<Awaited<ReturnType<LanguageModelV1['doGenerate']>>> {
155-
const { args, warnings } = this.getArgs(options);
155+
const { args, warnings } = await this.getArgs(options);
156156

157157
const { responseHeaders, value: response } = await postJsonToApi({
158158
url: `${this.config.baseURL}/messages`,
@@ -208,7 +208,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
208208
async doStream(
209209
options: Parameters<LanguageModelV1['doStream']>[0],
210210
): Promise<Awaited<ReturnType<LanguageModelV1['doStream']>>> {
211-
const { args, warnings } = this.getArgs(options);
211+
const { args, warnings } = await this.getArgs(options);
212212

213213
const { responseHeaders, value: response } = await postJsonToApi({
214214
url: `${this.config.baseURL}/messages`,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import { convertToAnthropicMessagesPrompt } from './convert-to-anthropic-messages-prompt';
2+
3+
describe('user messages', () => {
4+
it('should download images for user image parts with URLs', async () => {
5+
const result = await convertToAnthropicMessagesPrompt({
6+
prompt: [
7+
{
8+
role: 'user',
9+
content: [
10+
{
11+
type: 'image',
12+
image: new URL('https://example.com/image.png'),
13+
},
14+
],
15+
},
16+
],
17+
downloadImplementation: async ({ url }) => {
18+
expect(url).toEqual(new URL('https://example.com/image.png'));
19+
20+
return {
21+
data: new Uint8Array([0, 1, 2, 3]),
22+
mimeType: 'image/png',
23+
};
24+
},
25+
});
26+
27+
expect(result).toEqual({
28+
messages: [
29+
{
30+
role: 'user',
31+
content: [
32+
{
33+
type: 'image',
34+
source: {
35+
data: 'AAECAw==',
36+
media_type: 'image/png',
37+
type: 'base64',
38+
},
39+
},
40+
],
41+
},
42+
],
43+
system: undefined,
44+
});
45+
});
46+
47+
it('should add image parts for UInt8Array images', async () => {
48+
const result = await convertToAnthropicMessagesPrompt({
49+
prompt: [
50+
{
51+
role: 'user',
52+
content: [
53+
{
54+
type: 'image',
55+
image: new Uint8Array([0, 1, 2, 3]),
56+
mimeType: 'image/png',
57+
},
58+
],
59+
},
60+
],
61+
62+
downloadImplementation: async ({ url }) => {
63+
throw new Error('Unexpected download call');
64+
},
65+
});
66+
67+
expect(result).toEqual({
68+
messages: [
69+
{
70+
role: 'user',
71+
content: [
72+
{
73+
type: 'image',
74+
source: {
75+
data: 'AAECAw==',
76+
media_type: 'image/png',
77+
type: 'base64',
78+
},
79+
},
80+
],
81+
},
82+
],
83+
system: undefined,
84+
});
85+
});
86+
});

‎packages/anthropic/src/convert-to-anthropic-messages-prompt.ts

+46-28
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,20 @@ import {
22
LanguageModelV1Prompt,
33
UnsupportedFunctionalityError,
44
} from '@ai-sdk/provider';
5-
import { convertUint8ArrayToBase64 } from '@ai-sdk/provider-utils';
5+
import { convertUint8ArrayToBase64, download } from '@ai-sdk/provider-utils';
66
import {
77
AnthropicMessage,
88
AnthropicMessagesPrompt,
9+
AnthropicUserMessage,
910
} from './anthropic-messages-prompt';
1011

11-
export function convertToAnthropicMessagesPrompt(
12-
prompt: LanguageModelV1Prompt,
13-
): AnthropicMessagesPrompt {
12+
export async function convertToAnthropicMessagesPrompt({
13+
prompt,
14+
downloadImplementation = download,
15+
}: {
16+
prompt: LanguageModelV1Prompt;
17+
downloadImplementation?: typeof download;
18+
}): Promise<AnthropicMessagesPrompt> {
1419
let system: string | undefined = undefined;
1520
const messages: AnthropicMessage[] = [];
1621

@@ -28,32 +33,45 @@ export function convertToAnthropicMessagesPrompt(
2833
}
2934

3035
case 'user': {
31-
messages.push({
32-
role: 'user',
33-
content: content.map(part => {
34-
switch (part.type) {
35-
case 'text': {
36-
return { type: 'text', text: part.text };
37-
}
38-
case 'image': {
39-
if (part.image instanceof URL) {
40-
throw new UnsupportedFunctionalityError({
41-
functionality: 'URL image parts',
42-
});
43-
} else {
44-
return {
45-
type: 'image',
46-
source: {
47-
type: 'base64',
48-
media_type: part.mimeType ?? 'image/jpeg',
49-
data: convertUint8ArrayToBase64(part.image),
50-
},
51-
};
52-
}
36+
const anthropicContent: AnthropicUserMessage['content'] = [];
37+
38+
for (const part of content) {
39+
switch (part.type) {
40+
case 'text': {
41+
anthropicContent.push({ type: 'text', text: part.text });
42+
break;
43+
}
44+
case 'image': {
45+
let data: Uint8Array;
46+
let mimeType: string | undefined;
47+
48+
if (part.image instanceof URL) {
49+
const downloadResult = await downloadImplementation({
50+
url: part.image,
51+
});
52+
53+
data = downloadResult.data;
54+
mimeType = downloadResult.mimeType;
55+
} else {
56+
data = part.image;
57+
mimeType = part.mimeType;
5358
}
59+
60+
anthropicContent.push({
61+
type: 'image',
62+
source: {
63+
type: 'base64',
64+
media_type: mimeType ?? 'image/jpeg',
65+
data: convertUint8ArrayToBase64(data),
66+
},
67+
});
68+
69+
break;
5470
}
55-
}),
56-
});
71+
}
72+
}
73+
74+
messages.push({ role: 'user', content: anthropicContent });
5775
break;
5876
}
5977

0 commit comments

Comments
 (0)
Please sign in to comment.