improve docs & error handling for image parts (#1569)

lgrammel · web-flow · commit 325ca55e84de · 2024-05-13T18:21:09.000+02:00
diff --git a/.changeset/tricky-hotels-check.md b/.changeset/tricky-hotels-check.md
@@ -0,0 +1,6 @@
+---
+'@ai-sdk/provider': patch
+'ai': patch
+---
+
+feat (ai/core): improve image content part error message
diff --git a/content/docs/03-ai-sdk-core/03-prompts.mdx b/content/docs/03-ai-sdk-core/03-prompts.mdx
@@ -70,21 +70,78 @@ const result = await generateText({
 
 <Note>
   Multi-modal refers to interacting with a model across different data types
-  (text, images, sound etc.).
+  such as text, image, or audio data.
 </Note>
 
+Instead of sending a text in the `content` property, you can send an array of parts that include text and other data types.
+Currently image and text parts are supported.
+
 For models that support multi-modal inputs, user messages can include images. An `image` can be a base64-encoded image (`string`), an `ArrayBuffer`, a `Uint8Array`,
 a `Buffer`, or a `URL` object. It is possible to mix text and multiple images.
 
-```ts highlight="3-11"
+<Note type="warning">
+  Not all models support all types of multi-modal inputs. Check the model's
+  capabilities before using this feature.
+</Note>
+
+#### Example: Buffer images
+
+```ts highlight="8-11"
 const result = await generateText({
   model,
   messages: [
     {
       role: 'user',
       content: [
         { type: 'text', text: 'Describe the image in detail.' },
-        { type: 'image', image: fs.readFileSync('./data/comic-cat.png') },
+        {
+          type: 'image',
+          image: fs.readFileSync('./data/comic-cat.png'),
+        },
+      ],
+    },
+  ],
+});
+```
+
+#### Example: Base-64 encoded images
+
+<Note>You do not need a `data:...` prefix for the base64-encoded image.</Note>
+
+```ts highlight="8-11"
+const result = await generateText({
+  model,
+  messages: [
+    {
+      role: 'user',
+      content: [
+        { type: 'text', text: 'Describe the image in detail.' },
+        {
+          type: 'image',
+          image: fs.readFileSync('./data/comic-cat.png').toString('base64'),
+        },
+      ],
+    },
+  ],
+});
+```
+
+#### Example: Image URLs
+
+```ts highlight="8-13"
+const result = await generateText({
+  model,
+  messages: [
+    {
+      role: 'user',
+      content: [
+        { type: 'text', text: 'Describe the image in detail.' },
+        {
+          type: 'image',
+          image: new URL(
+            'https://github.com/vercel/ai/blob/main/examples/ai-core/data/comic-cat.png?raw=true',
+          ),
+        },
       ],
     },
   ],
diff --git a/content/docs/08-troubleshooting/01-common-issues/index.mdx b/content/docs/08-troubleshooting/01-common-issues/index.mdx
@@ -1,6 +1,6 @@
 ---
 title: Common Issues
-description: Welcome to the Vercel AI SDK documentation!
+description: Troubleshooting information for common issues encountered with the Vercel AI SDK.
 ---
 
 # Common Issues
diff --git a/content/docs/08-troubleshooting/05-ai-sdk-errors/ai-invalid-data-content-error.mdx b/content/docs/08-troubleshooting/05-ai-sdk-errors/ai-invalid-data-content-error.mdx
@@ -0,0 +1,10 @@
+---
+title: AI_InvalidDataContentError
+description: How to fix AI_InvalidDataContentError
+---
+
+# AI_InvalidDataContentError
+
+The data content that you have sent in a multi-modal message part is invalid.
+
+Check out the [prompt examples for multi-modal messages](/docs/ai-sdk-core/prompts#multi-modal-messages).
diff --git a/content/docs/08-troubleshooting/05-ai-sdk-errors/index.mdx b/content/docs/08-troubleshooting/05-ai-sdk-errors/index.mdx
@@ -0,0 +1,8 @@
+---
+title: AI SDK Errors
+description: Troubleshooting information for common AI SDK errors.
+---
+
+# AI SDK Errors
+
+- [ AI_InvalidDataContentError ](/docs/troubleshooting/ai-sdk-errors/ai-invalid-data-content-error)
diff --git a/examples/ai-core/src/generate-text/openai-multimodal-base64.ts b/examples/ai-core/src/generate-text/openai-multimodal-base64.ts
@@ -0,0 +1,29 @@
+import { openai } from '@ai-sdk/openai';
+import { generateText } from 'ai';
+import dotenv from 'dotenv';
+import fs from 'node:fs';
+
+dotenv.config();
+
+async function main() {
+  const result = await generateText({
+    model: openai.chat('gpt-4-turbo'),
+    maxTokens: 512,
+    messages: [
+      {
+        role: 'user',
+        content: [
+          { type: 'text', text: 'Describe the image in detail.' },
+          {
+            type: 'image',
+            image: fs.readFileSync('./data/comic-cat.png').toString('base64'),
+          },
+        ],
+      },
+    ],
+  });
+
+  console.log(result.text);
+}
+
+main().catch(console.error);
diff --git a/packages/core/core/prompt/data-content.ts b/packages/core/core/prompt/data-content.ts
@@ -41,7 +41,16 @@ export function convertDataContentToUint8Array(
   }
 
   if (typeof content === 'string') {
-    return convertBase64ToUint8Array(content);
+    try {
+      return convertBase64ToUint8Array(content);
+    } catch (error) {
+      throw new InvalidDataContentError({
+        message:
+          'Invalid data content. Content string is not a base64-encoded image.',
+        content,
+        cause: error,
+      });
+    }
   }
 
   if (content instanceof ArrayBuffer) {
diff --git a/packages/provider/src/errors/invalid-data-content-error.ts b/packages/provider/src/errors/invalid-data-content-error.ts
@@ -1,17 +1,21 @@
 export class InvalidDataContentError extends Error {
   readonly content: unknown;
+  readonly cause?: unknown;
 
   constructor({
     content,
-    message = `Invalid data content. Expected a string, Uint8Array, ArrayBuffer, or Buffer, but got ${typeof content}.`,
+    cause,
+    message = `Invalid data content. Expected a base64 string, Uint8Array, ArrayBuffer, or Buffer, but got ${typeof content}.`,
   }: {
     content: unknown;
+    cause?: unknown;
     message?: string;
   }) {
     super(message);
 
     this.name = 'AI_InvalidDataContentError';
 
+    this.cause = cause;
     this.content = content;
   }
 
@@ -30,7 +34,7 @@ export class InvalidDataContentError extends Error {
       name: this.name,
       message: this.message,
       stack: this.stack,
-
+      cause: this.cause,
       content: this.content,
     };
   }