feat: Added AIM support for Meta Llama3 models in AWS Bedrock (#2306)

amychisholm03 · web-flow · commit ff2e509913c2 · 2024-06-27T11:04:39.000-07:00
diff --git a/ai-support.json b/ai-support.json
@@ -64,7 +64,19 @@
           }
         ]
       },
-
+      {
+        "name": "Meta Llama3",
+        "features": [
+          {
+            "title": "Text",
+            "supported": true
+          },
+          {
+            "title": "Image",
+            "supported": false
+          }
+        ]
+      },
       {
         "name": "Amazon Titan",
         "features": [
diff --git a/lib/llm-events/aws-bedrock/bedrock-command.js b/lib/llm-events/aws-bedrock/bedrock-command.js
@@ -37,7 +37,7 @@ class BedrockCommand {
       result = this.#body.max_tokens_to_sample
     } else if (this.isClaude3() === true || this.isCohere() === true) {
       result = this.#body.max_tokens
-    } else if (this.isLlama2() === true) {
+    } else if (this.isLlama() === true) {
       result = this.#body.max_gen_length
     } else if (this.isTitan() === true) {
       result = this.#body.textGenerationConfig?.maxTokenCount
@@ -80,7 +80,7 @@ class BedrockCommand {
       this.isClaude() === true ||
       this.isAi21() === true ||
       this.isCohere() === true ||
-      this.isLlama2() === true
+      this.isLlama() === true
     ) {
       result = this.#body.prompt
     } else if (this.isClaude3() === true) {
@@ -104,7 +104,7 @@ class BedrockCommand {
       this.isClaude3() === true ||
       this.isAi21() === true ||
       this.isCohere() === true ||
-      this.isLlama2() === true
+      this.isLlama() === true
     ) {
       result = this.#body.temperature
     }
@@ -131,8 +131,8 @@ class BedrockCommand {
     return this.#modelId.startsWith('cohere.embed')
   }
 
-  isLlama2() {
-    return this.#modelId.startsWith('meta.llama2')
+  isLlama() {
+    return this.#modelId.startsWith('meta.llama')
   }
 
   isTitan() {
diff --git a/lib/llm-events/aws-bedrock/bedrock-response.js b/lib/llm-events/aws-bedrock/bedrock-response.js
@@ -70,7 +70,7 @@ class BedrockResponse {
     } else if (cmd.isCohere() === true) {
       this.#completions = body.generations?.map((g) => g.text) ?? []
       this.#id = body.id
-    } else if (cmd.isLlama2() === true) {
+    } else if (cmd.isLlama() === true) {
       body.generation && this.#completions.push(body.generation)
     } else if (cmd.isTitan() === true) {
       this.#completions = body.results?.map((r) => r.outputText) ?? []
@@ -107,7 +107,7 @@ class BedrockResponse {
       result = this.#parsedBody.stop_reason
     } else if (cmd.isCohere() === true) {
       result = this.#parsedBody.generations?.find((r) => r.finish_reason !== null)?.finish_reason
-    } else if (cmd.isLlama2() === true) {
+    } else if (cmd.isLlama() === true) {
       result = this.#parsedBody.stop_reason
     } else if (cmd.isTitan() === true) {
       result = this.#parsedBody.results?.find((r) => r.completionReason !== null)?.completionReason
diff --git a/lib/llm-events/aws-bedrock/stream-handler.js b/lib/llm-events/aws-bedrock/stream-handler.js
@@ -114,9 +114,9 @@ class StreamHandler {
     } else if (bedrockCommand.isCohereEmbed() === true) {
       this.stopReasonKey = 'nr_none'
       this.generator = handleCohereEmbed
-    } else if (bedrockCommand.isLlama2() === true) {
+    } else if (bedrockCommand.isLlama() === true) {
       this.stopReasonKey = 'stop_reason'
-      this.generator = handleLlama2
+      this.generator = handleLlama
     } else if (bedrockCommand.isTitan() === true) {
       this.stopReasonKey = 'completionReason'
       this.generator = handleTitan
@@ -271,7 +271,7 @@ async function* handleCohereEmbed() {
   }
 }
 
-async function* handleLlama2() {
+async function* handleLlama() {
   let currentBody = {}
   let generation = ''
 
diff --git a/test/lib/aws-server-stubs/ai-server/index.js b/test/lib/aws-server-stubs/ai-server/index.js
@@ -114,8 +114,11 @@ function handler(req, res) {
       }
 
       case 'meta.llama2-13b-chat-v1':
-      case 'meta.llama2-70b-chat-v1': {
-        response = responses.llama2.get(payload.prompt)
+      case 'meta.llama2-70b-chat-v1':
+      // llama3 responses are indentical, just return llama2 data
+      case 'meta.llama3-8b-instruct-v1:0':
+      case 'meta.llama3-70b-instruct-v1:0': {
+        response = responses.llama.get(payload.prompt)
         break
       }
 
diff --git a/test/lib/aws-server-stubs/ai-server/responses/index.js b/test/lib/aws-server-stubs/ai-server/responses/index.js
@@ -10,13 +10,13 @@ const amazon = require('./amazon')
 const claude = require('./claude')
 const claude3 = require('./claude3')
 const cohere = require('./cohere')
-const llama2 = require('./llama2')
+const llama = require('./llama')
 
 module.exports = {
   ai21,
   amazon,
   claude,
   claude3,
   cohere,
-  llama2
+  llama
 }
diff --git a/test/lib/aws-server-stubs/ai-server/responses/llama.js b/test/lib/aws-server-stubs/ai-server/responses/llama.js
@@ -8,7 +8,7 @@
 const responses = new Map()
 const { contentType, reqId } = require('./constants')
 
-responses.set('text llama2 ultimate question', {
+responses.set('text llama ultimate question', {
   headers: {
     'content-type': contentType,
     'x-amzn-requestid': reqId,
@@ -25,7 +25,7 @@ responses.set('text llama2 ultimate question', {
   }
 })
 
-responses.set('text llama2 ultimate question streamed', {
+responses.set('text llama ultimate question streamed', {
   headers: {
     'content-type': 'application/vnd.amazon.eventstream',
     'x-amzn-requestid': reqId,
@@ -68,7 +68,7 @@ responses.set('text llama2 ultimate question streamed', {
   ]
 })
 
-responses.set('text llama2 ultimate question error', {
+responses.set('text llama ultimate question error', {
   headers: {
     'content-type': contentType,
     'x-amzn-requestid': reqId,
diff --git a/test/unit/llm-events/aws-bedrock/bedrock-command.test.js b/test/unit/llm-events/aws-bedrock/bedrock-command.test.js
@@ -52,6 +52,13 @@ const llama2 = {
   }
 }
 
+const llama3 = {
+  modelId: 'meta.llama3-8b-instruct-v1:0',
+  body: {
+    prompt: 'who are you'
+  }
+}
+
 const titan = {
   modelId: 'amazon.titan-text-lite-v1',
   body: {
@@ -85,7 +92,7 @@ tap.test('non-conforming command is handled gracefully', async (t) => {
     'Claude3',
     'Cohere',
     'CohereEmbed',
-    'Llama2',
+    'Llama',
     'Titan',
     'TitanEmbed'
   ]) {
@@ -212,7 +219,7 @@ tap.test('cohere embed minimal command works', async (t) => {
 tap.test('llama2 minimal command works', async (t) => {
   t.context.updatePayload(structuredClone(llama2))
   const cmd = new BedrockCommand(t.context.input)
-  t.equal(cmd.isLlama2(), true)
+  t.equal(cmd.isLlama(), true)
   t.equal(cmd.maxTokens, undefined)
   t.equal(cmd.modelId, llama2.modelId)
   t.equal(cmd.modelType, 'completion')
@@ -226,7 +233,32 @@ tap.test('llama2 complete command works', async (t) => {
   payload.body.temperature = 0.5
   t.context.updatePayload(payload)
   const cmd = new BedrockCommand(t.context.input)
-  t.equal(cmd.isLlama2(), true)
+  t.equal(cmd.isLlama(), true)
+  t.equal(cmd.maxTokens, 25)
+  t.equal(cmd.modelId, payload.modelId)
+  t.equal(cmd.modelType, 'completion')
+  t.equal(cmd.prompt, payload.body.prompt)
+  t.equal(cmd.temperature, payload.body.temperature)
+})
+
+tap.test('llama3 minimal command works', async (t) => {
+  t.context.updatePayload(structuredClone(llama3))
+  const cmd = new BedrockCommand(t.context.input)
+  t.equal(cmd.isLlama(), true)
+  t.equal(cmd.maxTokens, undefined)
+  t.equal(cmd.modelId, llama3.modelId)
+  t.equal(cmd.modelType, 'completion')
+  t.equal(cmd.prompt, llama3.body.prompt)
+  t.equal(cmd.temperature, undefined)
+})
+
+tap.test('llama3 complete command works', async (t) => {
+  const payload = structuredClone(llama3)
+  payload.body.max_gen_length = 25
+  payload.body.temperature = 0.5
+  t.context.updatePayload(payload)
+  const cmd = new BedrockCommand(t.context.input)
+  t.equal(cmd.isLlama(), true)
   t.equal(cmd.maxTokens, 25)
   t.equal(cmd.modelId, payload.modelId)
   t.equal(cmd.modelType, 'completion')
diff --git a/test/unit/llm-events/aws-bedrock/bedrock-response.test.js b/test/unit/llm-events/aws-bedrock/bedrock-response.test.js
@@ -38,8 +38,8 @@ const cohere = {
   ]
 }
 
-const llama2 = {
-  generation: 'llama2-response',
+const llama = {
+  generation: 'llama-response',
   stop_reason: 'done'
 }
 
@@ -79,7 +79,7 @@ tap.beforeEach((t) => {
     isCohere() {
       return false
     },
-    isLlama2() {
+    isLlama() {
       return false
     },
     isTitan() {
@@ -172,8 +172,8 @@ tap.test('cohere complete responses work', async (t) => {
   t.equal(res.statusCode, 200)
 })
 
-tap.test('llama2 malformed responses work', async (t) => {
-  t.context.bedrockCommand.isLlama2 = () => true
+tap.test('llama malformed responses work', async (t) => {
+  t.context.bedrockCommand.isLlama = () => true
   const res = new BedrockResponse(t.context)
   t.same(res.completions, [])
   t.equal(res.finishReason, undefined)
@@ -183,11 +183,11 @@ tap.test('llama2 malformed responses work', async (t) => {
   t.equal(res.statusCode, 200)
 })
 
-tap.test('llama2 complete responses work', async (t) => {
-  t.context.bedrockCommand.isLlama2 = () => true
-  t.context.updatePayload(structuredClone(llama2))
+tap.test('llama complete responses work', async (t) => {
+  t.context.bedrockCommand.isLlama = () => true
+  t.context.updatePayload(structuredClone(llama))
   const res = new BedrockResponse(t.context)
-  t.same(res.completions, ['llama2-response'])
+  t.same(res.completions, ['llama-response'])
   t.equal(res.finishReason, 'done')
   t.same(res.headers, t.context.response.response.headers)
   t.equal(res.id, undefined)
diff --git a/test/unit/llm-events/aws-bedrock/stream-handler.test.js b/test/unit/llm-events/aws-bedrock/stream-handler.test.js
@@ -45,7 +45,7 @@ tap.beforeEach((t) => {
       isClaude3() {
         return false
       },
-      isLlama2() {
+      isLlama() {
         return false
       },
       isTitan() {
@@ -242,15 +242,15 @@ tap.test('handles cohere embedding streams', async (t) => {
   t.equal(br.statusCode, 200)
 })
 
-tap.test('handles llama2 streams', async (t) => {
-  t.context.passThroughParams.bedrockCommand.isLlama2 = () => true
+tap.test('handles llama streams', async (t) => {
+  t.context.passThroughParams.bedrockCommand.isLlama = () => true
   t.context.chunks = [
     { generation: '1', stop_reason: null },
     { generation: '2', stop_reason: 'done', ...t.context.metrics }
   ]
   const handler = new StreamHandler(t.context)
 
-  t.equal(handler.generator.name, 'handleLlama2')
+  t.equal(handler.generator.name, 'handleLlama')
   for await (const event of handler.generator()) {
     t.type(event.chunk.bytes, Uint8Array)
   }
@@ -267,7 +267,7 @@ tap.test('handles llama2 streams', async (t) => {
   })
 
   const bc = new BedrockCommand({
-    modelId: 'meta.llama2',
+    modelId: 'meta.llama',
     body: JSON.stringify({
       prompt: 'prompt',
       max_gen_length: 5
diff --git a/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js b/test/versioned/aws-sdk-v3/bedrock-chat-completions.tap.js
@@ -48,7 +48,7 @@ const requests = {
     body: JSON.stringify({ prompt, temperature: 0.5, max_tokens: 100 }),
     modelId
   }),
-  llama2: (prompt, modelId) => ({
+  llama: (prompt, modelId) => ({
     body: JSON.stringify({ prompt, max_gen_length: 100, temperature: 0.5 }),
     modelId
   })
@@ -98,7 +98,8 @@ tap.afterEach(async (t) => {
   { modelId: 'anthropic.claude-v2', resKey: 'claude' },
   { modelId: 'anthropic.claude-3-haiku-20240307-v1:0', resKey: 'claude3' },
   { modelId: 'cohere.command-text-v14', resKey: 'cohere' },
-  { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama2' }
+  { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama' },
+  { modelId: 'meta.llama3-8b-instruct-v1:0', resKey: 'llama' }
 ].forEach(({ modelId, resKey }) => {
   tap.test(`${modelId}: should properly create completion segment`, (t) => {
     const { bedrock, client, responses, agent, expectedExternalPath } = t.context

Original file line number	Diff line number	Diff line change
`@@ -114,8 +114,11 @@ function handler(req, res) {`
`114`	`114`	`}`
`115`	`115`
`116`	`116`	`case 'meta.llama2-13b-chat-v1':`
`117`		`- case 'meta.llama2-70b-chat-v1': {`
`118`		`- response = responses.llama2.get(payload.prompt)`
	`117`	`+ case 'meta.llama2-70b-chat-v1':`
	`118`	`+ // llama3 responses are indentical, just return llama2 data`
	`119`	`+ case 'meta.llama3-8b-instruct-v1:0':`
	`120`	`+ case 'meta.llama3-70b-instruct-v1:0': {`
	`121`	`+ response = responses.llama.get(payload.prompt)`
`119`	`122`	`break`
`120`	`123`	`}`
`121`	`124`