ai/core: remove scaling of setting values. (#1418)

lgrammel · web-flow · commit eb150a626443 · 2024-04-23T13:13:18.000+02:00
diff --git a/.changeset/fair-avocados-itch.md b/.changeset/fair-avocados-itch.md
@@ -0,0 +1,10 @@
+---
+'@ai-sdk/provider-utils': patch
+'@ai-sdk/anthropic': patch
+'@ai-sdk/provider': patch
+'@ai-sdk/mistral': patch
+'@ai-sdk/openai': patch
+'ai': patch
+---
+
+ai/core: remove scaling of setting values (breaking change). If you were using the temperature, frequency penalty, or presence penalty settings, you need to update the providers and adjust the setting values.
diff --git a/docs/pages/docs/ai-core/settings.mdx b/docs/pages/docs/ai-core/settings.mdx
@@ -10,19 +10,17 @@ All AI functions (`generateText`, `streamText`, `generateObject`, `streamObject`
 
 - **maxTokens** - Maximum number of tokens to generate.
 - **temperature** - Temperature setting.
-  This is a number between 0 (almost no randomness) and 1 (very random).
+  The value is passed through to the provider. The range depends on the provider and model.
   It is recommended to set either `temperature` or `topP`, but not both.
-- **topP** - Nucleus sampling. This is a number between 0 and 1.
-  E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+- **topP** - Nucleus sampling.
+  The value is passed through to the provider. The range depends on the provider and model.
   It is recommended to set either `temperature` or `topP`, but not both.
 - **presencePenalty** - Presence penalty setting.
   It affects the likelihood of the model to repeat information that is already in the prompt.
-  The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-  0 means no penalty.
+  The value is passed through to the provider. The range depends on the provider and model.
 - **frequencyPenalty** - Frequency penalty setting.
   It affects the likelihood of the model to repeatedly use the same words or phrases.
-  The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-  0 means no penalty.
+  The value is passed through to the provider. The range depends on the provider and model.
 - **seed** - The seed (integer) to use for random sampling.
   If set and supported by the model, calls will generate deterministic results.
 - **maxRetries** - Maximum number of retries. Set to 0 to disable retries. Default: 2.
diff --git a/packages/anthropic/src/anthropic-messages-language-model.ts b/packages/anthropic/src/anthropic-messages-language-model.ts
@@ -96,7 +96,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens ?? 4096, // 4096: max model output tokens
-      temperature, // uses 0..1 scale
+      temperature,
       top_p: topP,
 
       // prompt:
diff --git a/packages/core/core/generate-object/generate-object.ts b/packages/core/core/generate-object/generate-object.ts
@@ -32,19 +32,17 @@ This function does not stream the output. If you want to stream the output, use
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/generate-object/stream-object.ts b/packages/core/core/generate-object/stream-object.ts
@@ -37,19 +37,17 @@ This function streams the output. If you do not want to stream the output, use `
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/generate-text/generate-text.ts b/packages/core/core/generate-text/generate-text.ts
@@ -29,19 +29,17 @@ This function does not stream the output. If you want to stream the output, use
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/generate-text/stream-text.ts b/packages/core/core/generate-text/stream-text.ts
@@ -39,19 +39,17 @@ This function streams the output. If you do not want to stream the output, use `
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/prompt/prepare-call-settings.ts b/packages/core/core/prompt/prepare-call-settings.ts
@@ -39,14 +39,6 @@ export function prepareCallSettings({
         message: 'temperature must be a number',
       });
     }
-
-    if (temperature < 0 || temperature > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'temperature',
-        value: temperature,
-        message: 'temperature must be between 0 and 1 (inclusive)',
-      });
-    }
   }
 
   if (topP != null) {
@@ -57,14 +49,6 @@ export function prepareCallSettings({
         message: 'topP must be a number',
       });
     }
-
-    if (topP < 0 || topP > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'topP',
-        value: topP,
-        message: 'topP must be between 0 and 1 (inclusive)',
-      });
-    }
   }
 
   if (presencePenalty != null) {
@@ -75,14 +59,6 @@ export function prepareCallSettings({
         message: 'presencePenalty must be a number',
       });
     }
-
-    if (presencePenalty < -1 || presencePenalty > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'presencePenalty',
-        value: presencePenalty,
-        message: 'presencePenalty must be between -1 and 1 (inclusive)',
-      });
-    }
   }
 
   if (frequencyPenalty != null) {
@@ -93,14 +69,6 @@ export function prepareCallSettings({
         message: 'frequencyPenalty must be a number',
       });
     }
-
-    if (frequencyPenalty < -1 || frequencyPenalty > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'frequencyPenalty',
-        value: frequencyPenalty,
-        message: 'frequencyPenalty must be between -1 and 1 (inclusive)',
-      });
-    }
   }
 
   if (seed != null) {
diff --git a/packages/mistral/src/mistral-chat-language-model.ts b/packages/mistral/src/mistral-chat-language-model.ts
@@ -87,7 +87,7 @@ export class MistralChatLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens,
-      temperature, // uses 0..1 scale
+      temperature,
       top_p: topP,
       random_seed: seed,
 
diff --git a/packages/openai/src/openai-chat-language-model.test.ts b/packages/openai/src/openai-chat-language-model.test.ts
@@ -330,51 +330,6 @@ describe('doStream', () => {
     });
   });
 
-  it('should scale the temperature', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.chat('gpt-3.5-turbo').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      temperature: 0.5,
-    });
-
-    expect((await server.getRequestBodyJson()).temperature).toBeCloseTo(1, 5);
-  });
-
-  it('should scale the frequency penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.chat('gpt-3.5-turbo').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      frequencyPenalty: 0.2,
-    });
-
-    expect((await server.getRequestBodyJson()).frequency_penalty).toBeCloseTo(
-      0.4,
-      5,
-    );
-  });
-
-  it('should scale the presence penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.chat('gpt-3.5-turbo').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      presencePenalty: -0.9,
-    });
-
-    expect((await server.getRequestBodyJson()).presence_penalty).toBeCloseTo(
-      -1.8,
-      5,
-    );
-  });
-
   it('should pass custom headers', async () => {
     prepareStreamResponse({ content: [] });
 
diff --git a/packages/openai/src/openai-chat-language-model.ts b/packages/openai/src/openai-chat-language-model.ts
@@ -12,7 +12,6 @@ import {
   generateId,
   isParseableJson,
   postJsonToApi,
-  scale,
 } from '@ai-sdk/provider-utils';
 import { z } from 'zod';
 import { convertToOpenAIChatMessages } from './convert-to-openai-chat-messages';
@@ -71,26 +70,10 @@ export class OpenAIChatLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens,
-      temperature: scale({
-        value: temperature,
-        outputMin: 0,
-        outputMax: 2,
-      }),
+      temperature,
       top_p: topP,
-      frequency_penalty: scale({
-        value: frequencyPenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
-      presence_penalty: scale({
-        value: presencePenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
+      frequency_penalty: frequencyPenalty,
+      presence_penalty: presencePenalty,
       seed,
 
       // messages:
diff --git a/packages/openai/src/openai-completion-language-model.test.ts b/packages/openai/src/openai-completion-language-model.test.ts
@@ -208,51 +208,6 @@ describe('doStream', () => {
     });
   });
 
-  it('should scale the temperature', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.completion('gpt-3.5-turbo-instruct').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      temperature: 0.5,
-    });
-
-    expect((await server.getRequestBodyJson()).temperature).toBeCloseTo(1, 5);
-  });
-
-  it('should scale the frequency penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.completion('gpt-3.5-turbo-instruct').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      frequencyPenalty: 0.2,
-    });
-
-    expect((await server.getRequestBodyJson()).frequency_penalty).toBeCloseTo(
-      0.4,
-      5,
-    );
-  });
-
-  it('should scale the presence penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.completion('gpt-3.5-turbo-instruct').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      presencePenalty: -0.9,
-    });
-
-    expect((await server.getRequestBodyJson()).presence_penalty).toBeCloseTo(
-      -1.8,
-      5,
-    );
-  });
-
   it('should pass custom headers', async () => {
     prepareStreamResponse({ content: [] });
 
diff --git a/packages/openai/src/openai-completion-language-model.ts b/packages/openai/src/openai-completion-language-model.ts
@@ -9,7 +9,6 @@ import {
   createEventSourceResponseHandler,
   createJsonResponseHandler,
   postJsonToApi,
-  scale,
 } from '@ai-sdk/provider-utils';
 import { z } from 'zod';
 import { convertToOpenAICompletionPrompt } from './convert-to-openai-completion-prompt';
@@ -77,26 +76,10 @@ export class OpenAICompletionLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens,
-      temperature: scale({
-        value: temperature,
-        outputMin: 0,
-        outputMax: 2,
-      }),
+      temperature,
       top_p: topP,
-      frequency_penalty: scale({
-        value: frequencyPenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
-      presence_penalty: scale({
-        value: presencePenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
+      frequency_penalty: frequencyPenalty,
+      presence_penalty: presencePenalty,
       seed,
 
       // prompt:
diff --git a/packages/provider-utils/src/index.ts b/packages/provider-utils/src/index.ts
diff --git a/packages/provider-utils/src/scale.ts b/packages/provider-utils/src/scale.ts
diff --git a/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts b/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts

-Original file line number
+Diff line change
 export * from './parse-json';
 export * from './post-to-api';
 export * from './response-handler';
 -export * from './scale';
 export * from './uint8-utils';
 export * from './validate-types';
 export * from './without-trailing-slash';
-Original file line number
+Diff line change
   maxTokens?: number;
   /**
 -   * Temperature setting. This is a number between 0 (almost no randomness) and
 -   * 1 (very random).
 +   * Temperature setting.
+   *
 -   * Different LLM providers have different temperature
 -   * scales, so they'd need to map it (without mapping, the same temperature has
 -   * different effects on different models). The provider can also chose to map
 -   * this to topP, potentially even using a custom setting on their model.
 -   *
 -   * Note: This is an example of a setting that requires a clear specification of
 -   * the semantics.
 +   * It is recommended to set either `temperature` or `topP`, but not both.
    */
   temperature?: number;
   /**
 -   * Nucleus sampling. This is a number between 0 and 1.
 -   *
 -   * E.g. 0.1 would mean that only tokens with the top 10% probability mass
 -   * are considered.
 +   * Nucleus sampling.
+   *
    * It is recommended to set either `temperature` or `topP`, but not both.
    */
   /**
    * Presence penalty setting. It affects the likelihood of the model to
    * repeat information that is already in the prompt.
 -   *
 -   * The presence penalty is a number between -1 (increase repetition)
 -   * and 1 (maximum penalty, decrease repetition). 0 means no penalty.
    */
   presencePenalty?: number;
   /**
    * Frequency penalty setting. It affects the likelihood of the model
    * to repeatedly use the same words or phrases.
 -   *
 -   * The frequency penalty is a number between -1 (increase repetition)
 -   * and 1 (maximum penalty, decrease repetition). 0 means no penalty.
    */
   frequencyPenalty?: number;