Skip to content

Commit d25566a

Browse files
authoredJun 13, 2024··
feat (core): add cosineSimilarity helper function (#1939)
1 parent f9db8fd commit d25566a

File tree

10 files changed

+183
-23
lines changed

10 files changed

+183
-23
lines changed
 

‎.changeset/curly-taxis-warn.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'ai': patch
3+
---
4+
5+
feat (core): add cosineSimilarity helper function

‎content/docs/03-ai-sdk-core/30-embeddings.mdx

+20
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,23 @@ const { embeddings } = await embedMany({
4949
],
5050
});
5151
```
52+
53+
## Embedding Similarity
54+
55+
After embedding values, you can calculate the similarity between them using the [`cosineSimilarity`](/docs/reference/ai-sdk-core/cosine-similarity) function.
56+
This is useful to e.g. find similar words or phrases in a dataset.
57+
You can also rank and filter related items based on their similarity.
58+
59+
```ts highlight={"2,10"}
60+
import { openai } from '@ai-sdk/openai';
61+
import { cosineSimilarity, embedMany } from 'ai';
62+
63+
const { embeddings } = await embedMany({
64+
model: openai.embedding('text-embedding-3-small'),
65+
values: ['sunny day at the beach', 'rainy afternoon in the city'],
66+
});
67+
68+
console.log(
69+
`cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
70+
);
71+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
---
2+
title: cosineSimilarity
3+
description: Calculate the cosine similarity between two vectors (API Reference)
4+
---
5+
6+
# `cosineSimilarity()`
7+
8+
When you want to compare the similarity of embeddings, standard vector similarity metrics
9+
like cosine similarity are often used.
10+
11+
`cosineSimilarity` calculates the cosine similarity between two vectors.
12+
A high value (close to 1) indicates that the vectors are very similar, while a low value (close to -1) indicates that they are different.
13+
14+
```ts
15+
import { openai } from '@ai-sdk/openai';
16+
import { cosineSimilarity, embedMany } from 'ai';
17+
18+
const { embeddings } = await embedMany({
19+
model: openai.embedding('text-embedding-3-small'),
20+
values: ['sunny day at the beach', 'rainy afternoon in the city'],
21+
});
22+
23+
console.log(
24+
`cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
25+
);
26+
```
27+
28+
## Import
29+
30+
<Snippet text={`import { cosineSimilarity } from "ai"`} prompt={false} />
31+
32+
## API Signature
33+
34+
### Parameters
35+
36+
<PropertiesTable
37+
content={[
38+
{
39+
name: 'vector1',
40+
type: 'number[]',
41+
description: `The first vector to compare`,
42+
},
43+
{
44+
name: 'vector2',
45+
type: 'number[]',
46+
description: `The second vector to compare`,
47+
},
48+
]}
49+
/>
50+
51+
### Returns
52+
53+
A number between -1 and 1 representing the cosine similarity between the two vectors.

‎content/docs/07-reference/ai-sdk-core/index.mdx

+6
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,11 @@ It also contains the following helper functions:
6363
'Creates a registry for using models from multiple providers.',
6464
href: '/docs/reference/ai-sdk-core/model-registry',
6565
},
66+
{
67+
title: 'cosineSimilarity()',
68+
description:
69+
'Calculates the cosine similarity between two vectors, e.g. embeddings.',
70+
href: '/docs/reference/ai-sdk-core/cosine-similarity',
71+
},
6672
]}
6773
/>

‎examples/ai-core/src/complex/semantic-router/cosine-similarity.ts

-20
This file was deleted.

‎examples/ai-core/src/complex/semantic-router/semantic-router.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
import { Embedding, EmbeddingModel, embed, embedMany } from 'ai';
2-
import { cosineSimilarity } from './cosine-similarity';
1+
import {
2+
Embedding,
3+
EmbeddingModel,
4+
embed,
5+
embedMany,
6+
cosineSimilarity,
7+
} from 'ai';
38

49
export interface Route<NAME extends string> {
510
name: NAME;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { openai } from '@ai-sdk/openai';
2+
import { cosineSimilarity, embedMany } from 'ai';
3+
import dotenv from 'dotenv';
4+
5+
dotenv.config();
6+
7+
async function main() {
8+
const { embeddings } = await embedMany({
9+
model: openai.embedding('text-embedding-3-small'),
10+
values: ['sunny day at the beach', 'rainy afternoon in the city'],
11+
});
12+
13+
console.log(
14+
`cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`,
15+
);
16+
}
17+
18+
main().catch(console.error);

‎packages/core/core/index.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ export * from './prompt';
55
export * from './registry';
66
export * from './tool';
77
export * from './types';
8-
export * from './util/deep-partial';
8+
export type { DeepPartial } from './util/deep-partial';
9+
export { cosineSimilarity } from './util/cosine-similarity';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { cosineSimilarity } from './cosine-similarity';
2+
3+
it('should calculate cosine similarity correctly', () => {
4+
const vector1 = [1, 2, 3];
5+
const vector2 = [4, 5, 6];
6+
7+
const result = cosineSimilarity(vector1, vector2);
8+
9+
// test against pre-calculated value:
10+
expect(result).toBeCloseTo(0.9746318461970762, 5);
11+
});
12+
13+
it('should calculate negative cosine similarity correctly', () => {
14+
const vector1 = [1, 0];
15+
const vector2 = [-1, 0];
16+
17+
const result = cosineSimilarity(vector1, vector2);
18+
19+
// test against pre-calculated value:
20+
expect(result).toBeCloseTo(-1, 5);
21+
});
22+
23+
it('should throw an error when vectors have different lengths', () => {
24+
const vector1 = [1, 2, 3];
25+
const vector2 = [4, 5];
26+
27+
expect(() => cosineSimilarity(vector1, vector2)).toThrowError();
28+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/**
2+
* Calculates the cosine similarity between two vectors. This is a useful metric for
3+
* comparing the similarity of two vectors such as embeddings.
4+
*
5+
* @param vector1 - The first vector.
6+
* @param vector2 - The second vector.
7+
*
8+
* @returns The cosine similarity between vector1 and vector2.
9+
* @throws {Error} If the vectors do not have the same length.
10+
*/
11+
export function cosineSimilarity(vector1: number[], vector2: number[]) {
12+
if (vector1.length !== vector2.length) {
13+
throw new Error(
14+
`Vectors must have the same length (vector1: ${vector1.length} elements, vector2: ${vector2.length} elements)`,
15+
);
16+
}
17+
18+
return (
19+
dotProduct(vector1, vector2) / (magnitude(vector1) * magnitude(vector2))
20+
);
21+
}
22+
23+
/**
24+
* Calculates the dot product of two vectors.
25+
* @param vector1 - The first vector.
26+
* @param vector2 - The second vector.
27+
* @returns The dot product of vector1 and vector2.
28+
*/
29+
function dotProduct(vector1: number[], vector2: number[]) {
30+
return vector1.reduce(
31+
(accumulator: number, value: number, index: number) =>
32+
accumulator + value * vector2[index]!,
33+
0,
34+
);
35+
}
36+
37+
/**
38+
* Calculates the magnitude of a vector.
39+
* @param vector - The vector.
40+
* @returns The magnitude of the vector.
41+
*/
42+
function magnitude(vector: number[]) {
43+
return Math.sqrt(dotProduct(vector, vector));
44+
}

0 commit comments

Comments
 (0)
Please sign in to comment.