diff --git a/cspell.schema.json b/cspell.schema.json index 66c53f7872b..84479e36364 100644 --- a/cspell.schema.json +++ b/cspell.schema.json @@ -42,9 +42,13 @@ "type": "number" }, "map": { - "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.", + "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.", "type": "string" }, + "penalty": { + "description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.", + "type": "number" + }, "replace": { "description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.", "type": "number" @@ -72,9 +76,13 @@ "type": "number" }, "map": { - "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.", + "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.", "type": "string" }, + "penalty": { + "description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.", + "type": "number" + }, "replace": { "description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.", "type": "number" @@ -102,9 +110,13 @@ "type": "number" }, "map": { - "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.", + "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.", "type": "string" }, + "penalty": { + "description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.", + "type": "number" + }, "replace": { "description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.", "type": "number" @@ -870,7 +882,7 @@ "$ref": "#/definitions/CostMapDefSwap" } ], - "description": "A WeightedMapDef enables setting weights for edits between related characters and substrings.\n\nMultiple groups can be defined using a `|`. A multi-character substring is defined using `()`.\n\nFor example, in some languages, some letters sound alike.\n\n```ts { map: 'sc(sh)(sch)(ss)|t(tt)', // two groups. replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. } ```\n\nThe following could be used to make inserting, removing, or replacing vowels cheaper. ```ts { map: 'aeiouy', //. insDel: 50, // Make it is cheaper to insert or delete a vowel. replace: 45, // It is even cheaper to replace one with another. } ```\n\nNote: the default edit distance is 100." + "description": "A WeightedMapDef enables setting weights for edits between related characters and substrings.\n\nMultiple groups can be defined using a `|`. A multi-character substring is defined using `()`.\n\nFor example, in some languages, some letters sound alike.\n\n```yaml map: 'sc(sh)(sch)(ss)|t(tt)' # two groups. replace: 50 # Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. ```\n\nThe following could be used to make inserting, removing, or replacing vowels cheaper. ```yaml map: 'aeiouy' insDel: 50 # Make it is cheaper to insert or delete a vowel. replace: 45 # It is even cheaper to replace one with another. ```\n\nNote: the default edit distance is 100." }, "SuggestionCostsDefs": { "items": { diff --git a/packages/cspell-trie-lib/package-lock.json b/packages/cspell-trie-lib/package-lock.json index b35a0c61fb6..37dc7843fb2 100644 --- a/packages/cspell-trie-lib/package-lock.json +++ b/packages/cspell-trie-lib/package-lock.json @@ -13,6 +13,7 @@ "gensequence": "^3.1.1" }, "devDependencies": { + "@cspell/cspell-types": "^5.15.3", "@cspell/dict-en_us": "^2.1.4", "@cspell/dict-es-es": "^2.1.0", "@types/fs-extra": "^9.0.13", @@ -585,6 +586,15 @@ "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", "dev": true }, + "node_modules/@cspell/cspell-types": { + "version": "5.15.3", + "resolved": "https://registry.npmjs.org/@cspell/cspell-types/-/cspell-types-5.15.3.tgz", + "integrity": "sha512-9SdGHfOlcI86NBkLnCMuJYE1+//686HTNETbISyPSvKyrX8IyNLEN9rCiSQp542UAixG5sRBD96V6MQ1ByZIAw==", + "dev": true, + "engines": { + "node": ">=12.13.0" + } + }, "node_modules/@cspell/dict-en_us": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/@cspell/dict-en_us/-/dict-en_us-2.1.4.tgz", @@ -4444,6 +4454,12 @@ "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", "dev": true }, + "@cspell/cspell-types": { + "version": "5.15.3", + "resolved": "https://registry.npmjs.org/@cspell/cspell-types/-/cspell-types-5.15.3.tgz", + "integrity": "sha512-9SdGHfOlcI86NBkLnCMuJYE1+//686HTNETbISyPSvKyrX8IyNLEN9rCiSQp542UAixG5sRBD96V6MQ1ByZIAw==", + "dev": true + }, "@cspell/dict-en_us": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/@cspell/dict-en_us/-/dict-en_us-2.1.4.tgz", diff --git a/packages/cspell-trie-lib/package.json b/packages/cspell-trie-lib/package.json index 1783bc90831..eea693aea75 100644 --- a/packages/cspell-trie-lib/package.json +++ b/packages/cspell-trie-lib/package.json @@ -44,6 +44,7 @@ "node": ">=12.13.0" }, "devDependencies": { + "@cspell/cspell-types": "^5.15.3", "@cspell/dict-en_us": "^2.1.4", "@cspell/dict-es-es": "^2.1.0", "@types/fs-extra": "^9.0.13", diff --git a/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.test.ts b/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.test.ts index 37d75bd1210..5bec54b7008 100644 --- a/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.test.ts +++ b/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.test.ts @@ -51,14 +51,18 @@ describe('distanceAStar', () => { ${''} | ${''} | ${undefined} | ${0} ${'walk'} | ${'walking'} | ${undefined} | ${300} ${'walk'} | ${''} | ${calcWeightMap(mapLetters())} | ${200} - ${'1234'} | ${''} | ${calcWeightMap(mapLetters())} | ${400} + ${'1234'} | ${''} | ${calcWeightMap(mapLetters())} | ${804} ${'walk'} | ${'walking'} | ${calcWeightMap()} | ${50} ${'wake up'} | ${'woken up'} | ${calcWeightMap()} | ${145} ${'definition'} | ${'defunishun'} | ${calcWeightMap()} | ${45 + 40} ${'reputation'} | ${'repetition'} | ${calcWeightMap()} | ${45 + 45} + ${'gr8'} | ${'great'} | ${calcWeightMap()} | ${250} + ${'read'} | ${'read7'} | ${calcWeightMap()} | ${201} ${'airplane'} | ${'aeroplane'} | ${calcWeightMap()} | ${60} ${'talked'} | ${'walking'} | ${calcWeightMap()} | ${150} ${'kings'} | ${'king'} | ${calcWeightMap()} | ${50} + ${'re-wind'} | ${'rewind'} | ${calcWeightMap()} | ${202} + ${'re-'} | ${'re'} | ${calcWeightMap()} | ${201} ${"I'm talk'n to u"} | ${'I am talking to you'} | ${calcWeightMap()} | ${302} ${"wear'd u go?"} | ${'where did you go?'} | ${calcWeightMap()} | ${304} `( @@ -127,6 +131,24 @@ function calcWeightMap(...defs: SuggestionCostMapDef[]): WeightMap { { map: '(air)(aer)(err)|(oar)(or)(hor)|(or)(our)', replace: 40, + }, + { + description: 'Penalty for inserting numbers', + map: '0123456789', + insDel: 1, // Cheap to insert, + penalty: 200, // Costly later + }, + { + description: 'Discourage leading and trailing `-`', + map: '(^-)(^)|($)(-$)', + replace: 1, // Cheap to insert, + penalty: 200, // Costly later + }, + { + description: 'Discourage inserting special characters `-`', + map: '-._', + insDel: 2, // Cheap to insert, + penalty: 200, // Costly later } ); } diff --git a/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.ts b/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.ts index 1ffb662e906..ebcaf5aec3c 100644 --- a/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.ts +++ b/packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.ts @@ -15,40 +15,44 @@ export function distanceAStarWeighted(wordA: string, wordB: string, map: WeightM const candidates = new PairingHeap(compare); - candidates.add({ ai: 0, bi: 0, c: 0 }); + candidates.add({ ai: 0, bi: 0, c: 0, p: 0 }); + /** Substitute / Replace */ function opSub(n: Node) { - const { ai, bi, c } = n; + const { ai, bi, c, p } = n; if (ai < aN && bi < bN) { const cc = a[ai] === b[bi] ? c : c + cost; - candidates.add({ ai: ai + 1, bi: bi + 1, c: cc }); + candidates.add({ ai: ai + 1, bi: bi + 1, c: cc, p }); } } + /** Insert */ function opIns(n: Node) { - const { ai, bi, c } = n; + const { ai, bi, c, p } = n; if (bi < bN) { - candidates.add({ ai: ai, bi: bi + 1, c: c + cost }); + candidates.add({ ai: ai, bi: bi + 1, c: c + cost, p }); } } + /** Delete */ function opDel(n: Node) { - const { ai, bi, c } = n; + const { ai, bi, c, p } = n; if (ai < aN) { - candidates.add({ ai: ai + 1, bi: bi, c: c + cost }); + candidates.add({ ai: ai + 1, bi: bi, c: c + cost, p }); } } + /** Swap adjacent letters */ function opSwap(n: Node) { - const { ai, bi, c } = n; + const { ai, bi, c, p } = n; if (a[ai] === b[bi + 1] && a[ai + 1] === b[bi]) { - candidates.add({ ai: ai + 2, bi: bi + 2, c: c + cost }); + candidates.add({ ai: ai + 2, bi: bi + 2, c: c + cost, p }); } } function opMap(n: Node) { - const { ai, bi, c } = n; - const pos = { a, b, ai, bi, c }; + const { ai, bi, c, p } = n; + const pos = { a, b, ai, bi, c, p }; const costCalculations = [map.calcInsDelCosts(pos), map.calcSwapCosts(pos), map.calcReplaceCosts(pos)]; costCalculations.forEach((iter) => { for (const nn of iter) { @@ -70,7 +74,7 @@ export function distanceAStarWeighted(wordA: string, wordB: string, map: WeightM } // istanbul ignore else - return best ? best.c : -1; + return best ? best.c + best.p : -1; } interface Pos { @@ -83,6 +87,8 @@ interface Pos { interface Node extends Pos { /** the current cost */ c: number; + /** the current penalty */ + p: number; } function compare(a: Node, b: Node): number { diff --git a/packages/cspell-trie-lib/src/lib/distance/suggestionCostsDef.ts b/packages/cspell-trie-lib/src/lib/distance/suggestionCostsDef.ts index d04f7025dce..4fa50a958ff 100644 --- a/packages/cspell-trie-lib/src/lib/distance/suggestionCostsDef.ts +++ b/packages/cspell-trie-lib/src/lib/distance/suggestionCostsDef.ts @@ -1,73 +1 @@ -// cspell:ignore aeiouy -/** - * A WeightedMapDef enables setting weights for edits between related characters and substrings. - * - * Multiple groups can be defined using a `|`. - * A multi-character substring is defined using `()`. - * - * For example, in some languages, some letters sound alike. - * - * ```ts - * { - * map: 'sc(sh)(sch)(ss)|t(tt)', // two groups. - * replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. - * } - * ``` - * - * The following could be used to make inserting, removing, or replacing vowels cheaper. - * ```ts - * { - * map: 'aeiouy', //. - * insDel: 50, // Make it is cheaper to insert or delete a vowel. - * replace: 45, // It is even cheaper to replace one with another. - * } - * ``` - * - * Note: the default edit distance is 100. - */ -export type SuggestionCostMapDef = CostMapDefReplace | CostMapDefInsDel | CostMapDefSwap; - -interface CostMapDefBase { - /** - * The set of substrings to map, these are generally single character strings. - * - * Multiple sets can be defined by using a `|` to separate them. - * - * Example: `"eéê|aåá"` contains two different sets. - * - * To add a multi-character substring use `()`. - * - * Example: `"f(ph)(gh)"` results in the following set: `f`, `ph`, `gh`. - */ - map: string; - /** The cost to insert/delete one of the substrings in the map. Note: insert/delete costs are symmetrical. */ - insDel?: number; - /** - * The cost to replace of of the substrings in the map with another substring in the map. - * Example: Map['a', 'i'] - * This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse. - */ - replace?: number; - /** - * The cost to swap two adjacent substrings found in the map. - * Example: Map['e', 'i'] - * This represents the cost to change `ei` to `ie` or the reverse. - */ - swap?: number; - /** - * A description to describe the purpose of the map. - */ - description?: string; -} - -export interface CostMapDefReplace extends CostMapDefBase { - replace: number; -} - -export interface CostMapDefInsDel extends CostMapDefBase { - insDel: number; -} - -export interface CostMapDefSwap extends CostMapDefBase { - swap: number; -} +export type { SuggestionCostMapDef } from '@cspell/cspell-types'; diff --git a/packages/cspell-trie-lib/src/lib/distance/weightedMaps.test.ts b/packages/cspell-trie-lib/src/lib/distance/weightedMaps.test.ts index 7ed3e866821..d9420a994d2 100644 --- a/packages/cspell-trie-lib/src/lib/distance/weightedMaps.test.ts +++ b/packages/cspell-trie-lib/src/lib/distance/weightedMaps.test.ts @@ -10,7 +10,7 @@ import { const { splitMapSubstrings, splitMap, findTrieCostPrefixes } = __testing__; -// const u = undefined; +// const u = undefined; cspell: describe('Validate weightedMaps', () => { test.each` @@ -63,10 +63,10 @@ describe('Validate weightedMaps', () => { }); test.each` - defs | word | offset | expected - ${[]} | ${''} | ${1} | ${[]} - ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${1} | ${[{ i: 2, c: 7 }, { i: 3, c: 11 }] /* cspell:disable-line */} - ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${2} | ${[{ i: 3, c: 7 }] /* cspell:disable-line */} + defs | word | offset | expected + ${[]} | ${''} | ${1} | ${[]} + ${[defIns('aeiou', 7, penalty(5)), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${1} | ${[{ i: 2, c: 7, p: 5 }, { i: 3, c: 11, p: 0 }] /* cspell:disable-line */} + ${[defIns('aeiou', 7, penalty(5)), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${2} | ${[{ i: 3, c: 7, p: 5 }] /* cspell:disable-line */} `('findTrieCostPrefixes with insDel $defs.0 $defs.1 $word $offset', ({ defs, word, offset, expected }) => { const map = createWeightMap(...defs); expect([...findTrieCostPrefixes(map.insDel, word, offset)]).toEqual(expected); @@ -74,12 +74,13 @@ describe('Validate weightedMaps', () => { // cspell:ignore aeiou test.each` - defs | wordA | ai | wordB | bi | expected - ${[]} | ${''} | ${1} | ${''} | ${0} | ${[]} - ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${1} | ${''} | ${0} | ${[{ ai: 2, bi: 0, c: 1007 }, { ai: 3, bi: 0, c: 1011 }]} - ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${1} | ${'ride'} | ${1} | ${[{ ai: 2, bi: 1, c: 1007 /* del e */ }, { ai: 3, bi: 1, c: 1011 /* del ea */ }, { ai: 1, bi: 2, c: 1007 /* ins i */ }]} - ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'red'} | ${1} | ${'read'} | ${1} | ${[{ ai: 2, bi: 1, c: 1007 /* del e */ }, { ai: 1, bi: 2, c: 1007 /* ins */ }, { ai: 1, bi: 3, c: 1011 /* ins ea */ }]} - ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'red'} | ${2} | ${'read'} | ${2} | ${[{ ai: 2, bi: 3, c: 1007 /* ins a */ }]} + defs | wordA | ai | wordB | bi | expected + ${[]} | ${''} | ${1} | ${''} | ${0} | ${[]} + ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${1} | ${''} | ${0} | ${[{ ai: 2, bi: 0, c: 1007, p: 1000 }, { ai: 3, bi: 0, c: 1011, p: 1000 }]} + ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'read'} | ${1} | ${'ride'} | ${1} | ${[{ ai: 2, bi: 1, c: 1007 /* del e */, p: 1000 }, { ai: 3, bi: 1, c: 1011 /* del ea */, p: 1000 }, { ai: 1, bi: 2, c: 1007 /* ins i */, p: 1000 }]} + ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'red'} | ${1} | ${'read'} | ${1} | ${[{ ai: 2, bi: 1, c: 1007 /* del e */, p: 1000 }, { ai: 1, bi: 2, c: 1007 /* ins */, p: 1000 }, { ai: 1, bi: 3, c: 1011 /* ins ea */, p: 1000 }]} + ${[defIns('aeiou', 7), defIns('(ae)(ea)(ou)(ei)(ie)', 11)]} | ${'red'} | ${2} | ${'read'} | ${2} | ${[{ ai: 2, bi: 3, c: 1007 /* ins a */, p: 1000 }]} + ${[defIns('1234567890', 7, penalty(20))]} | ${'cost'} | ${4} | ${'cost8'} | ${4} | ${[{ ai: 4, bi: 5, c: 1007 /* insert 8 */, p: 1020 }]} `( 'calcInsDelCosts with $defs.0 $defs.1 "$wordA"@$ai, "$wordB"@$bi', ({ @@ -98,7 +99,7 @@ describe('Validate weightedMaps', () => { expected: CostPosition[]; }) => { const map = createWeightMap(...defs); - const results = [...map.calcInsDelCosts({ a: wordA, b: wordB, ai, bi, c: 1000 })]; + const results = [...map.calcInsDelCosts({ a: wordA, b: wordB, ai, bi, c: 1000, p: 1000 })]; expected.forEach((p) => { (p.a = p.a ?? wordA), (p.b = p.b ?? wordB); }); @@ -110,12 +111,12 @@ describe('Validate weightedMaps', () => { test.each` defs | wordA | ai | wordB | bi | expected ${[]} | ${''} | ${0} | ${''} | ${0} | ${[]} - ${[defRep('aeiou', 5), defRep('ae(ae)(ea)', 7)]} | ${'read'} | ${1} | ${'red'} | ${1} | ${[{ ai: 3, bi: 2, c: 1007 /* ea -> a */ }]} - ${[defRep('aeiou', 5), defRep('ae(ae)(ea)', 7)]} | ${'read'} | ${1} | ${'road'} | ${1} | ${[{ ai: 2, bi: 2, c: 1005 /* e -> o */ }]} - ${[defRep('aeiou', 5), defRep('o(oo)|e(ee)', 7)]} | ${'met'} | ${1} | ${'meet'} | ${1} | ${[{ ai: 2, bi: 3, c: 1007 /* e -> ee */ }]} - ${[defRep('aeiou', 5), defRep('o(oo)|e(ee)', 7)]} | ${'meet'} | ${1} | ${'met'} | ${1} | ${[{ ai: 3, bi: 2, c: 1007 /* ee -> e */ }]} - ${[defRep('aeiou', 5), defRep('(ei)(ie)', 7)]} | ${'believe'} | ${3} | ${'receive'} | ${3} | ${[{ ai: 4, bi: 4, c: 1005 /* i => e */ }, { ai: 5, bi: 5, c: 1007 /* ie => ei */ }]} - ${[defRep('(sk)(sch)', 5), defRep('(sch)(sk)', 7)]} | ${'school'} | ${0} | ${'skull'} | ${0} | ${[{ ai: 3, bi: 2, c: 1005 /* sch => sk */ }]} + ${[defRep('aeiou', 5), defRep('ae(ae)(ea)', 7)]} | ${'read'} | ${1} | ${'red'} | ${1} | ${[{ ai: 3, bi: 2, c: 1007 /* ea -> a */, p: 1000 }]} + ${[defRep('aeiou', 5), defRep('ae(ae)(ea)', 7)]} | ${'read'} | ${1} | ${'road'} | ${1} | ${[{ ai: 2, bi: 2, c: 1005 /* e -> o */, p: 1000 }]} + ${[defRep('aeiou', 5), defRep('o(oo)|e(ee)', 7)]} | ${'met'} | ${1} | ${'meet'} | ${1} | ${[{ ai: 2, bi: 3, c: 1007 /* e -> ee */, p: 1000 }]} + ${[defRep('aeiou', 5), defRep('o(oo)|e(ee)', 7)]} | ${'meet'} | ${1} | ${'met'} | ${1} | ${[{ ai: 3, bi: 2, c: 1007 /* ee -> e */, p: 1000 }]} + ${[defRep('aeiou', 5), defRep('(ei)(ie)', 7)]} | ${'believe'} | ${3} | ${'receive'} | ${3} | ${[{ ai: 4, bi: 4, c: 1005 /* i => e */, p: 1000 }, { ai: 5, bi: 5, c: 1007 /* ie => ei */, p: 1000 }]} + ${[defRep('(sk)(sch)', 5), defRep('(sch)(sk)', 7)]} | ${'school'} | ${0} | ${'skull'} | ${0} | ${[{ ai: 3, bi: 2, c: 1005 /* sch => sk */, p: 1000 }]} `( 'calcReplaceCosts with $defs.0 $defs.1 "$wordA"@$ai, "$wordB"@$bi', ({ @@ -134,7 +135,7 @@ describe('Validate weightedMaps', () => { expected: CostPosition[]; }) => { const map = createWeightMap(...defs); - const results = [...map.calcReplaceCosts({ a: wordA, b: wordB, ai, bi, c: 1000 })]; + const results = [...map.calcReplaceCosts({ a: wordA, b: wordB, ai, bi, c: 1000, p: 1000 })]; expected.forEach((p) => { (p.a = p.a ?? wordA), (p.b = p.b ?? wordB); }); @@ -146,7 +147,8 @@ describe('Validate weightedMaps', () => { test.each` defs | wordA | ai | wordB | bi | expected ${[defSwap('ae', 9), defSwap('ei', 7)]} | ${'believe'} | ${1} | ${'receive'} | ${1} | ${[]} - ${[defSwap('ae', 9), defSwap('ei', 7)]} | ${'believe'} | ${3} | ${'receive'} | ${3} | ${[{ ai: 5, bi: 5, c: 1007 /* swap ei -> ie */ }]} + ${[defSwap('ae', 9), defSwap('ei', 7)]} | ${'believe'} | ${3} | ${'receive'} | ${3} | ${[{ ai: 5, bi: 5, c: 1007 /* swap ei -> ie */, p: 1000 }]} + ${[defSwap('ei', 7, penalty(20))]} | ${'believe'} | ${3} | ${'receive'} | ${3} | ${[{ ai: 5, bi: 5, c: 1007 /* swap ei -> ie */, p: 1020 }]} `( 'calcSwapCosts with $defs.0 $defs.1 "$wordA"@$ai, "$wordB"@$bi', ({ @@ -165,7 +167,7 @@ describe('Validate weightedMaps', () => { expected: CostPosition[]; }) => { const map = createWeightMap(...defs); - const results = [...map.calcSwapCosts({ a: wordA, b: wordB, ai, bi, c: 1000 })]; + const results = [...map.calcSwapCosts({ a: wordA, b: wordB, ai, bi, c: 1000, p: 1000 })]; expected.forEach((p) => { (p.a = p.a ?? wordA), (p.b = p.b ?? wordB); }); @@ -187,14 +189,26 @@ describe('Validate weightedMaps', () => { }); }); -function defIns(map: string, insDel: number, opt: Partial = {}): SuggestionCostMapDef { - return { ...opt, map, insDel }; +// function mo(...opts: Partial[]): Partial { +// return mergeOps(opts); +// } + +function penalty(penalty: number): Partial { + return { penalty }; +} + +function defIns(map: string, insDel: number, ...opts: Partial[]): SuggestionCostMapDef { + return { ...mergeOps(opts), map, insDel }; +} + +function defRep(map: string, replace: number, ...opts: Partial[]): SuggestionCostMapDef { + return { ...mergeOps(opts), map, replace }; } -function defRep(map: string, replace: number, opt: Partial = {}): SuggestionCostMapDef { - return { ...opt, map, replace }; +function defSwap(map: string, swap: number, ...opts: Partial[]): SuggestionCostMapDef { + return { ...mergeOps(opts), map, swap }; } -function defSwap(map: string, swap: number, opt: Partial = {}): SuggestionCostMapDef { - return { ...opt, map, swap }; +function mergeOps(opts: Partial[]): Partial { + return opts.reduce((acc, opt) => ({ ...acc, ...opt }), {} as Partial); } diff --git a/packages/cspell-trie-lib/src/lib/distance/weightedMaps.ts b/packages/cspell-trie-lib/src/lib/distance/weightedMaps.ts index 50026dfaac3..80d3ddf7119 100644 --- a/packages/cspell-trie-lib/src/lib/distance/weightedMaps.ts +++ b/packages/cspell-trie-lib/src/lib/distance/weightedMaps.ts @@ -11,11 +11,25 @@ interface WeightedRepTrieNode { swap?: number | undefined; } -interface TrieCost { +/** + * Costs are minimized while penalties are maximized. + */ +interface Cost { + /** + * The cost of an operation + * `c'' = min(c, c')` + */ + c?: number | undefined; + /** + * The penalties applied + * `p'' = max(p, p')` + */ + p?: number | undefined; +} + +interface TrieCost extends Cost { /** nested trie nodes */ n?: Record; - /** the cost to insert/delete */ - c?: number | undefined; } interface TrieTrieCost { @@ -36,6 +50,8 @@ export interface CostPosition { bi: number; // accumulated cost to this point. c: number; + // accumulated penalties to this point. + p: number; } export interface WeightMap { @@ -65,9 +81,9 @@ export function addDefToWeightMap(map: WeightMap, ...defs: SuggestionCostMapDef[ function _addDefToWeightMap(map: WeightMap, ...defs: SuggestionCostMapDef[]): WeightMap { function addSet(set: string[], def: SuggestionCostMapDef) { - addSetToTrieCost(map.insDel, set, def.insDel); - addSetToTrieTrieCost(map.replace, set, def.replace); - addSetToTrieTrieCost(map.swap, set, def.swap); + addSetToTrieCost(map.insDel, set, def.insDel, def.penalty); + addSetToTrieTrieCost(map.replace, set, def.replace, def.penalty); + addSetToTrieTrieCost(map.swap, set, def.swap, def.penalty); } for (const def of defs) { @@ -86,6 +102,12 @@ function lowest(a: number | undefined, b: number | undefined): number | undefine return a <= b ? a : b; } +function highest(a: number | undefined, b: number | undefined): number | undefined { + if (a === undefined) return b; + if (b === undefined) return a; + return a >= b ? a : b; +} + /** * Splits a WeightedMapDef.map * @param map @@ -117,7 +139,7 @@ function splitMapSubstrings(map: string): string[] { return values.map((s) => s.trim()).filter((s) => !!s); } -function addToTrieCost(trie: TrieCost, str: string, cost: number): void { +function addToTrieCost(trie: TrieCost, str: string, cost: number, penalties: number | undefined): void { if (!str) return; let t = trie; for (const c of str) { @@ -125,31 +147,43 @@ function addToTrieCost(trie: TrieCost, str: string, cost: number): void { t = n[c] = n[c] || Object.create(null); } t.c = lowest(t.c, cost); + t.p = highest(t.p, penalties); } -function addToTrieTrieCost(trie: TrieTrieCost, left: string, right: string, cost: number): void { +function addToTrieTrieCost( + trie: TrieTrieCost, + left: string, + right: string, + cost: number, + penalties: number | undefined +): void { let t = trie; for (const c of left) { const n = (t.n = t.n || Object.create(null)); t = n[c] = n[c] || Object.create(null); } const trieCost = (t.t = t.t || Object.create(null)); - addToTrieCost(trieCost, right, cost); + addToTrieCost(trieCost, right, cost, penalties); } -function addSetToTrieCost(trie: TrieCost, set: string[], cost: number | undefined) { +function addSetToTrieCost(trie: TrieCost, set: string[], cost: number | undefined, penalties: number | undefined) { if (cost === undefined) return; for (const str of set) { - addToTrieCost(trie, str, cost); + addToTrieCost(trie, str, cost, penalties); } } -function addSetToTrieTrieCost(trie: TrieTrieCost, set: string[], cost: number | undefined) { +function addSetToTrieTrieCost( + trie: TrieTrieCost, + set: string[], + cost: number | undefined, + penalties: number | undefined +) { if (cost === undefined) return; for (const left of set) { for (const right of set) { if (left === right) continue; - addToTrieTrieCost(trie, left, right, cost); + addToTrieTrieCost(trie, left, right, cost, penalties); } } } @@ -181,19 +215,19 @@ function* walkTrieNodes }>( } } -function* walkTrieCost(trie: TrieCost): Generator<{ s: string; c: number }> { +function* walkTrieCost(trie: TrieCost): Generator<{ s: string; c: number; p: number | undefined }> { for (const { s, t } of walkTrieNodes(trie, '')) { if (t.c) { - yield { s, c: t.c }; + yield { s, c: t.c, p: t.p }; } } } -function* walkTrieTrieCost(trie: TrieTrieCost): Generator<{ a: string; b: string; c: number }> { +function* walkTrieTrieCost(trie: TrieTrieCost): Generator<{ a: string; b: string; c: number; p: number | undefined }> { for (const { s: a, t } of walkTrieNodes(trie, '')) { if (t.t) { - for (const { s: b, c } of walkTrieCost(t.t)) { - yield { a, b, c }; + for (const { s: b, c, p } of walkTrieCost(t.t)) { + yield { a, b, c, p }; } } } @@ -202,13 +236,14 @@ function* walkTrieTrieCost(trie: TrieTrieCost): Generator<{ a: string; b: string interface MatchTrieCost { i: number; c: number; + p: number; } function* findTrieCostPrefixes(trie: TrieCost, str: string, i: number): Iterable { for (const n of searchTrieNodes(trie, str, i)) { - const c = n.t.c; + const { c, p } = n.t; if (c !== undefined) { - yield { i: n.i, c }; + yield { i: n.i, c, p: p || 0 }; } } } @@ -233,12 +268,12 @@ class _WeightedMap implements WeightMap { swap: TrieTrieCost = {}; *calcInsDelCosts(pos: CostPosition): Iterable { - const { a, ai, b, bi, c } = pos; + const { a, ai, b, bi, c, p } = pos; for (const del of findTrieCostPrefixes(this.insDel, a, ai)) { - yield { a, b, ai: del.i, bi, c: c + del.c }; + yield { a, b, ai: del.i, bi, c: c + del.c, p: p + del.p }; } for (const ins of findTrieCostPrefixes(this.insDel, b, bi)) { - yield { a, b, ai, bi: ins.i, c: c + ins.c }; + yield { a, b, ai, bi: ins.i, c: c + ins.c, p: p + ins.p }; } } @@ -246,16 +281,16 @@ class _WeightedMap implements WeightMap { // Search for matching substrings in `a` to be replaced by // matching substrings from `b`. All substrings start at their // respective `ai`/`bi` positions. - const { a, ai, b, bi, c } = pos; + const { a, ai, b, bi, c, p } = pos; for (const del of findTrieTrieCostPrefixes(this.replace, a, ai)) { for (const ins of findTrieCostPrefixes(del.t, b, bi)) { - yield { a, b, ai: del.i, bi: ins.i, c: c + ins.c }; + yield { a, b, ai: del.i, bi: ins.i, c: c + ins.c, p: p + ins.p }; } } } *calcSwapCosts(pos: CostPosition): Iterable { - const { a, ai, b, bi, c } = pos; + const { a, ai, b, bi, c, p } = pos; const swap = this.swap; for (const left of findTrieTrieCostPrefixes(swap, a, ai)) { @@ -263,7 +298,7 @@ class _WeightedMap implements WeightMap { const sw = a.slice(left.i, right.i) + a.slice(ai, left.i); if (b.slice(bi).startsWith(sw)) { const len = sw.length; - yield { a, b, ai: ai + len, bi: bi + len, c: c + right.c }; + yield { a, b, ai: ai + len, bi: bi + len, c: c + right.c, p: p + right.p }; } } } @@ -272,8 +307,9 @@ class _WeightedMap implements WeightMap { function prettyPrintInsDel(trie: TrieCost, pfx = '', indent = ' '): string { function* walk() { - for (const { s, c } of walkTrieCost(trie)) { - yield indent + `(${s}) = ${c}`; + for (const { s, c, p } of walkTrieCost(trie)) { + const pm = p ? ` + ${p}` : ''; + yield indent + `(${s}) = ${c}${pm}`; } } return ['InsDel:', ...[...walk()].sort()].map((line) => pfx + line + '\n').join(''); @@ -281,8 +317,9 @@ function prettyPrintInsDel(trie: TrieCost, pfx = '', indent = ' '): string { export function prettyPrintReplace(trie: TrieTrieCost, pfx = '', indent = ' '): string { function* walk() { - for (const { a, b, c } of walkTrieTrieCost(trie)) { - yield indent + `(${a}) -> (${b}) = ${c}`; + for (const { a, b, c, p } of walkTrieTrieCost(trie)) { + const pm = p ? ` + ${p}` : ''; + yield indent + `(${a}) -> (${b}) = ${c}${pm}`; } } return ['Replace:', ...[...walk()].sort()].map((line) => pfx + line + '\n').join(''); @@ -290,8 +327,9 @@ export function prettyPrintReplace(trie: TrieTrieCost, pfx = '', indent = ' '): export function prettyPrintSwap(trie: TrieTrieCost, pfx = '', indent = ' '): string { function* walk() { - for (const { a, b, c } of walkTrieTrieCost(trie)) { - yield indent + `(${a}) <-> (${b}) = ${c}`; + for (const { a, b, c, p } of walkTrieTrieCost(trie)) { + const pm = p ? ` + ${p}` : ''; + yield indent + `(${a}) <-> (${b}) = ${c}${pm}`; } } return ['Swap:', ...[...walk()].sort()].map((line) => pfx + line + '\n').join(''); diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggest-nl.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggest-nl.test.ts index d0b1e310120..d9df0f8c20a 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggest-nl.test.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggest-nl.test.ts @@ -128,8 +128,9 @@ function weightMap(): WeightMap { replace: 30, }, { - map: '1234567890', - insDel: 100, + map: '1234567890-.', + insDel: 1, + penalty: 200, } ); } diff --git a/packages/cspell-types/cspell.schema.json b/packages/cspell-types/cspell.schema.json index 66c53f7872b..84479e36364 100644 --- a/packages/cspell-types/cspell.schema.json +++ b/packages/cspell-types/cspell.schema.json @@ -42,9 +42,13 @@ "type": "number" }, "map": { - "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.", + "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.", "type": "string" }, + "penalty": { + "description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.", + "type": "number" + }, "replace": { "description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.", "type": "number" @@ -72,9 +76,13 @@ "type": "number" }, "map": { - "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.", + "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.", "type": "string" }, + "penalty": { + "description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.", + "type": "number" + }, "replace": { "description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.", "type": "number" @@ -102,9 +110,13 @@ "type": "number" }, "map": { - "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.", + "description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.", "type": "string" }, + "penalty": { + "description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.", + "type": "number" + }, "replace": { "description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.", "type": "number" @@ -870,7 +882,7 @@ "$ref": "#/definitions/CostMapDefSwap" } ], - "description": "A WeightedMapDef enables setting weights for edits between related characters and substrings.\n\nMultiple groups can be defined using a `|`. A multi-character substring is defined using `()`.\n\nFor example, in some languages, some letters sound alike.\n\n```ts { map: 'sc(sh)(sch)(ss)|t(tt)', // two groups. replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. } ```\n\nThe following could be used to make inserting, removing, or replacing vowels cheaper. ```ts { map: 'aeiouy', //. insDel: 50, // Make it is cheaper to insert or delete a vowel. replace: 45, // It is even cheaper to replace one with another. } ```\n\nNote: the default edit distance is 100." + "description": "A WeightedMapDef enables setting weights for edits between related characters and substrings.\n\nMultiple groups can be defined using a `|`. A multi-character substring is defined using `()`.\n\nFor example, in some languages, some letters sound alike.\n\n```yaml map: 'sc(sh)(sch)(ss)|t(tt)' # two groups. replace: 50 # Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. ```\n\nThe following could be used to make inserting, removing, or replacing vowels cheaper. ```yaml map: 'aeiouy' insDel: 50 # Make it is cheaper to insert or delete a vowel. replace: 45 # It is even cheaper to replace one with another. ```\n\nNote: the default edit distance is 100." }, "SuggestionCostsDefs": { "items": { diff --git a/packages/cspell-types/src/suggestionCostsDef.ts b/packages/cspell-types/src/suggestionCostsDef.ts index c45177bf2e7..72937d723db 100644 --- a/packages/cspell-types/src/suggestionCostsDef.ts +++ b/packages/cspell-types/src/suggestionCostsDef.ts @@ -7,20 +7,16 @@ * * For example, in some languages, some letters sound alike. * - * ```ts - * { - * map: 'sc(sh)(sch)(ss)|t(tt)', // two groups. - * replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. - * } + * ```yaml + * map: 'sc(sh)(sch)(ss)|t(tt)' # two groups. + * replace: 50 # Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. * ``` * * The following could be used to make inserting, removing, or replacing vowels cheaper. - * ```ts - * { - * map: 'aeiouy', //. - * insDel: 50, // Make it is cheaper to insert or delete a vowel. - * replace: 45, // It is even cheaper to replace one with another. - * } + * ```yaml + * map: 'aeiouy' + * insDel: 50 # Make it is cheaper to insert or delete a vowel. + * replace: 45 # It is even cheaper to replace one with another. * ``` * * Note: the default edit distance is 100. @@ -40,6 +36,10 @@ interface CostMapDefBase { * To add a multi-character substring use `()`. * * Example: `"f(ph)(gh)"` results in the following set: `f`, `ph`, `gh`. + * + * - To match the beginning of a word, use `^`: `"(^I)""`. + * - To match the end of a word, use `$`: `"(e$)(ing$)"`. + * */ map: string; /** The cost to insert/delete one of the substrings in the map. Note: insert/delete costs are symmetrical. */ @@ -60,6 +60,23 @@ interface CostMapDefBase { * A description to describe the purpose of the map. */ description?: string; + /** + * Add a penalty to the final cost. + * This is used to discourage certain suggestions. + * + * Example: + * ```yaml + * # Match adding/removing `-` to the end of a word. + * map: "$(-$)" + * replace: 50 + * penalty: 100 + * ``` + * + * This makes adding a `-` to the end of a word more expensive. + * + * Think of it as taking the toll way for speed but getting the bill later. + */ + penalty?: number; } export interface CostMapDefReplace extends CostMapDefBase { diff --git a/rfc/rfc-0002 improve dictionary suggestions/README.md b/rfc/rfc-0002 improve dictionary suggestions/README.md index e56f828f91c..50f3c154186 100644 --- a/rfc/rfc-0002 improve dictionary suggestions/README.md +++ b/rfc/rfc-0002 improve dictionary suggestions/README.md @@ -19,42 +19,60 @@ In the current implementation: `1 edit = 100 cost`. This was done to allow for p #### Proposed Structure -```ts +````ts interface SuggestionCosts { /** - * A map is a set of characters to be considered together - * Each individual character is an entry in the set. - * To have multiple character entry use `()` around the characters. - * Because multiple sets of characters can have the same intention - * It is possible to use a single map to define multiple sets by separating - * each set with a `|`. + * The set of substrings to map, these are generally single character strings. + * + * Multiple sets can be defined by using a `|` to separate them. + * + * Example: `"eéê|aåá"` contains two different sets. + * + * To add a multi-character substring use `()`. + * + * Example: `"f(ph)(gh)"` results in the following set: `f`, `ph`, `gh`. + * + * To match the beginning of a word, use `^`: `(^I)`. + * To match the end of a word, use `$`: `(e$)(ing$)`. */ map: string; + /** The cost to insert/delete one of the substrings in the map. Note: insert/delete costs are symmetrical. */ + insDel?: number; /** - * The cost to insert/delete a character from the map into a word. - * @default 100 + * The cost to replace of of the substrings in the map with another substring in the map. + * Example: Map['a', 'i'] + * This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse. */ - insDel?: number; + replace?: number; /** - * The cost to replace a character in a set with another from the same set. - * - * Example: - * - * Give: `map: 'you|tkb', swap: 50` - * - To swap `a` with `y` is 100 - * - To swap `y` with `u` is 50 - * - to swap `y` with `t` is 100 - * - to swap `t` with `k` is 50 - * - * @default 100 + * The cost to swap two adjacent substrings found in the map. + * Example: Map['e', 'i'] + * This represents the cost to change `ei` to `ie` or the reverse. */ swap?: number; /** - * A comment about why a cost is defined. + * A description to describe the purpose of the map. */ description?: string; + /** + * Add a penalty to the final cost. + * This is used to discourage certain suggestions. + * + * Example: + * ```yaml + * # Match adding/removing `-` to the end of a word. + * map: "$(-$)" + * replace: 50 + * penalty: 100 + * ``` + * + * This makes adding a `-` to the end of a word more expensive. + * + * Think of it as taking the toll way for speed but getting the bill later. + */ + penalty?: number; } -``` +```` #### Example of costs: