diff --git a/cspell-dict.txt b/cspell-dict.txt index 8e1518d10a3..27184c4cea3 100644 --- a/cspell-dict.txt +++ b/cspell-dict.txt @@ -2,14 +2,15 @@ alexiosc backreference bitjson cheatsheets -Codecov codecov +Codecov codeql COMPOUNDFLAG coverallsapp cspellcache -DAWG +Damerau Dawg +DAWG deserializers exonum gimu diff --git a/packages/cspell-trie-lib/src/lib/suggestions/distanceAStar.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStar.test.ts new file mode 100644 index 00000000000..d554db616d5 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStar.test.ts @@ -0,0 +1,18 @@ +import { distanceAStar } from './distanceAStar'; +import { levenshteinDistance } from './levenshtein'; + +describe('distanceAStar', () => { + test.each` + wordA | wordB + ${''} | ${''} + ${'apple'} | ${'apple'} + ${'apple'} | ${''} + ${'apple'} | ${'apples'} + ${'apple'} | ${'maple'} + ${'grapple'} | ${'maples'} + `('distanceAStar vs Levenshtein "$wordA" "$wordB"', ({ wordA, wordB }) => { + const expected = levenshteinDistance(wordA, wordB) * 100; + expect(distanceAStar(wordA, wordB)).toBe(expected); + expect(distanceAStar(wordB, wordA)).toBe(expected); + }); +}); diff --git a/packages/cspell-trie-lib/src/lib/suggestions/distanceAStar.ts b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStar.ts new file mode 100644 index 00000000000..c87fbb95718 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStar.ts @@ -0,0 +1,75 @@ +import { PairingHeap } from '../utils/PairingHeap'; + +/** + * Calculate the edit distance between two words using an A* algorithm. + * + * Using basic weights, this algorithm has the same results as the Damerau-Levenshtein algorithm. + */ +export function distanceAStar(a: string, b: string): number { + const aN = a.length; + const bN = b.length; + const cost = 100; + + const candidates = new PairingHeap(compare); + + candidates.add({ ai: 0, bi: 0, c: 0 }); + + function opSub(n: Node) { + const { ai, bi, c } = n; + if (ai < aN && bi < bN) { + const cc = a[ai] === b[bi] ? c : c + cost; + candidates.add({ ai: ai + 1, bi: bi + 1, c: cc }); + } + } + + function opIns(n: Node) { + const { ai, bi, c } = n; + if (bi < bN) { + candidates.add({ ai: ai, bi: bi + 1, c: c + cost }); + } + } + + function opDel(n: Node) { + const { ai, bi, c } = n; + if (ai < aN) { + candidates.add({ ai: ai + 1, bi: bi, c: c + cost }); + } + } + + function opSwap(n: Node) { + const { ai, bi, c } = n; + if (a[ai] === b[bi + 1] && a[ai + 1] === b[bi]) { + candidates.add({ ai: ai + 2, bi: bi + 2, c: c + cost }); + } + } + + let best: Node | undefined; + // const bc2 = 2 * bc; + while ((best = candidates.dequeue())) { + if (best.ai === aN && best.bi === bN) break; + + opSwap(best); + opIns(best); + opDel(best); + opSub(best); + } + + return best?.c ?? -1; +} + +interface Pos { + /** the offset in string `a` */ + ai: number; + /** the offset in string `b` */ + bi: number; +} + +interface Node extends Pos { + /** the current cost */ + c: number; +} + +function compare(a: Node, b: Node): number { + // Choose lowest cost or farthest Manhattan distance. + return a.c - b.c || b.ai + b.bi - a.ai - a.bi; +} diff --git a/packages/cspell-trie-lib/src/lib/suggestions/distanceAStarWeighted.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStarWeighted.test.ts new file mode 100644 index 00000000000..42f6b132142 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStarWeighted.test.ts @@ -0,0 +1,42 @@ +import { distanceAStarWeighted } from './distanceAStarWeighted'; +import { levenshteinDistance } from './levenshtein'; +import { buildWeightedMapTrie } from './weightedMaps'; + +describe('distanceAStar', () => { + test.each` + wordA | wordB + ${''} | ${''} + ${'apple'} | ${'apple'} + ${'apple'} | ${''} + ${'apple'} | ${'apples'} + ${'apple'} | ${'maple'} + ${'grapple'} | ${'maples'} + `('distanceAStar vs Levenshtein "$wordA" "$wordB"', ({ wordA, wordB }) => { + const expected = levenshteinDistance(wordA, wordB) * 100; + expect(distanceAStarWeighted(wordA, wordB, {})).toBe(expected); + expect(distanceAStarWeighted(wordB, wordA, {})).toBe(expected); + }); + + // cspell:ignore aeiou + test.each` + wordA | wordB | map | expected + ${''} | ${''} | ${undefined} | ${0} + ${'apple'} | ${'apple'} | ${{ map: 'ae', insDel: 75 }} | ${0} + ${'apple'} | ${''} | ${{ map: 'ae', insDel: 75 }} | ${450} + ${'apple'} | ${''} | ${{ map: 'ae|(ap)', insDel: 75 }} | ${350} + ${'apple'} | ${''} | ${{ map: '(ap)', insDel: 1 }} | ${301} + ${'apple'} | ${'apples'} | ${{ map: '(les)(le)', replace: 50 }} | ${50} + ${'apple'} | ${'maple'} | ${{ map: '(pp)p', replace: 50 }} | ${150} + ${'grapple'} | ${'maples'} | ${{ map: '(pp)p', replace: 50 }} | ${350} + ${'bite'} | ${'bate'} | ${{ map: 'aei', replace: 25 }} | ${25} + ${'receive'} | ${'recieve' /* cspell:ignore recieve */} | ${{ map: 'ei', swap: 25 }} | ${25} + ${'airplane'} | ${'aeroplane'} | ${{ map: '(ai)(ae)', replace: 25 }} | ${125} + ${'airplane'} | ${'aeroplane'} | ${{ map: '(air)(aero)|aeiou', replace: 25 }} | ${25} + ${'airplane'} | ${'aeroplane'} | ${{ map: 'aeiou', replace: 25 }} | ${125} + ${'plain'} | ${'plane'} | ${{ map: '(ane)(ain)', replace: 100 }} | ${100} + `('distanceAStar vs Levenshtein "$wordA" "$wordB" $map', ({ wordA, wordB, map, expected }) => { + const trie = map ? buildWeightedMapTrie([map]) : buildWeightedMapTrie([]); + expect(distanceAStarWeighted(wordA, wordB, trie)).toBe(expected); + expect(distanceAStarWeighted(wordB, wordA, trie)).toBe(expected); + }); +}); diff --git a/packages/cspell-trie-lib/src/lib/suggestions/distanceAStarWeighted.ts b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStarWeighted.ts new file mode 100644 index 00000000000..60d84ee6042 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/distanceAStarWeighted.ts @@ -0,0 +1,168 @@ +import { PairingHeap } from '../utils/PairingHeap'; +import { WeightedMapTrie, WeightedRepMapTrie } from './weightedMaps'; + +/** + * Calculate the edit distance between two words using an A* algorithm. + * + * Using basic weights, this algorithm has the same results as the Damerau-Levenshtein algorithm. + */ +export function distanceAStarWeighted(a: string, b: string, map: WeightedMapTrie): number { + const aN = a.length; + const bN = b.length; + const cost = 100; + + const candidates = new PairingHeap(compare); + + candidates.add({ ai: 0, bi: 0, c: 0 }); + + function opSub(n: Node) { + const { ai, bi, c } = n; + if (ai < aN && bi < bN) { + const cc = a[ai] === b[bi] ? c : c + cost; + candidates.add({ ai: ai + 1, bi: bi + 1, c: cc }); + } + } + + function opIns(n: Node) { + const { ai, bi, c } = n; + if (bi < bN) { + candidates.add({ ai: ai, bi: bi + 1, c: c + cost }); + } + } + + function opDel(n: Node) { + const { ai, bi, c } = n; + if (ai < aN) { + candidates.add({ ai: ai + 1, bi: bi, c: c + cost }); + } + } + + function opSwap(n: Node) { + const { ai, bi, c } = n; + if (a[ai] === b[bi + 1] && a[ai + 1] === b[bi]) { + candidates.add({ ai: ai + 2, bi: bi + 2, c: c + cost }); + } + } + + function opMap(n: Node) { + const { ai, bi, c } = n; + + function ins(ai: number, bi: number, m: WeightedMapTrie | undefined) { + if (bi >= bN || !m) return; + const n = m[b[bi]]; + if (!n) return; + const cost = n.insDel; + ++bi; + if (cost !== undefined) { + candidates.add({ ai, bi, c: c + cost }); + } + ins(ai, bi, n.t); + } + + function del(ai: number, bi: number, m: WeightedMapTrie | undefined) { + if (ai >= aN || !m) return; + const n = m[a[ai]]; + if (!n) return; + ++ai; + const cost = n.insDel; + if (cost !== undefined) { + candidates.add({ ai, bi, c: c + cost }); + } + del(ai, bi, n.t); + } + + function repApply(ai: number, bi: number, m: WeightedRepMapTrie | undefined) { + if (!m || bi >= bN) return; + const char = b[bi]; + const n = m[char]; + if (!n) return; + ++bi; + const cost = n.rep; + if (cost !== undefined) { + candidates.add({ ai, bi, c: c + cost }); + } + repApply(ai, bi, n.r); + } + + function rep(ai: number, bi: number, m: WeightedMapTrie | undefined) { + if (!m || ai >= aN || bi >= bN) return; + const n = m[a[ai]]; + if (!n) return; + ++ai; + repApply(ai, bi, n.r); + rep(ai, bi, n.t); + } + + function swap(ai: number, bi: number, m: WeightedMapTrie | undefined) { + if (!m || ai >= aN || bi >= bN) return; + + function apply(mid: number, right: number, cost: number | undefined) { + if (cost === undefined) return; + const swap = a.slice(mid, right) + a.slice(ai, mid); + const len = swap.length; + + const subB = b.slice(bi, bi + len); + if (swap === subB) { + candidates.add({ ai: ai + len, bi: bi + len, c: cost }); + } + } + + function right(aim: number, ail: number, m: WeightedRepMapTrie | undefined) { + if (!m || ail >= aN) return; + const n = m[a[ail]]; + if (!n) return; + ++ail; + apply(aim, ail, n.swap); + right(aim, ail, n.r); + } + + function left(aim: number, m: WeightedMapTrie | undefined) { + if (!m || aim >= aN) return; + const n = m[a[aim]]; + if (!n) return; + ++aim; + right(aim, aim, n.r); + left(aim, n.t); + } + + left(ai, m); + } + + ins(ai, bi, map); + del(ai, bi, map); + rep(ai, bi, map); + swap(ai, bi, map); + } + + let best: Node | undefined; + // const bc2 = 2 * bc; + while ((best = candidates.dequeue())) { + if (best.ai === aN && best.bi === bN) break; + + opSwap(best); + opIns(best); + opDel(best); + opMap(best); + opSub(best); + } + + // istanbul ignore else + return best ? best.c : -1; +} + +interface Pos { + /** the offset in string `a` */ + ai: number; + /** the offset in string `b` */ + bi: number; +} + +interface Node extends Pos { + /** the current cost */ + c: number; +} + +function compare(a: Node, b: Node): number { + // Choose lowest cost or farthest Manhattan distance. + return a.c - b.c || b.ai + b.bi - a.ai - a.bi; +} diff --git a/packages/cspell-trie-lib/src/lib/suggestions/levenshtein.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/levenshtein.test.ts new file mode 100644 index 00000000000..79d9b248ea9 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/levenshtein.test.ts @@ -0,0 +1,23 @@ +import { levenshteinDistance } from './levenshtein'; + +describe('levenshtein', () => { + test.each` + left | right | expected + ${'abc'} | ${'abc'} | ${0} + ${'abc'} | ${'ab'} | ${1} + ${'abc'} | ${''} | ${3} + ${'kitten'} | ${'sitting'} | ${3} + ${'Saturday'} | ${'Sunday'} | ${3} + ${'ab'} | ${'ba'} | ${1} + ${'aba'} | ${'bab'} | ${2} + ${'abab'} | ${'baba'} | ${2} + ${'abab'} | ${'ababa'} | ${1} + ${'appear'} | ${'apple'} | ${3} + ${'appease'} | ${'apple'} | ${3} + `('levenshteinDistance "$left" vs "$right"', ({ left, right, expected }) => { + expect(levenshteinDistance(left, right)).toBe(expected); + expect(levenshteinDistance(right, left)).toBe(expected); + }); +}); + +// cspell:ignore ababa diff --git a/packages/cspell-trie-lib/src/lib/suggestions/levenshtein.ts b/packages/cspell-trie-lib/src/lib/suggestions/levenshtein.ts new file mode 100644 index 00000000000..9acf9ba65c0 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/levenshtein.ts @@ -0,0 +1,52 @@ +/** + * Damerau–Levenshtein distance + * [Damerau–Levenshtein distance - Wikipedia](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) + * @param a - first word + * @param b - second word + * @returns Distance value + */ +export function levenshteinDistance(a: string, b: string): number { + // By prefixing with spaces, no out of bounds checks are necessary. + const aa = ' ' + a; + const bb = ' ' + b; + + const nA = a.length + 1; + const nB = b.length + 1; + + const firstRow: number[] = []; + for (let i = 0; i <= nA; ++i) { + firstRow[i] = i; + } + + const matrix = [firstRow, [1].concat(firstRow), [2, 1].concat(firstRow)]; + let ppRow = matrix[0]; + let pRow = matrix[1]; + + for (let j = 2; j <= nB; ++j) { + const row = matrix[j % 3]; + row[0] = pRow[0] + 1; + row[1] = pRow[1] + 1; + + const bp = bb[j - 1]; + const bc = bb[j]; + + let ap = aa[0]; + + for (let i = 2, i1 = 1; i <= nA; i1 = i, ++i) { + const ac = aa[i]; + const c = pRow[i1] + (ac == bc ? 0 : 1); + const ct = ac == bp && ap == bc ? ppRow[i1 - 1] + 1 : c; + row[i] = Math.min( + c, // substitute + ct, // transpose + pRow[i] + 1, // insert + row[i1] + 1 // delete + ); + ap = ac; + } + ppRow = pRow; + pRow = row; + } + + return pRow[nA]; +} diff --git a/packages/cspell-trie-lib/src/lib/suggestions/suggest.ts b/packages/cspell-trie-lib/src/lib/suggestions/suggest.ts index dd2f362051c..5e3fba7fd86 100644 --- a/packages/cspell-trie-lib/src/lib/suggestions/suggest.ts +++ b/packages/cspell-trie-lib/src/lib/suggestions/suggest.ts @@ -110,7 +110,9 @@ export function* genCompoundableSuggestions( for (let r = iWalk.next({ goDeeper }); !stopNow && !r.done; r = iWalk.next({ goDeeper })) { const { text, node, depth } = r.value; let { a, b } = stack[depth]; + /** Current character from word */ const w = text.slice(-1); + /** Current character visual letter group */ const wG = visualLetterMaskMap[w] || 0; if (setOfSeparators.has(w)) { const mxRange = matrix[depth].slice(a, b + 1); @@ -146,9 +148,12 @@ export function* genCompoundableSuggestions( historyTags.set(tag, { w: text, i: history.length, m: mxMin }); } } + /** current depth */ const d = depth + 1; const lastSugLetter = d > 1 ? text[d - 2] : ''; + /** standard cost */ const c = bc - d + (specialSubCosts[w] || 0); + /** insert cost */ const ci = c + (specialInsCosts[w] || 0); // Setup first column @@ -161,6 +166,7 @@ export function* genCompoundableSuggestions( // calc the core letters for (i = a + 1; i <= b; ++i) { const curLetter = x[i]; + /** current group */ const cG = visualLetterMaskMap[curLetter] || 0; const subCost = w === curLetter diff --git a/packages/cspell-trie-lib/src/lib/suggestions/weightedMaps.test.ts b/packages/cspell-trie-lib/src/lib/suggestions/weightedMaps.test.ts new file mode 100644 index 00000000000..6c0c84b7636 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/weightedMaps.test.ts @@ -0,0 +1,66 @@ +import { __testing__, addWeightedDefMapToTrie, WeightedMapDef } from './weightedMaps'; + +const { splitMapSubstrings, splitMap } = __testing__; + +// const u = undefined; + +describe('Validate weightedMaps', () => { + test.each` + map | expected + ${''} | ${[]} + ${'abc'} | ${'abc'.split('')} + ${'f(ph)(gh)v'} | ${['f', 'ph', 'gh', 'v']} + `('splitMapSubstrings "$map"', ({ map, expected }) => { + expect(splitMapSubstrings(map)).toEqual(expected); + }); + + test.each` + map | expected + ${''} | ${[[]]} + ${'||'} | ${[[], [], []]} + ${'abc'} | ${['abc'.split('')]} + ${'f(ph)(gh)v|eé'} | ${[['f', 'ph', 'gh', 'v'], ['e', 'é']]} + `('splitMap "$map"', ({ map, expected }) => { + expect(splitMap({ map })).toEqual(expected); + }); + + test.each` + map | insDel | replace | swap | expected + ${''} | ${undefined} | ${undefined} | ${undefined} | ${{}} + ${''} | ${1} | ${1} | ${1} | ${{}} + ${'a'} | ${1} | ${undefined} | ${undefined} | ${{ a: { insDel: 1 } }} + ${'ab'} | ${1} | ${undefined} | ${undefined} | ${{ a: { insDel: 1 }, b: { insDel: 1 } }} + ${'a'} | ${1} | ${2} | ${3} | ${{ a: { insDel: 1, r: { a: { rep: 2, swap: 3 } } } }} + ${'a'} | ${0} | ${0} | ${0} | ${{ a: { insDel: 0, r: { a: { rep: 0, swap: 0 } } } }} + ${'ab'} | ${undefined} | ${2} | ${undefined} | ${{ a: { r: { a: { rep: 2 }, b: { rep: 2 } } }, b: { r: { a: { rep: 2 }, b: { rep: 2 } } } }} + ${'a|b'} | ${undefined} | ${2} | ${undefined} | ${{ a: { r: { a: { rep: 2 } } }, b: { r: { b: { rep: 2 } } } }} + `('splitMap "$map"', ({ map, insDel, replace, swap, expected }) => { + const def: WeightedMapDef = { + map, + insDel, + replace, + swap, + }; + expect(addWeightedDefMapToTrie(def)).toEqual(expected); + }); + + test.each` + defA | defB | expected + ${{ map: '' }} | ${{ map: '' }} | ${{}} + ${{ map: '' }} | ${{ map: 'b' }} | ${{ b: {} }} + ${{ map: 'a' }} | ${{ map: 'b' }} | ${{ a: {}, b: {} }} + ${{ map: '(ab)' }} | ${{ map: 'b' }} | ${{ a: { t: { b: {} } }, b: {} }} + ${{ map: 'a', insDel: 5 }} | ${{ map: 'a', insDel: 10 }} | ${{ a: { insDel: 5 } }} + ${{ map: 'a', insDel: 5 }} | ${{ map: 'ab', insDel: 10 }} | ${{ a: { insDel: 5 }, b: { insDel: 10 } }} + ${{ map: 'a', replace: 5 }} | ${{ map: 'ab', insDel: 10, replace: 3 }} | ${{ a: { insDel: 10, r: { a: { rep: 3 }, b: { rep: 3 } } }, b: { insDel: 10, r: { a: { rep: 3 }, b: { rep: 3 } } } }} + ${{ map: '(ab)', insDel: 3 }} | ${{ map: 'b' }} | ${{ a: { t: { b: { insDel: 3 } } }, b: {} }} + `('splitMap $defA $defB', ({ defA, defB, expected }) => { + const tAB = addWeightedDefMapToTrie(defA); + addWeightedDefMapToTrie(defB, tAB); + expect(tAB).toEqual(expected); + + const tBA = addWeightedDefMapToTrie(defA); + addWeightedDefMapToTrie(defB, tBA); + expect(tBA).toEqual(expected); + }); +}); diff --git a/packages/cspell-trie-lib/src/lib/suggestions/weightedMaps.ts b/packages/cspell-trie-lib/src/lib/suggestions/weightedMaps.ts new file mode 100644 index 00000000000..be1380212e8 --- /dev/null +++ b/packages/cspell-trie-lib/src/lib/suggestions/weightedMaps.ts @@ -0,0 +1,172 @@ +export type WeightedMapTrie = Record; + +interface WeightedMapTrieNode { + /** The nested Tri nodes */ + t?: WeightedMapTrie | undefined; + /** the cost to insert/delete this string */ + insDel?: number | undefined; + /** the related replacement Tri Map */ + r?: WeightedRepMapTrie | undefined; +} + +export type WeightedRepMapTrie = Record; + +interface WeightedRepTrieNode { + /** The nested Trie nodes */ + r?: WeightedRepMapTrie | undefined; + /** The cost to replace */ + rep?: number | undefined; + /** The cost to swap */ + swap?: number | undefined; +} + +export interface WeightedMapDef { + /** + * The set of substrings to map, these are generally single character strings. + * + * Multiple sets can be defined by using a `|` to separate them. + * + * Example: `"eéê|aåá"` contains two different sets. + * + * To add a multi-character substring use `()`. + * + * Example: `"f(ph)(gh)"` results in the following set: `f`, `ph`, `gh`. + */ + map: string; + /** The cost to insert/delete one of the substrings in the map. Note: insert/delete costs are symmetrical. */ + insDel?: number; + /** + * The cost to replace of of the substrings in the map with another substring in the map. + * Example: Map['a', 'i'] + * This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse. + */ + replace?: number; + /** + * The cost to swap two adjacent substrings found in the map. + * Example: Map['e', 'i'] + * This represents the cost to change `ei` to `ie` or the reverse. + */ + swap?: number; +} + +export function buildWeightedMapTrie(defs: WeightedMapDef[]): WeightedMapTrie { + const trie: WeightedMapTrie = createMapTrie(); + defs.forEach((def) => addWeightedDefMapToTrie(def, trie)); + return trie; +} + +/** + * Add weighted map definitions to a WeightedMapTrie + * @param def - the def to add + * @param trie - the trie to add it to. NOTE: this trie is modified! + * @returns the modified trie + */ +export function addWeightedDefMapToTrie(def: WeightedMapDef, trie: WeightedMapTrie = createMapTrie()): WeightedMapTrie { + const mapSets = splitMap(def); + + function addRepToNode(mapSet: string[], n: WeightedMapTrieNode) { + const root = createRepTrieNode((n.r = n.r || createRepTrie())); + for (const s of mapSet) { + let n = root; + for (const c of s) { + const r = (n.r = n.r || createRepTrie()); + n = r[c] = r[c] || createRepTrieNode(); + } + addWeightsToRepNode(n, def); + } + } + + function addSet(mapSet: string[]) { + const r = createMapTrieNode(trie); + for (const s of mapSet) { + let n = r; + for (const c of s) { + const t = (n.t = n.t || createMapTrie()); + n = t[c] = t[c] || createMapTrieNode(); + } + addWeightsToNode(n, def); + if (def.replace !== undefined || def.swap !== undefined) { + addRepToNode(mapSet, n); + } + } + } + + mapSets.forEach(addSet); + + return trie; +} + +function createMapTrie(): WeightedMapTrie { + return Object.create(null); +} + +function createMapTrieNode(t?: WeightedMapTrie): WeightedMapTrieNode { + const n: WeightedMapTrieNode = {}; + return assignIfDefined(n, 't', t); +} + +function createRepTrie(): WeightedRepMapTrie { + return Object.create(null); +} + +function createRepTrieNode(r?: WeightedRepMapTrie): WeightedRepTrieNode { + const n: WeightedRepTrieNode = {}; + return assignIfDefined(n, 'r', r); +} + +function addWeightsToNode(n: WeightedMapTrieNode, def: WeightedMapDef) { + assignIfDefined(n, 'insDel', lowest(n.insDel, def.insDel)); +} + +function addWeightsToRepNode(n: WeightedRepTrieNode, def: WeightedMapDef) { + assignIfDefined(n, 'rep', lowest(n.rep, def.replace)); + assignIfDefined(n, 'swap', lowest(n.swap, def.swap)); +} + +function lowest(a: number | undefined, b: number | undefined): number | undefined { + if (a === undefined) return b; + if (b === undefined) return a; + return a <= b ? a : b; +} + +/** + * Splits a WeightedMapDef.map + * @param map + */ +function splitMap(def: WeightedMapDef): string[][] { + const { map } = def; + + const sets = map.split('|'); + return sets.map(splitMapSubstrings); +} + +function splitMapSubstrings(map: string): string[] { + const values = []; + const len = map.length; + + for (let i = 0; i < len; ++i) { + const c = map[i]; + if (c !== '(') { + values.push(c); + continue; + } + const s = i + 1; + while (map[++i] !== ')' && i < len) { + // empty + } + values.push(map.slice(s, i)); + } + + return values; +} + +function assignIfDefined(r: T, key: K, v: T[K] | undefined): T { + if (v === undefined) return r; + r[key] = v; + return r; +} + +export const __testing__ = { + splitMap, + splitMapSubstrings, +}; diff --git a/packages/cspell-trie-lib/src/lib/utils/PairingHeap.ts b/packages/cspell-trie-lib/src/lib/utils/PairingHeap.ts index 2669bb558e4..9ab085d1a4d 100644 --- a/packages/cspell-trie-lib/src/lib/utils/PairingHeap.ts +++ b/packages/cspell-trie-lib/src/lib/utils/PairingHeap.ts @@ -7,6 +7,16 @@ export interface PairHeapNode { c: PairHeapNode | undefined; } +/** + * Compare Functions + * Compares two values a and b. + * Meaning of return value: + * `v <= 0`: `a` is ahead of `b` + * `v > 0`: `b` is ahead of `a` + * @param a - item a + * @param b - item b + * @returns a number + */ export type CompareFn = (a: T, b: T) => number; export class PairingHeap implements IterableIterator { diff --git a/rfc/rfc-0002 improve dictionary suggestions/README.md b/rfc/rfc-0002 improve dictionary suggestions/README.md index d96abcad024..6f32df16917 100644 --- a/rfc/rfc-0002 improve dictionary suggestions/README.md +++ b/rfc/rfc-0002 improve dictionary suggestions/README.md @@ -31,15 +31,10 @@ interface SuggestionCosts { */ map: string; /** - * The cost to insert a character from the map into a word. + * The cost to insert/delete a character from the map into a word. * @default 100 */ - insert?: number; - /** - * The cost to delete a character from the map from a word. - * @default 100 - */ - delete?: number; + insDel?: number; /** * The cost to replace a character in a set with another from the same set. * @@ -67,13 +62,11 @@ interface SuggestionCosts { costs: - description: Accented Vowel Letters map: 'aáâäãå|eéêë|iíîï|oóôöõ|uúûü|yÿ' - insert: 50 - delete: 50 + insDel: 50 replace: 10 - description: Vowels map: 'aáâäãåeéêëiíîïoóôöõuúûüyÿ' - insert: 50 - delete: 50 + insDel: 50 replace: 25 # Replacing one vowel with another is cheap swap: 25 # Swapping vowels are cheap - description: Multi Character example @@ -81,12 +74,10 @@ costs: replace: 10 - description: Appending / Removing Accent Marks map: '\u0641' # Shadda - insert: 10 - delete: 10 + insDel: 10 - description: Arabic Vowels map: '\u064f\u0648\u064e\u0627\u0650\u64a\u0652' # Damma, Wāw, Fatha, Alif, Kasra, Ya', Sukūn - insert: 20 - delete: 20 + insDel: 20 replace: 20 - description: Keyboard Adjacency map: 'qwas|aszx|wesd|sdxc|erdf|dfcv|rtfg|fgvb|tygh|ghbn|yuhj|hjnm|uijk|jkm|iokl|opl'