Skip to content

Commit

Permalink
fix: Add ability to discourage certain types of suggestions. (#2291)
Browse files Browse the repository at this point in the history
* fix: Add ability to discourage certain types of suggestions.
  • Loading branch information
Jason3S committed Jan 20, 2022
1 parent 404760b commit a6e1bf6
Show file tree
Hide file tree
Showing 12 changed files with 275 additions and 190 deletions.
20 changes: 16 additions & 4 deletions cspell.schema.json
Expand Up @@ -42,9 +42,13 @@
"type": "number"
},
"map": {
"description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.",
"description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.",
"type": "string"
},
"penalty": {
"description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.",
"type": "number"
},
"replace": {
"description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.",
"type": "number"
Expand Down Expand Up @@ -72,9 +76,13 @@
"type": "number"
},
"map": {
"description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.",
"description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.",
"type": "string"
},
"penalty": {
"description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.",
"type": "number"
},
"replace": {
"description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.",
"type": "number"
Expand Down Expand Up @@ -102,9 +110,13 @@
"type": "number"
},
"map": {
"description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.",
"description": "The set of substrings to map, these are generally single character strings.\n\nMultiple sets can be defined by using a `|` to separate them.\n\nExample: `\"eéê|aåá\"` contains two different sets.\n\nTo add a multi-character substring use `()`.\n\nExample: `\"f(ph)(gh)\"` results in the following set: `f`, `ph`, `gh`.\n\n- To match the beginning of a word, use `^`: `\"(^I)\"\"`.\n- To match the end of a word, use `$`: `\"(e$)(ing$)\"`.",
"type": "string"
},
"penalty": {
"description": "Add a penalty to the final cost. This is used to discourage certain suggestions.\n\nExample: ```yaml # Match adding/removing `-` to the end of a word. map: \"$(-$)\" replace: 50 penalty: 100 ```\n\nThis makes adding a `-` to the end of a word more expensive.\n\nThink of it as taking the toll way for speed but getting the bill later.",
"type": "number"
},
"replace": {
"description": "The cost to replace of of the substrings in the map with another substring in the map. Example: Map['a', 'i'] This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.",
"type": "number"
Expand Down Expand Up @@ -870,7 +882,7 @@
"$ref": "#/definitions/CostMapDefSwap"
}
],
"description": "A WeightedMapDef enables setting weights for edits between related characters and substrings.\n\nMultiple groups can be defined using a `|`. A multi-character substring is defined using `()`.\n\nFor example, in some languages, some letters sound alike.\n\n```ts { map: 'sc(sh)(sch)(ss)|t(tt)', // two groups. replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. } ```\n\nThe following could be used to make inserting, removing, or replacing vowels cheaper. ```ts { map: 'aeiouy', //. insDel: 50, // Make it is cheaper to insert or delete a vowel. replace: 45, // It is even cheaper to replace one with another. } ```\n\nNote: the default edit distance is 100."
"description": "A WeightedMapDef enables setting weights for edits between related characters and substrings.\n\nMultiple groups can be defined using a `|`. A multi-character substring is defined using `()`.\n\nFor example, in some languages, some letters sound alike.\n\n```yaml map: 'sc(sh)(sch)(ss)|t(tt)' # two groups. replace: 50 # Make it 1/2 the cost of a normal edit to replace a `t` with `tt`. ```\n\nThe following could be used to make inserting, removing, or replacing vowels cheaper. ```yaml map: 'aeiouy' insDel: 50 # Make it is cheaper to insert or delete a vowel. replace: 45 # It is even cheaper to replace one with another. ```\n\nNote: the default edit distance is 100."
},
"SuggestionCostsDefs": {
"items": {
Expand Down
16 changes: 16 additions & 0 deletions packages/cspell-trie-lib/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/cspell-trie-lib/package.json
Expand Up @@ -44,6 +44,7 @@
"node": ">=12.13.0"
},
"devDependencies": {
"@cspell/cspell-types": "^5.15.3",
"@cspell/dict-en_us": "^2.1.4",
"@cspell/dict-es-es": "^2.1.0",
"@types/fs-extra": "^9.0.13",
Expand Down
Expand Up @@ -51,14 +51,18 @@ describe('distanceAStar', () => {
${''} | ${''} | ${undefined} | ${0}
${'walk'} | ${'walking'} | ${undefined} | ${300}
${'walk'} | ${''} | ${calcWeightMap(mapLetters())} | ${200}
${'1234'} | ${''} | ${calcWeightMap(mapLetters())} | ${400}
${'1234'} | ${''} | ${calcWeightMap(mapLetters())} | ${804}
${'walk'} | ${'walking'} | ${calcWeightMap()} | ${50}
${'wake up'} | ${'woken up'} | ${calcWeightMap()} | ${145}
${'definition'} | ${'defunishun'} | ${calcWeightMap()} | ${45 + 40}
${'reputation'} | ${'repetition'} | ${calcWeightMap()} | ${45 + 45}
${'gr8'} | ${'great'} | ${calcWeightMap()} | ${250}
${'read'} | ${'read7'} | ${calcWeightMap()} | ${201}
${'airplane'} | ${'aeroplane'} | ${calcWeightMap()} | ${60}
${'talked'} | ${'walking'} | ${calcWeightMap()} | ${150}
${'kings'} | ${'king'} | ${calcWeightMap()} | ${50}
${'re-wind'} | ${'rewind'} | ${calcWeightMap()} | ${202}
${'re-'} | ${'re'} | ${calcWeightMap()} | ${201}
${"I'm talk'n to u"} | ${'I am talking to you'} | ${calcWeightMap()} | ${302}
${"wear'd u go?"} | ${'where did you go?'} | ${calcWeightMap()} | ${304}
`(
Expand Down Expand Up @@ -127,6 +131,24 @@ function calcWeightMap(...defs: SuggestionCostMapDef[]): WeightMap {
{
map: '(air)(aer)(err)|(oar)(or)(hor)|(or)(our)',
replace: 40,
},
{
description: 'Penalty for inserting numbers',
map: '0123456789',
insDel: 1, // Cheap to insert,
penalty: 200, // Costly later
},
{
description: 'Discourage leading and trailing `-`',
map: '(^-)(^)|($)(-$)',
replace: 1, // Cheap to insert,
penalty: 200, // Costly later
},
{
description: 'Discourage inserting special characters `-`',
map: '-._',
insDel: 2, // Cheap to insert,
penalty: 200, // Costly later
}
);
}
30 changes: 18 additions & 12 deletions packages/cspell-trie-lib/src/lib/distance/distanceAStarWeighted.ts
Expand Up @@ -15,40 +15,44 @@ export function distanceAStarWeighted(wordA: string, wordB: string, map: WeightM

const candidates = new PairingHeap(compare);

candidates.add({ ai: 0, bi: 0, c: 0 });
candidates.add({ ai: 0, bi: 0, c: 0, p: 0 });

/** Substitute / Replace */
function opSub(n: Node) {
const { ai, bi, c } = n;
const { ai, bi, c, p } = n;
if (ai < aN && bi < bN) {
const cc = a[ai] === b[bi] ? c : c + cost;
candidates.add({ ai: ai + 1, bi: bi + 1, c: cc });
candidates.add({ ai: ai + 1, bi: bi + 1, c: cc, p });
}
}

/** Insert */
function opIns(n: Node) {
const { ai, bi, c } = n;
const { ai, bi, c, p } = n;
if (bi < bN) {
candidates.add({ ai: ai, bi: bi + 1, c: c + cost });
candidates.add({ ai: ai, bi: bi + 1, c: c + cost, p });
}
}

/** Delete */
function opDel(n: Node) {
const { ai, bi, c } = n;
const { ai, bi, c, p } = n;
if (ai < aN) {
candidates.add({ ai: ai + 1, bi: bi, c: c + cost });
candidates.add({ ai: ai + 1, bi: bi, c: c + cost, p });
}
}

/** Swap adjacent letters */
function opSwap(n: Node) {
const { ai, bi, c } = n;
const { ai, bi, c, p } = n;
if (a[ai] === b[bi + 1] && a[ai + 1] === b[bi]) {
candidates.add({ ai: ai + 2, bi: bi + 2, c: c + cost });
candidates.add({ ai: ai + 2, bi: bi + 2, c: c + cost, p });
}
}

function opMap(n: Node) {
const { ai, bi, c } = n;
const pos = { a, b, ai, bi, c };
const { ai, bi, c, p } = n;
const pos = { a, b, ai, bi, c, p };
const costCalculations = [map.calcInsDelCosts(pos), map.calcSwapCosts(pos), map.calcReplaceCosts(pos)];
costCalculations.forEach((iter) => {
for (const nn of iter) {
Expand All @@ -70,7 +74,7 @@ export function distanceAStarWeighted(wordA: string, wordB: string, map: WeightM
}

// istanbul ignore else
return best ? best.c : -1;
return best ? best.c + best.p : -1;
}

interface Pos {
Expand All @@ -83,6 +87,8 @@ interface Pos {
interface Node extends Pos {
/** the current cost */
c: number;
/** the current penalty */
p: number;
}

function compare(a: Node, b: Node): number {
Expand Down
74 changes: 1 addition & 73 deletions packages/cspell-trie-lib/src/lib/distance/suggestionCostsDef.ts
@@ -1,73 +1 @@
// cspell:ignore aeiouy
/**
* A WeightedMapDef enables setting weights for edits between related characters and substrings.
*
* Multiple groups can be defined using a `|`.
* A multi-character substring is defined using `()`.
*
* For example, in some languages, some letters sound alike.
*
* ```ts
* {
* map: 'sc(sh)(sch)(ss)|t(tt)', // two groups.
* replace: 50, // Make it 1/2 the cost of a normal edit to replace a `t` with `tt`.
* }
* ```
*
* The following could be used to make inserting, removing, or replacing vowels cheaper.
* ```ts
* {
* map: 'aeiouy', //.
* insDel: 50, // Make it is cheaper to insert or delete a vowel.
* replace: 45, // It is even cheaper to replace one with another.
* }
* ```
*
* Note: the default edit distance is 100.
*/
export type SuggestionCostMapDef = CostMapDefReplace | CostMapDefInsDel | CostMapDefSwap;

interface CostMapDefBase {
/**
* The set of substrings to map, these are generally single character strings.
*
* Multiple sets can be defined by using a `|` to separate them.
*
* Example: `"eéê|aåá"` contains two different sets.
*
* To add a multi-character substring use `()`.
*
* Example: `"f(ph)(gh)"` results in the following set: `f`, `ph`, `gh`.
*/
map: string;
/** The cost to insert/delete one of the substrings in the map. Note: insert/delete costs are symmetrical. */
insDel?: number;
/**
* The cost to replace of of the substrings in the map with another substring in the map.
* Example: Map['a', 'i']
* This would be the cost to substitute `a` with `i`: Like `bat` to `bit` or the reverse.
*/
replace?: number;
/**
* The cost to swap two adjacent substrings found in the map.
* Example: Map['e', 'i']
* This represents the cost to change `ei` to `ie` or the reverse.
*/
swap?: number;
/**
* A description to describe the purpose of the map.
*/
description?: string;
}

export interface CostMapDefReplace extends CostMapDefBase {
replace: number;
}

export interface CostMapDefInsDel extends CostMapDefBase {
insDel: number;
}

export interface CostMapDefSwap extends CostMapDefBase {
swap: number;
}
export type { SuggestionCostMapDef } from '@cspell/cspell-types';

0 comments on commit a6e1bf6

Please sign in to comment.