Skip to content

Commit

Permalink
fix: cspell-tools - support excluding words (#5140)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Jan 2, 2024
1 parent 7136c39 commit 3fcdd89
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 6 deletions.
@@ -0,0 +1,10 @@
# yaml-language-server: $schema=./../../cspell-tools.config.schema.json

targets:
- name: colors
targetDirectory: ../../temp/builds/build-exclude
sources:
- filename: src/words.txt
split: true
- ../dicts/colors.txt
format: plaintext
5 changes: 5 additions & 0 deletions packages/cspell-tools/fixtures/build-exclude/src/exclude.txt
@@ -0,0 +1,5 @@
# Words to exclucde
green
purple
London
paris
4 changes: 4 additions & 0 deletions packages/cspell-tools/fixtures/build-exclude/src/words.txt
@@ -0,0 +1,4 @@
apple
banana
apple‌banana
grape
21 changes: 21 additions & 0 deletions packages/cspell-tools/src/__snapshots__/build.test.ts.snap
Expand Up @@ -131,6 +131,27 @@ strawberry
"
`;

exports[`build action > build 6 1`] = `
"
# cspell-tools: keep-case no-split
apple
banana
black
blue
cyan
grape
green
magenta
orange
pink
purple
red
white
yellow
"
`;

exports[`build action > build multi 0 1`] = `
"
# cspell-tools: keep-case no-split
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-tools/src/build.test.ts
Expand Up @@ -27,6 +27,7 @@ describe('build action', () => {
${f('build-source-list')} | ${undefined} | ${tBuilds('build-source-list/source-list.txt')}
${'.'} | ${f('build-combo/cspell-tools.config.yaml')} | ${'color-cities-code.txt'}
${f('build-split-source')} | ${undefined} | ${tBuilds('build-split-source/split-colors.txt')}
${f('build-exclude')} | ${undefined} | ${tBuilds('build-exclude/colors.txt')}
`('build %#', async ({ sourceRoot, config, target }) => {
await expect(build(undefined, { config, root: t(sourceRoot), cwd: t() })).resolves.toBeUndefined();
const content = await readTextFile(t(target));
Expand Down
7 changes: 7 additions & 0 deletions packages/cspell-tools/src/compiler/CompileOptions.ts
Expand Up @@ -9,4 +9,11 @@ export interface CompileOptions {
* Generate lower case / accent free versions of words.
*/
generateNonStrict: boolean;

/**
* Optional filter function to filter out words.
* @param word the word to test
* @returns `true` to keep the word, `false` to exclude it.
*/
filter?: (word: string) => boolean;
}
Expand Up @@ -305,3 +305,33 @@ Error+
msg
"
`;

exports[`compile > compile filtered 'dicts/cities.txt', excludeWordsFrom: [ 'build-exclude/src/exclude.txt' ] 1`] = `
"
# cspell-tools: keep-case no-split
Los Angeles
Mexico City
New Amsterdam
New Delhi
New York
Paris
San Francisco
"
`;

exports[`compile > compile filtered 'dicts/colors.txt', excludeWordsFrom: [ 'build-exclude/src/exclude.txt' ] 1`] = `
"
# cspell-tools: keep-case no-split
black
blue
cyan
magenta
orange
pink
red
white
yellow
"
`;
39 changes: 39 additions & 0 deletions packages/cspell-tools/src/compiler/compile.test.ts
Expand Up @@ -16,6 +16,10 @@ function sample(...parts: string[]): string {
return path.resolve(pathSamples, ...parts);
}

function fix(...parts: string[]): string {
return testHelper.resolveFixture(...parts);
}

const consoleSpy = spyOnConsole();

describe('compile', () => {
Expand Down Expand Up @@ -104,6 +108,41 @@ describe('compile', () => {
expect(check2.passed).toBe(true);
},
);

test.each`
file | excludeWordsFrom
${'dicts/colors.txt'} | ${['build-exclude/src/exclude.txt']}
${'dicts/cities.txt'} | ${['build-exclude/src/exclude.txt']}
`('compile filtered $file, excludeWordsFrom: $excludeWordsFrom', async ({ file, excludeWordsFrom }) => {
const targetDirectory = t(`.`);
const target: Target = {
name: 'myDictionary',
targetDirectory,
format: 'plaintext',
sources: [fix(file)],
compress: false,
trieBase: 10,
sort: true,
excludeWordsFrom: excludeWordsFrom.map((f: string) => fix(f)),
};
const req: CompileRequest = {
targets: [target],
rootDir: targetDirectory,
checksumFile: true,
};

await compile(req, { conditionalBuild: true });

const ext = '.txt';
const content = await readTextFile(`${targetDirectory}/myDictionary${ext}`);
expect(content).toMatchSnapshot();
const check = await checkShasumFile(path.join(targetDirectory, 'checksum.txt'), [], targetDirectory);
expect(check.passed).toBe(true);

await compile(req, { conditionalBuild: true });
const check2 = await checkShasumFile(path.join(targetDirectory, 'checksum.txt'), [], targetDirectory);
expect(check2.passed).toBe(true);
});
});

function t(...parts: string[]): string {
Expand Down
24 changes: 19 additions & 5 deletions packages/cspell-tools/src/compiler/compile.ts
Expand Up @@ -14,7 +14,7 @@ import type {
} from '../config/index.js';
import { isFileListSource, isFilePath, isFileSource } from '../config/index.js';
import { checkShasumFile, updateChecksumForFiles } from '../shasum/index.js';
import { createAllowedSplitWordsFromFiles } from './createWordsCollection.js';
import { createAllowedSplitWordsFromFiles, createWordsCollectionFromFiles } from './createWordsCollection.js';
import { logWithTimestamp } from './logWithTimestamp.js';
import { readTextFile } from './readers/readTextFile.js';
import type { SourceReaderOptions } from './SourceReader.js';
Expand Down Expand Up @@ -95,9 +95,11 @@ export async function compileTarget(
): Promise<string[]> {
logWithTimestamp(`Start compile: ${target.name}`);
const { rootDir, cwd, checksumFile, conditional } = compileOptions;
const { format, sources, trieBase, sort = true, generateNonStrict = false } = target;
const { format, sources, trieBase, sort = true, generateNonStrict = false, excludeWordsFrom } = target;
const targetDirectory = path.resolve(rootDir, target.targetDirectory ?? cwd ?? process.cwd());

const excludeFilter = await createExcludeFilter(excludeWordsFrom);

const generateNonStrictTrie = target.generateNonStrict ?? true;

const name = normalizeTargetName(target.name);
Expand All @@ -111,10 +113,10 @@ export async function compileTarget(
opAwaitAsync(),
);
const filesToProcess: FileToProcess[] = await toArray(filesToProcessAsync);
const normalizer = normalizeTargetWords({ sort: useTrie || sort, generateNonStrict });
const normalizer = normalizeTargetWords({ sort: useTrie || sort, generateNonStrict, filter: excludeFilter });
const checksumRoot = (checksumFile && path.dirname(checksumFile)) || rootDir;

const deps = [...calculateDependencies(filename, filesToProcess, checksumRoot)];
const deps = [...calculateDependencies(filename, filesToProcess, excludeWordsFrom, checksumRoot)];

if (conditional && checksumFile) {
const check = await checkShasumFile(checksumFile, deps, checksumRoot).catch(() => undefined);
Expand Down Expand Up @@ -145,10 +147,16 @@ export async function compileTarget(
return deps;
}

function calculateDependencies(targetFile: string, filesToProcess: FileToProcess[], rootDir: string): Set<string> {
function calculateDependencies(
targetFile: string,
filesToProcess: FileToProcess[],
excludeFiles: string[] | undefined,
rootDir: string,
): Set<string> {
const dependencies = new Set<string>();

addDependency(targetFile);
excludeFiles?.forEach((f) => addDependency(f));
filesToProcess.forEach((f) => addDependency(f.src));

return dependencies;
Expand Down Expand Up @@ -288,3 +296,9 @@ function logProgress<T>(freq = 100000): (iter: Iterable<T>) => Iterable<T> {

return logProgress;
}

async function createExcludeFilter(excludeWordsFrom: FilePath[] | undefined): Promise<(word: string) => boolean> {
if (!excludeWordsFrom || !excludeWordsFrom.length) return () => true;
const excludeWords = await createWordsCollectionFromFiles(excludeWordsFrom);
return (word: string) => !excludeWords.has(word);
}
1 change: 1 addition & 0 deletions packages/cspell-tools/src/compiler/wordListParser.ts
Expand Up @@ -17,6 +17,7 @@ export function normalizeTargetWords(options: CompileOptions): Operator<string>
lineParser,
options.sort ? createInlineBufferedSort(10000) : undefined,
opFilter<string>(uniqueFilter(10000)),
options.filter ? opFilter<string>(options.filter) : undefined,
].filter(isDefined);
return opCombine(...operations);
}
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-tools/src/config/config.ts
Expand Up @@ -100,7 +100,7 @@ export interface Target extends CompileTargetOptions {
* Words from the sources that are found in `excludeWordsFrom` files
* will not be added to the dictionary.
*
* @version TBD
* @version 8.3.2
*/
excludeWordsFrom?: FilePath[] | undefined;

Expand Down
2 changes: 2 additions & 0 deletions packages/cspell-tools/src/test/TestHelper.ts
Expand Up @@ -12,7 +12,9 @@ const tempDirBase = path.join(packageRoot, 'temp');
const repoSamples = path.join(repoRoot, 'packages/Samples');

export interface TestHelper {
/** path to `.../cspell/package/cspell-tools/` */
readonly packageRoot: string;
/** path to `...cspell/` */
readonly repoRoot: string;
readonly tempDir: string;

Expand Down

0 comments on commit 3fcdd89

Please sign in to comment.