From cb43872fb6c05366a8fc25a8bd889b95918f45a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Iiro=20J=C3=A4ppinen?= <iiro@jappinen.fi>
Date: Wed, 20 Nov 2019 11:56:44 +0200
Subject: [PATCH] feat: split tasks into chunks to support shells with limited
 max argument length (#732)

* feat: split tasks into chunks to support shells with limited max argument length

* refactor: use a function for getting max arg lenth

* refactor: simplify spreading of listrTasks into main runner tasks
---
 README.md                              |   6 ++
 bin/lint-staged                        |  19 +++++
 lib/chunkFiles.js                      |  40 +++++++++
 lib/index.js                           |  13 ++-
 lib/runAll.js                          | 112 ++++++++++++++-----------
 test/__snapshots__/runAll.spec.js.snap |  21 -----
 test/chunkFiles.spec.js                |  29 +++++++
 test/runAll.spec.js                    |  12 ---
 test/runAll.unmocked.spec.js           |  23 +++++
 9 files changed, 192 insertions(+), 83 deletions(-)
 create mode 100644 lib/chunkFiles.js
 create mode 100644 test/chunkFiles.spec.js
diff --git a/README.md b/README.md
index a171e46e6..fe953ead0 100644
--- a/README.md
+++ b/README.md
@@ -394,6 +394,8 @@ Parameters to `lintStaged` are equivalent to their CLI counterparts:
 ```js
 const success = await lintStaged({
   configPath: './path/to/configuration/file',
+  maxArgLength: null,
+  relative: false,
   shell: false,
   quiet: false,
   debug: false
@@ -407,12 +409,16 @@ const success = await lintStaged({
   config: {
     '*.js': 'eslint --fix'
   },
+  maxArgLength: null,
+  relative: false,
   shell: false,
   quiet: false,
   debug: false
 })
 ```
 
+The `maxArgLength` option configures chunking of tasks into multiple parts that are run one after the other. This is to avoid issues on Windows platforms where the maximum length of the command line argument string is limited to 8192 characters. Lint-staged might generate a very long argument string when there are many staged files. This option is set automatically from the cli, but not via the Node.js API by default.
+
 ### Using with JetBrains IDEs _(WebStorm, PyCharm, IntelliJ IDEA, RubyMine, etc.)_
 
 _**Update**_: The latest version of JetBrains IDEs now support running hooks as you would expect.
diff --git a/bin/lint-staged b/bin/lint-staged
index 277f1b681..eea0d69fc 100755
--- a/bin/lint-staged
+++ b/bin/lint-staged
@@ -42,8 +42,27 @@ if (cmdline.debug) {
 
 debug('Running `lint-staged@%s`', pkg.version)
 
+/**
+ * Get the maximum length of a command-line argument string based on current platform
+ *
+ * https://serverfault.com/questions/69430/what-is-the-maximum-length-of-a-command-line-in-mac-os-x
+ * https://support.microsoft.com/en-us/help/830473/command-prompt-cmd-exe-command-line-string-limitation
+ * https://unix.stackexchange.com/a/120652
+ */
+const getMaxArgLength = () => {
+  switch (process.platform) {
+    case 'darwin':
+      return 262144
+    case 'win32':
+      return 8191
+    default:
+      return 131072
+  }
+}
+
 lintStaged({
   configPath: cmdline.config,
+  maxArgLength: getMaxArgLength(),
   relative: !!cmdline.relative,
   shell: !!cmdline.shell,
   quiet: !!cmdline.quiet,
diff --git a/lib/chunkFiles.js b/lib/chunkFiles.js
new file mode 100644
index 000000000..824c383d7
--- /dev/null
+++ b/lib/chunkFiles.js
@@ -0,0 +1,40 @@
+'use strict'
+
+const normalize = require('normalize-path')
+const path = require('path')
+
+/**
+ * Chunk array into sub-arrays
+ * @param {Array} arr
+ * @param {Number} chunkCount
+ * @retuns {Array<Array>}
+ */
+function chunkArray(arr, chunkCount) {
+  if (chunkCount === 1) return [arr]
+  const chunked = []
+  let position = 0
+  for (let i = 0; i < chunkCount; i++) {
+    const chunkLength = Math.ceil((arr.length - position) / (chunkCount - i))
+    chunked.push([])
+    chunked[i] = arr.slice(position, chunkLength + position)
+    position += chunkLength
+  }
+  return chunked
+}
+
+/**
+ * Chunk files into sub-arrays based on the length of the resulting argument string
+ * @param {Object} opts
+ * @param {Array<String>} opts.files
+ * @param {String} opts.gitDir
+ * @param {number} [opts.maxArgLength] the maximum argument string length
+ * @param {Boolean} [opts.relative] whether files are relative to `gitDir` or should be resolved as absolute
+ * @returns {Array<Array<String>>}
+ */
+module.exports = function chunkFiles({ files, gitDir, maxArgLength = null, relative = false }) {
+  if (!maxArgLength) return [files]
+  const normalizedFiles = files.map(file => normalize(relative ? file : path.resolve(gitDir, file)))
+  const fileListLength = normalizedFiles.join(' ').length
+  const chunkCount = Math.min(Math.ceil(fileListLength / maxArgLength), files.length)
+  return chunkArray(files, chunkCount)
+}
diff --git a/lib/index.js b/lib/index.js
index bdfd92bba..53827a493 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -44,6 +44,7 @@ function loadConfig(configPath) {
  * @param {object} options
  * @param {string} [options.configPath] - Path to configuration file
  * @param {object}  [options.config] - Object with configuration for programmatic API
+ * @param {number} [options.maxArgLength] - Maximum argument string length
  * @param {boolean} [options.relative] - Pass relative filepaths to tasks
  * @param {boolean} [options.shell] - Skip parsing of tasks for better shell support
  * @param {boolean} [options.quiet] - Disable lint-staged’s own console output
@@ -53,7 +54,15 @@ function loadConfig(configPath) {
  * @returns {Promise<boolean>} Promise of whether the linting passed or failed
  */
 module.exports = function lintStaged(
-  { configPath, config, relative = false, shell = false, quiet = false, debug = false } = {},
+  {
+    configPath,
+    config,
+    maxArgLength,
+    relative = false,
+    shell = false,
+    quiet = false,
+    debug = false
+  } = {},
   logger = console
 ) {
   debugLog('Loading config using `cosmiconfig`')
@@ -76,7 +85,7 @@ module.exports = function lintStaged(
         debugLog('lint-staged config:\n%O', config)
       }
 
-      return runAll({ config, relative, shell, quiet, debug }, logger)
+      return runAll({ config, maxArgLength, relative, shell, quiet, debug }, logger)
         .then(() => {
           debugLog('tasks were executed successfully!')
           return Promise.resolve(true)
diff --git a/lib/runAll.js b/lib/runAll.js
index 21c1cef61..cbc6adaae 100644
--- a/lib/runAll.js
+++ b/lib/runAll.js
@@ -6,6 +6,7 @@ const chalk = require('chalk')
 const Listr = require('listr')
 const symbols = require('log-symbols')
 
+const chunkFiles = require('./chunkFiles')
 const generateTasks = require('./generateTasks')
 const getStagedFiles = require('./getStagedFiles')
 const GitWorkflow = require('./gitWorkflow')
@@ -14,20 +15,13 @@ const resolveGitDir = require('./resolveGitDir')
 
 const debugLog = require('debug')('lint-staged:run')
 
-/**
- * https://serverfault.com/questions/69430/what-is-the-maximum-length-of-a-command-line-in-mac-os-x
- * https://support.microsoft.com/en-us/help/830473/command-prompt-cmd-exe-command-line-string-limitation
- * https://unix.stackexchange.com/a/120652
- */
-const MAX_ARG_LENGTH =
-  (process.platform === 'darwin' && 262144) || (process.platform === 'win32' && 8191) || 131072
-
 /**
  * Executes all tasks and either resolves or rejects the promise
  *
  * @param {object} options
  * @param {Object} [options.config] - Task configuration
  * @param {Object} [options.cwd] - Current working directory
+ * @param {number} [options.maxArgLength] - Maximum argument string length
  * @param {boolean} [options.relative] - Pass relative filepaths to tasks
  * @param {boolean} [options.shell] - Skip parsing of tasks for better shell support
  * @param {boolean} [options.quiet] - Disable lint-staged’s own console output
@@ -36,7 +30,15 @@ const MAX_ARG_LENGTH =
  * @returns {Promise}
  */
 module.exports = async function runAll(
-  { config, cwd = process.cwd(), debug = false, quiet = false, relative = false, shell = false },
+  {
+    config,
+    cwd = process.cwd(),
+    debug = false,
+    maxArgLength,
+    quiet = false,
+    relative = false,
+    shell = false
+  },
   logger = console
 ) {
   debugLog('Running all linter scripts')
@@ -57,48 +59,70 @@ module.exports = async function runAll(
 
   debugLog('Loaded list of staged files in git:\n%O', files)
 
-  const argLength = files.join(' ').length
-  if (argLength > MAX_ARG_LENGTH) {
-    logger.warn(`
- ${symbols.warning}  ${chalk.yellow(
-      `lint-staged generated an argument string of ${argLength} characters, and commands might not run correctly on your platform.
-    It is recommended to use functions as linters and split your command based on the number of staged files. For more info, please visit:
-    https://github.com/okonet/lint-staged#using-js-functions-to-customize-linter-commands`
-    )}
-`)
+  const chunkedFiles = chunkFiles({ files, gitDir, maxArgLength, relative })
+  const chunkCount = chunkedFiles.length
+  if (chunkCount > 1) {
+    debugLog(`Chunked staged files into ${chunkCount} part`, chunkCount)
   }
 
-  const tasks = generateTasks({ config, cwd, gitDir, files, relative })
-
   // lint-staged 10 will automatically add modifications to index
   // Warn user when their command includes `git add`
   let hasDeprecatedGitAdd = false
 
-  const listrTasks = tasks.map(task => {
-    const subTasks = makeCmdTasks({ commands: task.commands, files: task.fileList, gitDir, shell })
+  const listrOptions = {
+    dateFormat: false,
+    renderer: (quiet && 'silent') || (debug && 'verbose') || 'update'
+  }
 
-    if (subTasks.some(subTask => subTask.command.includes('git add'))) {
-      hasDeprecatedGitAdd = true
-    }
+  const listrTasks = []
+
+  for (const [index, files] of chunkedFiles.entries()) {
+    const chunkTasks = generateTasks({ config, cwd, gitDir, files, relative })
+    const chunkListrTasks = chunkTasks.map(task => {
+      const subTasks = makeCmdTasks({
+        commands: task.commands,
+        files: task.fileList,
+        gitDir,
+        shell
+      })
 
-    return {
-      title: `Running tasks for ${task.pattern}`,
-      task: async () =>
-        new Listr(subTasks, {
-          // In sub-tasks we don't want to run concurrently
-          // and we want to abort on errors
-          dateFormat: false,
-          concurrent: false,
-          exitOnError: true
-        }),
+      if (subTasks.some(subTask => subTask.command.includes('git add'))) {
+        hasDeprecatedGitAdd = true
+      }
+
+      return {
+        title: `Running tasks for ${task.pattern}`,
+        task: async () =>
+          new Listr(subTasks, {
+            // In sub-tasks we don't want to run concurrently
+            // and we want to abort on errors
+            dateFormat: false,
+            concurrent: false,
+            exitOnError: true
+          }),
+        skip: () => {
+          if (task.fileList.length === 0) {
+            return `No staged files match ${task.pattern}`
+          }
+          return false
+        }
+      }
+    })
+
+    listrTasks.push({
+      // No need to show number of task chunks when there's only one
+      title:
+        chunkCount > 1 ? `Running tasks (chunk ${index}/${chunkCount})...` : 'Running tasks...',
+      task: () =>
+        new Listr(chunkListrTasks, { ...listrOptions, concurrent: false, exitOnError: false }),
       skip: () => {
-        if (task.fileList.length === 0) {
-          return `No staged files match ${task.pattern}`
+        if (chunkListrTasks.every(task => task.skip())) {
+          return 'No tasks to run'
         }
         return false
       }
-    }
-  })
+    })
+  }
 
   if (hasDeprecatedGitAdd) {
     logger.warn(`${symbols.warning} ${chalk.yellow(
@@ -114,11 +138,6 @@ module.exports = async function runAll(
     return 'No tasks to run.'
   }
 
-  const listrOptions = {
-    dateFormat: false,
-    renderer: (quiet && 'silent') || (debug && 'verbose') || 'update'
-  }
-
   const git = new GitWorkflow(gitDir)
 
   const runner = new Listr(
@@ -127,10 +146,7 @@ module.exports = async function runAll(
         title: 'Preparing...',
         task: () => git.stashBackup()
       },
-      {
-        title: 'Running tasks...',
-        task: () => new Listr(listrTasks, { ...listrOptions, concurrent: true, exitOnError: false })
-      },
+      ...listrTasks,
       {
         title: 'Applying modifications...',
         skip: ctx => ctx.hasErrors && 'Skipped because of errors from tasks',
diff --git a/test/__snapshots__/runAll.spec.js.snap b/test/__snapshots__/runAll.spec.js.snap
index 130ed3b91..3ce7f0e71 100644
--- a/test/__snapshots__/runAll.spec.js.snap
+++ b/test/__snapshots__/runAll.spec.js.snap
@@ -87,24 +87,3 @@ exports[`runAll should use an injected logger 1`] = `
 "
 LOG No staged files match any of provided globs."
 `;
-
-exports[`runAll should warn if the argument length is longer than what the platform can handle 1`] = `
-"
-WARN 
- ‼  lint-staged generated an argument string of 999999 characters, and commands might not run correctly on your platform.
-    It is recommended to use functions as linters and split your command based on the number of staged files. For more info, please visit:
-    https://github.com/okonet/lint-staged#using-js-functions-to-customize-linter-commands
-
-LOG Preparing... [started]
-LOG Preparing... [completed]
-LOG Running tasks... [started]
-LOG Running tasks for *.js [started]
-LOG echo \\"sample\\" [started]
-LOG echo \\"sample\\" [completed]
-LOG Running tasks for *.js [completed]
-LOG Running tasks... [completed]
-LOG Applying modifications... [started]
-LOG Applying modifications... [completed]
-LOG Cleaning up... [started]
-LOG Cleaning up... [completed]"
-`;
diff --git a/test/chunkFiles.spec.js b/test/chunkFiles.spec.js
new file mode 100644
index 000000000..7686d448b
--- /dev/null
+++ b/test/chunkFiles.spec.js
@@ -0,0 +1,29 @@
+import chunkFiles from '../lib/chunkFiles'
+
+describe('chunkFiles', () => {
+  const files = ['example.js', 'foo.js', 'bar.js', 'foo/bar.js']
+  const gitDir = '/opt/git/example.git'
+
+  it('should default to sane value', () => {
+    const chunkedFiles = chunkFiles({ files: ['foo.js'], gitDir, relative: true })
+    expect(chunkedFiles).toEqual([['foo.js']])
+  })
+
+  it('should not chunk short argument string', () => {
+    const chunkedFiles = chunkFiles({ files, gitDir, maxArgLength: 1000 })
+    expect(chunkedFiles).toEqual([files])
+  })
+
+  it('should chunk too long argument string', () => {
+    const chunkedFiles = chunkFiles({ files, gitDir, maxArgLength: 20 })
+    expect(chunkedFiles).toEqual(files.map(file => [file]))
+  })
+
+  it('should take into account relative setting', () => {
+    const chunkedFiles = chunkFiles({ files, gitDir, maxArgLength: 20, relative: true })
+    expect(chunkedFiles).toEqual([
+      [files[0], files[1]],
+      [files[2], files[3]]
+    ])
+  })
+})
diff --git a/test/runAll.spec.js b/test/runAll.spec.js
index c72939970..c5d32b91b 100644
--- a/test/runAll.spec.js
+++ b/test/runAll.spec.js
@@ -38,18 +38,6 @@ describe('runAll', () => {
     expect(console.printHistory()).toMatchSnapshot()
   })
 
-  it('should warn if the argument length is longer than what the platform can handle', async () => {
-    getStagedFiles.mockImplementationOnce(async () => new Array(100000).fill('sample.js'))
-
-    try {
-      await runAll({ config: { '*.js': () => 'echo "sample"' } })
-    } catch (err) {
-      console.log(err)
-    }
-
-    expect(console.printHistory()).toMatchSnapshot()
-  })
-
   it('should use an injected logger', async () => {
     expect.assertions(1)
     const logger = makeConsoleMock()
diff --git a/test/runAll.unmocked.spec.js b/test/runAll.unmocked.spec.js
index affb64255..52c6f9078 100644
--- a/test/runAll.unmocked.spec.js
+++ b/test/runAll.unmocked.spec.js
@@ -82,6 +82,11 @@ describe('runAll', () => {
     )
     await removeTempDir(nonGitDir)
   })
+
+  it('should short-circuit with no staged files', async () => {
+    const status = await runAll({ config: { '*.js': 'echo success' }, cwd })
+    expect(status).toEqual('No tasks to run.')
+  })
 })
 
 const globalConsoleTemp = console
@@ -533,4 +538,22 @@ describe('runAll', () => {
     expect(await execGit(['log', '-1', '--pretty=%B'])).toMatch('test')
     expect(await readFile('test.js')).toEqual(testJsFilePretty)
   })
+
+  it('should run chunked tasks when necessary', async () => {
+    // Stage two files
+    await appendFile('test.js', testJsFilePretty)
+    await execGit(['add', 'test.js'])
+    await appendFile('test2.js', testJsFilePretty)
+    await execGit(['add', 'test2.js'])
+
+    // Run lint-staged with `prettier --list-different` and commit pretty file
+    // Set maxArgLength low enough so that chunking is used
+    await gitCommit({ config: { '*.js': 'prettier --list-different' }, maxArgLength: 10 })
+
+    // Nothing is wrong, so a new commit is created
+    expect(await execGit(['rev-list', '--count', 'HEAD'])).toEqual('2')
+    expect(await execGit(['log', '-1', '--pretty=%B'])).toMatch('test')
+    expect(await readFile('test.js')).toEqual(testJsFilePretty)
+    expect(await readFile('test2.js')).toEqual(testJsFilePretty)
+  })
 })