diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index fa617b97..0e089bca 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -52,6 +52,13 @@ entry: check-json language: python types: [json] +- id: check-shebang-scripts-are-executable + name: Check that scripts with shebangs are executable + description: Ensures that (non-binary) files with a shebang are executable. + entry: check-shebang-scripts-are-executable + language: python + types: [text] + stages: [commit, push, manual] - id: pretty-format-json name: Pretty format JSON description: This hook sets a standard for formatting JSON files. diff --git a/README.md b/README.md index bf36ecf1..d61d4576 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,9 @@ Attempts to load all json files to verify syntax. #### `check-merge-conflict` Check for files that contain merge conflict strings. +#### `check-shebang-scripts-are-executable` +Checks that scripts with shebangs are executable. + #### `check-symlinks` Checks for symlinks which do not point to anything. diff --git a/pre_commit_hooks/check_executables_have_shebangs.py b/pre_commit_hooks/check_executables_have_shebangs.py index 19f7ed27..e271c662 100644 --- a/pre_commit_hooks/check_executables_have_shebangs.py +++ b/pre_commit_hooks/check_executables_have_shebangs.py @@ -2,7 +2,9 @@ import argparse import shlex import sys +from typing import Generator from typing import List +from typing import NamedTuple from typing import Optional from typing import Sequence from typing import Set @@ -19,29 +21,38 @@ def check_executables(paths: List[str]) -> int: else: # pragma: win32 no cover retv = 0 for path in paths: - if not _check_has_shebang(path): + if not has_shebang(path): _message(path) retv = 1 return retv -def _check_git_filemode(paths: Sequence[str]) -> int: +class GitLsFile(NamedTuple): + mode: str + filename: str + + +def git_ls_files(paths: Sequence[str]) -> Generator[GitLsFile, None, None]: outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths) - seen: Set[str] = set() for out in zsplit(outs): - metadata, path = out.split('\t') - tagmode = metadata.split(' ', 1)[0] + metadata, filename = out.split('\t') + mode, _, _ = metadata.split() + yield GitLsFile(mode, filename) - is_executable = any(b in EXECUTABLE_VALUES for b in tagmode[-3:]) - if is_executable and not _check_has_shebang(path): - _message(path) - seen.add(path) + +def _check_git_filemode(paths: Sequence[str]) -> int: + seen: Set[str] = set() + for ls_file in git_ls_files(paths): + is_executable = any(b in EXECUTABLE_VALUES for b in ls_file.mode[-3:]) + if is_executable and not has_shebang(ls_file.filename): + _message(ls_file.filename) + seen.add(ls_file.filename) return int(bool(seen)) -def _check_has_shebang(path: str) -> int: +def has_shebang(path: str) -> int: with open(path, 'rb') as f: first_bytes = f.read(2) diff --git a/pre_commit_hooks/check_shebang_scripts_are_executable.py b/pre_commit_hooks/check_shebang_scripts_are_executable.py new file mode 100644 index 00000000..dce8c59d --- /dev/null +++ b/pre_commit_hooks/check_shebang_scripts_are_executable.py @@ -0,0 +1,53 @@ +"""Check that text files with a shebang are executable.""" +import argparse +import shlex +import sys +from typing import List +from typing import Optional +from typing import Sequence +from typing import Set + +from pre_commit_hooks.check_executables_have_shebangs import EXECUTABLE_VALUES +from pre_commit_hooks.check_executables_have_shebangs import git_ls_files +from pre_commit_hooks.check_executables_have_shebangs import has_shebang + + +def check_shebangs(paths: List[str]) -> int: + # Cannot optimize on non-executability here if we intend this check to + # work on win32 -- and that's where problems caused by non-executability + # (elsewhere) are most likely to arise from. + return _check_git_filemode(paths) + + +def _check_git_filemode(paths: Sequence[str]) -> int: + seen: Set[str] = set() + for ls_file in git_ls_files(paths): + is_executable = any(b in EXECUTABLE_VALUES for b in ls_file.mode[-3:]) + if not is_executable and has_shebang(ls_file.filename): + _message(ls_file.filename) + seen.add(ls_file.filename) + + return int(bool(seen)) + + +def _message(path: str) -> None: + print( + f'{path}: has a shebang but is not marked executable!\n' + f' If it is supposed to be executable, try: ' + f'`chmod +x {shlex.quote(path)}`\n' + f' If it not supposed to be executable, double-check its shebang ' + f'is wanted.\n', + file=sys.stderr, + ) + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('filenames', nargs='*') + args = parser.parse_args(argv) + + return check_shebangs(args.filenames) + + +if __name__ == '__main__': + exit(main()) diff --git a/setup.cfg b/setup.cfg index 631faabb..dbe151b5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ console_scripts = check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main check-json = pre_commit_hooks.check_json:main check-merge-conflict = pre_commit_hooks.check_merge_conflict:main + check-shebang-scripts-are-executable = pre_commit_hooks.check_executables_have_shebangs:main_reverse check-symlinks = pre_commit_hooks.check_symlinks:main check-toml = pre_commit_hooks.check_toml:main check-vcs-permalinks = pre_commit_hooks.check_vcs_permalinks:main diff --git a/tests/check_shebang_scripts_are_executable_test.py b/tests/check_shebang_scripts_are_executable_test.py new file mode 100644 index 00000000..9e78b06c --- /dev/null +++ b/tests/check_shebang_scripts_are_executable_test.py @@ -0,0 +1,87 @@ +import os + +import pytest + +from pre_commit_hooks.check_shebang_scripts_are_executable import \ + _check_git_filemode +from pre_commit_hooks.check_shebang_scripts_are_executable import main +from pre_commit_hooks.util import cmd_output + + +def test_check_git_filemode_passing(tmpdir): + with tmpdir.as_cwd(): + cmd_output('git', 'init', '.') + + f = tmpdir.join('f') + f.write('#!/usr/bin/env bash') + f_path = str(f) + cmd_output('chmod', '+x', f_path) + cmd_output('git', 'add', f_path) + cmd_output('git', 'update-index', '--chmod=+x', f_path) + + g = tmpdir.join('g').ensure() + g_path = str(g) + cmd_output('git', 'add', g_path) + + files = [f_path, g_path] + assert _check_git_filemode(files) == 0 + + # this is the one we should trigger on + h = tmpdir.join('h') + h.write('#!/usr/bin/env bash') + h_path = str(h) + cmd_output('git', 'add', h_path) + + files = [h_path] + assert _check_git_filemode(files) == 1 + + +def test_check_git_filemode_passing_unusual_characters(tmpdir): + with tmpdir.as_cwd(): + cmd_output('git', 'init', '.') + + f = tmpdir.join('maƱana.txt') + f.write('#!/usr/bin/env bash') + f_path = str(f) + cmd_output('chmod', '+x', f_path) + cmd_output('git', 'add', f_path) + cmd_output('git', 'update-index', '--chmod=+x', f_path) + + files = (f_path,) + assert _check_git_filemode(files) == 0 + + +def test_check_git_filemode_failing(tmpdir): + with tmpdir.as_cwd(): + cmd_output('git', 'init', '.') + + f = tmpdir.join('f').ensure() + f.write('#!/usr/bin/env bash') + f_path = str(f) + cmd_output('git', 'add', f_path) + + files = (f_path,) + assert _check_git_filemode(files) == 1 + + +@pytest.mark.parametrize( + ('content', 'mode', 'expected'), + ( + pytest.param('#!python', '+x', 0, id='shebang with executable'), + pytest.param('#!python', '-x', 1, id='shebang without executable'), + pytest.param('', '+x', 0, id='no shebang with executable'), + pytest.param('', '-x', 0, id='no shebang without executable'), + ), +) +def test_git_executable_shebang(temp_git_dir, content, mode, expected): + with temp_git_dir.as_cwd(): + path = temp_git_dir.join('path') + path.write(content) + cmd_output('git', 'add', str(path)) + cmd_output('chmod', mode, str(path)) + cmd_output('git', 'update-index', f'--chmod={mode}', str(path)) + + # simulate how identify chooses that something is executable + filenames = [path for path in [str(path)] if os.access(path, os.X_OK)] + + assert main(filenames) == expected