Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New hook 'destroyed-symlinks' #511

Merged
merged 1 commit into from Nov 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-hooks.yaml
Expand Up @@ -100,6 +100,12 @@
entry: debug-statement-hook
language: python
types: [python]
- id: destroyed-symlinks
name: Detect Destroyed Symlinks
description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
entry: destroyed-symlinks
language: python
types: [file]
- id: detect-aws-credentials
name: Detect AWS Credentials
description: Detects *your* aws credentials from the aws cli credentials file
Expand Down
6 changes: 6 additions & 0 deletions README.md
Expand Up @@ -87,6 +87,12 @@ Attempts to load all yaml files to verify syntax.
#### `debug-statements`
Check for debugger imports and py37+ `breakpoint()` calls in python source.

#### `destroyed-symlinks`
Detects symlinks which are changed to regular files with a content of a path
which that symlink was pointing to.
This usually happens on Windows when a user clones a repository that has
symlinks but they do not have the permission to create symlinks.

#### `detect-aws-credentials`
Checks for the existence of AWS secrets that you have set up with the AWS CLI.
The following arguments are available:
Expand Down
9 changes: 1 addition & 8 deletions pre_commit_hooks/check_executables_have_shebangs.py
Expand Up @@ -8,18 +8,11 @@
from typing import Set

from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit

EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))


def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []


def check_executables(paths: List[str]) -> int:
if sys.platform == 'win32': # pragma: win32 cover
return _check_git_filemode(paths)
Expand Down
96 changes: 96 additions & 0 deletions pre_commit_hooks/destroyed_symlinks.py
@@ -0,0 +1,96 @@
import argparse
import shlex
import subprocess
from typing import List
from typing import Optional
from typing import Sequence

from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit

ORDINARY_CHANGED_ENTRIES_MARKER = '1'
PERMS_LINK = '120000'
PERMS_NONEXIST = '000000'


def find_destroyed_symlinks(files: Sequence[str]) -> List[str]:
destroyed_links: List[str] = []
if not files:
return destroyed_links
for line in zsplit(
cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files),
):
splitted = line.split(' ')
if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
# https://git-scm.com/docs/git-status#_changed_tracked_entries
(
_, _, _,
mode_HEAD,
mode_index,
_,
hash_HEAD,
hash_index,
*path_splitted,
) = splitted
path = ' '.join(path_splitted)
if (
mode_HEAD == PERMS_LINK and
mode_index != PERMS_LINK and
mode_index != PERMS_NONEXIST
):
if hash_HEAD == hash_index:
# if old and new hashes are equal, it's not needed to check
# anything more, we've found a destroyed symlink for sure
destroyed_links.append(path)
else:
# if old and new hashes are *not* equal, it doesn't mean
# that everything is OK - new file may be altered
# by something like trailing-whitespace and/or
# mixed-line-ending hooks so we need to go deeper
SIZE_CMD = ('git', 'cat-file', '-s')
size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())

# in the worst case new file may have CRLF added
# so check content only if new file is bigger
# not more than 2 bytes compared to the old one
if size_index <= size_HEAD + 2:
head_content = subprocess.check_output(
('git', 'cat-file', '-p', hash_HEAD),
).rstrip()
index_content = subprocess.check_output(
('git', 'cat-file', '-p', hash_index),
).rstrip()
if head_content == index_content:
destroyed_links.append(path)
return destroyed_links


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to check.')
args = parser.parse_args(argv)
destroyed_links = find_destroyed_symlinks(files=args.filenames)
if destroyed_links:
print('Destroyed symlinks:')
for destroyed_link in destroyed_links:
print(f'- {destroyed_link}')
print('You should unstage affected files:')
print(
'\tgit reset HEAD -- {}'.format(
' '.join(shlex.quote(link) for link in destroyed_links),
),
)
print(
'And retry commit. As a long term solution '
'you may try to explicitly tell git that your '
'environment does not support symlinks:',
)
print('\tgit config core.symlinks false')
return 1
else:
return 0


if __name__ == '__main__':
exit(main())
9 changes: 9 additions & 0 deletions pre_commit_hooks/util.py
@@ -1,5 +1,6 @@
import subprocess
from typing import Any
from typing import List
from typing import Optional
from typing import Set

Expand All @@ -22,3 +23,11 @@ def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str:
if retcode is not None and proc.returncode != retcode:
raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
return stdout


def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []
1 change: 1 addition & 0 deletions setup.cfg
Expand Up @@ -44,6 +44,7 @@ console_scripts =
check-xml = pre_commit_hooks.check_xml:main
check-yaml = pre_commit_hooks.check_yaml:main
debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
detect-private-key = pre_commit_hooks.detect_private_key:main
double-quote-string-fixer = pre_commit_hooks.string_fixer:main
Expand Down
10 changes: 0 additions & 10 deletions tests/check_executables_have_shebangs_test.py
Expand Up @@ -102,16 +102,6 @@ def test_check_git_filemode_failing(tmpdir):
assert check_executables_have_shebangs._check_git_filemode(files) == 1


@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_correctly(out):
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']


@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert check_executables_have_shebangs.zsplit(out) == []


@pytest.mark.parametrize(
('content', 'mode', 'expected'),
(
Expand Down
74 changes: 74 additions & 0 deletions tests/destroyed_symlinks_test.py
@@ -0,0 +1,74 @@
import os
import subprocess

import pytest

from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
from pre_commit_hooks.destroyed_symlinks import main

TEST_SYMLINK = 'test_symlink'
TEST_SYMLINK_TARGET = '/doesnt/really/matters'
TEST_FILE = 'test_file'
TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'


@pytest.fixture
def repo_with_destroyed_symlink(tmpdir):
source_repo = tmpdir.join('src')
os.makedirs(source_repo, exist_ok=True)
test_repo = tmpdir.join('test')
with source_repo.as_cwd():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that you can use git -C source_repo ... instead of as_cwd()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know but this way it's a bit more readable IMO.

subprocess.check_call(('git', 'init'))
os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
with open(TEST_FILE, 'w') as f:
print('some random content', file=f)
subprocess.check_call(('git', 'add', '.'))
subprocess.check_call(
('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
)
assert b'120000 ' in subprocess.check_output(
('git', 'cat-file', '-p', 'HEAD^{tree}'),
)
subprocess.check_call(
('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
)
with test_repo.as_cwd():
subprocess.check_call(
('git', 'config', '--local', 'core.symlinks', 'true'),
)
subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
assert not os.path.islink(test_repo.join(TEST_SYMLINK))
yield test_repo


def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
with repo_with_destroyed_symlink.as_cwd():
assert find_destroyed_symlinks([]) == []
assert main([]) == 0

subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
assert find_destroyed_symlinks([]) == []
assert main([]) == 0
assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0

with open(TEST_SYMLINK, 'a') as f:
print(file=f) # add trailing newline
subprocess.check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0

with open(TEST_SYMLINK, 'w') as f:
print('0' * len(TEST_SYMLINK_TARGET), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0

with open(TEST_SYMLINK, 'w') as f:
print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0
11 changes: 11 additions & 0 deletions tests/util_test.py
Expand Up @@ -2,6 +2,7 @@

from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit


def test_raises_on_error():
Expand All @@ -12,3 +13,13 @@ def test_raises_on_error():
def test_output():
ret = cmd_output('sh', '-c', 'echo hi')
assert ret == 'hi\n'


@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_str_correctly(out):
assert zsplit(out) == ['f1', 'f2']


@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert zsplit(out) == []