Skip to content

Commit

Permalink
New hook 'destroyed-symlinks' to detect symlinks which are changed to…
Browse files Browse the repository at this point in the history
… regular files with a content of a path which that symlink was pointing to
  • Loading branch information
m-khvoinitsky committed Aug 2, 2020
1 parent e1668fe commit 433aa14
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .pre-commit-hooks.yaml
Expand Up @@ -100,6 +100,13 @@
entry: debug-statement-hook
language: python
types: [python]
- id: destroyed-symlinks
name: Detect Destroyed Symlinks
description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
entry: destroyed-symlinks
language: python
types: [file]
pass_filenames: false
- id: detect-aws-credentials
name: Detect AWS Credentials
description: Detects *your* aws credentials from the aws cli credentials file
Expand Down
9 changes: 9 additions & 0 deletions README.md
Expand Up @@ -82,6 +82,15 @@ Attempts to load all yaml files to verify syntax.
#### `debug-statements`
Check for debugger imports and py37+ `breakpoint()` calls in python source.

#### `destroyed-symlinks`
Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
This usually happens on Windows in case when user without a permission for creating symlinks clones repository with symlinks.
The following argument is available:
- `--autofix` - unstage detected broken symlinks so they won't be commited.
Note: this option won't fix the symlinks on the filesystem because,
if the symlink has been destroyed in the first place, there is some reason for that
(see above) which this hook most likely won't be able to fix.

#### `detect-aws-credentials`
Checks for the existence of AWS secrets that you have set up with the AWS CLI.
The following arguments are available:
Expand Down
90 changes: 90 additions & 0 deletions pre_commit_hooks/destroyed_symlinks.py
@@ -0,0 +1,90 @@
import argparse
import sys
from operator import methodcaller
from subprocess import check_call
from subprocess import check_output
from typing import Optional
from typing import Sequence

ORDINARY_CHANGED_ENTRIES_MARKER = b'1'
PERMS_LINK = b'120000'
PERMS_NONEXIST = b'000000'


def normalize_content(content: bytes) -> bytes:
return b'\n'.join(
filter(
None,
map(
methodcaller('strip'),
content.splitlines(),
),
),
)


def find_destroyed_symlinks(autofix: bool) -> Sequence[bytes]:
destroyed_links = []
for line in check_output(['git', 'status', '--porcelain=v2', '-z']).split(b'\0'):
splitted = line.split(b' ')
if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
# variable names are taken from https://git-scm.com/docs/git-status#_changed_tracked_entries
_, XY, sub, mH, mI, mW, hH, hI, *path_splitted = splitted
path = b' '.join(path_splitted)
if all((
mH == PERMS_LINK,
mI != PERMS_LINK,
mI != PERMS_NONEXIST,
)):
found_destroyed_link = False
if hH == hI:
# if old and new hashes are equal, it's not needed to check anything more, we've found a destroyed symlink for sure
found_destroyed_link = True
else:
# if old and new hashes are *not* equal, it doesn't mean that everything is OK -
# new file may be altered by something like trailing-whitespace and/or mixed-line-ending hooks so we need to go deeper
index_size = int(check_output(['git', 'cat-file', '-s', hI]).strip())
# Most filesystems limit path length to 4096 bytes. In the worst (insane) case when symlink points to a file which path
# consists of pure newlines and slashes, after converting it to Windows line break, its size in any case won't be bigger
# than 4096*2, so if new file is bigger than this, we can safely assume that it is not destroyed symlink but
# a valid new file instead of the symlink.
if index_size <= 8192:
head_content = normalize_content(check_output(['git', 'cat-file', '-p', hH]))
index_content = normalize_content(check_output(['git', 'cat-file', '-p', hI]))
found_destroyed_link = head_content == index_content
if found_destroyed_link:
destroyed_links.append(path)
if autofix:
check_call([
'git',
'update-index',
'--cacheinfo',
b','.join((
PERMS_LINK,
hH,
path,
)),
])
return destroyed_links


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--autofix', action='store_true', help='unstage broken symlinks')
args = parser.parse_args(argv)
destroyed_links = find_destroyed_symlinks(
autofix=args.autofix,
)
if destroyed_links:
print('Destroyed symlinks:', flush=True)
for destroyed_link in destroyed_links:
sys.stdout.buffer.write(b'- ')
sys.stdout.buffer.write(destroyed_link)
sys.stdout.buffer.write(b'\n')
sys.stdout.buffer.flush()
return 1
return 0


if __name__ == '__main__':
exit(main())
1 change: 1 addition & 0 deletions setup.cfg
Expand Up @@ -43,6 +43,7 @@ console_scripts =
check-xml = pre_commit_hooks.check_xml:main
check-yaml = pre_commit_hooks.check_yaml:main
debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
detect-private-key = pre_commit_hooks.detect_private_key:main
double-quote-string-fixer = pre_commit_hooks.string_fixer:main
Expand Down
66 changes: 66 additions & 0 deletions tests/destroyed_symlinks_test.py
@@ -0,0 +1,66 @@
import os
from subprocess import check_call
from subprocess import check_output

import pytest

from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
from pre_commit_hooks.destroyed_symlinks import main
from pre_commit_hooks.destroyed_symlinks import normalize_content

TEST_SYMLINK = 'test_symlink'


@pytest.fixture
def repo_with_destroyed_symlink(tmpdir):
source_repo = tmpdir.join('src')
os.makedirs(source_repo, exist_ok=True)
test_repo = tmpdir.join('test')
with source_repo.as_cwd():
check_call(['git', 'init'])
os.symlink('/doesnt/really/matters', TEST_SYMLINK)
check_call(['git', 'add', '.'])
check_call(['git', 'commit', '--no-gpg-sign', '-m', 'initial'])
assert check_output(['git', 'cat-file', '-p', 'HEAD^{tree}']).startswith(b'120000')
check_call(['git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo])
with test_repo.as_cwd():
check_call(['git', 'config', '--local', 'core.symlinks', 'true'])
assert not os.path.islink(test_repo.join(TEST_SYMLINK))
yield test_repo


@pytest.mark.parametrize(
('content', 'result'),
(
(b'qwer', b'qwer'),
(b'qwer\n', b'qwer'),
(b'qwer\nasdf', b'qwer\nasdf'),
(b'qwer\r\nasdf', b'qwer\nasdf'),
(b' qwer\r\n\tasdf \r\n', b'qwer\nasdf'),
),
)
def test_normalize_content(content: bytes, result: bytes) -> None:
assert normalize_content(content) == result


def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
with repo_with_destroyed_symlink.as_cwd():
assert find_destroyed_symlinks(autofix=False) == []
assert main([]) == 0
check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(autofix=False) == [TEST_SYMLINK.encode()]
assert main([]) != 0
assert find_destroyed_symlinks(autofix=True) == [TEST_SYMLINK.encode()]
# check that file is not staged anymore
assert check_output(['git', 'status', '--porcelain=v2']).startswith(b'1 .T ')
check_call(['git', 'add', TEST_SYMLINK])
assert main(['--autofix']) != 0
assert check_output(['git', 'status', '--porcelain=v2']).startswith(b'1 .T ')
print(file=open(TEST_SYMLINK, 'a')) # add trailing newline
check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(autofix=False) == [TEST_SYMLINK.encode()]
assert main([]) != 0
print('0' * 8193, file=open(TEST_SYMLINK, 'w'))
check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(autofix=False) == []
assert main([]) == 0

0 comments on commit 433aa14

Please sign in to comment.