Skip to content

Commit

Permalink
Add check for text file encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
scop committed Nov 11, 2021
1 parent 56b4a7e commit 69d0dfb
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-hooks.yaml
Expand Up @@ -32,6 +32,12 @@
entry: check-docstring-first
language: python
types: [python]
- id: check-encoding
name: check text files have desired encoding
description: checks that text files decode cleanly using an encoding.
entry: check-encoding
language: python
types: [text]
- id: check-executables-have-shebangs
name: check that executables have shebangs
description: ensures that (non-binary) executables have a shebang.
Expand Down
5 changes: 5 additions & 0 deletions README.md
Expand Up @@ -49,6 +49,11 @@ Check for files with names that would conflict on a case-insensitive filesystem
#### `check-docstring-first`
Checks for a common error of placing code before the docstring.

#### `check-encoding`
Checks that text files have desired encoding.
- `--encoding` - specify encoding to assert; if not specified, default is
[platform dependent](https://docs.python.org/3/library/functions.html?highlight=encoding#open)

#### `check-executables-have-shebangs`
Checks that non-binary executables have a proper shebang.

Expand Down
29 changes: 29 additions & 0 deletions pre_commit_hooks/check_encoding.py
@@ -0,0 +1,29 @@
import argparse
from typing import Optional
from typing import Sequence


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to check.')
parser.add_argument('--encoding', help='Encoding to assert.')
args = parser.parse_args(argv)

retval = 0
for filename in args.filenames:
try:
with open(filename, encoding=args.encoding) as f:
f.read()
except LookupError as exc:
# Unknown encoding, don't bother with the rest
print(f'{__file__}: {exc}')
retval = 2
break
except Exception as exc:
print(f'{filename}: {exc}')
retval = 1
return retval


if __name__ == '__main__':
raise SystemExit(main())
1 change: 1 addition & 0 deletions setup.cfg
Expand Up @@ -41,6 +41,7 @@ console_scripts =
check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main
check-case-conflict = pre_commit_hooks.check_case_conflict:main
check-docstring-first = pre_commit_hooks.check_docstring_first:main
check-encoding = pre_commit_hooks.check_encoding:main
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
check-json = pre_commit_hooks.check_json:main
check-merge-conflict = pre_commit_hooks.check_merge_conflict:main
Expand Down
17 changes: 17 additions & 0 deletions tests/check_encoding_test.py
@@ -0,0 +1,17 @@
import pytest

from pre_commit_hooks.check_encoding import main


@pytest.mark.parametrize(
('content', 'encoding', 'expected'),
(
(b'Hello!', 'ascii', 0),
(b'Hello!', 'unknown-encoding', 2),
('Hello ☃!'.encode(), 'ascii', 1),
),
)
def test_has_encoding(content, encoding, expected, tmpdir):
path = tmpdir.join('path')
path.write(content, 'wb')
assert main(('--encoding', encoding, str(path))) == expected

0 comments on commit 69d0dfb

Please sign in to comment.