Skip to content

Commit

Permalink
Add unspecified-encoding checker #3826 (#4753)
Browse files Browse the repository at this point in the history
* Add unspecified-encoding checker #3826
This adds an unspecified-encoding checker that adds a warning
whenever open() is called without an explicit encoding argument.
This closes #3826

* Update tests to conform to unspecified-encoding
With addition of the unspecified-encoding checker calls of open()
need an encoding argument.
Where necessary this argument has been added, or the message has been
disabled.
This also includes small linting changes to a small number
of tests. Their test-data has been updated to reflect new line numbers.

* Update scripts to conform to unspecified-encoding
With addition of the unspecified-encoding checker calls of open()
need an encoding argument.
Where necessary this argument has been added.

* Update pylint to conform to unspecified-encoding
With addition of the unspecified-encoding checker calls of open()
need an encoding argument.
Where necessary this argument has been added.

Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
  • Loading branch information
DanielNoord and cdce8p committed Jul 28, 2021
1 parent c04f92e commit a8b7dd7
Show file tree
Hide file tree
Showing 38 changed files with 221 additions and 84 deletions.
2 changes: 2 additions & 0 deletions CONTRIBUTORS.txt
Expand Up @@ -521,3 +521,5 @@ contributors:
* Yilei Yang: contributor

* Marcin Kurczewski (rr-): contributor

* Daniel van Noord (DanielNoord): contributor
4 changes: 4 additions & 0 deletions ChangeLog
Expand Up @@ -9,6 +9,10 @@ Release date: TBA
..
Put new features here and also in 'doc/whatsnew/2.10.rst'

* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding

Closes #3826


What's New in Pylint 2.9.6?
===========================
Expand Down
3 changes: 3 additions & 0 deletions doc/whatsnew/2.10.rst
Expand Up @@ -12,6 +12,9 @@ Summary -- Release highlights
New checkers
============

* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding

Closes #3826


Other Changes
Expand Down
2 changes: 1 addition & 1 deletion pylint/checkers/similar.py
Expand Up @@ -546,7 +546,7 @@ def Run(argv=None):
min_lines, ignore_comments, ignore_docstrings, ignore_imports, ignore_signatures
)
for filename in args:
with open(filename) as stream:
with open(filename, encoding="utf-8") as stream:
sim.append_stream(filename, stream)
sim.run()
sys.exit(0)
Expand Down
2 changes: 1 addition & 1 deletion pylint/checkers/spelling.py
Expand Up @@ -318,7 +318,7 @@ def open(self):
dict_name, self.config.spelling_private_dict_file
)
self.private_dict_file = open( # pylint: disable=consider-using-with
self.config.spelling_private_dict_file, "a"
self.config.spelling_private_dict_file, "a", encoding="utf-8"
)
else:
self.spelling_dict = enchant.Dict(dict_name)
Expand Down
54 changes: 51 additions & 3 deletions pylint/checkers/stdlib.py
Expand Up @@ -30,6 +30,7 @@
# Copyright (c) 2021 Marc Mueller <30130371+cdce8p@users.noreply.github.com>
# Copyright (c) 2021 Matus Valo <matusvalo@users.noreply.github.com>
# Copyright (c) 2021 victor <16359131+jiajunsu@users.noreply.github.com>
# Copyright (c) 2021 Daniel van Noord <13665637+DanielNoord@users.noreply.github.com>

# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
Expand All @@ -44,12 +45,13 @@
from pylint.checkers import BaseChecker, DeprecatedMixin, utils
from pylint.interfaces import IAstroidChecker

OPEN_FILES = {"open", "file"}
OPEN_FILES_MODE = ("open", "file")
OPEN_FILES_ENCODING = ("open",)
UNITTEST_CASE = "unittest.case"
THREADING_THREAD = "threading.Thread"
COPY_COPY = "copy.copy"
OS_ENVIRON = "os._Environ"
ENV_GETTERS = {"os.getenv"}
ENV_GETTERS = ("os.getenv",)
SUBPROCESS_POPEN = "subprocess.Popen"
SUBPROCESS_RUN = "subprocess.run"
OPEN_MODULE = "_io"
Expand Down Expand Up @@ -425,6 +427,13 @@ class StdlibChecker(DeprecatedMixin, BaseChecker):
"deprecated-decorator",
"The decorator is marked as deprecated and will be removed in the future.",
),
"W1514": (
"Using open without explicitly specifying an encoding",
"unspecified-encoding",
"It is better to specify an encoding when opening documents. "
"Using the system default implicitly can create problems on other operating systems. "
"See https://www.python.org/dev/peps/pep-0597/",
),
}

def __init__(self, linter=None):
Expand Down Expand Up @@ -485,6 +494,7 @@ def _check_shallow_copy_environ(self, node):
"subprocess-popen-preexec-fn",
"subprocess-run-check",
"deprecated-class",
"unspecified-encoding",
)
def visit_call(self, node):
"""Visit a Call node."""
Expand All @@ -494,8 +504,18 @@ def visit_call(self, node):
if inferred is astroid.Uninferable:
continue
if inferred.root().name == OPEN_MODULE:
if getattr(node.func, "name", None) in OPEN_FILES:
if (
isinstance(node.func, astroid.Name)
and node.func.name in OPEN_FILES_MODE
):
self._check_open_mode(node)
if (
isinstance(node.func, astroid.Name)
and node.func.name in OPEN_FILES_ENCODING
or isinstance(node.func, astroid.Attribute)
and node.func.attrname in OPEN_FILES_ENCODING
):
self._check_open_encoded(node)
elif inferred.root().name == UNITTEST_CASE:
self._check_redundant_assert(node, inferred)
elif isinstance(inferred, astroid.ClassDef):
Expand Down Expand Up @@ -573,6 +593,34 @@ def _check_open_mode(self, node):
):
self.add_message("bad-open-mode", node=node, args=mode_arg.value)

def _check_open_encoded(self, node: astroid.Call) -> None:
"""Check that the encoded argument of an open call is valid."""
mode_arg = None
try:
mode_arg = utils.get_argument_from_call(node, position=1, keyword="mode")
except utils.NoSuchArgumentError:
pass

if mode_arg:
mode_arg = utils.safe_infer(mode_arg)
if not mode_arg or "b" not in mode_arg.value:
encoding_arg = None
try:
encoding_arg = utils.get_argument_from_call(
node, position=None, keyword="encoding"
)
except utils.NoSuchArgumentError:
self.add_message("unspecified-encoding", node=node)

if encoding_arg:
encoding_arg = utils.safe_infer(encoding_arg)

if (
isinstance(encoding_arg, astroid.Const)
and encoding_arg.value is None
):
self.add_message("unspecified-encoding", node=node)

def _check_env_function(self, node, infer):
env_name_kwarg = "key"
env_value_kwarg = "default"
Expand Down
2 changes: 1 addition & 1 deletion pylint/config/find_default_config_files.py
Expand Up @@ -9,7 +9,7 @@


def _toml_has_config(path):
with open(path) as toml_handle:
with open(path, encoding="utf-8") as toml_handle:
try:
content = toml.load(toml_handle)
except TomlDecodeError as error:
Expand Down
2 changes: 1 addition & 1 deletion pylint/config/option_manager_mixin.py
Expand Up @@ -267,7 +267,7 @@ def read_config_file(self, config_file=None, verbose=None):
parser = self.cfgfile_parser

if config_file.endswith(".toml"):
with open(config_file) as fp:
with open(config_file, encoding="utf-8") as fp:
content = toml.load(fp)

try:
Expand Down
9 changes: 6 additions & 3 deletions pylint/lint/pylinter.py
Expand Up @@ -571,7 +571,9 @@ def _load_reporters(self) -> None:
(reporter_output,) = reporter_output

# pylint: disable=consider-using-with
output_file = stack.enter_context(open(reporter_output, "w"))
output_file = stack.enter_context(
open(reporter_output, "w", encoding="utf-8")
)

reporter.set_output(output_file)
output_files.append(output_file)
Expand Down Expand Up @@ -617,8 +619,9 @@ def set_option(self, optname, value, action=None, optdict=None):
except KeyError:
meth = self._bw_options_methods[optname]
warnings.warn(
"%s is deprecated, replace it by %s"
% (optname, optname.split("-")[0]),
"{} is deprecated, replace it by {}".format(
optname, optname.split("-")[0]
),
DeprecationWarning,
)
value = utils._check_csv(value)
Expand Down
2 changes: 1 addition & 1 deletion pylint/lint/run.py
Expand Up @@ -373,7 +373,7 @@ def __init__(

if self._output:
try:
with open(self._output, "w") as output:
with open(self._output, "w", encoding="utf-8") as output:
linter.reporter.set_output(output)
linter.check(args)
score_value = linter.generate_reports()
Expand Down
2 changes: 1 addition & 1 deletion pylint/pyreverse/utils.py
Expand Up @@ -35,7 +35,7 @@ def get_default_options():
if home:
rcfile = os.path.join(home, RCFILE)
try:
with open(rcfile) as file_handle:
with open(rcfile, encoding="utf-8") as file_handle:
options = file_handle.read().split()
except OSError:
pass # ignore if no config file found
Expand Down
4 changes: 3 additions & 1 deletion pylint/pyreverse/writer.py
Expand Up @@ -187,7 +187,9 @@ def __init__(self, config):

def set_printer(self, file_name, basename):
"""initialize VCGWriter for a UML graph"""
self.graph_file = open(file_name, "w+") # pylint: disable=consider-using-with
self.graph_file = open( # pylint: disable=consider-using-with
file_name, "w+", encoding="utf-8"
)
self.printer = VCGPrinter(self.graph_file)
self.printer.open_graph(
title=basename,
Expand Down
4 changes: 2 additions & 2 deletions pylint/testutils/lint_module_test.py
Expand Up @@ -141,14 +141,14 @@ def multiset_difference(
# pylint: disable=consider-using-with
def _open_expected_file(self):
try:
return open(self._test_file.expected_output)
return open(self._test_file.expected_output, encoding="utf-8")
except FileNotFoundError:
return StringIO("")

# pylint: disable=consider-using-with
def _open_source_file(self):
if self._test_file.base == "invalid_encoded_data":
return open(self._test_file.source)
return open(self._test_file.source, encoding="utf-8")
if "latin1" in self._test_file.base:
return open(self._test_file.source, encoding="latin1")
return open(self._test_file.source, encoding="utf8")
Expand Down
4 changes: 2 additions & 2 deletions script/bump_changelog.py
Expand Up @@ -33,10 +33,10 @@ def main() -> None:
logging.debug(f"Launching bump_changelog with args: {args}")
if "dev" in args.version:
return
with open(DEFAULT_CHANGELOG_PATH) as f:
with open(DEFAULT_CHANGELOG_PATH, encoding="utf-8") as f:
content = f.read()
content = transform_content(content, args.version)
with open(DEFAULT_CHANGELOG_PATH, "w") as f:
with open(DEFAULT_CHANGELOG_PATH, "w", encoding="utf-8") as f:
f.write(content)


Expand Down
4 changes: 2 additions & 2 deletions script/fix_documentation.py
Expand Up @@ -82,7 +82,7 @@ def main(argv: Union[List[str], None] = None) -> int:

return_value: int = 0
for file_name in args.filenames:
with open(file_name) as fp:
with open(file_name, encoding="utf-8") as fp:
orignal_content = fp.read()
content = orignal_content
# Modify files
Expand All @@ -91,7 +91,7 @@ def main(argv: Union[List[str], None] = None) -> int:
content = changelog_insert_empty_lines(content, args.subtitle_prefix)
# If modified, write changes and eventually return 1
if orignal_content != content:
with open(file_name, "w") as fp:
with open(file_name, "w", encoding="utf-8") as fp:
fp.write(content)
return_value |= 1
return return_value
Expand Down
2 changes: 1 addition & 1 deletion tests/checkers/unittest_similar.py
Expand Up @@ -449,7 +449,7 @@ def test_get_map_data():
# Manually perform a 'map' type function
for source_fname in source_streams:
sim = similar.SimilarChecker(linter)
with open(source_fname) as stream:
with open(source_fname, encoding="utf-8") as stream:
sim.append_stream(source_fname, stream)
# The map bit, can you tell? ;)
data.extend(sim.get_map_data())
Expand Down
20 changes: 10 additions & 10 deletions tests/functional/b/bad_open_mode_py3.py
Expand Up @@ -3,22 +3,22 @@

NAME = "foo.bar"
open(NAME, "wb")
open(NAME, "w")
open(NAME, "w", encoding="utf-8")
open(NAME, "rb")
open(NAME, "x")
open(NAME, "x", encoding="utf-8")
open(NAME, "br")
open(NAME, "+r")
open(NAME, "+r", encoding="utf-8")
open(NAME, "xb")
open(NAME, "rwx") # [bad-open-mode]
open(NAME, "rr") # [bad-open-mode]
open(NAME, "+") # [bad-open-mode]
open(NAME, "xw") # [bad-open-mode]
open(NAME, "rwx", encoding="utf-8") # [bad-open-mode]
open(NAME, "rr", encoding="utf-8") # [bad-open-mode]
open(NAME, "+", encoding="utf-8") # [bad-open-mode]
open(NAME, "xw", encoding="utf-8") # [bad-open-mode]
open(NAME, "ab+")
open(NAME, "a+b")
open(NAME, "+ab")
open(NAME, "+rUb")
open(NAME, "x+b")
open(NAME, "Ua") # [bad-open-mode]
open(NAME, "Ur++") # [bad-open-mode]
open(NAME, "Ut")
open(NAME, "Ua", encoding="utf-8") # [bad-open-mode]
open(NAME, "Ur++", encoding="utf-8") # [bad-open-mode]
open(NAME, "Ut", encoding="utf-8")
open(NAME, "Ubr")
2 changes: 1 addition & 1 deletion tests/functional/c/consider/consider_using_with.py
Expand Up @@ -151,7 +151,7 @@ def test_suppress_in_exit_stack():
"""Regression test for issue #4654 (false positive)"""
with contextlib.ExitStack() as stack:
_ = stack.enter_context(
open("/sys/firmware/devicetree/base/hwid,location", "r")
open("/sys/firmware/devicetree/base/hwid,location", "r", encoding="utf-8")
) # must not trigger


Expand Down
30 changes: 15 additions & 15 deletions tests/functional/c/consider/consider_using_with_open.py
@@ -1,5 +1,5 @@
# pylint: disable=missing-function-docstring, missing-module-docstring, invalid-name, import-outside-toplevel
# pylint: disable=missing-class-docstring, too-few-public-methods, unused-variable, multiple-statements
# pylint: disable=missing-class-docstring, too-few-public-methods, unused-variable, multiple-statements, line-too-long
"""
The functional test for the standard ``open()`` function has to be moved in a separate file,
because PyPy has to be excluded for the tests as the ``open()`` function is uninferable in PyPy.
Expand All @@ -8,26 +8,26 @@
"""
from contextlib import contextmanager


myfile = open("test.txt") # [consider-using-with]
myfile = open("test.txt", encoding="utf-8") # [consider-using-with]


def test_open():
fh = open("test.txt") # [consider-using-with]
fh = open("test.txt", encoding="utf-8") # [consider-using-with]
fh.close()

with open("test.txt") as fh: # must not trigger
with open("test.txt", encoding="utf-8") as fh: # must not trigger
fh.read()


def test_open_in_enter():
"""Message must not trigger if the resource is allocated in a context manager."""

class MyContextManager:
def __init__(self):
self.file_handle = None

def __enter__(self):
self.file_handle = open("foo.txt", "w") # must not trigger
self.file_handle = open("foo.txt", "w", encoding="utf-8") # must not trigger

def __exit__(self, exc_type, exc_value, traceback):
self.file_handle.close()
Expand All @@ -36,31 +36,31 @@ def __exit__(self, exc_type, exc_value, traceback):
@contextmanager
def test_open_in_with_contextlib():
"""Message must not trigger if the resource is allocated in a context manager."""
file_handle = open("foo.txt", "w") # must not trigger
file_handle = open("foo.txt", "w", encoding="utf-8") # must not trigger
yield file_handle
file_handle.close()


def test_open_outside_assignment():
open("foo").read() # [consider-using-with]
content = open("foo").read() # [consider-using-with]
open("foo", encoding="utf-8").read() # [consider-using-with]
content = open("foo", encoding="utf-8").read() # [consider-using-with]


def test_open_inside_with_block():
with open("foo") as fh:
open("bar") # [consider-using-with]
with open("foo", encoding="utf-8") as fh:
open("bar", encoding="utf-8") # [consider-using-with]


def test_ternary_if_in_with_block(file1, file2, which):
"""Regression test for issue #4676 (false positive)"""
with (open(file1) if which else open(file2)) as input_file: # must not trigger
with (open(file1, encoding="utf-8") if which else open(file2, encoding="utf-8")) as input_file: # must not trigger
return input_file.read()


def test_single_line_with(file1):
with open(file1): return file1.read() # must not trigger
with open(file1, encoding="utf-8"): return file1.read() # must not trigger


def test_multiline_with_items(file1, file2, which):
with (open(file1) if which
else open(file2)) as input_file: return input_file.read()
with (open(file1, encoding="utf-8") if which
else open(file2, encoding="utf-8")) as input_file: return input_file.read()

0 comments on commit a8b7dd7

Please sign in to comment.