diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py b/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py new file mode 100644 index 0000000000000..216fd83513a9c --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pylint/unspecified_encoding.py @@ -0,0 +1,44 @@ +import io +import sys +import tempfile +import io as hugo +import codecs + +# Errors. +open("test.txt") +io.TextIOWrapper(io.FileIO("test.txt")) +hugo.TextIOWrapper(hugo.FileIO("test.txt")) +tempfile.NamedTemporaryFile("w") +tempfile.TemporaryFile("w") +codecs.open("test.txt") +tempfile.SpooledTemporaryFile(0, "w") + +# Non-errors. +open("test.txt", encoding="utf-8") +open("test.bin", "wb") +open("test.bin", mode="wb") +open("test.txt", "r", -1, "utf-8") +open("test.txt", mode=sys.argv[1]) + +def func(*args, **kwargs): + open(*args) + open("text.txt", **kwargs) + +io.TextIOWrapper(io.FileIO("test.txt"), encoding="utf-8") +io.TextIOWrapper(io.FileIO("test.txt"), "utf-8") +tempfile.TemporaryFile("w", encoding="utf-8") +tempfile.TemporaryFile("w", -1, "utf-8") +tempfile.TemporaryFile("wb") +tempfile.TemporaryFile() +tempfile.NamedTemporaryFile("w", encoding="utf-8") +tempfile.NamedTemporaryFile("w", -1, "utf-8") +tempfile.NamedTemporaryFile("wb") +tempfile.NamedTemporaryFile() +codecs.open("test.txt", encoding="utf-8") +codecs.open("test.bin", "wb") +codecs.open("test.bin", mode="wb") +codecs.open("test.txt", "r", -1, "utf-8") +tempfile.SpooledTemporaryFile(0, "w", encoding="utf-8") +tempfile.SpooledTemporaryFile(0, "w", -1, "utf-8") +tempfile.SpooledTemporaryFile(0, "wb") +tempfile.SpooledTemporaryFile(0, ) diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 58ec6f6e4bdcd..8d11a323368a8 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -786,6 +786,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::SubprocessRunWithoutCheck) { pylint::rules::subprocess_run_without_check(checker, call); } + if checker.enabled(Rule::UnspecifiedEncoding) { + pylint::rules::unspecified_encoding(checker, call); + } if checker.any_enabled(&[ Rule::PytestRaisesWithoutException, Rule::PytestRaisesTooBroad, diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 46e7bcda2c482..d1fc068d86e38 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -267,6 +267,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "W1508") => (RuleGroup::Stable, rules::pylint::rules::InvalidEnvvarDefault), (Pylint, "W1509") => (RuleGroup::Stable, rules::pylint::rules::SubprocessPopenPreexecFn), (Pylint, "W1510") => (RuleGroup::Stable, rules::pylint::rules::SubprocessRunWithoutCheck), + (Pylint, "W1514") => (RuleGroup::Preview, rules::pylint::rules::UnspecifiedEncoding), #[allow(deprecated)] (Pylint, "W1641") => (RuleGroup::Nursery, rules::pylint::rules::EqWithoutHash), (Pylint, "R0904") => (RuleGroup::Preview, rules::pylint::rules::TooManyPublicMethods), diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs index bb88ed9119193..bd79d334d9276 100644 --- a/crates/ruff_linter/src/rules/pylint/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/mod.rs @@ -133,6 +133,7 @@ mod tests { Rule::SubprocessRunWithoutCheck, Path::new("subprocess_run_without_check.py") )] + #[test_case(Rule::UnspecifiedEncoding, Path::new("unspecified_encoding.py"))] #[test_case(Rule::BadDunderMethodName, Path::new("bad_dunder_method_name.py"))] #[test_case(Rule::NoSelfUse, Path::new("no_self_use.py"))] #[test_case(Rule::MisplacedBareRaise, Path::new("misplaced_bare_raise.py"))] diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs index 2f20fa0f62283..4a213ec29f19c 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/mod.rs @@ -53,6 +53,7 @@ pub(crate) use type_name_incorrect_variance::*; pub(crate) use type_param_name_mismatch::*; pub(crate) use unexpected_special_method_signature::*; pub(crate) use unnecessary_direct_lambda_call::*; +pub(crate) use unspecified_encoding::*; pub(crate) use useless_else_on_loop::*; pub(crate) use useless_import_alias::*; pub(crate) use useless_return::*; @@ -114,6 +115,7 @@ mod type_name_incorrect_variance; mod type_param_name_mismatch; mod unexpected_special_method_signature; mod unnecessary_direct_lambda_call; +mod unspecified_encoding; mod useless_else_on_loop; mod useless_import_alias; mod useless_return; diff --git a/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs new file mode 100644 index 0000000000000..8d2b83dddec9d --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/rules/unspecified_encoding.rs @@ -0,0 +1,157 @@ +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast as ast; +use ruff_python_ast::call_path::{format_call_path, CallPath}; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for uses of `open` and related calls without an explicit `encoding` +/// argument. +/// +/// ## Why is this bad? +/// Using `open` in text mode without an explicit encoding can lead to +/// non-portable code, with differing behavior across platforms. +/// +/// Instead, consider using the `encoding` parameter to enforce a specific +/// encoding. +/// +/// ## Example +/// ```python +/// open("file.txt") +/// ``` +/// +/// Use instead: +/// ```python +/// open("file.txt", encoding="utf-8") +/// ``` +/// +/// ## References +/// - [Python documentation: `open`](https://docs.python.org/3/library/functions.html#open) +#[violation] +pub struct UnspecifiedEncoding { + function_name: String, + mode: Mode, +} + +impl Violation for UnspecifiedEncoding { + #[derive_message_formats] + fn message(&self) -> String { + let UnspecifiedEncoding { + function_name, + mode, + } = self; + + match mode { + Mode::Supported => { + format!("`{function_name}` in text mode without explicit `encoding` argument") + } + Mode::Unsupported => { + format!("`{function_name}` without explicit `encoding` argument") + } + } + } +} + +/// PLW1514 +pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) { + let Some((function_name, mode)) = checker + .semantic() + .resolve_call_path(&call.func) + .filter(|call_path| is_violation(call, call_path)) + .map(|call_path| { + ( + format_call_path(call_path.as_slice()), + Mode::from(&call_path), + ) + }) + else { + return; + }; + + checker.diagnostics.push(Diagnostic::new( + UnspecifiedEncoding { + function_name, + mode, + }, + call.func.range(), + )); +} + +/// Returns `true` if the given expression is a string literal containing a `b` character. +fn is_binary_mode(expr: &ast::Expr) -> Option { + Some(expr.as_constant_expr()?.value.as_str()?.value.contains('b')) +} + +/// Returns `true` if the given call lacks an explicit `encoding`. +fn is_violation(call: &ast::ExprCall, call_path: &CallPath) -> bool { + // If we have something like `*args`, which might contain the encoding argument, abort. + if call + .arguments + .args + .iter() + .any(ruff_python_ast::Expr::is_starred_expr) + { + return false; + } + // If we have something like `**kwargs`, which might contain the encoding argument, abort. + if call + .arguments + .keywords + .iter() + .any(|keyword| keyword.arg.is_none()) + { + return false; + } + match call_path.as_slice() { + ["" | "codecs" | "_io", "open"] => { + if let Some(mode_arg) = call.arguments.find_argument("mode", 1) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } + } + // else mode not specified, defaults to text mode + call.arguments.find_argument("encoding", 3).is_none() + } + ["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => { + let mode_pos = usize::from(call_path[1] == "SpooledTemporaryFile"); + if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) { + if is_binary_mode(mode_arg).unwrap_or(true) { + // binary mode or unknown mode is no violation + return false; + } + } else { + // defaults to binary mode + return false; + } + call.arguments + .find_argument("encoding", mode_pos + 2) + .is_none() + } + ["io" | "_io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(), + _ => false, + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Mode { + /// The call supports a `mode` argument. + Supported, + /// The call does not support a `mode` argument. + Unsupported, +} + +impl From<&CallPath<'_>> for Mode { + fn from(value: &CallPath<'_>) -> Self { + match value.as_slice() { + ["" | "codecs" | "_io", "open"] => Mode::Supported, + ["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => { + Mode::Supported + } + ["io" | "_io", "TextIOWrapper"] => Mode::Unsupported, + _ => Mode::Unsupported, + } + } +} diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap new file mode 100644 index 0000000000000..d10f3d3b633fd --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1514_unspecified_encoding.py.snap @@ -0,0 +1,72 @@ +--- +source: crates/ruff_linter/src/rules/pylint/mod.rs +--- +unspecified_encoding.py:8:1: PLW1514 `open` in text mode without explicit `encoding` argument + | + 7 | # Errors. + 8 | open("test.txt") + | ^^^^ PLW1514 + 9 | io.TextIOWrapper(io.FileIO("test.txt")) +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) + | + +unspecified_encoding.py:9:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument + | + 7 | # Errors. + 8 | open("test.txt") + 9 | io.TextIOWrapper(io.FileIO("test.txt")) + | ^^^^^^^^^^^^^^^^ PLW1514 +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) +11 | tempfile.NamedTemporaryFile("w") + | + +unspecified_encoding.py:10:1: PLW1514 `io.TextIOWrapper` without explicit `encoding` argument + | + 8 | open("test.txt") + 9 | io.TextIOWrapper(io.FileIO("test.txt")) +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) + | ^^^^^^^^^^^^^^^^^^ PLW1514 +11 | tempfile.NamedTemporaryFile("w") +12 | tempfile.TemporaryFile("w") + | + +unspecified_encoding.py:11:1: PLW1514 `tempfile.NamedTemporaryFile` in text mode without explicit `encoding` argument + | + 9 | io.TextIOWrapper(io.FileIO("test.txt")) +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) +11 | tempfile.NamedTemporaryFile("w") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +12 | tempfile.TemporaryFile("w") +13 | codecs.open("test.txt") + | + +unspecified_encoding.py:12:1: PLW1514 `tempfile.TemporaryFile` in text mode without explicit `encoding` argument + | +10 | hugo.TextIOWrapper(hugo.FileIO("test.txt")) +11 | tempfile.NamedTemporaryFile("w") +12 | tempfile.TemporaryFile("w") + | ^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +13 | codecs.open("test.txt") +14 | tempfile.SpooledTemporaryFile(0, "w") + | + +unspecified_encoding.py:13:1: PLW1514 `codecs.open` in text mode without explicit `encoding` argument + | +11 | tempfile.NamedTemporaryFile("w") +12 | tempfile.TemporaryFile("w") +13 | codecs.open("test.txt") + | ^^^^^^^^^^^ PLW1514 +14 | tempfile.SpooledTemporaryFile(0, "w") + | + +unspecified_encoding.py:14:1: PLW1514 `tempfile.SpooledTemporaryFile` in text mode without explicit `encoding` argument + | +12 | tempfile.TemporaryFile("w") +13 | codecs.open("test.txt") +14 | tempfile.SpooledTemporaryFile(0, "w") + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PLW1514 +15 | +16 | # Non-errors. + | + + diff --git a/ruff.schema.json b/ruff.schema.json index 81248ece3bf9a..b37f4d78685e8 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2997,6 +2997,7 @@ "PLW1509", "PLW151", "PLW1510", + "PLW1514", "PLW16", "PLW164", "PLW1641",