diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/bad_open_mode.py b/crates/ruff_linter/resources/test/fixtures/pylint/bad_open_mode.py new file mode 100644 index 0000000000000..3ede40b308cea --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/pylint/bad_open_mode.py @@ -0,0 +1,34 @@ +import pathlib + +NAME = "foo.bar" +open(NAME, "wb") +open(NAME, "w", encoding="utf-8") +open(NAME, "rb") +open(NAME, "x", encoding="utf-8") +open(NAME, "br") +open(NAME, "+r", encoding="utf-8") +open(NAME, "xb") +open(NAME, "rwx") # [bad-open-mode] +open(NAME, mode="rwx") # [bad-open-mode] +open(NAME, "rwx", encoding="utf-8") # [bad-open-mode] +open(NAME, "rr", encoding="utf-8") # [bad-open-mode] +open(NAME, "+", encoding="utf-8") # [bad-open-mode] +open(NAME, "xw", encoding="utf-8") # [bad-open-mode] +open(NAME, "ab+") +open(NAME, "a+b") +open(NAME, "+ab") +open(NAME, "+rUb") +open(NAME, "x+b") +open(NAME, "Ua", encoding="utf-8") # [bad-open-mode] +open(NAME, "Ur++", encoding="utf-8") # [bad-open-mode] +open(NAME, "Ut", encoding="utf-8") +open(NAME, "Ubr") + +mode = "rw" +open(NAME, mode) + +pathlib.Path(NAME).open("wb") +pathlib.Path(NAME).open(mode) +pathlib.Path(NAME).open("rwx") # [bad-open-mode] +pathlib.Path(NAME).open(mode="rwx") # [bad-open-mode] +pathlib.Path(NAME).open("rwx", encoding="utf-8") # [bad-open-mode] diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 13ad26bed7933..94cb344068004 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -749,6 +749,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::SysExitAlias) { pylint::rules::sys_exit_alias(checker, func); } + if checker.enabled(Rule::BadOpenMode) { + pylint::rules::bad_open_mode(checker, call); + } if checker.enabled(Rule::BadStrStripCall) { pylint::rules::bad_str_strip_call(checker, func, args); } diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 1c836a092be94..ba8b99eb55442 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -270,6 +270,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Pylint, "W0604") => (RuleGroup::Preview, rules::pylint::rules::GlobalAtModuleLevel), (Pylint, "W0603") => (RuleGroup::Stable, rules::pylint::rules::GlobalStatement), (Pylint, "W0711") => (RuleGroup::Stable, rules::pylint::rules::BinaryOpException), + (Pylint, "W1501") => (RuleGroup::Preview, rules::pylint::rules::BadOpenMode), (Pylint, "W1508") => (RuleGroup::Stable, rules::pylint::rules::InvalidEnvvarDefault), (Pylint, "W1509") => (RuleGroup::Stable, rules::pylint::rules::SubprocessPopenPreexecFn), (Pylint, "W1510") => (RuleGroup::Stable, rules::pylint::rules::SubprocessRunWithoutCheck), diff --git a/crates/ruff_linter/src/rules/pylint/mod.rs b/crates/ruff_linter/src/rules/pylint/mod.rs index 4b3b5afa335e3..2a80dcd63df7d 100644 --- a/crates/ruff_linter/src/rules/pylint/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/mod.rs @@ -21,6 +21,7 @@ mod tests { #[test_case(Rule::AndOrTernary, Path::new("and_or_ternary.py"))] #[test_case(Rule::AssertOnStringLiteral, Path::new("assert_on_string_literal.py"))] #[test_case(Rule::AwaitOutsideAsync, Path::new("await_outside_async.py"))] + #[test_case(Rule::BadOpenMode, Path::new("bad_open_mode.py"))] #[test_case( Rule::BadStringFormatCharacter, Path::new("bad_string_format_character.py") diff --git a/crates/ruff_linter/src/rules/pylint/rules/bad_open_mode.rs b/crates/ruff_linter/src/rules/pylint/rules/bad_open_mode.rs new file mode 100644 index 0000000000000..2d98d00cc5026 --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/rules/bad_open_mode.rs @@ -0,0 +1,197 @@ +use bitflags::bitflags; + +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{self as ast, Constant, Expr}; +use ruff_python_semantic::SemanticModel; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Check for an invalid `mode` argument in `open` calls. +/// +/// ## Why is this bad? +/// The `open` function accepts a `mode` argument that specifies how the file +/// should be opened (e.g., read-only, write-only, append-only, etc.). +/// +/// Python supports a variety of open modes: `r`, `w`, `a`, and `x`, to control +/// reading, writing, appending, and creating, respectively, along with +/// `b` (binary mode), `+` (read and write), and `U` (universal newlines), +/// the latter of which is only valid alongside `r`. This rule detects both +/// invalid combinations of modes and invalid characters in the mode string +/// itself. +/// +/// ## Example +/// ```python +/// with open("file", "rwx") as f: +/// return f.read() +/// ``` +/// +/// Use instead: +/// ```python +/// with open("file", "r") as f: +/// return f.read() +/// ``` +/// +/// ## References +/// - [Python documentation: `open`](https://docs.python.org/3/library/functions.html#open) +#[violation] +pub struct BadOpenMode { + mode: String, +} + +impl Violation for BadOpenMode { + #[derive_message_formats] + fn message(&self) -> String { + let BadOpenMode { mode } = self; + format!("`{mode}` is not a valid mode for `open`") + } +} + +/// PLW1501 +pub(crate) fn bad_open_mode(checker: &mut Checker, call: &ast::ExprCall) { + let Some(kind) = is_open(call.func.as_ref(), checker.semantic()) else { + return; + }; + + let Some(mode) = extract_mode(call, kind) else { + return; + }; + + let Expr::Constant(ast::ExprConstant { + value: Constant::Str(ast::StringConstant { value, .. }), + .. + }) = mode + else { + return; + }; + + if is_valid_mode(value) { + return; + } + + checker.diagnostics.push(Diagnostic::new( + BadOpenMode { + mode: value.to_string(), + }, + mode.range(), + )); +} + +#[derive(Debug, Copy, Clone)] +enum Kind { + /// A call to the builtin `open(...)`. + Builtin, + /// A call to `pathlib.Path(...).open(...)`. + Pathlib, +} + +/// If a function is a call to `open`, returns the kind of `open` call. +fn is_open(func: &Expr, semantic: &SemanticModel) -> Option { + match func { + // Ex) `pathlib.Path(...).open(...)` + Expr::Attribute(ast::ExprAttribute { attr, value, .. }) if attr.as_str() == "open" => { + match value.as_ref() { + Expr::Call(ast::ExprCall { func, .. }) => semantic + .resolve_call_path(func) + .is_some_and(|call_path| matches!(call_path.as_slice(), ["pathlib", "Path"])) + .then_some(Kind::Pathlib), + _ => None, + } + } + // Ex) `open(...)` + Expr::Name(ast::ExprName { id, .. }) => { + (id.as_str() == "open" && semantic.is_builtin("open")).then_some(Kind::Builtin) + } + _ => None, + } +} + +/// Returns the mode argument, if present. +fn extract_mode(call: &ast::ExprCall, kind: Kind) -> Option<&Expr> { + match kind { + Kind::Builtin => call.arguments.find_argument("mode", 1), + Kind::Pathlib => call.arguments.find_argument("mode", 0), + } +} + +bitflags! { + #[derive(Copy, Clone, Debug, PartialEq, Eq)] + pub(super) struct OpenMode: u8 { + /// `r` + const READ = 0b0001; + /// `w` + const WRITE = 0b0010; + /// `a` + const APPEND = 0b0100; + /// `x` + const CREATE = 0b1000; + /// `b` + const BINARY = 0b10000; + /// `t` + const TEXT = 0b10_0000; + /// `+` + const PLUS = 0b100_0000; + /// `U` + const UNIVERSAL_NEWLINES = 0b1000_0000; + + } +} + +impl TryFrom for OpenMode { + type Error = (); + + fn try_from(value: char) -> Result { + match value { + 'r' => Ok(Self::READ), + 'w' => Ok(Self::WRITE), + 'a' => Ok(Self::APPEND), + 'x' => Ok(Self::CREATE), + 'b' => Ok(Self::BINARY), + 't' => Ok(Self::TEXT), + '+' => Ok(Self::PLUS), + 'U' => Ok(Self::UNIVERSAL_NEWLINES), + _ => Err(()), + } + } +} + +/// Returns `true` if the open mode is valid. +fn is_valid_mode(mode: &str) -> bool { + // Flag duplicates and invalid characters. + let mut flags = OpenMode::empty(); + for char in mode.chars() { + let Ok(flag) = OpenMode::try_from(char) else { + return false; + }; + if flags.intersects(flag) { + return false; + } + flags.insert(flag); + } + + // Both text and binary mode cannot be set at the same time. + if flags.contains(OpenMode::TEXT | OpenMode::BINARY) { + return false; + } + + // The `U` mode is only valid with `r`. + if flags.contains(OpenMode::UNIVERSAL_NEWLINES) + && flags.intersects(OpenMode::WRITE | OpenMode::APPEND | OpenMode::CREATE) + { + return false; + } + + // Otherwise, reading, writing, creating, and appending are mutually exclusive. + [ + OpenMode::READ | OpenMode::UNIVERSAL_NEWLINES, + OpenMode::WRITE, + OpenMode::CREATE, + OpenMode::APPEND, + ] + .into_iter() + .filter(|flag| flags.intersects(*flag)) + .count() + == 1 +} diff --git a/crates/ruff_linter/src/rules/pylint/rules/mod.rs b/crates/ruff_linter/src/rules/pylint/rules/mod.rs index ae3676ba3fc5d..5ac1858a41f7e 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/mod.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/mod.rs @@ -2,6 +2,7 @@ pub(crate) use and_or_ternary::*; pub(crate) use assert_on_string_literal::*; pub(crate) use await_outside_async::*; pub(crate) use bad_dunder_method_name::*; +pub(crate) use bad_open_mode::*; pub(crate) use bad_str_strip_call::*; pub(crate) use bad_string_format_character::BadStringFormatCharacter; pub(crate) use bad_string_format_type::*; @@ -70,6 +71,7 @@ mod and_or_ternary; mod assert_on_string_literal; mod await_outside_async; mod bad_dunder_method_name; +mod bad_open_mode; mod bad_str_strip_call; pub(crate) mod bad_string_format_character; mod bad_string_format_type; diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1501_bad_open_mode.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1501_bad_open_mode.py.snap new file mode 100644 index 0000000000000..6e24d45dd749d --- /dev/null +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLW1501_bad_open_mode.py.snap @@ -0,0 +1,111 @@ +--- +source: crates/ruff_linter/src/rules/pylint/mod.rs +--- +bad_open_mode.py:11:12: PLW1501 `rwx` is not a valid mode for `open` + | + 9 | open(NAME, "+r", encoding="utf-8") +10 | open(NAME, "xb") +11 | open(NAME, "rwx") # [bad-open-mode] + | ^^^^^ PLW1501 +12 | open(NAME, mode="rwx") # [bad-open-mode] +13 | open(NAME, "rwx", encoding="utf-8") # [bad-open-mode] + | + +bad_open_mode.py:12:17: PLW1501 `rwx` is not a valid mode for `open` + | +10 | open(NAME, "xb") +11 | open(NAME, "rwx") # [bad-open-mode] +12 | open(NAME, mode="rwx") # [bad-open-mode] + | ^^^^^ PLW1501 +13 | open(NAME, "rwx", encoding="utf-8") # [bad-open-mode] +14 | open(NAME, "rr", encoding="utf-8") # [bad-open-mode] + | + +bad_open_mode.py:13:12: PLW1501 `rwx` is not a valid mode for `open` + | +11 | open(NAME, "rwx") # [bad-open-mode] +12 | open(NAME, mode="rwx") # [bad-open-mode] +13 | open(NAME, "rwx", encoding="utf-8") # [bad-open-mode] + | ^^^^^ PLW1501 +14 | open(NAME, "rr", encoding="utf-8") # [bad-open-mode] +15 | open(NAME, "+", encoding="utf-8") # [bad-open-mode] + | + +bad_open_mode.py:14:12: PLW1501 `rr` is not a valid mode for `open` + | +12 | open(NAME, mode="rwx") # [bad-open-mode] +13 | open(NAME, "rwx", encoding="utf-8") # [bad-open-mode] +14 | open(NAME, "rr", encoding="utf-8") # [bad-open-mode] + | ^^^^ PLW1501 +15 | open(NAME, "+", encoding="utf-8") # [bad-open-mode] +16 | open(NAME, "xw", encoding="utf-8") # [bad-open-mode] + | + +bad_open_mode.py:15:12: PLW1501 `+` is not a valid mode for `open` + | +13 | open(NAME, "rwx", encoding="utf-8") # [bad-open-mode] +14 | open(NAME, "rr", encoding="utf-8") # [bad-open-mode] +15 | open(NAME, "+", encoding="utf-8") # [bad-open-mode] + | ^^^ PLW1501 +16 | open(NAME, "xw", encoding="utf-8") # [bad-open-mode] +17 | open(NAME, "ab+") + | + +bad_open_mode.py:16:12: PLW1501 `xw` is not a valid mode for `open` + | +14 | open(NAME, "rr", encoding="utf-8") # [bad-open-mode] +15 | open(NAME, "+", encoding="utf-8") # [bad-open-mode] +16 | open(NAME, "xw", encoding="utf-8") # [bad-open-mode] + | ^^^^ PLW1501 +17 | open(NAME, "ab+") +18 | open(NAME, "a+b") + | + +bad_open_mode.py:22:12: PLW1501 `Ua` is not a valid mode for `open` + | +20 | open(NAME, "+rUb") +21 | open(NAME, "x+b") +22 | open(NAME, "Ua", encoding="utf-8") # [bad-open-mode] + | ^^^^ PLW1501 +23 | open(NAME, "Ur++", encoding="utf-8") # [bad-open-mode] +24 | open(NAME, "Ut", encoding="utf-8") + | + +bad_open_mode.py:23:12: PLW1501 `Ur++` is not a valid mode for `open` + | +21 | open(NAME, "x+b") +22 | open(NAME, "Ua", encoding="utf-8") # [bad-open-mode] +23 | open(NAME, "Ur++", encoding="utf-8") # [bad-open-mode] + | ^^^^^^ PLW1501 +24 | open(NAME, "Ut", encoding="utf-8") +25 | open(NAME, "Ubr") + | + +bad_open_mode.py:32:25: PLW1501 `rwx` is not a valid mode for `open` + | +30 | pathlib.Path(NAME).open("wb") +31 | pathlib.Path(NAME).open(mode) +32 | pathlib.Path(NAME).open("rwx") # [bad-open-mode] + | ^^^^^ PLW1501 +33 | pathlib.Path(NAME).open(mode="rwx") # [bad-open-mode] +34 | pathlib.Path(NAME).open("rwx", encoding="utf-8") # [bad-open-mode] + | + +bad_open_mode.py:33:30: PLW1501 `rwx` is not a valid mode for `open` + | +31 | pathlib.Path(NAME).open(mode) +32 | pathlib.Path(NAME).open("rwx") # [bad-open-mode] +33 | pathlib.Path(NAME).open(mode="rwx") # [bad-open-mode] + | ^^^^^ PLW1501 +34 | pathlib.Path(NAME).open("rwx", encoding="utf-8") # [bad-open-mode] + | + +bad_open_mode.py:34:25: PLW1501 `rwx` is not a valid mode for `open` + | +32 | pathlib.Path(NAME).open("rwx") # [bad-open-mode] +33 | pathlib.Path(NAME).open(mode="rwx") # [bad-open-mode] +34 | pathlib.Path(NAME).open("rwx", encoding="utf-8") # [bad-open-mode] + | ^^^^^ PLW1501 + | + + diff --git a/ruff.schema.json b/ruff.schema.json index 2c3d3fcf1476c..e67a8fc14dd12 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3113,6 +3113,7 @@ "PLW1", "PLW15", "PLW150", + "PLW1501", "PLW1508", "PLW1509", "PLW151",