forked from astral-sh/ruff
-
Notifications
You must be signed in to change notification settings - Fork 0
/
unspecified_encoding.rs
118 lines (112 loc) · 3.68 KB
/
unspecified_encoding.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_text_size::Ranged;
use crate::checkers::ast::Checker;
/// ## What it does
/// Checks for uses of `open` or similar calls without an explicit `encoding` argument.
///
/// ## Why is this bad?
/// Using `open` in text mode without an explicit encoding specified can lead to
/// unportable code that leads to different behaviour on different systems.
///
/// Instead, consider using the `encoding` parameter to explicitly enforce a specific encoding.
///
/// ## Example
/// ```python
/// open("file.txt")
/// ```
///
/// Use instead:
/// ```python
/// open("file.txt", encoding="utf-8")
/// ```
///
/// ## References
/// - [Python documentation: `open`](https://docs.python.org/3/library/functions.html#open)
#[violation]
pub struct UnspecifiedEncoding {
function_name: String,
}
impl Violation for UnspecifiedEncoding {
#[derive_message_formats]
fn message(&self) -> String {
format!(
"`{}` {}without explicit `encoding` argument",
self.function_name,
if self.function_name == "open" {
"in text mode "
} else {
""
}
)
}
}
fn is_binary_mode(expr: &ast::Expr) -> Option<bool> {
Some(expr.as_constant_expr()?.value.as_str()?.value.contains('b'))
}
fn is_violation(call: &ast::ExprCall, path: &[&str]) -> bool {
// this checks if we have something like *args which might contain the encoding argument
if call
.arguments
.args
.iter()
.any(ruff_python_ast::Expr::is_starred_expr)
{
return false;
}
// this checks if we have something like **kwargs which might contain the encoding argument
if call.arguments.keywords.iter().any(|a| a.arg.is_none()) {
return false;
}
match path {
["" | "codecs", "open"] => {
if let Some(mode_arg) = call.arguments.find_argument("mode", 1) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
}
// else mode not specified, defaults to text mode
call.arguments.find_argument("encoding", 3).is_none()
}
["io", "TextIOWrapper"] => call.arguments.find_argument("encoding", 1).is_none(),
["tempfile", "TemporaryFile" | "NamedTemporaryFile" | "SpooledTemporaryFile"] => {
let mode_pos = usize::from(path[1] == "SpooledTemporaryFile");
if let Some(mode_arg) = call.arguments.find_argument("mode", mode_pos) {
if is_binary_mode(mode_arg).unwrap_or(true) {
// binary mode or unknown mode is no violation
return false;
}
} else {
// defaults to binary mode
return false;
}
call.arguments
.find_argument("encoding", mode_pos + 2)
.is_none()
}
_ => false,
}
}
/// PLW1514
pub(crate) fn unspecified_encoding(checker: &mut Checker, call: &ast::ExprCall) {
let Some(path) = checker.semantic().resolve_call_path(&call.func) else {
return;
};
if is_violation(call, path.as_slice()) {
let path_slice = if path[0].is_empty() {
&path[1..]
} else {
&path[0..]
};
let result = Diagnostic::new(
UnspecifiedEncoding {
function_name: path_slice.join("."),
},
call.func.range(),
);
drop(path);
checker.diagnostics.push(result);
}
}