Review: Introduce parentheses_iterator

astral-sh · Oct 19, 2023 · e4ef5b0 · e4ef5b0
1 parent 3dea426
commit e4ef5b0
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 51 deletions.
diff --git a/crates/ruff_python_ast/src/parenthesize.rs b/crates/ruff_python_ast/src/parenthesize.rs
@@ -4,35 +4,44 @@ use ruff_text_size::{Ranged, TextLen, TextRange};
 use crate::AnyNodeRef;
 use crate::ExpressionRef;
 
-/// Returns the [`TextRange`] of a given expression including parentheses, if the expression is
-/// parenthesized; or `None`, if the expression is not parenthesized.
-pub fn parenthesized_range(
-    expr: ExpressionRef,
-    parent: AnyNodeRef,
-    comment_ranges: &CommentRanges,
-    source: &str,
-) -> Option<TextRange> {
-    // If the parent is a node that brings its own parentheses, exclude the closing parenthesis
-    // from our search range. Otherwise, we risk matching on calls, like `func(x)`, for which
-    // the open and close parentheses are part of the `Arguments` node.
-    //
-    // There are a few other nodes that may have their own parentheses, but are fine to exclude:
-    // - `Parameters`: The parameters to a function definition. Any expressions would represent
-    //   default arguments, and so must be preceded by _at least_ the parameter name. As such,
-    //   we won't mistake any parentheses for the opening and closing parentheses on the
-    //   `Parameters` node itself.
-    // - `Tuple`: The elements of a tuple. The only risk is a single-element tuple (e.g., `(x,)`),
-    //    which must have a trailing comma anyway.
-    let exclusive_parent_end = if parent.is_arguments() {
-        parent.end() - ")".text_len()
+/// Returns an iterator over the ranges of the optional parentheses surrounding an expression.
+///
+/// E.g. for `((f()))` with `f()` as expression, the iterator returns the ranges (1, 6) and (0, 7).
+///
+/// Note that without a parent the range can be inaccurate, e.g. `f(a)` we falsely return a set of
+/// parentheses around `a` even if the parentheses actually belong to `f`. That is why you should
+/// generally prefer [`parenthesized_range`].
+pub fn parentheses_iterator<'a>(
+    expr: ExpressionRef<'a>,
+    parent: Option<AnyNodeRef>,
+    comment_ranges: &'a CommentRanges,
+    source: &'a str,
+) -> impl Iterator<Item = TextRange> + 'a {
+    let right_tokenizer = if let Some(parent) = parent {
+        // If the parent is a node that brings its own parentheses, exclude the closing parenthesis
+        // from our search range. Otherwise, we risk matching on calls, like `func(x)`, for which
+        // the open and close parentheses are part of the `Arguments` node.
+        //
+        // There are a few other nodes that may have their own parentheses, but are fine to exclude:
+        // - `Parameters`: The parameters to a function definition. Any expressions would represent
+        //   default arguments, and so must be preceded by _at least_ the parameter name. As such,
+        //   we won't mistake any parentheses for the opening and closing parentheses on the
+        //   `Parameters` node itself.
+        // - `Tuple`: The elements of a tuple. The only risk is a single-element tuple (e.g., `(x,)`),
+        //    which must have a trailing comma anyway.
+        let exclusive_parent_end = if parent.is_arguments() {
+            parent.end() - ")".text_len()
+        } else {
+            parent.end()
+        };
+        SimpleTokenizer::new(source, TextRange::new(expr.end(), exclusive_parent_end))
     } else {
-        parent.end()
+        SimpleTokenizer::starts_at(expr.end(), source)
     };
 
-    let right_tokenizer =
-        SimpleTokenizer::new(source, TextRange::new(expr.end(), exclusive_parent_end))
-            .skip_trivia()
-            .take_while(|token| token.kind == SimpleTokenKind::RParen);
+    let right_tokenizer = right_tokenizer
+        .skip_trivia()
+        .take_while(|token| token.kind == SimpleTokenKind::RParen);
 
     let left_tokenizer = BackwardsTokenizer::up_to(expr.start(), source, comment_ranges)
         .skip_trivia()
@@ -43,6 +52,16 @@ pub fn parenthesized_range(
     // the `right_tokenizer` is exhausted.
     right_tokenizer
         .zip(left_tokenizer)
-        .last()
         .map(|(right, left)| TextRange::new(left.start(), right.end()))
 }
+
+/// Returns the [`TextRange`] of a given expression including parentheses, if the expression is
+/// parenthesized; or `None`, if the expression is not parenthesized.
+pub fn parenthesized_range(
+    expr: ExpressionRef,
+    parent: AnyNodeRef,
+    comment_ranges: &CommentRanges,
+    source: &str,
+) -> Option<TextRange> {
+    parentheses_iterator(expr, Some(parent), comment_ranges, source).last()
+}
diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs
@@ -5,11 +5,11 @@ use ruff_formatter::{
     write, FormatOwnedWithRule, FormatRefWithRule, FormatRule, FormatRuleWithOptions,
 };
 use ruff_python_ast as ast;
+use ruff_python_ast::parenthesize::parentheses_iterator;
 use ruff_python_ast::visitor::preorder::{walk_expr, PreorderVisitor};
-use ruff_python_ast::AnyNodeRef;
-use ruff_python_ast::{Constant, Expr, ExpressionRef, Operator};
-use ruff_python_trivia::{BackwardsTokenizer, CommentRanges, SimpleTokenKind, SimpleTokenizer};
-use ruff_text_size::{Ranged, TextRange};
+use ruff_python_ast::{AnyNodeRef, Constant, Expr, ExpressionRef, Operator};
+use ruff_python_trivia::CommentRanges;
+use ruff_text_size::Ranged;
 
 use crate::builders::parenthesize_if_expands;
 use crate::comments::{leading_comments, trailing_comments, LeadingDanglingTrailingComments};
@@ -187,8 +187,7 @@ fn format_with_parentheses_comments(
 ) -> FormatResult<()> {
     // First part: Split the comments
 
-    // TODO(konstin): This is copied from `parenthesized_range`, except that we don't have the
-    // parent, which is a problem:
+    // TODO(konstin): We don't have the parent, which is a problem:
     // ```python
     // f(
     //     # a
@@ -204,25 +203,13 @@ fn format_with_parentheses_comments(
     //     )
     // )
     // ```
-    let right_tokenizer = SimpleTokenizer::starts_at(expression.end(), f.context().source())
-        .skip_trivia()
-        .take_while(|token| token.kind == SimpleTokenKind::RParen);
-
-    let left_tokenizer = BackwardsTokenizer::up_to(
-        expression.start(),
-        f.context().source(),
+    let range_with_parens = parentheses_iterator(
+        expression.into(),
+        None,
         f.context().comments().ranges(),
+        f.context().source(),
     )
-    .skip_trivia()
-    .take_while(|token| token.kind == SimpleTokenKind::LParen);
-
-    // Zip closing parenthesis with opening parenthesis. The order is intentional, as testing for
-    // closing parentheses is cheaper, and `zip` will avoid progressing the `left_tokenizer` if
-    // the `right_tokenizer` is exhausted.
-    let range_with_parens = right_tokenizer
-        .zip(left_tokenizer)
-        .last()
-        .map(|(right, left)| TextRange::new(left.start(), right.end()));
+    .last();
 
     let (leading_split, trailing_split) = if let Some(range_with_parens) = range_with_parens {
         let leading_split = node_comments
@@ -250,7 +237,7 @@ fn format_with_parentheses_comments(
         Some((first, rest)) if first.line_position().is_end_of_line() => {
             (slice::from_ref(first), rest)
         }
-        _ => (&[], node_comments.leading),
+        _ => (Default::default(), node_comments.leading),
     };
 
     // Second Part: Format