From 43b5745ece6340ffb655e99c2f1769e72359a787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Thu, 8 Dec 2022 12:17:20 +0900 Subject: [PATCH 1/6] Fast path --- crates/swc_css_utils/src/lib.rs | 40 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/swc_css_utils/src/lib.rs b/crates/swc_css_utils/src/lib.rs index 7ee85a1c1b4b..19910e407102 100644 --- a/crates/swc_css_utils/src/lib.rs +++ b/crates/swc_css_utils/src/lib.rs @@ -1,6 +1,6 @@ #![deny(clippy::all)] -use std::{char::REPLACEMENT_CHARACTER, str}; +use std::{borrow::Cow, char::REPLACEMENT_CHARACTER, str}; use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; @@ -154,7 +154,39 @@ pub static NAMED_COLORS: Lazy> = Lazy::new(|| { }); // https://drafts.csswg.org/cssom/#serialize-an-identifier -pub fn serialize_ident(value: &str, raw: Option<&str>, minify: bool) -> String { +pub fn serialize_ident<'a>(value: &'a str, raw: Option<&str>, minify: bool) -> Cow<'a, str> { + // Fast-path + if value.chars().enumerate().all(|(i, c)| { + match c { + // Old browser hacks with `\0` and other - IE + REPLACEMENT_CHARACTER if raw.is_some() => false, + '\x00' => false, + '\x01'..='\x1f' | '\x7F' => false, + '0'..='9' if i == 0 => false, + '0'..='9' if i == 1 && &value[0..1] == "-" => false, + // If the character is the first character and is a "-" (U+002D), and there is no second + // character, then the escaped character. + '-' if i == 0 && value.len() == 1 => false, + // If the character is not handled by one of the above rules and is greater than or + // equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of the ranges [0-9] + // (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z] (U+0061 to U+007A), then the + // character itself. + _ if !c.is_ascii() + || c == '-' + || c == '_' + || c.is_ascii_digit() + || c.is_ascii_uppercase() + || c.is_ascii_lowercase() => + { + true + } + // Otherwise, the escaped character. + _ => false, + } + }) { + return Cow::Borrowed(value); + } + let mut result = String::with_capacity(value.len()); // @@ -175,7 +207,7 @@ pub fn serialize_ident(value: &str, raw: Option<&str>, minify: bool) -> String { REPLACEMENT_CHARACTER if raw.is_some() => { result.push_str(raw.unwrap()); - return result; + return Cow::Owned(result); } // If the character is NULL (U+0000), then the REPLACEMENT CHARACTER (U+FFFD). '\x00' => { @@ -225,7 +257,7 @@ pub fn serialize_ident(value: &str, raw: Option<&str>, minify: bool) -> String { } } - result + Cow::Owned(result) } // https://github.com/servo/rust-cssparser/blob/4c5d065798ea1be649412532bde481dbd404f44a/src/serializer.rs#L166 From 5b97e2f370f6d5e22f1b1cadfb8df32ab31e3eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Thu, 8 Dec 2022 12:19:03 +0900 Subject: [PATCH 2/6] split --- crates/swc_css_utils/src/lib.rs | 60 ++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/crates/swc_css_utils/src/lib.rs b/crates/swc_css_utils/src/lib.rs index 19910e407102..4fa11ea4e956 100644 --- a/crates/swc_css_utils/src/lib.rs +++ b/crates/swc_css_utils/src/lib.rs @@ -155,35 +155,39 @@ pub static NAMED_COLORS: Lazy> = Lazy::new(|| { // https://drafts.csswg.org/cssom/#serialize-an-identifier pub fn serialize_ident<'a>(value: &'a str, raw: Option<&str>, minify: bool) -> Cow<'a, str> { - // Fast-path - if value.chars().enumerate().all(|(i, c)| { - match c { - // Old browser hacks with `\0` and other - IE - REPLACEMENT_CHARACTER if raw.is_some() => false, - '\x00' => false, - '\x01'..='\x1f' | '\x7F' => false, - '0'..='9' if i == 0 => false, - '0'..='9' if i == 1 && &value[0..1] == "-" => false, - // If the character is the first character and is a "-" (U+002D), and there is no second - // character, then the escaped character. - '-' if i == 0 && value.len() == 1 => false, - // If the character is not handled by one of the above rules and is greater than or - // equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of the ranges [0-9] - // (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z] (U+0061 to U+007A), then the - // character itself. - _ if !c.is_ascii() - || c == '-' - || c == '_' - || c.is_ascii_digit() - || c.is_ascii_uppercase() - || c.is_ascii_lowercase() => - { - true + fn can_skip(value: &str, raw: Option<&str>) -> bool { + value.chars().enumerate().all(|(i, c)| { + match c { + // Old browser hacks with `\0` and other - IE + REPLACEMENT_CHARACTER if raw.is_some() => false, + '\x00' => false, + '\x01'..='\x1f' | '\x7F' => false, + '0'..='9' if i == 0 => false, + '0'..='9' if i == 1 && &value[0..1] == "-" => false, + // If the character is the first character and is a "-" (U+002D), and there is no + // second character, then the escaped character. + '-' if i == 0 && value.len() == 1 => false, + // If the character is not handled by one of the above rules and is greater than or + // equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of the ranges + // [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z] + // (U+0061 to U+007A), then the character itself. + _ if !c.is_ascii() + || c == '-' + || c == '_' + || c.is_ascii_digit() + || c.is_ascii_uppercase() + || c.is_ascii_lowercase() => + { + true + } + // Otherwise, the escaped character. + _ => false, } - // Otherwise, the escaped character. - _ => false, - } - }) { + }) + } + + // Fast-path + if can_skip(value, raw) { return Cow::Borrowed(value); } From 3b6d743f24943a985758a26a82a90dc2148782ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Thu, 8 Dec 2022 12:35:50 +0900 Subject: [PATCH 3/6] Reduce allocation --- crates/swc_css_codegen/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/swc_css_codegen/src/lib.rs b/crates/swc_css_codegen/src/lib.rs index d0648c1ecaeb..907cd57169f3 100644 --- a/crates/swc_css_codegen/src/lib.rs +++ b/crates/swc_css_codegen/src/lib.rs @@ -2,7 +2,7 @@ #![allow(clippy::needless_update)] pub use std::fmt::Result; -use std::{str, str::from_utf8}; +use std::{borrow::Cow, str, str::from_utf8}; use serde::{Deserialize, Serialize}; use swc_atoms::*; @@ -1565,9 +1565,9 @@ where fn emit_ident(&mut self, n: &Ident) -> Result { if self.config.minify { let value = if self.ctx.allow_to_lowercase && self.config.minify { - n.value.to_ascii_lowercase().to_string() + Cow::Owned(n.value.to_ascii_lowercase()) } else { - n.value.to_string() + Cow::Borrowed(&n.value) }; let serialized = serialize_ident(&value, n.raw.as_deref(), true); From 264db94acc338566e4013d6c52a1803063602f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Thu, 8 Dec 2022 12:44:28 +0900 Subject: [PATCH 4/6] Extract --- crates/swc_css_utils/src/lib.rs | 58 +++++++++++++++------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/crates/swc_css_utils/src/lib.rs b/crates/swc_css_utils/src/lib.rs index 4fa11ea4e956..91a5a58a1f4a 100644 --- a/crates/swc_css_utils/src/lib.rs +++ b/crates/swc_css_utils/src/lib.rs @@ -153,41 +153,35 @@ pub static NAMED_COLORS: Lazy> = Lazy::new(|| { named_colors }); -// https://drafts.csswg.org/cssom/#serialize-an-identifier -pub fn serialize_ident<'a>(value: &'a str, raw: Option<&str>, minify: bool) -> Cow<'a, str> { - fn can_skip(value: &str, raw: Option<&str>) -> bool { - value.chars().enumerate().all(|(i, c)| { - match c { - // Old browser hacks with `\0` and other - IE - REPLACEMENT_CHARACTER if raw.is_some() => false, - '\x00' => false, - '\x01'..='\x1f' | '\x7F' => false, - '0'..='9' if i == 0 => false, - '0'..='9' if i == 1 && &value[0..1] == "-" => false, - // If the character is the first character and is a "-" (U+002D), and there is no - // second character, then the escaped character. - '-' if i == 0 && value.len() == 1 => false, - // If the character is not handled by one of the above rules and is greater than or - // equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of the ranges - // [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z] - // (U+0061 to U+007A), then the character itself. - _ if !c.is_ascii() - || c == '-' - || c == '_' - || c.is_ascii_digit() - || c.is_ascii_uppercase() - || c.is_ascii_lowercase() => - { - true - } - // Otherwise, the escaped character. - _ => false, +#[inline] +fn is_escape_not_required(value: &str, raw: Option<&str>) -> bool { + value.chars().enumerate().all(|(i, c)| { + match c { + REPLACEMENT_CHARACTER if raw.is_some() => false, + '\x00' => false, + '\x01'..='\x1f' | '\x7F' => false, + '0'..='9' if i == 0 => false, + '0'..='9' if i == 1 && &value[0..1] == "-" => false, + '-' if i == 0 && value.len() == 1 => false, + _ if !c.is_ascii() + || c == '-' + || c == '_' + || c.is_ascii_digit() + || c.is_ascii_uppercase() + || c.is_ascii_lowercase() => + { + true } - }) - } + // Otherwise, the escaped character. + _ => false, + } + }) +} +// https://drafts.csswg.org/cssom/#serialize-an-identifier +pub fn serialize_ident<'a>(value: &'a str, raw: Option<&str>, minify: bool) -> Cow<'a, str> { // Fast-path - if can_skip(value, raw) { + if is_escape_not_required(value, raw) { return Cow::Borrowed(value); } From 1aaaa2b09425c951599ebbb7ac9a6b9976b208f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Thu, 8 Dec 2022 12:53:32 +0900 Subject: [PATCH 5/6] Optimize --- crates/swc_css_utils/src/lib.rs | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/crates/swc_css_utils/src/lib.rs b/crates/swc_css_utils/src/lib.rs index 91a5a58a1f4a..7b057c685b50 100644 --- a/crates/swc_css_utils/src/lib.rs +++ b/crates/swc_css_utils/src/lib.rs @@ -155,17 +155,35 @@ pub static NAMED_COLORS: Lazy> = Lazy::new(|| { #[inline] fn is_escape_not_required(value: &str, raw: Option<&str>) -> bool { - value.chars().enumerate().all(|(i, c)| { + if value.is_empty() { + return true; + } + + if raw.is_some() && value.contains(REPLACEMENT_CHARACTER) { + return false; + } + + if (b'0'..=b'9').contains(&value.as_bytes()[0]) { + return false; + } + + if value.len() == 1 && value.as_bytes()[0] == b'-' { + return false; + } + + if value.len() >= 2 + && value.as_bytes()[0] == b'-' + && (b'0'..=b'9').contains(&value.as_bytes()[1]) + { + return false; + } + + value.chars().all(|c| { match c { - REPLACEMENT_CHARACTER if raw.is_some() => false, '\x00' => false, '\x01'..='\x1f' | '\x7F' => false, - '0'..='9' if i == 0 => false, - '0'..='9' if i == 1 && &value[0..1] == "-" => false, - '-' if i == 0 && value.len() == 1 => false, + '-' | '_' => true, _ if !c.is_ascii() - || c == '-' - || c == '_' || c.is_ascii_digit() || c.is_ascii_uppercase() || c.is_ascii_lowercase() => From 81af0b6513759912644b675561900e2ddbb5478d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Thu, 8 Dec 2022 12:59:35 +0900 Subject: [PATCH 6/6] Iterate only once --- crates/swc_css_utils/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/swc_css_utils/src/lib.rs b/crates/swc_css_utils/src/lib.rs index 7b057c685b50..f64bc237a8ca 100644 --- a/crates/swc_css_utils/src/lib.rs +++ b/crates/swc_css_utils/src/lib.rs @@ -159,10 +159,6 @@ fn is_escape_not_required(value: &str, raw: Option<&str>) -> bool { return true; } - if raw.is_some() && value.contains(REPLACEMENT_CHARACTER) { - return false; - } - if (b'0'..=b'9').contains(&value.as_bytes()[0]) { return false; } @@ -180,6 +176,7 @@ fn is_escape_not_required(value: &str, raw: Option<&str>) -> bool { value.chars().all(|c| { match c { + REPLACEMENT_CHARACTER if raw.is_some() => false, '\x00' => false, '\x01'..='\x1f' | '\x7F' => false, '-' | '_' => true,