From eeb2e5b9223c03590cb095b3b36dec191aad644b Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Wed, 28 Sep 2022 18:42:15 -0700 Subject: [PATCH 1/3] Add a test involving byte order mark Currently fails with: thread 'byte_order_mark' panicked at 'called `Result::unwrap()` on an `Err` value: LexError { span: Span }', tests/test.rs:637:48 --- tests/test.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index 16f775ed..a7f47cd4 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -630,3 +630,13 @@ fn check_spans_internal(ts: TokenStream, lines: &mut &[(usize, usize, usize, usi } } } + +#[test] +fn byte_order_mark() { + let string = "\u{feff}foo"; + let tokens = string.parse::().unwrap(); + match tokens.into_iter().next().unwrap() { + TokenTree::Ident(ident) => assert_eq!(ident, "foo"), + _ => unreachable!(), + } +} From e2327f08540b3d32778b9fd08ecee38406c773d7 Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Wed, 28 Sep 2022 18:52:07 -0700 Subject: [PATCH 2/3] Strip a byte order mark if present --- src/fallback.rs | 8 +++++++- src/parse.rs | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/fallback.rs b/src/fallback.rs index 4c476e5d..fe4f248d 100644 --- a/src/fallback.rs +++ b/src/fallback.rs @@ -182,7 +182,13 @@ impl FromStr for TokenStream { fn from_str(src: &str) -> Result { // Create a dummy file & add it to the source map - let cursor = get_cursor(src); + let mut cursor = get_cursor(src); + + // Strip a byte order mark if present + const BYTE_ORDER_MARK: &str = "\u{feff}"; + if cursor.starts_with(BYTE_ORDER_MARK) { + cursor = cursor.advance(BYTE_ORDER_MARK.len()); + } parse::token_stream(cursor) } diff --git a/src/parse.rs b/src/parse.rs index d2b86a41..04c48336 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -14,7 +14,7 @@ pub(crate) struct Cursor<'a> { } impl<'a> Cursor<'a> { - fn advance(&self, bytes: usize) -> Cursor<'a> { + pub fn advance(&self, bytes: usize) -> Cursor<'a> { let (_front, rest) = self.rest.split_at(bytes); Cursor { rest, @@ -23,7 +23,7 @@ impl<'a> Cursor<'a> { } } - fn starts_with(&self, s: &str) -> bool { + pub fn starts_with(&self, s: &str) -> bool { self.rest.starts_with(s) } From 1d068c8f1a5a9b33eebdb9ebdf35bb409f2bafbf Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Wed, 28 Sep 2022 18:53:26 -0700 Subject: [PATCH 3/3] Test that byte order mark can only go at beginning of input --- tests/test.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test.rs b/tests/test.rs index a7f47cd4..8f5624db 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -639,4 +639,7 @@ fn byte_order_mark() { TokenTree::Ident(ident) => assert_eq!(ident, "foo"), _ => unreachable!(), } + + let string = "foo\u{feff}"; + string.parse::().unwrap_err(); }