Skip to content

Commit

Permalink
Merge pull request #354 from dtolnay/bom
Browse files Browse the repository at this point in the history
Handle parsing input that starts with byte order mark
  • Loading branch information
dtolnay committed Sep 29, 2022
2 parents f26128d + 1d068c8 commit 6004b0a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
8 changes: 7 additions & 1 deletion src/fallback.rs
Expand Up @@ -182,7 +182,13 @@ impl FromStr for TokenStream {

fn from_str(src: &str) -> Result<TokenStream, LexError> {
// Create a dummy file & add it to the source map
let cursor = get_cursor(src);
let mut cursor = get_cursor(src);

// Strip a byte order mark if present
const BYTE_ORDER_MARK: &str = "\u{feff}";
if cursor.starts_with(BYTE_ORDER_MARK) {
cursor = cursor.advance(BYTE_ORDER_MARK.len());
}

parse::token_stream(cursor)
}
Expand Down
4 changes: 2 additions & 2 deletions src/parse.rs
Expand Up @@ -14,7 +14,7 @@ pub(crate) struct Cursor<'a> {
}

impl<'a> Cursor<'a> {
fn advance(&self, bytes: usize) -> Cursor<'a> {
pub fn advance(&self, bytes: usize) -> Cursor<'a> {
let (_front, rest) = self.rest.split_at(bytes);
Cursor {
rest,
Expand All @@ -23,7 +23,7 @@ impl<'a> Cursor<'a> {
}
}

fn starts_with(&self, s: &str) -> bool {
pub fn starts_with(&self, s: &str) -> bool {
self.rest.starts_with(s)
}

Expand Down
13 changes: 13 additions & 0 deletions tests/test.rs
Expand Up @@ -630,3 +630,16 @@ fn check_spans_internal(ts: TokenStream, lines: &mut &[(usize, usize, usize, usi
}
}
}

#[test]
fn byte_order_mark() {
let string = "\u{feff}foo";
let tokens = string.parse::<TokenStream>().unwrap();
match tokens.into_iter().next().unwrap() {
TokenTree::Ident(ident) => assert_eq!(ident, "foo"),
_ => unreachable!(),
}

let string = "foo\u{feff}";
string.parse::<TokenStream>().unwrap_err();
}

0 comments on commit 6004b0a

Please sign in to comment.