From a6b08ba869255be4d357393b4e403436161d8f51 Mon Sep 17 00:00:00 2001 From: Christopher Morton Date: Mon, 17 Oct 2022 17:45:21 +0100 Subject: [PATCH] Add support for UTF-8 BOM encoding (#28) The UTF-8 Byte Order Mark `\ufeff` will be ignored when parsing. See #27 --- dotenv/src/iter.rs | 14 +++++++++++++- dotenv/tests/test-ignore-bom.rs | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 dotenv/tests/test-ignore-bom.rs diff --git a/dotenv/src/iter.rs b/dotenv/src/iter.rs index 6b9f2bc..2d4a830 100644 --- a/dotenv/src/iter.rs +++ b/dotenv/src/iter.rs @@ -22,7 +22,9 @@ impl Iter { } /// Loads all variables found in the `reader` into the environment. - pub fn load(self) -> Result<()> { + pub fn load(mut self) -> Result<()> { + self.remove_bom()?; + for item in self { let (key, value) = item?; if env::var(&key).is_err() { @@ -32,6 +34,16 @@ impl Iter { Ok(()) } + + fn remove_bom(&mut self) -> Result<()> { + let buffer = self.lines.buf.fill_buf().map_err(Error::Io)?; + // https://www.compart.com/en/unicode/U+FEFF + if buffer.starts_with(&[0xEF, 0xBB, 0xBF]) { + // remove the BOM from the bufreader + self.lines.buf.consume(3); + } + Ok(()) + } } struct QuotedLines { diff --git a/dotenv/tests/test-ignore-bom.rs b/dotenv/tests/test-ignore-bom.rs new file mode 100644 index 0000000..ef2e618 --- /dev/null +++ b/dotenv/tests/test-ignore-bom.rs @@ -0,0 +1,22 @@ +mod common; + +use crate::common::*; +use dotenvy::*; +use std::{env, error::Error, result::Result}; + +#[test] +fn test_ignore_bom() -> Result<(), Box> { + let bom = "\u{feff}"; + let dir = tempdir_with_dotenv(&format!("{}TESTKEY=test_val", bom))?; + + let mut path = env::current_dir()?; + path.push(".env"); + + from_path(&path)?; + + assert_eq!(env::var("TESTKEY")?, "test_val"); + + env::set_current_dir(dir.path().parent().unwrap())?; + dir.close()?; + Ok(()) +}