Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add standard newline/quoting behavior to dotenv store
Rationale ========= The dotenv store as it exists right now performs splitting on newlines to determine where a new key-value pair or comment begins. This works remarkably well, up until you need to handle values that contain newlines. While I couldn't find an offical dotenv file format spec, I sampled a number of open-source dotenv parsers and it seems that they typically apply the following rules: Newline handling: * If a value is unquoted and contains a literal `\n` (`0x5c6e`), it is interpretted literally and NOT converted to an actual newline (`0x0a`). * If a value is single-quoted and contains a literal `\n` (`0x5c6e`), it is interpretted literally and NOT converted to an actual newline (`0x0a`). * If a value is double-quoted and contains a literal `\n` (`0x5c6e`), it is converted to an actual newline (`0x0a`). * If a value is either single- or double-quoted, it may contain an actual newline (`0x0a`). Whitespace trimming: * If a value is unquoted and contains any leading or trailing whitespace, it is trimmed. * If a value is either single- or double-quoted and contains any leading or trailing whitespace, it is left untrimmed. Quotation handling: * Because quotations around values have special meaning, they are interpretted and are not included in the parsed value. Literal quotes may be included within a quoted string either by escaping them or using the opposite quotation mark. Because single- and double-quoted values may contain actual newlines, we cannot split our input data on newlines as this may be in the middle of a quoted value. This, along with the other rules around handling quoted values, prompted me to try and implement a more robust parsing solution. This commit is my first stab at that. Special Considerations ====================== This is _not_ a backwards-compatible change: * The `dotenv` files produced by this version of SOPS _cannot_ be read by an earlier version. * The `dotenv` files produced by an earlier version of SOPS _can_ be read by this version, with the understanding that the semantics around quotations and newlines have changed. Examples ======== The below examples show how double-quoted values are passed to the running environment: ```console $ echo 'FOO="foo\\nbar\\nbaz"' > plaintext.env $ sops -e --output ciphertext.env plaintext.env $ sops exec-env ciphertext.env 'env | grep FOO | xxd' 00000000: 464f 4f3d 666f 6f5c 6e62 6172 5c6e 6261 FOO=foo\nbar\nba 00000010: 7a0a z. ``` ```console $ echo 'FOO="foo\nbar\nbaz"' > plaintext.env $ sops -e --output ciphertext.env plaintext.env $ sops exec-env ciphertext.env 'env | grep -A2 FOO | xxd' 00000000: 464f 4f3d 666f 6f0a 6261 720a 6261 7a0a FOO=foo.bar.baz. ```
- Loading branch information
Spencer Judd
committed
Feb 4, 2020
1 parent
1634350
commit 623fa10
Showing
4 changed files
with
306 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
package dotenv | ||
|
||
import ( | ||
"bufio" | ||
"bytes" | ||
"fmt" | ||
"regexp" | ||
"strings" | ||
|
||
"go.mozilla.org/sops/v3" | ||
) | ||
|
||
const ( | ||
stateKey int = iota | ||
stateValue | ||
stateComment | ||
) | ||
|
||
var KeyRegexp = regexp.MustCompile(`^[A-Za-z_]+[A-Za-z0-9_]*$`) | ||
|
||
func Parse(data []byte) (items []sops.TreeItem, err error) { | ||
nextState := stateKey | ||
var currentKey string | ||
|
||
for _, token := range tokenize(data) { | ||
state := nextState | ||
|
||
if token == "#" { | ||
nextState = stateComment | ||
continue | ||
} | ||
|
||
if token == "=" { | ||
nextState = stateValue | ||
continue | ||
} | ||
|
||
if state == stateComment && containsNewline(token) { | ||
nextState = stateKey | ||
continue | ||
} | ||
|
||
if isAllWhitespace(token) { | ||
continue | ||
} | ||
|
||
if state == stateComment { | ||
nextState = stateKey | ||
items = append(items, sops.TreeItem{Key: sops.Comment{token}, Value: nil}) | ||
} | ||
|
||
if state == stateKey { | ||
if KeyRegexp.MatchString(token) { | ||
currentKey = token | ||
} else { | ||
return nil, fmt.Errorf("invalid dotenv key: %q", token) | ||
} | ||
} | ||
|
||
if state == stateValue { | ||
nextState = stateKey | ||
items = append(items, sops.TreeItem{Key: currentKey, Value: parseValue(token)}) | ||
} | ||
} | ||
|
||
return | ||
} | ||
|
||
func parseValue(value string) string { | ||
if value[0] == '"' { | ||
expanded := strings.Replace(value, "\\n", "\n", -1) | ||
expanded = strings.Replace(expanded, "\\\n", "\\n", -1) | ||
return expanded[1 : len(expanded)-1] | ||
} | ||
|
||
if value[0] == '\'' { | ||
return value[1 : len(value)-1] | ||
} | ||
|
||
return strings.Trim(value, "\t ") | ||
} | ||
|
||
func tokenize(data []byte) []string { | ||
scanner := bufio.NewScanner(bytes.NewReader(data)) | ||
nextState := stateKey | ||
|
||
split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { | ||
state := nextState | ||
|
||
if len(data) == 0 { | ||
return 0, nil, nil | ||
} | ||
|
||
if isWhitespace(data[0]) { | ||
advance, token, err = consumeWhitespace(data) | ||
if state == stateComment && containsNewline(string(token)) { | ||
nextState = stateKey | ||
} | ||
return | ||
} | ||
|
||
if state == stateKey && data[0] == '#' { | ||
advance, token, err = 1, []byte{data[0]}, nil | ||
nextState = stateComment | ||
return | ||
} | ||
|
||
if state == stateComment { | ||
advance, token, err = consumeLine(data) | ||
nextState = stateKey | ||
return | ||
} | ||
|
||
if data[0] == '=' { | ||
advance, token, err = 1, []byte{data[0]}, nil | ||
nextState = stateValue | ||
return | ||
} | ||
|
||
if state == stateKey { | ||
advance, token, err = consumeKey(data) | ||
nextState = stateValue | ||
return | ||
} | ||
|
||
if state == stateValue && (data[0] == '"' || data[0] == '\'') { | ||
advance, token, err = consumeQuoted(data) | ||
nextState = stateKey | ||
return | ||
} | ||
|
||
if state == stateValue { | ||
advance, token, err = consumeLine(data) | ||
nextState = stateKey | ||
return | ||
} | ||
|
||
return | ||
} | ||
|
||
scanner.Split(split) | ||
|
||
tokens := []string{} | ||
for scanner.Scan() { | ||
tokens = append(tokens, scanner.Text()) | ||
} | ||
|
||
return tokens | ||
} | ||
|
||
func consumeWhitespace(data []byte) (advance int, token []byte, err error) { | ||
for _, b := range data { | ||
if isWhitespace(b) { | ||
advance += 1 | ||
token = append(token, b) | ||
} else { | ||
break | ||
} | ||
} | ||
return | ||
} | ||
|
||
func consumeQuoted(data []byte) (advance int, token []byte, err error) { | ||
advance = 1 | ||
stopAt := data[0] | ||
escaping := false | ||
token = append(token, stopAt) | ||
|
||
for _, b := range data[1:] { | ||
advance += 1 | ||
|
||
if !escaping && b == stopAt { | ||
token = append(token, stopAt) | ||
return | ||
} | ||
|
||
if escaping { | ||
token = append(token, '\\') | ||
escaping = false | ||
} | ||
|
||
if b == '\\' { | ||
escaping = true | ||
continue | ||
} | ||
|
||
token = append(token, b) | ||
} | ||
|
||
return 0, nil, fmt.Errorf("missing closing quotation mark") | ||
} | ||
|
||
func consumeKey(data []byte) (advance int, token []byte, err error) { | ||
for _, b := range data { | ||
if b == '=' { | ||
return | ||
} | ||
advance += 1 | ||
token = append(token, b) | ||
} | ||
return | ||
} | ||
|
||
func consumeLine(data []byte) (advance int, token []byte, err error) { | ||
for _, b := range data { | ||
if b == '\n' || b == '\r' { | ||
return | ||
} | ||
advance += 1 | ||
token = append(token, b) | ||
} | ||
return | ||
} | ||
|
||
func isWhitespace(b byte) bool { | ||
return b == ' ' || b == '\t' || b == '\r' || b == '\n' | ||
} | ||
|
||
func isAllWhitespace(s string) bool { | ||
for _, b := range []byte(s) { | ||
if !isWhitespace(b) { | ||
return false | ||
} | ||
} | ||
return true | ||
} | ||
|
||
func isNewline(b byte) bool { | ||
return b == '\r' || b == '\n' | ||
} | ||
|
||
func containsNewline(s string) bool { | ||
for _, b := range []byte(s) { | ||
if isNewline(b) { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.