From 1d3f0cb9f536088a97106cd2a97c6e49bde95398 Mon Sep 17 00:00:00 2001 From: Spencer Judd Date: Thu, 30 Jan 2020 11:06:47 -0500 Subject: [PATCH] Add standard newline/quoting behavior to dotenv store Rationale ========= The dotenv store as it exists right now performs splitting on newlines to determine where a new key-value pair or comment begins. This works remarkably well, up until you need to handle values that contain newlines. While I couldn't find an offical dotenv file format spec, I sampled a number of open-source dotenv parsers and it seems that they typically apply the following rules: Newline handling: * If a value is unquoted and contains a literal `\n` (`0x5c6e`), it is interpretted literally and NOT converted to an actual newline (`0x0a`). * If a value is single-quoted and contains a literal `\n` (`0x5c6e`), it is interpretted literally and NOT converted to an actual newline (`0x0a`). * If a value is double-quoted and contains a literal `\n` (`0x5c6e`), it is converted to an actual newline (`0x0a`). * If a value is either single- or double-quoted, it may contain an actual newline (`0x0a`). Whitespace trimming: * If a value is unquoted and contains any leading or trailing whitespace, it is trimmed. * If a value is either single- or double-quoted and contains any leading or trailing whitespace, it is left untrimmed. Quotation handling: * Because quotations around values have special meaning, they are interpretted and are not included in the parsed value. Literal quotes may be included within a quoted string either by escaping them or using the opposite quotation mark. Because single- and double-quoted values may contain actual newlines, we cannot split our input data on newlines as this may be in the middle of a quoted value. This, along with the other rules around handling quoted values, prompted me to try and implement a more robust parsing solution. This commit is my first stab at that. Special Considerations ====================== This is _not_ a backwards-compatible change: * The `dotenv` files produced by this version of SOPS _cannot_ be read by an earlier version. * The `dotenv` files produced by an earlier version of SOPS _can_ be read by this version, with the understanding that the semantics around quotations and newlines have changed. Examples ======== The below examples show how double-quoted values are passed to the running environment: ```console $ echo 'FOO="foo\\nbar\\nbaz"' > plaintext.env $ sops -e --output ciphertext.env plaintext.env $ $ ~/go/bin/sops exec-env ciphertext.env 'env | grep FOO | xxd' 00000000: 464f 4f3d 666f 6f5c 6e62 6172 5c6e 6261 FOO=foo\nbar\nba 00000010: 7a0a z. ``` ```console $ echo 'FOO="foo\nbar\nbaz"' > plaintext.env $ sops -e --output ciphertext.env plaintext.env $ sops exec-env ciphertext.env 'env | grep -A2 FOO | xxd' 00000000: 464f 4f3d 666f 6f0a 6261 720a 6261 7a0a FOO=foo.bar.baz. ``` --- cmd/sops/subcommand/exec/exec.go | 19 +-- stores/dotenv/parser.go | 218 +++++++++++++++++++++++++++++++ stores/dotenv/store.go | 32 ++--- stores/dotenv/store_test.go | 75 +++++++---- 4 files changed, 284 insertions(+), 60 deletions(-) create mode 100644 stores/dotenv/parser.go diff --git a/cmd/sops/subcommand/exec/exec.go b/cmd/sops/subcommand/exec/exec.go index c5db688c9..e416b779e 100644 --- a/cmd/sops/subcommand/exec/exec.go +++ b/cmd/sops/subcommand/exec/exec.go @@ -1,13 +1,14 @@ package exec import ( - "bytes" + "fmt" "io/ioutil" "os" "runtime" "strings" "go.mozilla.org/sops/v3/logging" + "go.mozilla.org/sops/v3/stores/dotenv" "github.com/sirupsen/logrus" ) @@ -83,15 +84,17 @@ func ExecWithEnv(opts ExecOpts) { } env := os.Environ() - lines := bytes.Split(opts.Plaintext, []byte("\n")) - for _, line := range lines { - if len(line) == 0 { - continue - } - if line[0] == '#' { + + items, err := dotenv.Parse(opts.Plaintext) + if err != nil { + log.Fatal(err) + } + + for _, item := range items { + if item.Value == nil { continue } - env = append(env, string(line)) + env = append(env, fmt.Sprintf("%s=%s", item.Key.(string), item.Value.(string))) } cmd := BuildCommand(opts.Command) diff --git a/stores/dotenv/parser.go b/stores/dotenv/parser.go new file mode 100644 index 000000000..112db01fb --- /dev/null +++ b/stores/dotenv/parser.go @@ -0,0 +1,218 @@ +package dotenv + +import ( + "bufio" + "bytes" + "fmt" + "regexp" + "strings" + + "go.mozilla.org/sops/v3" +) + +const ( + stateKey int = iota + stateValue + stateComment +) + +var KeyRegexp = regexp.MustCompile(`^[A-Za-z_]+[A-Za-z0-9_]*$`) + +func Parse(data []byte) (items []sops.TreeItem, err error) { + nextState := stateKey + var currentKey string + + for _, token := range tokenize(data) { + state := nextState + + if token == "#" { + nextState = stateComment + continue + } + + if token == "=" { + nextState = stateValue + continue + } + + if isAllWhitespace(token) { + continue + } + + if state == stateComment { + nextState = stateKey + items = append(items, sops.TreeItem{Key: sops.Comment{token}, Value: nil}) + } + + if state == stateKey { + if KeyRegexp.MatchString(token) { + currentKey = token + } else { + return nil, fmt.Errorf("invalid dotenv key: %q", token) + } + } + + if state == stateValue { + nextState = stateKey + items = append(items, sops.TreeItem{Key: currentKey, Value: parseValue(token)}) + } + } + + return +} + +func parseValue(value string) string { + if value[0] == '"' { + expanded := strings.Replace(value, "\\n", "\n", -1) + expanded = strings.Replace(expanded, "\\\n", "\\n", -1) + return expanded[1 : len(expanded)-1] + } + + if value[0] == '\'' { + return value[1 : len(value)-1] + } + + return strings.Trim(value, "\t ") +} + +func tokenize(data []byte) []string { + scanner := bufio.NewScanner(bytes.NewReader(data)) + nextState := stateKey + + split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + state := nextState + + if len(data) == 0 { + return 0, nil, nil + } + + if isWhitespace(data[0]) { + advance, token, err = consumeWhitespace(data) + return + } + + if state == stateKey && data[0] == '#' { + advance, token, err = 1, []byte{data[0]}, nil + nextState = stateComment + return + } + + if state == stateComment { + advance, token, err = consumeLine(data) + nextState = stateKey + return + } + + if data[0] == '=' { + advance, token, err = 1, []byte{data[0]}, nil + nextState = stateValue + return + } + + if state == stateKey { + advance, token, err = consumeKey(data) + nextState = stateValue + return + } + + if state == stateValue && (data[0] == '"' || data[0] == '\'') { + advance, token, err = consumeQuoted(data) + nextState = stateKey + return + } + + if state == stateValue { + advance, token, err = consumeLine(data) + nextState = stateKey + return + } + + return + } + + scanner.Split(split) + + tokens := []string{} + for scanner.Scan() { + tokens = append(tokens, scanner.Text()) + } + + return tokens +} + +func consumeWhitespace(data []byte) (advance int, token []byte, err error) { + for _, b := range data { + if isWhitespace(b) { + advance += 1 + token = append(token, b) + } else { + break + } + } + return +} + +func consumeQuoted(data []byte) (advance int, token []byte, err error) { + advance = 1 + stopAt := data[0] + escaping := false + token = append(token, stopAt) + + for _, b := range data[1:] { + advance += 1 + + if !escaping && b == stopAt { + token = append(token, stopAt) + return + } + + if escaping { + token = append(token, '\\') + escaping = false + } + + if b == '\\' { + escaping = true + continue + } + + token = append(token, b) + } + + return 0, nil, fmt.Errorf("missing closing quotation mark") +} + +func consumeKey(data []byte) (advance int, token []byte, err error) { + for _, b := range data { + if b == '=' { + return + } + advance += 1 + token = append(token, b) + } + return +} + +func consumeLine(data []byte) (advance int, token []byte, err error) { + for _, b := range data { + advance += 1 + if b == '\n' || b == '\r' { + return + } + token = append(token, b) + } + return +} + +func isWhitespace(b byte) bool { + return b == ' ' || b == '\t' || b == '\r' || b == '\n' +} + +func isAllWhitespace(s string) bool { + for _, b := range []byte(s) { + if !isWhitespace(b) { + return false + } + } + return true +} diff --git a/stores/dotenv/store.go b/stores/dotenv/store.go index 8add8a097..c4fb91415 100644 --- a/stores/dotenv/store.go +++ b/stores/dotenv/store.go @@ -63,30 +63,11 @@ func (store *Store) LoadEncryptedFile(in []byte) (sops.Tree, error) { // sops runtime object func (store *Store) LoadPlainFile(in []byte) (sops.TreeBranches, error) { var branches sops.TreeBranches - var branch sops.TreeBranch - - for _, line := range bytes.Split(in, []byte("\n")) { - if len(line) == 0 { - continue - } - if line[0] == '#' { - branch = append(branch, sops.TreeItem{ - Key: sops.Comment{string(line[1:])}, - Value: nil, - }) - } else { - pos := bytes.Index(line, []byte("=")) - if pos == -1 { - return nil, fmt.Errorf("invalid dotenv input line: %s", line) - } - branch = append(branch, sops.TreeItem{ - Key: string(line[:pos]), - Value: strings.Replace(string(line[pos+1:]), "\\n", "\n", -1), - }) - } + items, err := Parse(in) + if err != nil { + return nil, err } - - branches = append(branches, branch) + branches = append(branches, items) return branches, nil } @@ -118,9 +99,12 @@ func (store *Store) EmitPlainFile(in sops.TreeBranches) ([]byte, error) { var line string if comment, ok := item.Key.(sops.Comment); ok { line = fmt.Sprintf("#%s\n", comment.Value) - } else { + } else if strings.HasPrefix(item.Key.(string), SopsPrefix) { value := strings.Replace(item.Value.(string), "\n", "\\n", -1) line = fmt.Sprintf("%s=%s\n", item.Key, value) + } else { + value := strings.Replace(item.Value.(string), `\n`, `\\n`, -1) + line = fmt.Sprintf("%s=\"%s\"\n", item.Key, value) } buffer.WriteString(line) } diff --git a/stores/dotenv/store_test.go b/stores/dotenv/store_test.go index f4bd2cc85..97b618596 100644 --- a/stores/dotenv/store_test.go +++ b/stores/dotenv/store_test.go @@ -8,39 +8,58 @@ import ( "go.mozilla.org/sops/v3" ) -var PLAIN = []byte(strings.TrimLeft(` -VAR1=val1 -VAR2=val2 -#comment -VAR3_unencrypted=val3 -VAR4=val4\nval4 +var ORIGINAL_PLAIN = []byte(strings.TrimLeft(` +#Comment +UNQUOTED=value +UNQUOTED_ESCAPED_NEWLINE=escaped\nnewline +UNQUOTED_WHITESPACE= trimmed whitespace +SINGLEQUOTED='value' +SINGLEQUOTED_NEWLINE='real +newline' +SINGLEQUOTED_ESCAPED_NEWLINE='escaped\nnewline' +SINGLEQUOTED_WHITESPACE=' untrimmed whitespace ' +DOUBLEQUOTED="value" +DOUBLEQUOTED_NEWLINE="real +newline" +DOUBLEQUOTED_ESCAPED_NEWLINE="real\nnewline" +DOUBLEQUOTED_WHITESPACE=" untrimmed whitespace " +`, "\n")) + +var EMITTED_PLAIN = []byte(strings.TrimLeft(` +#Comment +UNQUOTED="value" +UNQUOTED_ESCAPED_NEWLINE="escaped\\nnewline" +UNQUOTED_WHITESPACE="trimmed whitespace" +SINGLEQUOTED="value" +SINGLEQUOTED_NEWLINE="real +newline" +SINGLEQUOTED_ESCAPED_NEWLINE="escaped\\nnewline" +SINGLEQUOTED_WHITESPACE=" untrimmed whitespace " +DOUBLEQUOTED="value" +DOUBLEQUOTED_NEWLINE="real +newline" +DOUBLEQUOTED_ESCAPED_NEWLINE="real +newline" +DOUBLEQUOTED_WHITESPACE=" untrimmed whitespace " `, "\n")) var BRANCH = sops.TreeBranch{ - sops.TreeItem{ - Key: "VAR1", - Value: "val1", - }, - sops.TreeItem{ - Key: "VAR2", - Value: "val2", - }, - sops.TreeItem{ - Key: sops.Comment{"comment"}, - Value: nil, - }, - sops.TreeItem{ - Key: "VAR3_unencrypted", - Value: "val3", - }, - sops.TreeItem{ - Key: "VAR4", - Value: "val4\nval4", - }, + sops.TreeItem{Key: sops.Comment{"Comment"}, Value: nil}, + sops.TreeItem{Key: "UNQUOTED", Value: "value"}, + sops.TreeItem{Key: "UNQUOTED_ESCAPED_NEWLINE", Value: "escaped\\nnewline"}, + sops.TreeItem{Key: "UNQUOTED_WHITESPACE", Value: "trimmed whitespace"}, + sops.TreeItem{Key: "SINGLEQUOTED", Value: "value"}, + sops.TreeItem{Key: "SINGLEQUOTED_NEWLINE", Value: "real\nnewline"}, + sops.TreeItem{Key: "SINGLEQUOTED_ESCAPED_NEWLINE", Value: "escaped\\nnewline"}, + sops.TreeItem{Key: "SINGLEQUOTED_WHITESPACE", Value: " untrimmed whitespace "}, + sops.TreeItem{Key: "DOUBLEQUOTED", Value: "value"}, + sops.TreeItem{Key: "DOUBLEQUOTED_NEWLINE", Value: "real\nnewline"}, + sops.TreeItem{Key: "DOUBLEQUOTED_ESCAPED_NEWLINE", Value: "real\nnewline"}, + sops.TreeItem{Key: "DOUBLEQUOTED_WHITESPACE", Value: " untrimmed whitespace "}, } func TestLoadPlainFile(t *testing.T) { - branches, err := (&Store{}).LoadPlainFile(PLAIN) + branches, err := (&Store{}).LoadPlainFile(ORIGINAL_PLAIN) assert.Nil(t, err) assert.Equal(t, BRANCH, branches[0]) } @@ -50,7 +69,7 @@ func TestEmitPlainFile(t *testing.T) { } bytes, err := (&Store{}).EmitPlainFile(branches) assert.Nil(t, err) - assert.Equal(t, PLAIN, bytes) + assert.Equal(t, EMITTED_PLAIN, bytes) } func TestEmitValueString(t *testing.T) {