Add standard newline/quoting behavior to dotenv store

Rationale ========= The dotenv store as it exists right now performs splitting on newlines to determine where a new key-value pair or comment begins. This works remarkably well, up until you need to handle values that contain newlines. While I couldn't find an offical dotenv file format spec, I sampled a number of open-source dotenv parsers and it seems that they typically apply the following rules: Newline handling: * If a value is unquoted and contains a literal `\n` (`0x5c6e`), it is interpretted literally and NOT converted to an actual newline (`0x0a`). * If a value is single-quoted and contains a literal `\n` (`0x5c6e`), it is interpretted literally and NOT converted to an actual newline (`0x0a`). * If a value is double-quoted and contains a literal `\n` (`0x5c6e`), it is converted to an actual newline (`0x0a`). * If a value is either single- or double-quoted, it may contain an actual newline (`0x0a`). Whitespace trimming: * If a value is unquoted and contains any leading or trailing whitespace, it is trimmed. * If a value is either single- or double-quoted and contains any leading or trailing whitespace, it is left untrimmed. Quotation handling: * Because quotations around values have special meaning, they are interpretted and are not included in the parsed value. Literal quotes may be included within a quoted string either by escaping them or using the opposite quotation mark. Because single- and double-quoted values may contain actual newlines, we cannot split our input data on newlines as this may be in the middle of a quoted value. This, along with the other rules around handling quoted values, prompted me to try and implement a more robust parsing solution. This commit is my first stab at that. Special Considerations ====================== This is _not_ a backwards-compatible change: * The `dotenv` files produced by this version of SOPS _cannot_ be read by an earlier version. * The `dotenv` files produced by an earlier version of SOPS _can_ be read by this version, with the understanding that the semantics around quotations and newlines have changed. Examples ======== The below examples show how double-quoted values are passed to the running environment: ```console $ echo 'FOO="foo\\nbar\\nbaz"' > plaintext.env $ sops -e --output ciphertext.env plaintext.env $ sops exec-env ciphertext.env 'env | grep FOO | xxd' 00000000: 464f 4f3d 666f 6f5c 6e62 6172 5c6e 6261 FOO=foo\nbar\nba 00000010: 7a0a z. ``` ```console $ echo 'FOO="foo\nbar\nbaz"' > plaintext.env $ sops -e --output ciphertext.env plaintext.env $ sops exec-env ciphertext.env 'env | grep -A2 FOO | xxd' 00000000: 464f 4f3d 666f 6f0a 6261 720a 6261 7a0a FOO=foo.bar.baz. ```
getsops · Feb 4, 2020 · 623fa10 · 623fa10
1 parent 1634350
commit 623fa10
Show file tree

Hide file tree

Showing 4 changed files with 306 additions and 60 deletions.
diff --git a/cmd/sops/subcommand/exec/exec.go b/cmd/sops/subcommand/exec/exec.go
@@ -1,13 +1,14 @@
 package exec
 
 import (
-	"bytes"
+	"fmt"
 	"io/ioutil"
 	"os"
 	"runtime"
 	"strings"
 
 	"go.mozilla.org/sops/v3/logging"
+	"go.mozilla.org/sops/v3/stores/dotenv"
 
 	"github.com/sirupsen/logrus"
 )
@@ -83,15 +84,17 @@ func ExecWithEnv(opts ExecOpts) {
 	}
 
 	env := os.Environ()
-	lines := bytes.Split(opts.Plaintext, []byte("\n"))
-	for _, line := range lines {
-		if len(line) == 0 {
-			continue
-		}
-		if line[0] == '#' {
+
+	items, err := dotenv.Parse(opts.Plaintext)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, item := range items {
+		if item.Value == nil {
 			continue
 		}
-		env = append(env, string(line))
+		env = append(env, fmt.Sprintf("%s=%s", item.Key, item.Value))
 	}
 
 	cmd := BuildCommand(opts.Command)

diff --git a/stores/dotenv/parser.go b/stores/dotenv/parser.go
@@ -0,0 +1,239 @@
+package dotenv
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"regexp"
+	"strings"
+
+	"go.mozilla.org/sops/v3"
+)
+
+const (
+	stateKey int = iota
+	stateValue
+	stateComment
+)
+
+var KeyRegexp = regexp.MustCompile(`^[A-Za-z_]+[A-Za-z0-9_]*$`)
+
+func Parse(data []byte) (items []sops.TreeItem, err error) {
+	nextState := stateKey
+	var currentKey string
+
+	for _, token := range tokenize(data) {
+		state := nextState
+
+		if token == "#" {
+			nextState = stateComment
+			continue
+		}
+
+		if token == "=" {
+			nextState = stateValue
+			continue
+		}
+
+		if state == stateComment && containsNewline(token) {
+			nextState = stateKey
+			continue
+		}
+
+		if isAllWhitespace(token) {
+			continue
+		}
+
+		if state == stateComment {
+			nextState = stateKey
+			items = append(items, sops.TreeItem{Key: sops.Comment{token}, Value: nil})
+		}
+
+		if state == stateKey {
+			if KeyRegexp.MatchString(token) {
+				currentKey = token
+			} else {
+				return nil, fmt.Errorf("invalid dotenv key: %q", token)
+			}
+		}
+
+		if state == stateValue {
+			nextState = stateKey
+			items = append(items, sops.TreeItem{Key: currentKey, Value: parseValue(token)})
+		}
+	}
+
+	return
+}
+
+func parseValue(value string) string {
+	if value[0] == '"' {
+		expanded := strings.Replace(value, "\\n", "\n", -1)
+		expanded = strings.Replace(expanded, "\\\n", "\\n", -1)
+		return expanded[1 : len(expanded)-1]
+	}
+
+	if value[0] == '\'' {
+		return value[1 : len(value)-1]
+	}
+
+	return strings.Trim(value, "\t ")
+}
+
+func tokenize(data []byte) []string {
+	scanner := bufio.NewScanner(bytes.NewReader(data))
+	nextState := stateKey
+
+	split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+		state := nextState
+
+		if len(data) == 0 {
+			return 0, nil, nil
+		}
+
+		if isWhitespace(data[0]) {
+			advance, token, err = consumeWhitespace(data)
+			if state == stateComment && containsNewline(string(token)) {
+				nextState = stateKey
+			}
+			return
+		}
+
+		if state == stateKey && data[0] == '#' {
+			advance, token, err = 1, []byte{data[0]}, nil
+			nextState = stateComment
+			return
+		}
+
+		if state == stateComment {
+			advance, token, err = consumeLine(data)
+			nextState = stateKey
+			return
+		}
+
+		if data[0] == '=' {
+			advance, token, err = 1, []byte{data[0]}, nil
+			nextState = stateValue
+			return
+		}
+
+		if state == stateKey {
+			advance, token, err = consumeKey(data)
+			nextState = stateValue
+			return
+		}
+
+		if state == stateValue && (data[0] == '"' || data[0] == '\'') {
+			advance, token, err = consumeQuoted(data)
+			nextState = stateKey
+			return
+		}
+
+		if state == stateValue {
+			advance, token, err = consumeLine(data)
+			nextState = stateKey
+			return
+		}
+
+		return
+	}
+
+	scanner.Split(split)
+
+	tokens := []string{}
+	for scanner.Scan() {
+		tokens = append(tokens, scanner.Text())
+	}
+
+	return tokens
+}
+
+func consumeWhitespace(data []byte) (advance int, token []byte, err error) {
+	for _, b := range data {
+		if isWhitespace(b) {
+			advance += 1
+			token = append(token, b)
+		} else {
+			break
+		}
+	}
+	return
+}
+
+func consumeQuoted(data []byte) (advance int, token []byte, err error) {
+	advance = 1
+	stopAt := data[0]
+	escaping := false
+	token = append(token, stopAt)
+
+	for _, b := range data[1:] {
+		advance += 1
+
+		if !escaping && b == stopAt {
+			token = append(token, stopAt)
+			return
+		}
+
+		if escaping {
+			token = append(token, '\\')
+			escaping = false
+		}
+
+		if b == '\\' {
+			escaping = true
+			continue
+		}
+
+		token = append(token, b)
+	}
+
+	return 0, nil, fmt.Errorf("missing closing quotation mark")
+}
+
+func consumeKey(data []byte) (advance int, token []byte, err error) {
+	for _, b := range data {
+		if b == '=' {
+			return
+		}
+		advance += 1
+		token = append(token, b)
+	}
+	return
+}
+
+func consumeLine(data []byte) (advance int, token []byte, err error) {
+	for _, b := range data {
+		if b == '\n' || b == '\r' {
+			return
+		}
+		advance += 1
+		token = append(token, b)
+	}
+	return
+}
+
+func isWhitespace(b byte) bool {
+	return b == ' ' || b == '\t' || b == '\r' || b == '\n'
+}
+
+func isAllWhitespace(s string) bool {
+	for _, b := range []byte(s) {
+		if !isWhitespace(b) {
+			return false
+		}
+	}
+	return true
+}
+
+func isNewline(b byte) bool {
+	return b == '\r' || b == '\n'
+}
+
+func containsNewline(s string) bool {
+	for _, b := range []byte(s) {
+		if isNewline(b) {
+			return true
+		}
+	}
+	return false
+}
diff --git a/stores/dotenv/store.go b/stores/dotenv/store.go
@@ -63,30 +63,11 @@ func (store *Store) LoadEncryptedFile(in []byte) (sops.Tree, error) {
 // sops runtime object
 func (store *Store) LoadPlainFile(in []byte) (sops.TreeBranches, error) {
 	var branches sops.TreeBranches
-	var branch sops.TreeBranch
-
-	for _, line := range bytes.Split(in, []byte("\n")) {
-		if len(line) == 0 {
-			continue
-		}
-		if line[0] == '#' {
-			branch = append(branch, sops.TreeItem{
-				Key:   sops.Comment{string(line[1:])},
-				Value: nil,
-			})
-		} else {
-			pos := bytes.Index(line, []byte("="))
-			if pos == -1 {
-				return nil, fmt.Errorf("invalid dotenv input line: %s", line)
-			}
-			branch = append(branch, sops.TreeItem{
-				Key:   string(line[:pos]),
-				Value: strings.Replace(string(line[pos+1:]), "\\n", "\n", -1),
-			})
-		}
+	items, err := Parse(in)
+	if err != nil {
+		return nil, err
 	}
-
-	branches = append(branches, branch)
+	branches = append(branches, items)
 	return branches, nil
 }
 
@@ -118,9 +99,12 @@ func (store *Store) EmitPlainFile(in sops.TreeBranches) ([]byte, error) {
 		var line string
 		if comment, ok := item.Key.(sops.Comment); ok {
 			line = fmt.Sprintf("#%s\n", comment.Value)
-		} else {
+		} else if strings.HasPrefix(item.Key.(string), SopsPrefix) {
 			value := strings.Replace(item.Value.(string), "\n", "\\n", -1)
 			line = fmt.Sprintf("%s=%s\n", item.Key, value)
+		} else {
+			value := strings.Replace(item.Value.(string), `\n`, `\\n`, -1)
+			line = fmt.Sprintf("%s=\"%s\"\n", item.Key, value)
 		}
 		buffer.WriteString(line)
 	}