Skip to content

Commit

Permalink
Add standard newline/quoting behavior to dotenv store
Browse files Browse the repository at this point in the history
Rationale
=========

The dotenv store as it exists right now performs splitting on newlines
to determine where a new key-value pair or comment begins. This works
remarkably well, up until you need to handle values that contain
newlines.

While I couldn't find an offical dotenv file format spec, I sampled a
number of open-source dotenv parsers and it seems that they typically
apply the following rules:

Newline handling:

* If a value is unquoted and contains a literal `\n` (`0x5c6e`), it is
  interpretted literally and NOT converted to an actual newline
  (`0x0a`).

* If a value is single-quoted and contains a literal `\n` (`0x5c6e`), it
  is interpretted literally and NOT converted to an actual newline
  (`0x0a`).

* If a value is double-quoted and contains a literal `\n` (`0x5c6e`), it
  is converted to an actual newline (`0x0a`).

* If a value is either single- or double-quoted, it may contain an
  actual newline (`0x0a`).

Whitespace trimming:

* If a value is unquoted and contains any leading or trailing
  whitespace, it is trimmed.

* If a value is either single- or double-quoted and contains any leading
  or trailing whitespace, it is left untrimmed.

Quotation handling:

* Because quotations around values have special meaning, they are
  interpretted and are not included in the parsed value. Literal quotes
  may be included within a quoted string either by escaping them or using
  the opposite quotation mark.

Because single- and double-quoted values may contain actual newlines,
we cannot split our input data on newlines as this may be in the middle
of a quoted value. This, along with the other rules around handling
quoted values, prompted me to try and implement a more robust parsing
solution. This commit is my first stab at that.

Special Considerations
======================

This is _not_ a backwards-compatible change:

* The `dotenv` files produced by this version of SOPS _cannot_ be read
  by an earlier version.

* The `dotenv` files produced by an earlier version of SOPS _can_ be
  read by this version, with the understanding that the semantics around
  quotations and newlines have changed.

Examples
========

The below examples show how double-quoted values are passed to the
running environment:

```console
$ echo 'FOO="foo\\nbar\\nbaz"' > plaintext.env
$ sops -e --output ciphertext.env plaintext.env
$ $ ~/go/bin/sops exec-env ciphertext.env 'env | grep FOO | xxd'
00000000: 464f 4f3d 666f 6f5c 6e62 6172 5c6e 6261  FOO=foo\nbar\nba
00000010: 7a0a                                     z.
```

```console
$ echo 'FOO="foo\nbar\nbaz"' > plaintext.env
$ sops -e --output ciphertext.env plaintext.env
$ sops exec-env ciphertext.env 'env | grep -A2 FOO | xxd'
00000000: 464f 4f3d 666f 6f0a 6261 720a 6261 7a0a  FOO=foo.bar.baz.
```
  • Loading branch information
Spencer Judd committed Feb 4, 2020
1 parent 1634350 commit 1d3f0cb
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 60 deletions.
19 changes: 11 additions & 8 deletions cmd/sops/subcommand/exec/exec.go
@@ -1,13 +1,14 @@
package exec

import (
"bytes"
"fmt"
"io/ioutil"
"os"
"runtime"
"strings"

"go.mozilla.org/sops/v3/logging"
"go.mozilla.org/sops/v3/stores/dotenv"

"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -83,15 +84,17 @@ func ExecWithEnv(opts ExecOpts) {
}

env := os.Environ()
lines := bytes.Split(opts.Plaintext, []byte("\n"))
for _, line := range lines {
if len(line) == 0 {
continue
}
if line[0] == '#' {

items, err := dotenv.Parse(opts.Plaintext)
if err != nil {
log.Fatal(err)
}

for _, item := range items {
if item.Value == nil {
continue
}
env = append(env, string(line))
env = append(env, fmt.Sprintf("%s=%s", item.Key.(string), item.Value.(string)))
}

cmd := BuildCommand(opts.Command)
Expand Down
218 changes: 218 additions & 0 deletions stores/dotenv/parser.go
@@ -0,0 +1,218 @@
package dotenv

import (
"bufio"
"bytes"
"fmt"
"regexp"
"strings"

"go.mozilla.org/sops/v3"
)

const (
stateKey int = iota
stateValue
stateComment
)

var KeyRegexp = regexp.MustCompile(`^[A-Za-z_]+[A-Za-z0-9_]*$`)

func Parse(data []byte) (items []sops.TreeItem, err error) {
nextState := stateKey
var currentKey string

for _, token := range tokenize(data) {
state := nextState

if token == "#" {
nextState = stateComment
continue
}

if token == "=" {
nextState = stateValue
continue
}

if isAllWhitespace(token) {
continue
}

if state == stateComment {
nextState = stateKey
items = append(items, sops.TreeItem{Key: sops.Comment{token}, Value: nil})
}

if state == stateKey {
if KeyRegexp.MatchString(token) {
currentKey = token
} else {
return nil, fmt.Errorf("invalid dotenv key: %q", token)
}
}

if state == stateValue {
nextState = stateKey
items = append(items, sops.TreeItem{Key: currentKey, Value: parseValue(token)})
}
}

return
}

func parseValue(value string) string {
if value[0] == '"' {
expanded := strings.Replace(value, "\\n", "\n", -1)
expanded = strings.Replace(expanded, "\\\n", "\\n", -1)
return expanded[1 : len(expanded)-1]
}

if value[0] == '\'' {
return value[1 : len(value)-1]
}

return strings.Trim(value, "\t ")
}

func tokenize(data []byte) []string {
scanner := bufio.NewScanner(bytes.NewReader(data))
nextState := stateKey

split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
state := nextState

if len(data) == 0 {
return 0, nil, nil
}

if isWhitespace(data[0]) {
advance, token, err = consumeWhitespace(data)
return
}

if state == stateKey && data[0] == '#' {
advance, token, err = 1, []byte{data[0]}, nil
nextState = stateComment
return
}

if state == stateComment {
advance, token, err = consumeLine(data)
nextState = stateKey
return
}

if data[0] == '=' {
advance, token, err = 1, []byte{data[0]}, nil
nextState = stateValue
return
}

if state == stateKey {
advance, token, err = consumeKey(data)
nextState = stateValue
return
}

if state == stateValue && (data[0] == '"' || data[0] == '\'') {
advance, token, err = consumeQuoted(data)
nextState = stateKey
return
}

if state == stateValue {
advance, token, err = consumeLine(data)
nextState = stateKey
return
}

return
}

scanner.Split(split)

tokens := []string{}
for scanner.Scan() {
tokens = append(tokens, scanner.Text())
}

return tokens
}

func consumeWhitespace(data []byte) (advance int, token []byte, err error) {
for _, b := range data {
if isWhitespace(b) {
advance += 1
token = append(token, b)
} else {
break
}
}
return
}

func consumeQuoted(data []byte) (advance int, token []byte, err error) {
advance = 1
stopAt := data[0]
escaping := false
token = append(token, stopAt)

for _, b := range data[1:] {
advance += 1

if !escaping && b == stopAt {
token = append(token, stopAt)
return
}

if escaping {
token = append(token, '\\')
escaping = false
}

if b == '\\' {
escaping = true
continue
}

token = append(token, b)
}

return 0, nil, fmt.Errorf("missing closing quotation mark")
}

func consumeKey(data []byte) (advance int, token []byte, err error) {
for _, b := range data {
if b == '=' {
return
}
advance += 1
token = append(token, b)
}
return
}

func consumeLine(data []byte) (advance int, token []byte, err error) {
for _, b := range data {
advance += 1
if b == '\n' || b == '\r' {
return
}
token = append(token, b)
}
return
}

func isWhitespace(b byte) bool {
return b == ' ' || b == '\t' || b == '\r' || b == '\n'
}

func isAllWhitespace(s string) bool {
for _, b := range []byte(s) {
if !isWhitespace(b) {
return false
}
}
return true
}
32 changes: 8 additions & 24 deletions stores/dotenv/store.go
Expand Up @@ -63,30 +63,11 @@ func (store *Store) LoadEncryptedFile(in []byte) (sops.Tree, error) {
// sops runtime object
func (store *Store) LoadPlainFile(in []byte) (sops.TreeBranches, error) {
var branches sops.TreeBranches
var branch sops.TreeBranch

for _, line := range bytes.Split(in, []byte("\n")) {
if len(line) == 0 {
continue
}
if line[0] == '#' {
branch = append(branch, sops.TreeItem{
Key: sops.Comment{string(line[1:])},
Value: nil,
})
} else {
pos := bytes.Index(line, []byte("="))
if pos == -1 {
return nil, fmt.Errorf("invalid dotenv input line: %s", line)
}
branch = append(branch, sops.TreeItem{
Key: string(line[:pos]),
Value: strings.Replace(string(line[pos+1:]), "\\n", "\n", -1),
})
}
items, err := Parse(in)
if err != nil {
return nil, err
}

branches = append(branches, branch)
branches = append(branches, items)
return branches, nil
}

Expand Down Expand Up @@ -118,9 +99,12 @@ func (store *Store) EmitPlainFile(in sops.TreeBranches) ([]byte, error) {
var line string
if comment, ok := item.Key.(sops.Comment); ok {
line = fmt.Sprintf("#%s\n", comment.Value)
} else {
} else if strings.HasPrefix(item.Key.(string), SopsPrefix) {
value := strings.Replace(item.Value.(string), "\n", "\\n", -1)
line = fmt.Sprintf("%s=%s\n", item.Key, value)
} else {
value := strings.Replace(item.Value.(string), `\n`, `\\n`, -1)
line = fmt.Sprintf("%s=\"%s\"\n", item.Key, value)
}
buffer.WriteString(line)
}
Expand Down

0 comments on commit 1d3f0cb

Please sign in to comment.