Skip to content

Commit

Permalink
yamlfmt: created content analyzer (#106)
Browse files Browse the repository at this point in the history
* yamlfmt: created content analyzer

In some use cases the provided patterns of exclusion aren't enough, for
example when something generates a lot of yaml files in weird places, or
when single files should be ignored out of large directories. This PR
adds the following:
* The concept of and ability to read metadata, with the first type of
  metadata being "ignore"
* A new ContentAnalyzer interface and a BasicContentAnalyzer which will accept
  an array of regex pattern strings. This will first read the metadata
  from the content of each file to find the ignore metadata, then will
  match the content to the regex patterns provided to determine which
  patterns will be excluded.

* yamlfmt: improve metadata errors

Change metadata errors into a wrapped error struct so that the line
number and path can be included in the resulting error. Make metadata
errors non-fatal to `yamlfmt` as a whole. Also add the docs in this
commit.
  • Loading branch information
braydonk committed Apr 2, 2023
1 parent cdca3ce commit 3a03733
Show file tree
Hide file tree
Showing 14 changed files with 501 additions and 9 deletions.
18 changes: 12 additions & 6 deletions Makefile
@@ -1,17 +1,23 @@
.PHONY: build
build:
go build ./cmd/yamlfmt

.PHONY: test
test:
go test ./...

.PHONY: test_v
test_v:
go test -v ./...

.PHONY: install
install:
go install ./cmd/yamlfmt

.PHONY: install_tools
install_tools:
go install github.com/google/addlicense@latest

.PHONY: addlicense
addlicense:
addlicense -c "Google LLC" -l apache .

test_diff:
go test -v -mod=mod github.com/google/yamlfmt/internal/diff

test_basic_formatter:
go test -v -mod=mod github.com/google/yamlfmt/formatters/basic
27 changes: 25 additions & 2 deletions command/command.go
Expand Up @@ -49,6 +49,7 @@ type Config struct {
Extensions []string `mapstructure:"extensions"`
Include []string `mapstructure:"include"`
Exclude []string `mapstructure:"exclude"`
RegexExclude []string `mapstructure:"regex_exclude"`
Doublestar bool `mapstructure:"doublestar"`
LineEnding yamlfmt.LineBreakStyle `mapstructure:"line_ending"`
FormatterConfig *FormatterConfig `mapstructure:"formatter,omitempty"`
Expand Down Expand Up @@ -110,10 +111,15 @@ func (c *Command) Run() error {
Quiet: c.Quiet,
}

paths, err := c.collectPaths()
collectedPaths, err := c.collectPaths()
if err != nil {
return err
}
paths, err := c.analyzePaths(collectedPaths)
if err != nil {
log.Printf("path analysis found the following errors:\n%v", err)
log.Println("Continuing...")
}

switch c.Operation {
case OperationFormat:
Expand All @@ -137,7 +143,11 @@ func (c *Command) Run() error {
if err != nil {
return err
}
log.Print(out)
if out.Message == "" {
log.Print("No files will be changed.")
} else {
log.Print(out)
}
case OperationStdin:
stdinYaml, err := readFromStdin()
if err != nil {
Expand All @@ -158,6 +168,15 @@ func (c *Command) collectPaths() ([]string, error) {
return collector.CollectPaths()
}

func (c *Command) analyzePaths(paths []string) ([]string, error) {
analyzer, err := c.makeAnalyzer()
if err != nil {
return nil, err
}
includePaths, _, err := analyzer.ExcludePathsByContent(paths)
return includePaths, err
}

func (c *Command) makePathCollector() yamlfmt.PathCollector {
if c.Config.Doublestar {
return &yamlfmt.DoublestarCollector{
Expand All @@ -172,6 +191,10 @@ func (c *Command) makePathCollector() yamlfmt.PathCollector {
}
}

func (c *Command) makeAnalyzer() (yamlfmt.ContentAnalyzer, error) {
return yamlfmt.NewBasicContentAnalyzer(c.Config.RegexExclude)
}

func readFromStdin() ([]byte, error) {
stdin := bufio.NewReader(os.Stdin)
data := []byte{}
Expand Down
76 changes: 76 additions & 0 deletions content_analyzer.go
@@ -0,0 +1,76 @@
package yamlfmt

import (
"os"
"regexp"

"github.com/google/yamlfmt/internal/collections"
)

type ContentAnalyzer interface {
ExcludePathsByContent(paths []string) ([]string, []string, error)
}

type BasicContentAnalyzer struct {
RegexPatterns []*regexp.Regexp
}

func NewBasicContentAnalyzer(patterns []string) (BasicContentAnalyzer, error) {
analyzer := BasicContentAnalyzer{RegexPatterns: []*regexp.Regexp{}}
compileErrs := collections.Errors{}
for _, pattern := range patterns {
re, err := regexp.Compile(pattern)
if err != nil {
compileErrs = append(compileErrs, err)
continue
}
analyzer.RegexPatterns = append(analyzer.RegexPatterns, re)
}
return analyzer, compileErrs.Combine()
}

func (a BasicContentAnalyzer) ExcludePathsByContent(paths []string) ([]string, []string, error) {
pathsToFormat := collections.SliceToSet(paths)
pathsExcluded := []string{}
pathErrs := collections.Errors{}

for _, path := range paths {
content, err := os.ReadFile(path)
if err != nil {
pathErrs = append(pathErrs, err)
continue
}

// Search metadata for ignore
metadata, mdErrs := ReadMetadata(content, path)
if len(mdErrs) != 0 {
pathErrs = append(pathErrs, mdErrs...)
}
ignoreFound := false
for md := range metadata {
if md.Type == MetadataIgnore {
ignoreFound = true
break
}
}
if ignoreFound {
pathsExcluded = append(pathsExcluded, path)
pathsToFormat.Remove(path)
continue
}

// Check if content matches any regex
matched := false
for _, pattern := range a.RegexPatterns {
if pattern.Match(content) {
matched = true
}
}
if matched {
pathsExcluded = append(pathsExcluded, path)
pathsToFormat.Remove(path)
}
}

return pathsToFormat.ToSlice(), pathsExcluded, pathErrs.Combine()
}
116 changes: 116 additions & 0 deletions content_analyzer_test.go
@@ -0,0 +1,116 @@
package yamlfmt_test

import (
"path/filepath"
"testing"

"github.com/google/yamlfmt"
"github.com/google/yamlfmt/internal/collections"
"github.com/google/yamlfmt/internal/tempfile"
)

const testdataBase = "testdata/content_analyzer"

func TestBasicContentAnalyzer(t *testing.T) {
testCases := []struct {
name string
testdataDir string
excludePatterns []string
expectedPaths collections.Set[string]
expectedExcluded collections.Set[string]
}{
{
name: "has ignore metadata",
testdataDir: "has_ignore",
excludePatterns: []string{},
expectedPaths: collections.Set[string]{
"y.yaml": {},
},
expectedExcluded: collections.Set[string]{
"x.yaml": {},
},
},
{
name: "matches regex pattern",
testdataDir: "regex_ignore",
excludePatterns: []string{
".*generated by.*",
},
expectedPaths: collections.Set[string]{
"y.yaml": {},
},
expectedExcluded: collections.Set[string]{
"x.yaml": {},
},
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
tempPath := t.TempDir()
testdataDir := filepath.Join(testdataBase, tc.testdataDir)
paths, err := tempfile.ReplicateDirectory(testdataDir, tempPath)
if err != nil {
t.Fatalf("could not replicate testdata directory %s: %v", tc.testdataDir, err)
}
err = paths.CreateAll()
if err != nil {
t.Fatalf("could not create full test directory: %v", err)
}
contentAnalyzer, err := yamlfmt.NewBasicContentAnalyzer(tc.excludePatterns)
if err != nil {
t.Fatalf("could not create content analyzer: %v", err)
}
collector := &yamlfmt.FilepathCollector{
Include: []string{tempPath},
Exclude: []string{},
Extensions: []string{"yaml", "yml"},
}
collectedPaths, err := collector.CollectPaths()
if err != nil {
t.Fatalf("CollectPaths failed: %v", err)
}
resultPaths, excludedPaths, err := contentAnalyzer.ExcludePathsByContent(collectedPaths)
if err != nil {
t.Fatalf("expected content analyzer to work, got error: %v", err)
}
resultPathsTrimmed, err := pathsTempdirTrimmed(resultPaths, tempPath)
if err != nil {
t.Fatalf("expected trimming tempdir from result not to have error: %v", err)
}
if !tc.expectedPaths.Equals(collections.SliceToSet(resultPathsTrimmed)) {
t.Fatalf("expected files:\n%v\ngot:\n%v", tc.expectedPaths, resultPaths)
}
excludePathsTrimmed, err := pathsTempdirTrimmed(excludedPaths, tempPath)
if err != nil {
t.Fatalf("expected trimming tempdir from excluded not to have error: %v", err)
}
if !tc.expectedExcluded.Equals(collections.SliceToSet(excludePathsTrimmed)) {
t.Fatalf("expected excludsions:\n%v\ngot:\n%v", tc.expectedExcluded, excludedPaths)
}
})
}
}

func TestBadNewContentAnalyzer(t *testing.T) {
// Illegal because no closing )
badPattern := "%^3412098(]fj"
_, err := yamlfmt.NewBasicContentAnalyzer([]string{badPattern})
if err == nil {
t.Fatalf("expected there to be an error")
}
}

func pathsTempdirTrimmed(paths []string, tempDir string) ([]string, error) {
trimmedPaths := []string{}
for _, path := range paths {
trimmedPath, err := filepath.Rel(tempDir, path)
if err != nil {
return nil, err
}
trimmedPaths = append(trimmedPaths, trimmedPath)
}
return trimmedPaths, nil
}
1 change: 1 addition & 0 deletions docs/config-file.md
Expand Up @@ -22,6 +22,7 @@ The command package defines the main command engine that `cmd/yamlfmt` uses. It
| `doublestar` | bool | false | Use [doublestar](https://github.com/bmatcuk/doublestar) for include and exclude paths. (This was the default before 0.7.0) |
| `include` | []string | [] | The paths for the command to include for formatting. See [Specifying Paths][] for more details. |
| `exclude` | []string | [] | The paths for the command to exclude from formatting. See [Specifying Paths][] for more details. |
| `regex_exclude` | []string | [] | Regex patterns to match file contents for, if the file content matches the regex the file will be excluded. Use [Golang regexes](https://regex101.com/). |
| `extensions` | []string | [] | The extensions to use for standard mode path collection. See [Specifying Paths][] for more details. |
| `formatter` | map[string]any | default basic formatter | Formatter settings. See [Formatter](#formatter) for more details. |

Expand Down
24 changes: 24 additions & 0 deletions docs/metadata.md
@@ -0,0 +1,24 @@
# Metadata

The `yamlfmt` library supports recognizing a limited amount of metadata from a yaml file.

## How to specify

Metadata is specified with a special token, followed by a colon, and then a type. For example, to add `ignore` metadata to a file:
```
# !yamlfmt!:ignore
```
If this string `!yamlfmt!:ignore` is anywhere in the file, the file will be dropped from the paths to format.

The format of `!yamlfmt!:type` is strict; there must be a colon separating the metadata identifier and the type, and there must be no whitespace separating anything within the metadata identifier block. For example either of these will cause an error:
```
# !yamlfmt!: ignore
# !yamlfmt!ignore
```
Metadata errors are considered non-fatal, and `yamlfmt` will attempt to continue despite them.

## Types

| Type | Example | Description |
|:-------|:-------------------|:------------|
| ignore | `!yamlfmt!:ignore` | If found, `yamlfmt` will exclude the file from formatting. |
19 changes: 19 additions & 0 deletions internal/collections/set.go
Expand Up @@ -23,6 +23,25 @@ func (s Set[T]) ToSlice() []T {
return sl
}

func (s Set[T]) Clone() Set[T] {
newSet := Set[T]{}
for el := range s {
newSet.Add(el)
}
return newSet
}

func (s Set[T]) Equals(rhs Set[T]) bool {
if len(s) != len(rhs) {
return false
}
rhsClone := rhs.Clone()
for el := range s {
rhsClone.Remove(el)
}
return len(rhsClone) == 0
}

func SliceToSet[T comparable](sl []T) Set[T] {
set := Set[T]{}
for _, el := range sl {
Expand Down

0 comments on commit 3a03733

Please sign in to comment.