diff --git a/Makefile b/Makefile index c693c40..6b52ce2 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,23 @@ +.PHONY: build build: go build ./cmd/yamlfmt +.PHONY: test +test: + go test ./... + +.PHONY: test_v +test_v: + go test -v ./... + +.PHONY: install install: go install ./cmd/yamlfmt +.PHONY: install_tools install_tools: go install github.com/google/addlicense@latest +.PHONY: addlicense addlicense: addlicense -c "Google LLC" -l apache . - -test_diff: - go test -v -mod=mod github.com/google/yamlfmt/internal/diff - -test_basic_formatter: - go test -v -mod=mod github.com/google/yamlfmt/formatters/basic \ No newline at end of file diff --git a/command/command.go b/command/command.go index e4bdf0b..4cf6680 100644 --- a/command/command.go +++ b/command/command.go @@ -49,6 +49,7 @@ type Config struct { Extensions []string `mapstructure:"extensions"` Include []string `mapstructure:"include"` Exclude []string `mapstructure:"exclude"` + RegexExclude []string `mapstructure:"regex_exclude"` Doublestar bool `mapstructure:"doublestar"` LineEnding yamlfmt.LineBreakStyle `mapstructure:"line_ending"` FormatterConfig *FormatterConfig `mapstructure:"formatter,omitempty"` @@ -110,10 +111,15 @@ func (c *Command) Run() error { Quiet: c.Quiet, } - paths, err := c.collectPaths() + collectedPaths, err := c.collectPaths() if err != nil { return err } + paths, err := c.analyzePaths(collectedPaths) + if err != nil { + log.Printf("path analysis found the following errors:\n%v", err) + log.Println("Continuing...") + } switch c.Operation { case OperationFormat: @@ -137,7 +143,11 @@ func (c *Command) Run() error { if err != nil { return err } - log.Print(out) + if out.Message == "" { + log.Print("No files will be changed.") + } else { + log.Print(out) + } case OperationStdin: stdinYaml, err := readFromStdin() if err != nil { @@ -158,6 +168,15 @@ func (c *Command) collectPaths() ([]string, error) { return collector.CollectPaths() } +func (c *Command) analyzePaths(paths []string) ([]string, error) { + analyzer, err := c.makeAnalyzer() + if err != nil { + return nil, err + } + includePaths, _, err := analyzer.ExcludePathsByContent(paths) + return includePaths, err +} + func (c *Command) makePathCollector() yamlfmt.PathCollector { if c.Config.Doublestar { return &yamlfmt.DoublestarCollector{ @@ -172,6 +191,10 @@ func (c *Command) makePathCollector() yamlfmt.PathCollector { } } +func (c *Command) makeAnalyzer() (yamlfmt.ContentAnalyzer, error) { + return yamlfmt.NewBasicContentAnalyzer(c.Config.RegexExclude) +} + func readFromStdin() ([]byte, error) { stdin := bufio.NewReader(os.Stdin) data := []byte{} diff --git a/content_analyzer.go b/content_analyzer.go new file mode 100644 index 0000000..10da3de --- /dev/null +++ b/content_analyzer.go @@ -0,0 +1,76 @@ +package yamlfmt + +import ( + "os" + "regexp" + + "github.com/google/yamlfmt/internal/collections" +) + +type ContentAnalyzer interface { + ExcludePathsByContent(paths []string) ([]string, []string, error) +} + +type BasicContentAnalyzer struct { + RegexPatterns []*regexp.Regexp +} + +func NewBasicContentAnalyzer(patterns []string) (BasicContentAnalyzer, error) { + analyzer := BasicContentAnalyzer{RegexPatterns: []*regexp.Regexp{}} + compileErrs := collections.Errors{} + for _, pattern := range patterns { + re, err := regexp.Compile(pattern) + if err != nil { + compileErrs = append(compileErrs, err) + continue + } + analyzer.RegexPatterns = append(analyzer.RegexPatterns, re) + } + return analyzer, compileErrs.Combine() +} + +func (a BasicContentAnalyzer) ExcludePathsByContent(paths []string) ([]string, []string, error) { + pathsToFormat := collections.SliceToSet(paths) + pathsExcluded := []string{} + pathErrs := collections.Errors{} + + for _, path := range paths { + content, err := os.ReadFile(path) + if err != nil { + pathErrs = append(pathErrs, err) + continue + } + + // Search metadata for ignore + metadata, mdErrs := ReadMetadata(content, path) + if len(mdErrs) != 0 { + pathErrs = append(pathErrs, mdErrs...) + } + ignoreFound := false + for md := range metadata { + if md.Type == MetadataIgnore { + ignoreFound = true + break + } + } + if ignoreFound { + pathsExcluded = append(pathsExcluded, path) + pathsToFormat.Remove(path) + continue + } + + // Check if content matches any regex + matched := false + for _, pattern := range a.RegexPatterns { + if pattern.Match(content) { + matched = true + } + } + if matched { + pathsExcluded = append(pathsExcluded, path) + pathsToFormat.Remove(path) + } + } + + return pathsToFormat.ToSlice(), pathsExcluded, pathErrs.Combine() +} diff --git a/content_analyzer_test.go b/content_analyzer_test.go new file mode 100644 index 0000000..f31ee50 --- /dev/null +++ b/content_analyzer_test.go @@ -0,0 +1,116 @@ +package yamlfmt_test + +import ( + "path/filepath" + "testing" + + "github.com/google/yamlfmt" + "github.com/google/yamlfmt/internal/collections" + "github.com/google/yamlfmt/internal/tempfile" +) + +const testdataBase = "testdata/content_analyzer" + +func TestBasicContentAnalyzer(t *testing.T) { + testCases := []struct { + name string + testdataDir string + excludePatterns []string + expectedPaths collections.Set[string] + expectedExcluded collections.Set[string] + }{ + { + name: "has ignore metadata", + testdataDir: "has_ignore", + excludePatterns: []string{}, + expectedPaths: collections.Set[string]{ + "y.yaml": {}, + }, + expectedExcluded: collections.Set[string]{ + "x.yaml": {}, + }, + }, + { + name: "matches regex pattern", + testdataDir: "regex_ignore", + excludePatterns: []string{ + ".*generated by.*", + }, + expectedPaths: collections.Set[string]{ + "y.yaml": {}, + }, + expectedExcluded: collections.Set[string]{ + "x.yaml": {}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + tempPath := t.TempDir() + testdataDir := filepath.Join(testdataBase, tc.testdataDir) + paths, err := tempfile.ReplicateDirectory(testdataDir, tempPath) + if err != nil { + t.Fatalf("could not replicate testdata directory %s: %v", tc.testdataDir, err) + } + err = paths.CreateAll() + if err != nil { + t.Fatalf("could not create full test directory: %v", err) + } + contentAnalyzer, err := yamlfmt.NewBasicContentAnalyzer(tc.excludePatterns) + if err != nil { + t.Fatalf("could not create content analyzer: %v", err) + } + collector := &yamlfmt.FilepathCollector{ + Include: []string{tempPath}, + Exclude: []string{}, + Extensions: []string{"yaml", "yml"}, + } + collectedPaths, err := collector.CollectPaths() + if err != nil { + t.Fatalf("CollectPaths failed: %v", err) + } + resultPaths, excludedPaths, err := contentAnalyzer.ExcludePathsByContent(collectedPaths) + if err != nil { + t.Fatalf("expected content analyzer to work, got error: %v", err) + } + resultPathsTrimmed, err := pathsTempdirTrimmed(resultPaths, tempPath) + if err != nil { + t.Fatalf("expected trimming tempdir from result not to have error: %v", err) + } + if !tc.expectedPaths.Equals(collections.SliceToSet(resultPathsTrimmed)) { + t.Fatalf("expected files:\n%v\ngot:\n%v", tc.expectedPaths, resultPaths) + } + excludePathsTrimmed, err := pathsTempdirTrimmed(excludedPaths, tempPath) + if err != nil { + t.Fatalf("expected trimming tempdir from excluded not to have error: %v", err) + } + if !tc.expectedExcluded.Equals(collections.SliceToSet(excludePathsTrimmed)) { + t.Fatalf("expected excludsions:\n%v\ngot:\n%v", tc.expectedExcluded, excludedPaths) + } + }) + } +} + +func TestBadNewContentAnalyzer(t *testing.T) { + // Illegal because no closing ) + badPattern := "%^3412098(]fj" + _, err := yamlfmt.NewBasicContentAnalyzer([]string{badPattern}) + if err == nil { + t.Fatalf("expected there to be an error") + } +} + +func pathsTempdirTrimmed(paths []string, tempDir string) ([]string, error) { + trimmedPaths := []string{} + for _, path := range paths { + trimmedPath, err := filepath.Rel(tempDir, path) + if err != nil { + return nil, err + } + trimmedPaths = append(trimmedPaths, trimmedPath) + } + return trimmedPaths, nil +} diff --git a/docs/config-file.md b/docs/config-file.md index 79080ee..0897819 100644 --- a/docs/config-file.md +++ b/docs/config-file.md @@ -22,6 +22,7 @@ The command package defines the main command engine that `cmd/yamlfmt` uses. It | `doublestar` | bool | false | Use [doublestar](https://github.com/bmatcuk/doublestar) for include and exclude paths. (This was the default before 0.7.0) | | `include` | []string | [] | The paths for the command to include for formatting. See [Specifying Paths][] for more details. | | `exclude` | []string | [] | The paths for the command to exclude from formatting. See [Specifying Paths][] for more details. | +| `regex_exclude` | []string | [] | Regex patterns to match file contents for, if the file content matches the regex the file will be excluded. Use [Golang regexes](https://regex101.com/). | | `extensions` | []string | [] | The extensions to use for standard mode path collection. See [Specifying Paths][] for more details. | | `formatter` | map[string]any | default basic formatter | Formatter settings. See [Formatter](#formatter) for more details. | diff --git a/docs/metadata.md b/docs/metadata.md new file mode 100644 index 0000000..86b7a20 --- /dev/null +++ b/docs/metadata.md @@ -0,0 +1,24 @@ +# Metadata + +The `yamlfmt` library supports recognizing a limited amount of metadata from a yaml file. + +## How to specify + +Metadata is specified with a special token, followed by a colon, and then a type. For example, to add `ignore` metadata to a file: +``` +# !yamlfmt!:ignore +``` +If this string `!yamlfmt!:ignore` is anywhere in the file, the file will be dropped from the paths to format. + +The format of `!yamlfmt!:type` is strict; there must be a colon separating the metadata identifier and the type, and there must be no whitespace separating anything within the metadata identifier block. For example either of these will cause an error: +``` +# !yamlfmt!: ignore +# !yamlfmt!ignore +``` +Metadata errors are considered non-fatal, and `yamlfmt` will attempt to continue despite them. + +## Types + +| Type | Example | Description | +|:-------|:-------------------|:------------| +| ignore | `!yamlfmt!:ignore` | If found, `yamlfmt` will exclude the file from formatting. | \ No newline at end of file diff --git a/internal/collections/set.go b/internal/collections/set.go index 4201cbd..4a04ec9 100644 --- a/internal/collections/set.go +++ b/internal/collections/set.go @@ -23,6 +23,25 @@ func (s Set[T]) ToSlice() []T { return sl } +func (s Set[T]) Clone() Set[T] { + newSet := Set[T]{} + for el := range s { + newSet.Add(el) + } + return newSet +} + +func (s Set[T]) Equals(rhs Set[T]) bool { + if len(s) != len(rhs) { + return false + } + rhsClone := rhs.Clone() + for el := range s { + rhsClone.Remove(el) + } + return len(rhsClone) == 0 +} + func SliceToSet[T comparable](sl []T) Set[T] { set := Set[T]{} for _, el := range sl { diff --git a/internal/tempfile/path.go b/internal/tempfile/path.go index f8a3a51..8f129d5 100644 --- a/internal/tempfile/path.go +++ b/internal/tempfile/path.go @@ -1,14 +1,18 @@ package tempfile import ( + "io/fs" "os" "path/filepath" + + "github.com/google/yamlfmt/internal/collections" ) type Path struct { BasePath string FileName string IsDir bool + Content []byte } func (p *Path) Create() error { @@ -17,7 +21,7 @@ func (p *Path) Create() error { if p.IsDir { err = os.Mkdir(file, os.ModePerm) } else { - err = os.WriteFile(file, []byte{}, os.ModePerm) + err = os.WriteFile(file, p.Content, os.ModePerm) } return err } @@ -25,3 +29,41 @@ func (p *Path) Create() error { func (p *Path) fullPath() string { return filepath.Join(p.BasePath, p.FileName) } + +type Paths []Path + +func (ps Paths) CreateAll() error { + errs := collections.Errors{} + for _, path := range ps { + errs = append(errs, path.Create()) + } + return errs.Combine() +} + +func ReplicateDirectory(dir string, newBase string) (Paths, error) { + paths := Paths{} + + err := filepath.Walk(dir, func(path string, info fs.FileInfo, walkErr error) error { + if walkErr != nil { + return walkErr + } + content := []byte{} + + if !info.IsDir() { + readContent, err := os.ReadFile(path) + if err != nil { + return err + } + content = readContent + } + + paths = append(paths, Path{ + BasePath: newBase, + FileName: info.Name(), + IsDir: info.IsDir(), + Content: content, + }) + return nil + }) + return paths, err +} diff --git a/metadata.go b/metadata.go new file mode 100644 index 0000000..deea91a --- /dev/null +++ b/metadata.go @@ -0,0 +1,100 @@ +package yamlfmt + +import ( + "errors" + "fmt" + "strings" + "unicode" + + "github.com/google/yamlfmt/internal/collections" +) + +const MetadataIdentifier = "!yamlfmt!" + +type MetadataType string + +const ( + MetadataIgnore MetadataType = "ignore" +) + +func IsMetadataType(mdValueStr string) bool { + mdTypes := collections.Set[MetadataType]{} + mdTypes.Add(MetadataIgnore) + return mdTypes.Contains(MetadataType(mdValueStr)) +} + +type Metadata struct { + Type MetadataType + LineNum int +} + +var ( + ErrMalformedMetadata = errors.New("metadata: malformed string") + ErrUnrecognizedMetadata = errors.New("metadata: unrecognized type") +) + +type MetadataError struct { + err error + path string + lineNum int + lineStr string +} + +func (e *MetadataError) Error() string { + return fmt.Sprintf( + "%v: %s:%d:%s", + e.err, + e.path, + e.lineNum, + e.lineStr, + ) +} + +func (e *MetadataError) Unwrap() error { + return e.err +} + +func ReadMetadata(content []byte, path string) (collections.Set[Metadata], collections.Errors) { + metadata := collections.Set[Metadata]{} + mdErrs := collections.Errors{} + // This could be `\r\n` but it won't affect the outcome of this operation. + contentLines := strings.Split(string(content), "\n") + for i, line := range contentLines { + mdidIndex := strings.Index(line, MetadataIdentifier) + if mdidIndex == -1 { + continue + } + mdStr := scanMetadata(line, mdidIndex) + mdComponents := strings.Split(mdStr, ":") + if len(mdComponents) != 2 { + mdErrs = append(mdErrs, &MetadataError{ + path: path, + lineNum: i + 1, + err: ErrMalformedMetadata, + lineStr: line, + }) + continue + } + if IsMetadataType(mdComponents[1]) { + metadata.Add(Metadata{LineNum: i + 1, Type: MetadataType(mdComponents[1])}) + } else { + mdErrs = append(mdErrs, &MetadataError{ + path: path, + lineNum: i + 1, + err: ErrUnrecognizedMetadata, + lineStr: line, + }) + } + } + return metadata, mdErrs +} + +func scanMetadata(line string, index int) string { + mdBytes := []byte{} + i := index + for i < len(line) && !unicode.IsSpace(rune(line[i])) { + mdBytes = append(mdBytes, line[i]) + i++ + } + return string(mdBytes) +} diff --git a/metadata_test.go b/metadata_test.go new file mode 100644 index 0000000..9b0c6f1 --- /dev/null +++ b/metadata_test.go @@ -0,0 +1,79 @@ +package yamlfmt_test + +import ( + "errors" + "testing" + + "github.com/google/yamlfmt" + "github.com/google/yamlfmt/internal/collections" +) + +type errorChecker func(t *testing.T, errs collections.Errors) + +func checkErrNil(t *testing.T, errs collections.Errors) { + combinedErr := errs.Combine() + if combinedErr != nil { + t.Fatalf("expected error to be nil, got: %v", combinedErr) + } +} + +func TestReadMetadata(t *testing.T) { + testCases := []struct { + name string + content string + expected collections.Set[yamlfmt.Metadata] + errCheck errorChecker + }{ + { + name: "contains no metadata", + content: "", + expected: collections.Set[yamlfmt.Metadata]{}, + errCheck: checkErrNil, + }, + { + name: "has ignore metadata", + content: "# !yamlfmt!:ignore\na: 1", + expected: collections.Set[yamlfmt.Metadata]{ + {Type: yamlfmt.MetadataIgnore, LineNum: 1}: {}, + }, + errCheck: checkErrNil, + }, + { + name: "has bad metadata", + content: "# !yamlfmt!fjghgh", + expected: collections.Set[yamlfmt.Metadata]{}, + errCheck: func(t *testing.T, errs collections.Errors) { + if len(errs) != 1 { + t.Fatalf("expected 1 error, got %d:\n%v", len(errs), errs.Combine()) + } + if errors.Unwrap(errs[0]) != yamlfmt.ErrMalformedMetadata { + t.Fatalf("expected ErrMalformedMetadata, got: %v", errs[0]) + } + }, + }, + { + name: "has unrecognized metadata type", + content: "# !yamlfmt!:lulsorandom", + expected: collections.Set[yamlfmt.Metadata]{}, + errCheck: func(t *testing.T, errs collections.Errors) { + if len(errs) != 1 { + t.Fatalf("expected 1 error, got %d:\n%v", len(errs), errs.Combine()) + } + if errors.Unwrap(errs[0]) != yamlfmt.ErrUnrecognizedMetadata { + t.Fatalf("expected ErrUnrecognizedMetadata, got: %v", errs[0]) + } + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + md, err := yamlfmt.ReadMetadata([]byte(tc.content), "test.yaml") + if !md.Equals(tc.expected) { + t.Fatalf("Mismatched metadata:\nexpected: %v\ngot: %v", tc.expected, md) + } + t.Logf("got error: %v", err) + tc.errCheck(t, err) + }) + } +} diff --git a/testdata/content_analyzer/has_ignore/x.yaml b/testdata/content_analyzer/has_ignore/x.yaml new file mode 100644 index 0000000..4a76bd3 --- /dev/null +++ b/testdata/content_analyzer/has_ignore/x.yaml @@ -0,0 +1,2 @@ +# !yamlfmt!:ignore +a: 1 diff --git a/testdata/content_analyzer/has_ignore/y.yaml b/testdata/content_analyzer/has_ignore/y.yaml new file mode 100644 index 0000000..a8926a5 --- /dev/null +++ b/testdata/content_analyzer/has_ignore/y.yaml @@ -0,0 +1 @@ +a: 1 diff --git a/testdata/content_analyzer/regex_ignore/x.yaml b/testdata/content_analyzer/regex_ignore/x.yaml new file mode 100644 index 0000000..c0ae45b --- /dev/null +++ b/testdata/content_analyzer/regex_ignore/x.yaml @@ -0,0 +1,2 @@ +# generated by: some tool +a: 1 diff --git a/testdata/content_analyzer/regex_ignore/y.yaml b/testdata/content_analyzer/regex_ignore/y.yaml new file mode 100644 index 0000000..a8926a5 --- /dev/null +++ b/testdata/content_analyzer/regex_ignore/y.yaml @@ -0,0 +1 @@ +a: 1