Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cacheitem to restore HTTP cache. #4634

Merged
merged 1 commit into from May 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 3 additions & 95 deletions cli/internal/cache/cache_http.go
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/DataDog/zstd"

"github.com/vercel/turbo/cli/internal/analytics"
"github.com/vercel/turbo/cli/internal/cacheitem"
"github.com/vercel/turbo/cli/internal/tarpatch"
"github.com/vercel/turbo/cli/internal/turbopath"
)
Expand Down Expand Up @@ -251,102 +252,9 @@ func (cache *httpCache) retrieve(hash string) (bool, []turbopath.AnchoredSystemP
return true, files, duration, nil
}

// restoreTar returns posix-style repo-relative paths of the files it
// restored. In the future, these should likely be repo-relative system paths
// so that they are suitable for being fed into cache.Put for other caches.
// For now, I think this is working because windows also accepts /-delimited paths.
func restoreTar(root turbopath.AbsoluteSystemPath, reader io.Reader) ([]turbopath.AnchoredSystemPath, error) {
files := []turbopath.AnchoredSystemPath{}
missingLinks := []*tar.Header{}
zr := zstd.NewReader(reader)
var closeError error
defer func() { closeError = zr.Close() }()
tr := tar.NewReader(zr)
for {
hdr, err := tr.Next()
if err != nil {
if err == io.EOF {
for _, link := range missingLinks {
err := restoreSymlink(root, link, true)
if err != nil {
return nil, err
}
}

return files, closeError
}
return nil, err
}
// hdr.Name is always a posix-style path
// FIXME: THIS IS A BUG.
restoredName := turbopath.AnchoredUnixPath(hdr.Name)
files = append(files, restoredName.ToSystemPath())
filename := restoredName.ToSystemPath().RestoreAnchor(root)
if isChild, err := root.ContainsPath(filename); err != nil {
return nil, err
} else if !isChild {
return nil, fmt.Errorf("cannot untar file to %v", filename)
}
switch hdr.Typeflag {
case tar.TypeDir:
if err := filename.MkdirAll(0775); err != nil {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should restore at the permissions set in the tar file which is why I had to repair the test.

return nil, err
}
case tar.TypeReg:
if dir := filename.Dir(); dir != "." {
if err := dir.MkdirAll(0775); err != nil {
return nil, err
}
}
if f, err := filename.OpenFile(os.O_WRONLY|os.O_TRUNC|os.O_CREATE, os.FileMode(hdr.Mode)); err != nil {
return nil, err
} else if _, err := io.Copy(f, tr); err != nil {
return nil, err
} else if err := f.Close(); err != nil {
return nil, err
}
case tar.TypeSymlink:
if err := restoreSymlink(root, hdr, false); errors.Is(err, errNonexistentLinkTarget) {
missingLinks = append(missingLinks, hdr)
} else if err != nil {
return nil, err
}
default:
log.Printf("Unhandled file type %d for %s", hdr.Typeflag, hdr.Name)
}
}
}

var errNonexistentLinkTarget = errors.New("the link target does not exist")

func restoreSymlink(root turbopath.AbsoluteSystemPath, hdr *tar.Header, allowNonexistentTargets bool) error {
// Note that hdr.Linkname is really the link target
relativeLinkTarget := filepath.FromSlash(hdr.Linkname)
linkFilename := root.UntypedJoin(hdr.Name)
if err := linkFilename.EnsureDir(); err != nil {
return err
}

// TODO: check if this is an absolute path, or if we even care
linkTarget := linkFilename.Dir().UntypedJoin(relativeLinkTarget)
if _, err := linkTarget.Lstat(); err != nil {
if os.IsNotExist(err) {
if !allowNonexistentTargets {
return errNonexistentLinkTarget
}
// if we're allowing nonexistent link targets, proceed to creating the link
} else {
return err
}
}
// Ensure that the link we're about to create doesn't already exist
if err := linkFilename.Remove(); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if err := linkFilename.Symlink(relativeLinkTarget); err != nil {
return err
}
return nil
cache := cacheitem.FromReader(reader, true)
return cache.Restore(root)
}

func (cache *httpCache) Clean(_ turbopath.AbsoluteSystemPath) {
Expand Down
4 changes: 2 additions & 2 deletions cli/internal/cache/cache_http_test.go
Expand Up @@ -81,7 +81,7 @@ func makeValidTar(t *testing.T) *bytes.Buffer {
// my-pkg
h := &tar.Header{
Name: "my-pkg/",
Mode: int64(0644),
Mode: int64(0755),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a valid configuration for a directory.

Typeflag: tar.TypeDir,
}
if err := tw.WriteHeader(h); err != nil {
Expand Down Expand Up @@ -182,7 +182,7 @@ func TestRestoreTar(t *testing.T) {

expectedFiles := []turbopath.AnchoredSystemPath{
turbopath.AnchoredUnixPath("extra-file").ToSystemPath(),
turbopath.AnchoredUnixPath("my-pkg/").ToSystemPath(),
turbopath.AnchoredUnixPath("my-pkg").ToSystemPath(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This output is only consumed inside of the taskSummary. This change effectively aligns FS and HTTP cache outputs.

turbopath.AnchoredUnixPath("my-pkg/some-file").ToSystemPath(),
turbopath.AnchoredUnixPath("my-pkg/link-to-extra-file").ToSystemPath(),
turbopath.AnchoredUnixPath("my-pkg/broken-link").ToSystemPath(),
Expand Down
12 changes: 8 additions & 4 deletions cli/internal/cacheitem/cacheitem.go
Expand Up @@ -7,7 +7,6 @@ import (
"crypto/sha512"
"errors"
"io"
"os"

"github.com/vercel/turbo/cli/internal/turbopath"
)
Expand All @@ -32,7 +31,7 @@ type CacheItem struct {
tw *tar.Writer
zw io.WriteCloser
fileBuffer *bufio.Writer
handle *os.File
handle io.Reader
compressed bool
}

Expand All @@ -57,9 +56,14 @@ func (ci *CacheItem) Close() error {
}

if ci.handle != nil {
if err := ci.handle.Close(); err != nil {
return err
closer, isCloser := ci.handle.(io.Closer)

if isCloser {
if err := closer.Close(); err != nil {
return err
}
}

}

return nil
Expand Down
7 changes: 6 additions & 1 deletion cli/internal/cacheitem/create.go
Expand Up @@ -36,7 +36,12 @@ func Create(path turbopath.AbsoluteSystemPath) (*CacheItem, error) {
// Wires all the writers end-to-end:
// tar.Writer -> zstd.Writer -> fileBuffer -> file
func (ci *CacheItem) init() {
fileBuffer := bufio.NewWriterSize(ci.handle, 2^20) // Flush to disk in 1mb chunks.
writer, isWriter := ci.handle.(io.Writer)
if !isWriter {
panic("can't write to this cache item")
}

fileBuffer := bufio.NewWriterSize(writer, 2^20) // Flush to disk in 1mb chunks.

var tw *tar.Writer
if ci.compressed {
Expand Down
8 changes: 8 additions & 0 deletions cli/internal/cacheitem/restore.go
Expand Up @@ -14,6 +14,14 @@ import (
"github.com/vercel/turbo/cli/internal/turbopath"
)

// FromReader returns an existing CacheItem at the specified path.
func FromReader(reader io.Reader, compressed bool) *CacheItem {
return &CacheItem{
handle: reader,
compressed: compressed,
}
}

// Open returns an existing CacheItem at the specified path.
func Open(path turbopath.AbsoluteSystemPath) (*CacheItem, error) {
handle, err := sequential.OpenFile(path.ToString(), os.O_RDONLY, 0777)
Expand Down