Skip to content

Commit

Permalink
lockedfile: use a retry loop to suppress EDEADLK on AIX and Solaris (#92
Browse files Browse the repository at this point in the history
)

Apply upstream CL 222277 per @bcmills

Automatically applied via:

(command cd /path/to/go; git diff c6f678b6efd6622d335e6d4b659282bb2d16f5ba~1 c6f678b6efd6622d335e6d4b659282bb2d16f5ba) | git apply -p5

with a minor fix to then use github.com/rogpeppe/go-internal/testenv
instead of internal/testenv

Fixes #91
  • Loading branch information
myitcv committed May 6, 2020
1 parent a7f0f53 commit be09da5
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 5 deletions.
70 changes: 65 additions & 5 deletions lockedfile/internal/filelock/filelock_fcntl.go
Expand Up @@ -13,19 +13,19 @@
// or an F_OFD_SETLK command for 'fcntl', that allows for better concurrency and
// does not require per-inode bookkeeping in the application.
//
// TODO(bcmills): If we add a build tag for Illumos (see golang.org/issue/20603)
// then Illumos should use F_OFD_SETLK, and the resulting code would be as
// simple as filelock_unix.go. We will still need the code in this file for AIX
// or as long as Oracle Solaris provides only F_SETLK.
// TODO(golang.org/issue/35618): add a syscall.Flock binding for Illumos and
// switch it over to use filelock_unix.go.

package filelock

import (
"errors"
"io"
"math/rand"
"os"
"sync"
"syscall"
"time"
)

type lockType int16
Expand Down Expand Up @@ -91,7 +91,67 @@ func lock(f File, lt lockType) (err error) {
wait <- f
}

err = setlkw(f.Fd(), lt)
// Spurious EDEADLK errors arise on platforms that compute deadlock graphs at
// the process, rather than thread, level. Consider processes P and Q, with
// threads P.1, P.2, and Q.3. The following trace is NOT a deadlock, but will be
// reported as a deadlock on systems that consider only process granularity:
//
// P.1 locks file A.
// Q.3 locks file B.
// Q.3 blocks on file A.
// P.2 blocks on file B. (This is erroneously reported as a deadlock.)
// P.1 unlocks file A.
// Q.3 unblocks and locks file A.
// Q.3 unlocks files A and B.
// P.2 unblocks and locks file B.
// P.2 unlocks file B.
//
// These spurious errors were observed in practice on AIX and Solaris in
// cmd/go: see https://golang.org/issue/32817.
//
// We work around this bug by treating EDEADLK as always spurious. If there
// really is a lock-ordering bug between the interacting processes, it will
// become a livelock instead, but that's not appreciably worse than if we had
// a proper flock implementation (which generally does not even attempt to
// diagnose deadlocks).
//
// In the above example, that changes the trace to:
//
// P.1 locks file A.
// Q.3 locks file B.
// Q.3 blocks on file A.
// P.2 spuriously fails to lock file B and goes to sleep.
// P.1 unlocks file A.
// Q.3 unblocks and locks file A.
// Q.3 unlocks files A and B.
// P.2 wakes up and locks file B.
// P.2 unlocks file B.
//
// We know that the retry loop will not introduce a *spurious* livelock
// because, according to the POSIX specification, EDEADLK is only to be
// returned when “the lock is blocked by a lock from another process”.
// If that process is blocked on some lock that we are holding, then the
// resulting livelock is due to a real deadlock (and would manifest as such
// when using, for example, the flock implementation of this package).
// If the other process is *not* blocked on some other lock that we are
// holding, then it will eventually release the requested lock.

nextSleep := 1 * time.Millisecond
const maxSleep = 500 * time.Millisecond
for {
err = setlkw(f.Fd(), lt)
if err != syscall.EDEADLK {
break
}
time.Sleep(nextSleep)

nextSleep += nextSleep
if nextSleep > maxSleep {
nextSleep = maxSleep
}
// Apply 10% jitter to avoid synchronizing collisions when we finally unblock.
nextSleep += time.Duration((0.1*rand.Float64() - 0.05) * float64(nextSleep))
}

if err != nil {
unlock(f)
Expand Down
99 changes: 99 additions & 0 deletions lockedfile/lockedfile_test.go
Expand Up @@ -8,12 +8,16 @@
package lockedfile_test

import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/rogpeppe/go-internal/testenv"

"github.com/rogpeppe/go-internal/lockedfile"
)

Expand Down Expand Up @@ -172,3 +176,98 @@ func TestCanLockExistingFile(t *testing.T) {
f.Close()
wait(t)
}

// TestSpuriousEDEADLK verifies that the spurious EDEADLK reported in
// https://golang.org/issue/32817 no longer occurs.
func TestSpuriousEDEADLK(t *testing.T) {
// P.1 locks file A.
// Q.3 locks file B.
// Q.3 blocks on file A.
// P.2 blocks on file B. (Spurious EDEADLK occurs here.)
// P.1 unlocks file A.
// Q.3 unblocks and locks file A.
// Q.3 unlocks files A and B.
// P.2 unblocks and locks file B.
// P.2 unlocks file B.

testenv.MustHaveExec(t)

dirVar := t.Name() + "DIR"

if dir := os.Getenv(dirVar); dir != "" {
// Q.3 locks file B.
b, err := lockedfile.Edit(filepath.Join(dir, "B"))
if err != nil {
t.Fatal(err)
}
defer b.Close()

if err := ioutil.WriteFile(filepath.Join(dir, "locked"), []byte("ok"), 0666); err != nil {
t.Fatal(err)
}

// Q.3 blocks on file A.
a, err := lockedfile.Edit(filepath.Join(dir, "A"))
// Q.3 unblocks and locks file A.
if err != nil {
t.Fatal(err)
}
defer a.Close()

// Q.3 unlocks files A and B.
return
}

dir, remove := mustTempDir(t)
defer remove()

// P.1 locks file A.
a, err := lockedfile.Edit(filepath.Join(dir, "A"))
if err != nil {
t.Fatal(err)
}

cmd := exec.Command(os.Args[0], "-test.run="+t.Name())
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=%s", dirVar, dir))

qDone := make(chan struct{})
waitQ := mustBlock(t, "Edit A and B in subprocess", func() {
out, err := cmd.CombinedOutput()
if err != nil {
t.Errorf("%v:\n%s", err, out)
}
close(qDone)
})

// Wait until process Q has either failed or locked file B.
// Otherwise, P.2 might not block on file B as intended.
locked:
for {
if _, err := os.Stat(filepath.Join(dir, "locked")); !os.IsNotExist(err) {
break locked
}
select {
case <-qDone:
break locked
case <-time.After(1 * time.Millisecond):
}
}

waitP2 := mustBlock(t, "Edit B", func() {
// P.2 blocks on file B. (Spurious EDEADLK occurs here.)
b, err := lockedfile.Edit(filepath.Join(dir, "B"))
// P.2 unblocks and locks file B.
if err != nil {
t.Error(err)
return
}
// P.2 unlocks file B.
b.Close()
})

// P.1 unlocks file A.
a.Close()

waitQ(t)
waitP2(t)
}

0 comments on commit be09da5

Please sign in to comment.