[ARVADOS] updated: 1.3.0-58-g076d8a0d4
Git user
git at public.curoverse.com
Wed Dec 12 15:22:39 EST 2018
Summary of changes:
services/crunch-run/background.go | 57 ++++++++++++++++++++++++++++++++-------
1 file changed, 48 insertions(+), 9 deletions(-)
via 076d8a0d4885f7bfa22962a745a5aacb48fa9be1 (commit)
from b205525d0b7c7b9042513fe77d2e8061534208ae (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 076d8a0d4885f7bfa22962a745a5aacb48fa9be1
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date: Wed Dec 12 15:21:51 2018 -0500
14360: Fix race between per-container lock and garbage collection.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
index be5b2e685..a50853837 100644
--- a/services/crunch-run/background.go
+++ b/services/crunch-run/background.go
@@ -39,15 +39,31 @@ func Detach(uuid string, args []string, stdout, stderr io.Writer) int {
return exitcode(stderr, detach(uuid, args, stdout, stderr))
}
func detach(uuid string, args []string, stdout, stderr io.Writer) error {
- lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+ lockfile, err := func() (*os.File, error) {
+ // We must hold the dir-level lock between
+ // opening/creating the lockfile and acquiring LOCK_EX
+ // on it, to avoid racing with the ListProcess's
+ // alive-checking and garbage collection.
+ dirlock, err := lockall()
+ if err != nil {
+ return nil, err
+ }
+ defer dirlock.Close()
+ lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+ if err != nil {
+ return nil, err
+ }
+ err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+ if err != nil {
+ lockfile.Close()
+ return nil, err
+ }
+ return lockfile, nil
+ }()
if err != nil {
return err
}
defer lockfile.Close()
- err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
- if err != nil {
- return err
- }
lockfile.Truncate(0)
outfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stdout-")
@@ -157,19 +173,24 @@ func ListProcesses(stdout, stderr io.Writer) int {
}
defer f.Close()
- // TODO: Do this check without risk of disrupting lock
- // acquisition during races, e.g., by connecting to a
- // unix socket or checking /proc/$pid/fd/$n ->
- // lockfile.
+ // Ensure other processes don't acquire this lockfile
+ // after we have decided it is abandoned but before we
+ // have deleted it.
+ dirlock, err := lockall()
+ if err != nil {
+ return err
+ }
err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH|syscall.LOCK_NB)
if err == nil {
// lockfile is stale
err := os.Remove(path)
+ dirlock.Close()
if err != nil {
fmt.Fprintln(stderr, err)
}
return nil
}
+ dirlock.Close()
var pi procinfo
err = json.NewDecoder(f).Decode(&pi)
@@ -196,3 +217,21 @@ func exitcode(stderr io.Writer, err error) int {
}
return 0
}
+
+// Acquire a dir-level lock. Must be held while creating or deleting
+// container-specific lockfiles, to avoid races during the intervals
+// when those container-specific lockfiles are open but not locked.
+//
+// Caller releases the lock by closing the returned file.
+func lockall() (*os.File, error) {
+ f, err := os.OpenFile(filepath.Join(lockdir, lockprefix+"all"+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+ if err != nil {
+ return nil, err
+ }
+ err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
+ if err != nil {
+ f.Close()
+ return nil, err
+ }
+ return f, nil
+}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list