[ARVADOS] created: 1.1.0-51-g0eb7924

Git user git at public.curoverse.com
Wed Nov 1 17:15:38 EDT 2017


        at  0eb79244613af42e17effc4437637da282bdaace (commit)


commit 0eb79244613af42e17effc4437637da282bdaace
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Wed Nov 1 17:10:20 2017 -0400

    12183: Update documentation.
    
    Improve error message about follow symlink chains that are too long.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/doc/_includes/_mount_types.liquid b/doc/_includes/_mount_types.liquid
index 38770ea..db1661a 100644
--- a/doc/_includes/_mount_types.liquid
+++ b/doc/_includes/_mount_types.liquid
@@ -122,3 +122,7 @@ table(table table-bordered table-condensed).
 "output_path": "/tmp"
 </code></pre>|Specified path refers to the file *hello.txt* in the *alice* subdirectory|./*foo* 030326... 0:13:*bar*\n
 *Note:* Here the subdirectory *alice* is replaced with *foo* and the filename *hello.txt* from this subdirectory is replaced with *bar*.|
+
+h2(#symlinks-in-output). Symlinks in output
+
+When a container's output_path is a tmp mount backed by local disk, this output directory can contain symlinks to other files in the the output directory, or to collection mount points.  If the symlink leads to a collection mount, efficiently copy the collection into the output collection.  Symlinks leading to files or directories are expanded and created as regular files in the output collection.  Further, whether symlinks are relative or absolute, every symlink target (even targets that are symlinks themselves) must either remain in the output directory or point to a collection mount.
diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid
index 6c4d3cd..7491fec 100644
--- a/doc/api/methods/container_requests.html.textile.liquid
+++ b/doc/api/methods/container_requests.html.textile.liquid
@@ -47,7 +47,7 @@ table(table table-bordered table-condensed).
 |environment|hash|Environment variables and values that should be set in the container environment (@docker run --env@). This augments and (when conflicts exist) overrides environment variables given in the image's Dockerfile.||
 |cwd|string|Initial working directory, given as an absolute path (in the container) or a path relative to the WORKDIR given in the image's Dockerfile.|Required.|
 |command|array of strings|Command to execute in the container.|Required. e.g., @["echo","hello"]@|
-|output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be, or be inside, one of the mount targets. For best performance, point output_path to a writable collection mount. Also, see "Pre-populate output using Mount points":#pre-populate-output for details regarding optional output pre-population using mount points.|Required.|
+|output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be one of the mount targets. For best performance, point output_path to a writable collection mount.  See "Pre-populate output using Mount points":#pre-populate-output for details regarding optional output pre-population using mount points and "Symlinks in output":#symlinks-in-output for additional details.|Required.|
 |output_name|string|Desired name for the output collection. If null, a name will be assigned automatically.||
 |output_ttl|integer|Desired lifetime for the output collection, in seconds. If zero, the output collection will not be deleted automatically.||
 |priority|integer|Higher value means spend more resources on this container_request, i.e., go ahead of other queued containers, bring up more nodes etc.|Priority 0 means a container should not be run on behalf of this request. Clients are expected to submit container requests with zero priority in order to preview the container that will be used to satisfy it. Priority can be null if and only if state!="Committed".|
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 56cee10..ca46bb0 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -920,7 +920,7 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
 	return nil
 }
 
-var NotInOutputDirError = fmt.Errorf("Must point to path within the output directory")
+var ErrNotInOutputDir = fmt.Errorf("Must point to path within the output directory")
 
 func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.FileInfo) (tgt string, readlinktgt string, info os.FileInfo, err error) {
 	// Follow symlinks if necessary
@@ -929,9 +929,9 @@ func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.File
 	readlinktgt = ""
 	nextlink := path
 	for followed := 0; info.Mode()&os.ModeSymlink != 0; followed++ {
-		if followed >= 16 {
+		if followed >= limitFollowSymlinks {
 			// Got stuck in a loop or just a pathological number of links, give up.
-			err = fmt.Errorf("Followed too many symlinks from path %q", path)
+			err = fmt.Errorf("Followed more than %v symlinks from path %q", limitFollowSymlinks, path)
 			return
 		}
 
@@ -953,7 +953,7 @@ func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.File
 			// After dereferencing, symlink target must either be
 			// within output directory, or must point to a
 			// collection mount.
-			err = NotInOutputDirError
+			err = ErrNotInOutputDir
 			return
 		}
 
@@ -971,11 +971,14 @@ func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.File
 	return
 }
 
+var limitFollowSymlinks = 10
+
 // UploadFile uploads files within the output directory, with special handling
 // for symlinks. If the symlink leads to a keep mount, copy the manifest text
 // from the keep mount into the output manifestText.  Ensure that whether
-// symlinks are relative or absolute, they must remain within the output
-// directory.
+// symlinks are relative or absolute, every symlink target (even targets that
+// are symlinks themselves) must either remain in the output directory or point
+// to a collection mount.
 //
 // Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
 func (runner *ContainerRunner) UploadOutputFile(
@@ -996,10 +999,10 @@ func (runner *ContainerRunner) UploadOutputFile(
 		return "", infoerr
 	}
 
-	if followed >= 8 {
+	if followed >= limitFollowSymlinks {
 		// Got stuck in a loop or just a pathological number of
 		// directory links, give up.
-		err = fmt.Errorf("Followed too many symlinks from path %q", path)
+		err = fmt.Errorf("Followed more than %v symlinks from path %q", limitFollowSymlinks, path)
 		return
 	}
 

commit 3371bf4ce0a99e6e944c7fcc54866cfec2718fc6
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Oct 16 16:39:36 2017 -0400

    12183: Handle circular directory symlinks
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 3903d24..56cee10 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -920,6 +920,8 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
 	return nil
 }
 
+var NotInOutputDirError = fmt.Errorf("Must point to path within the output directory")
+
 func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.FileInfo) (tgt string, readlinktgt string, info os.FileInfo, err error) {
 	// Follow symlinks if necessary
 	info = startinfo
@@ -927,7 +929,7 @@ func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.File
 	readlinktgt = ""
 	nextlink := path
 	for followed := 0; info.Mode()&os.ModeSymlink != 0; followed++ {
-		if followed >= 32 {
+		if followed >= 16 {
 			// Got stuck in a loop or just a pathological number of links, give up.
 			err = fmt.Errorf("Followed too many symlinks from path %q", path)
 			return
@@ -951,14 +953,15 @@ func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.File
 			// After dereferencing, symlink target must either be
 			// within output directory, or must point to a
 			// collection mount.
-			break
+			err = NotInOutputDirError
+			return
 		}
 
 		info, err = os.Lstat(tgt)
 		if err != nil {
 			// tgt
-			err = fmt.Errorf("Symlink in output %q points to invalid location %q, must exist, be readable, and point to path within the output directory.",
-				path[len(runner.HostOutputDir):], readlinktgt)
+			err = fmt.Errorf("Symlink in output %q points to invalid location %q: %v",
+				path[len(runner.HostOutputDir):], readlinktgt, err)
 			return
 		}
 
@@ -982,7 +985,8 @@ func (runner *ContainerRunner) UploadOutputFile(
 	binds []string,
 	walkUpload *WalkUpload,
 	relocateFrom string,
-	relocateTo string) (manifestText string, err error) {
+	relocateTo string,
+	followed int) (manifestText string, err error) {
 
 	if info.Mode().IsDir() {
 		return
@@ -992,13 +996,20 @@ func (runner *ContainerRunner) UploadOutputFile(
 		return "", infoerr
 	}
 
+	if followed >= 8 {
+		// Got stuck in a loop or just a pathological number of
+		// directory links, give up.
+		err = fmt.Errorf("Followed too many symlinks from path %q", path)
+		return
+	}
+
 	// When following symlinks, the source path may need to be logically
 	// relocated to some other path within the output collection.  Remove
 	// the relocateFrom prefix and replace it with relocateTo.
 	relocated := relocateTo + path[len(relocateFrom):]
 
 	tgt, readlinktgt, info, derefErr := runner.derefOutputSymlink(path, info)
-	if derefErr != nil {
+	if derefErr != nil && derefErr != NotInOutputDirError {
 		return "", derefErr
 	}
 
@@ -1023,7 +1034,7 @@ func (runner *ContainerRunner) UploadOutputFile(
 
 	// If target is not a collection mount, it must be located within the
 	// output directory, otherwise it is an error.
-	if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
+	if derefErr == NotInOutputDirError {
 		err = fmt.Errorf("Symlink in output %q points to invalid location %q, must point to path within the output directory.",
 			path[len(runner.HostOutputDir):], readlinktgt)
 		return
@@ -1040,7 +1051,8 @@ func (runner *ContainerRunner) UploadOutputFile(
 		// so they appear under the original symlink path.
 		err = filepath.Walk(tgt, func(walkpath string, walkinfo os.FileInfo, walkerr error) error {
 			var m string
-			m, walkerr = runner.UploadOutputFile(walkpath, walkinfo, walkerr, binds, walkUpload, tgt, relocated)
+			m, walkerr = runner.UploadOutputFile(walkpath, walkinfo, walkerr,
+				binds, walkUpload, tgt, relocated, followed+1)
 			if walkerr == nil {
 				manifestText = manifestText + m
 			}
@@ -1103,7 +1115,7 @@ func (runner *ContainerRunner) CaptureOutput() error {
 
 		var m string
 		err = filepath.Walk(runner.HostOutputDir, func(path string, info os.FileInfo, err error) error {
-			m, err = runner.UploadOutputFile(path, info, err, binds, walkUpload, "", "")
+			m, err = runner.UploadOutputFile(path, info, err, binds, walkUpload, "", "", 0)
 			if err == nil {
 				manifestText = manifestText + m
 			}
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 098a642..d3c9990 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1715,3 +1715,21 @@ func (s *TestSuite) TestEvalSymlinks(c *C) {
 		c.Assert(err, NotNil)
 	}
 }
+
+func (s *TestSuite) TestEvalSymlinkDir(c *C) {
+	cr := NewContainerRunner(&ArvTestClient{callraw: true}, &KeepTestClient{}, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+
+	realTemp, err := ioutil.TempDir("", "crunchrun_test-")
+	c.Assert(err, IsNil)
+	defer os.RemoveAll(realTemp)
+
+	cr.HostOutputDir = realTemp
+
+	// Absolute path outside output dir
+	os.Symlink(".", realTemp+"/loop")
+
+	v := "loop"
+	info, err := os.Lstat(realTemp + "/" + v)
+	_, err = cr.UploadOutputFile(realTemp+"/"+v, info, err, []string{}, nil, "", "", 0)
+	c.Assert(err, NotNil)
+}

commit cf3f89d7645fcc6aafa376fea4fd5518d9a15cf0
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Oct 16 15:26:55 2017 -0400

    12183: Fix tests.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 2696af5..3903d24 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -920,39 +920,12 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
 	return nil
 }
 
-// UploadFile uploads files within the output directory, with special handling
-// for symlinks. If the symlink leads to a keep mount, copy the manifest text
-// from the keep mount into the output manifestText.  Ensure that whether
-// symlinks are relative or absolute, they must remain within the output
-// directory.
-//
-// Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
-func (runner *ContainerRunner) UploadOutputFile(
-	path string,
-	info os.FileInfo,
-	infoerr error,
-	binds []string,
-	walkUpload *WalkUpload,
-	relocateFrom string,
-	relocateTo string) (manifestText string, err error) {
-
-	if info.Mode().IsDir() {
-		return
-	}
-
-	if infoerr != nil {
-		return "", infoerr
-	}
-
-	// When following symlinks, the source path may need to be logically
-	// relocated to some other path within the output collection.  Remove
-	// the relocateFrom prefix and replace it with relocateTo.
-	relocated := relocateTo + path[len(relocateFrom):]
-
+func (runner *ContainerRunner) derefOutputSymlink(path string, startinfo os.FileInfo) (tgt string, readlinktgt string, info os.FileInfo, err error) {
 	// Follow symlinks if necessary
-	tgt := path
+	info = startinfo
+	tgt = path
+	readlinktgt = ""
 	nextlink := path
-	readlinktgt := ""
 	for followed := 0; info.Mode()&os.ModeSymlink != 0; followed++ {
 		if followed >= 32 {
 			// Got stuck in a loop or just a pathological number of links, give up.
@@ -989,15 +962,46 @@ func (runner *ContainerRunner) UploadOutputFile(
 			return
 		}
 
-		if info.Mode().IsRegular() {
-			// Symlink leads to regular file.  Need to read from
-			// the target but upload it at the original path.
-			return "", walkUpload.UploadFile(relocated, tgt)
-		}
-
 		nextlink = tgt
 	}
 
+	return
+}
+
+// UploadFile uploads files within the output directory, with special handling
+// for symlinks. If the symlink leads to a keep mount, copy the manifest text
+// from the keep mount into the output manifestText.  Ensure that whether
+// symlinks are relative or absolute, they must remain within the output
+// directory.
+//
+// Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
+func (runner *ContainerRunner) UploadOutputFile(
+	path string,
+	info os.FileInfo,
+	infoerr error,
+	binds []string,
+	walkUpload *WalkUpload,
+	relocateFrom string,
+	relocateTo string) (manifestText string, err error) {
+
+	if info.Mode().IsDir() {
+		return
+	}
+
+	if infoerr != nil {
+		return "", infoerr
+	}
+
+	// When following symlinks, the source path may need to be logically
+	// relocated to some other path within the output collection.  Remove
+	// the relocateFrom prefix and replace it with relocateTo.
+	relocated := relocateTo + path[len(relocateFrom):]
+
+	tgt, readlinktgt, info, derefErr := runner.derefOutputSymlink(path, info)
+	if derefErr != nil {
+		return "", derefErr
+	}
+
 	// go through mounts and try reverse map to collection reference
 	for _, bind := range binds {
 		mnt := runner.Container.Mounts[bind]
@@ -1026,7 +1030,7 @@ func (runner *ContainerRunner) UploadOutputFile(
 	}
 
 	if info.Mode().IsRegular() {
-		return "", walkUpload.UploadFile(relocated, path)
+		return "", walkUpload.UploadFile(relocated, tgt)
 	}
 
 	if info.Mode().IsDir() {
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index c67988e..098a642 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1699,7 +1699,7 @@ func (s *TestSuite) TestEvalSymlinks(c *C) {
 	os.Symlink("/etc/passwd", realTemp+"/p1")
 
 	// Relative outside output dir
-	os.Symlink("..", realTemp+"/p2")
+	os.Symlink("../zip", realTemp+"/p2")
 
 	// Circular references
 	os.Symlink("p4", realTemp+"/p3")
@@ -1711,7 +1711,7 @@ func (s *TestSuite) TestEvalSymlinks(c *C) {
 
 	for _, v := range []string{"p1", "p2", "p3", "p4", "p5"} {
 		info, err := os.Lstat(realTemp + "/" + v)
-		_, err = cr.UploadOutputFile(realTemp+"/"+v, info, err, []string{}, nil, "", "")
+		_, _, _, err = cr.derefOutputSymlink(realTemp+"/"+v, info)
 		c.Assert(err, NotNil)
 	}
 }

commit 780e9fb871fd5ef04f9f01d077b739552ec0621d
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Oct 16 14:22:58 2017 -0400

    12183:  Refactor derefOutputSymlink into its own method.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 0f68714..2696af5 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -936,6 +936,10 @@ func (runner *ContainerRunner) UploadOutputFile(
 	relocateFrom string,
 	relocateTo string) (manifestText string, err error) {
 
+	if info.Mode().IsDir() {
+		return
+	}
+
 	if infoerr != nil {
 		return "", infoerr
 	}
@@ -945,25 +949,23 @@ func (runner *ContainerRunner) UploadOutputFile(
 	// the relocateFrom prefix and replace it with relocateTo.
 	relocated := relocateTo + path[len(relocateFrom):]
 
-	if info.Mode().IsRegular() {
-		return "", walkUpload.UploadFile(relocated, path)
-	}
-
-	// Not a regular file, try to follow symlinks
-	var nextlink = path
-	for n := 0; n < 32; n++ {
-		if info.Mode()&os.ModeSymlink == 0 {
-			// Not a symlink, don't do anything
+	// Follow symlinks if necessary
+	tgt := path
+	nextlink := path
+	readlinktgt := ""
+	for followed := 0; info.Mode()&os.ModeSymlink != 0; followed++ {
+		if followed >= 32 {
+			// Got stuck in a loop or just a pathological number of links, give up.
+			err = fmt.Errorf("Followed too many symlinks from path %q", path)
 			return
 		}
 
-		var readlinktgt string
 		readlinktgt, err = os.Readlink(nextlink)
 		if err != nil {
 			return
 		}
 
-		tgt := readlinktgt
+		tgt = readlinktgt
 		if !strings.HasPrefix(tgt, "/") {
 			// Relative symlink, resolve it to host path
 			tgt = filepath.Join(filepath.Dir(path), tgt)
@@ -972,38 +974,17 @@ func (runner *ContainerRunner) UploadOutputFile(
 			// Absolute symlink to container output path, adjust it to host output path.
 			tgt = filepath.Join(runner.HostOutputDir, tgt[len(runner.Container.OutputPath):])
 		}
-
-		// go through mounts and try reverse map to collection reference
-		for _, bind := range binds {
-			mnt := runner.Container.Mounts[bind]
-			if tgt == bind || strings.HasPrefix(tgt, bind+"/") {
-				// get path relative to bind
-				targetSuffix := tgt[len(bind):]
-
-				// Copy mount and adjust the path to add path relative to the bind
-				adjustedMount := mnt
-				adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
-
-				// Terminates in this keep mount, so add the
-				// manifest text at appropriate location.
-				outputSuffix := path[len(runner.HostOutputDir):]
-				manifestText, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
-				return
-			}
-		}
-
-		// If target is not a collection mount, it must be within the
-		// output directory, otherwise it is an error.
 		if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
-			err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
-				path[len(runner.HostOutputDir):], readlinktgt)
-			return
+			// After dereferencing, symlink target must either be
+			// within output directory, or must point to a
+			// collection mount.
+			break
 		}
 
 		info, err = os.Lstat(tgt)
 		if err != nil {
-			// tgt doesn't exist or lacks permissions
-			err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
+			// tgt
+			err = fmt.Errorf("Symlink in output %q points to invalid location %q, must exist, be readable, and point to path within the output directory.",
 				path[len(runner.HostOutputDir):], readlinktgt)
 			return
 		}
@@ -1014,26 +995,56 @@ func (runner *ContainerRunner) UploadOutputFile(
 			return "", walkUpload.UploadFile(relocated, tgt)
 		}
 
-		if info.Mode().IsDir() {
-			// Symlink leads to directory.  Walk() doesn't follow
-			// directory symlinks, so we walk the target directory
-			// instead.  Within the walk, file paths are relocated
-			// so they appear under the original symlink path.
-			err = filepath.Walk(tgt, func(walkpath string, walkinfo os.FileInfo, walkerr error) error {
-				var m string
-				m, walkerr = runner.UploadOutputFile(walkpath, walkinfo, walkerr, binds, walkUpload, tgt, relocated)
-				if walkerr == nil {
-					manifestText = manifestText + m
-				}
-				return walkerr
-			})
+		nextlink = tgt
+	}
+
+	// go through mounts and try reverse map to collection reference
+	for _, bind := range binds {
+		mnt := runner.Container.Mounts[bind]
+		if tgt == bind || strings.HasPrefix(tgt, bind+"/") {
+			// get path relative to bind
+			targetSuffix := tgt[len(bind):]
+
+			// Copy mount and adjust the path to add path relative to the bind
+			adjustedMount := mnt
+			adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
+
+			// Terminates in this keep mount, so add the
+			// manifest text at appropriate location.
+			outputSuffix := path[len(runner.HostOutputDir):]
+			manifestText, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
 			return
 		}
+	}
 
-		nextlink = tgt
+	// If target is not a collection mount, it must be located within the
+	// output directory, otherwise it is an error.
+	if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
+		err = fmt.Errorf("Symlink in output %q points to invalid location %q, must point to path within the output directory.",
+			path[len(runner.HostOutputDir):], readlinktgt)
+		return
 	}
-	// Got stuck in a loop or just a pathological number of links, give up.
-	err = fmt.Errorf("Followed too many symlinks from path %q", path)
+
+	if info.Mode().IsRegular() {
+		return "", walkUpload.UploadFile(relocated, path)
+	}
+
+	if info.Mode().IsDir() {
+		// Symlink leads to directory.  Walk() doesn't follow
+		// directory symlinks, so we walk the target directory
+		// instead.  Within the walk, file paths are relocated
+		// so they appear under the original symlink path.
+		err = filepath.Walk(tgt, func(walkpath string, walkinfo os.FileInfo, walkerr error) error {
+			var m string
+			m, walkerr = runner.UploadOutputFile(walkpath, walkinfo, walkerr, binds, walkUpload, tgt, relocated)
+			if walkerr == nil {
+				manifestText = manifestText + m
+			}
+			return walkerr
+		})
+		return
+	}
+
 	return
 }
 

commit aa9fbcd3fbcc653fef23ca9f583d16e535c6ffb9
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Oct 5 10:40:04 2017 -0400

    12183: Add comment about purpose of "relocated"
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index bd58201..0f68714 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -940,6 +940,9 @@ func (runner *ContainerRunner) UploadOutputFile(
 		return "", infoerr
 	}
 
+	// When following symlinks, the source path may need to be logically
+	// relocated to some other path within the output collection.  Remove
+	// the relocateFrom prefix and replace it with relocateTo.
 	relocated := relocateTo + path[len(relocateFrom):]
 
 	if info.Mode().IsRegular() {

commit 68c4f5c494e6d25ca50275f6feabd5731f96ac0e
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Thu Oct 5 08:47:04 2017 -0400

    12183: Refactor file upload so there is one walk to handle both symlinks and
    regular files.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 83c4ca3..bd58201 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -920,42 +920,47 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
 	return nil
 }
 
-// EvalSymlinks follows symlinks within the output directory.  If the symlink
-// leads to a keep mount, copy the manifest text from the keep mount into the
-// output manifestText.  Ensure that whether symlinks are relative or absolute,
-// they must remain within the output directory.
+// UploadFile uploads files within the output directory, with special handling
+// for symlinks. If the symlink leads to a keep mount, copy the manifest text
+// from the keep mount into the output manifestText.  Ensure that whether
+// symlinks are relative or absolute, they must remain within the output
+// directory.
 //
 // Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
-func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manifestText string, symlinksToRemove []string, err error) {
-	var links []string
+func (runner *ContainerRunner) UploadOutputFile(
+	path string,
+	info os.FileInfo,
+	infoerr error,
+	binds []string,
+	walkUpload *WalkUpload,
+	relocateFrom string,
+	relocateTo string) (manifestText string, err error) {
 
-	defer func() {
-		if err != nil {
-			symlinksToRemove = append(symlinksToRemove, links...)
-		}
-	}()
+	if infoerr != nil {
+		return "", infoerr
+	}
 
-	for n := 0; n < 32; n++ {
-		var info os.FileInfo
-		info, err = os.Lstat(path)
-		if err != nil {
-			return
-		}
+	relocated := relocateTo + path[len(relocateFrom):]
+
+	if info.Mode().IsRegular() {
+		return "", walkUpload.UploadFile(relocated, path)
+	}
 
+	// Not a regular file, try to follow symlinks
+	var nextlink = path
+	for n := 0; n < 32; n++ {
 		if info.Mode()&os.ModeSymlink == 0 {
-			// Not a symlink, nothing to do.
+			// Not a symlink, don't do anything
 			return
 		}
 
-		// Remember symlink for cleanup later
-		links = append(links, path)
-
 		var readlinktgt string
-		readlinktgt, err = os.Readlink(path)
-		tgt := readlinktgt
+		readlinktgt, err = os.Readlink(nextlink)
 		if err != nil {
 			return
 		}
+
+		tgt := readlinktgt
 		if !strings.HasPrefix(tgt, "/") {
 			// Relative symlink, resolve it to host path
 			tgt = filepath.Join(filepath.Dir(path), tgt)
@@ -965,8 +970,6 @@ func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manife
 			tgt = filepath.Join(runner.HostOutputDir, tgt[len(runner.Container.OutputPath):])
 		}
 
-		runner.CrunchLog.Printf("Resolve %q to %q", path, tgt)
-
 		// go through mounts and try reverse map to collection reference
 		for _, bind := range binds {
 			mnt := runner.Container.Mounts[bind]
@@ -978,51 +981,56 @@ func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manife
 				adjustedMount := mnt
 				adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
 
-				for _, l := range links {
-					// The chain of one or more symlinks
-					// terminates in this keep mount, so
-					// add them all to the manifest text at
-					// appropriate locations.
-					var m string
-					outputSuffix := l[len(runner.HostOutputDir):]
-					m, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
-					if err != nil {
-						return
-					}
-					manifestText = manifestText + m
-					symlinksToRemove = append(symlinksToRemove, l)
-				}
+				// Terminates in this keep mount, so add the
+				// manifest text at appropriate location.
+				outputSuffix := path[len(runner.HostOutputDir):]
+				manifestText, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
 				return
 			}
 		}
 
-		// If target is not a mount, it must be within the output
-		// directory, otherwise it is an error.
+		// If target is not a collection mount, it must be within the
+		// output directory, otherwise it is an error.
 		if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
 			err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
 				path[len(runner.HostOutputDir):], readlinktgt)
 			return
 		}
 
-		// Update symlink to host FS
-		err = os.Remove(path)
+		info, err = os.Lstat(tgt)
 		if err != nil {
-			err = fmt.Errorf("Error removing symlink %q: %v", path, err)
+			// tgt doesn't exist or lacks permissions
+			err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
+				path[len(runner.HostOutputDir):], readlinktgt)
 			return
 		}
 
-		err = os.Symlink(tgt, path)
-		if err != nil {
-			err = fmt.Errorf("Error updating symlink %q: %v", path, err)
+		if info.Mode().IsRegular() {
+			// Symlink leads to regular file.  Need to read from
+			// the target but upload it at the original path.
+			return "", walkUpload.UploadFile(relocated, tgt)
+		}
+
+		if info.Mode().IsDir() {
+			// Symlink leads to directory.  Walk() doesn't follow
+			// directory symlinks, so we walk the target directory
+			// instead.  Within the walk, file paths are relocated
+			// so they appear under the original symlink path.
+			err = filepath.Walk(tgt, func(walkpath string, walkinfo os.FileInfo, walkerr error) error {
+				var m string
+				m, walkerr = runner.UploadOutputFile(walkpath, walkinfo, walkerr, binds, walkUpload, tgt, relocated)
+				if walkerr == nil {
+					manifestText = manifestText + m
+				}
+				return walkerr
+			})
 			return
 		}
 
-		// Target is within the output directory, so loop and check if
-		// it is also a symlink.
-		path = tgt
+		nextlink = tgt
 	}
 	// Got stuck in a loop or just a pathological number of links, give up.
-	err = fmt.Errorf("Too many symlinks.")
+	err = fmt.Errorf("Followed too many symlinks from path %q", path)
 	return
 }
 
@@ -1072,40 +1080,25 @@ func (runner *ContainerRunner) CaptureOutput() error {
 	if err != nil {
 		// Regular directory
 
-		symlinksToRemove := make(map[string]bool)
+		cw := CollectionWriter{0, runner.Kc, nil, nil, sync.Mutex{}}
+		walkUpload := cw.BeginUpload(runner.HostOutputDir, runner.CrunchLog.Logger)
+
 		var m string
-		var srm []string
-		// Find symlinks to arv-mounted files & dirs.
 		err = filepath.Walk(runner.HostOutputDir, func(path string, info os.FileInfo, err error) error {
-			if err != nil {
-				return err
-			}
-			m, srm, err = runner.EvalSymlinks(path, binds)
-			for _, r := range srm {
-				symlinksToRemove[r] = true
-			}
+			m, err = runner.UploadOutputFile(path, info, err, binds, walkUpload, "", "")
 			if err == nil {
 				manifestText = manifestText + m
 			}
 			return err
 		})
-		for l, _ := range symlinksToRemove {
-			err2 := os.Remove(l)
-			if err2 != nil {
-				if err == nil {
-					err = fmt.Errorf("Error removing symlink %q: %v", err2)
-				} else {
-					err = fmt.Errorf("%v\nError removing symlink %q: %v",
-						err, err2)
-				}
-			}
-		}
+
+		cw.EndUpload(walkUpload)
+
 		if err != nil {
-			return fmt.Errorf("While checking output symlinks: %v", err)
+			return fmt.Errorf("While uploading output files: %v", err)
 		}
 
-		cw := CollectionWriter{0, runner.Kc, nil, nil, sync.Mutex{}}
-		m, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
+		m, err = cw.ManifestText()
 		manifestText = manifestText + m
 		if err != nil {
 			return fmt.Errorf("While uploading output files: %v", err)
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index d3606ec..c67988e 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1699,27 +1699,19 @@ func (s *TestSuite) TestEvalSymlinks(c *C) {
 	os.Symlink("/etc/passwd", realTemp+"/p1")
 
 	// Relative outside output dir
-	os.Symlink("../..", realTemp+"/p2")
+	os.Symlink("..", realTemp+"/p2")
 
 	// Circular references
 	os.Symlink("p4", realTemp+"/p3")
 	os.Symlink("p5", realTemp+"/p4")
 	os.Symlink("p3", realTemp+"/p5")
 
-	symlinksToRemove := make(map[string]bool)
+	// Target doesn't exist
+	os.Symlink("p99", realTemp+"/p6")
+
 	for _, v := range []string{"p1", "p2", "p3", "p4", "p5"} {
-		var srm []string
-		_, srm, err = cr.EvalSymlinks(realTemp+"/"+v, []string{})
+		info, err := os.Lstat(realTemp + "/" + v)
+		_, err = cr.UploadOutputFile(realTemp+"/"+v, info, err, []string{}, nil, "", "")
 		c.Assert(err, NotNil)
-		for _, r := range srm {
-			symlinksToRemove[r] = true
-		}
 	}
-	c.Assert(len(symlinksToRemove), Equals, 5)
-
-	c.Assert(map[string]bool{realTemp + "/" + "p1": true,
-		realTemp + "/" + "p2": true,
-		realTemp + "/" + "p3": true,
-		realTemp + "/" + "p4": true,
-		realTemp + "/" + "p5": true}, DeepEquals, symlinksToRemove)
 }
diff --git a/services/crunch-run/upload.go b/services/crunch-run/upload.go
index 3127683..95925e5 100644
--- a/services/crunch-run/upload.go
+++ b/services/crunch-run/upload.go
@@ -18,15 +18,14 @@ import (
 	"crypto/md5"
 	"errors"
 	"fmt"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
+	"git.curoverse.com/arvados.git/sdk/go/manifest"
 	"io"
 	"log"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
-
-	"git.curoverse.com/arvados.git/sdk/go/keepclient"
-	"git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
 // Block is a data block in a manifest stream
@@ -263,48 +262,27 @@ type WalkUpload struct {
 	mtx         sync.Mutex
 }
 
-// WalkFunc walks a directory tree, uploads each file found and adds it to the
-// CollectionWriter.
-func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
-	if err != nil {
-		return err
-	}
-
-	targetPath, targetInfo := path, info
-	if info.Mode()&os.ModeSymlink != 0 {
-		// Update targetpath/info to reflect the symlink
-		// target, not the symlink itself
-		targetPath, err = filepath.EvalSymlinks(path)
-		if err != nil {
-			return err
-		}
-		targetInfo, err = os.Stat(targetPath)
-		if err != nil {
-			return fmt.Errorf("stat symlink %q target %q: %s", path, targetPath, err)
-		}
-		if targetInfo.IsDir() {
-			// Symlinks to directories don't get walked, so do it
-			// here.  We've previously checked that they stay in
-			// the output directory and don't result in an endless
-			// loop.
-			filepath.Walk(path+"/.", m.WalkFunc)
-		}
-	}
-
-	if targetInfo.Mode()&os.ModeType != 0 {
-		// Skip directories, pipes, other non-regular files
-		return nil
-	}
-
+func (m *WalkUpload) UploadFile(path string, sourcePath string) error {
 	var dir string
-	if len(path) > (len(m.stripPrefix) + len(info.Name()) + 1) {
-		dir = path[len(m.stripPrefix)+1 : (len(path) - len(info.Name()) - 1)]
+	basename := filepath.Base(path)
+	if len(path) > (len(m.stripPrefix) + len(basename) + 1) {
+		dir = path[len(m.stripPrefix)+1 : (len(path) - len(basename) - 1)]
 	}
 	if dir == "" {
 		dir = "."
 	}
 
-	fn := path[(len(path) - len(info.Name())):]
+	fn := path[(len(path) - len(basename)):]
+
+	info, err := os.Stat(sourcePath)
+	if err != nil {
+		return err
+	}
+	file, err := os.Open(sourcePath)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
 
 	if m.streamMap[dir] == nil {
 		m.streamMap[dir] = &CollectionFileWriter{
@@ -334,16 +312,11 @@ func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
 	// Reset the CollectionFileWriter for a new file
 	fileWriter.NewFile(fn)
 
-	file, err := os.Open(path)
-	if err != nil {
-		return err
-	}
-	defer file.Close()
-
 	m.status.Printf("Uploading %v/%v (%v bytes)", dir, fn, info.Size())
 
 	_, err = io.Copy(fileWriter, file)
 	if err != nil {
+		m.status.Printf("Uh oh")
 		return err
 	}
 
@@ -353,20 +326,15 @@ func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
 	return nil
 }
 
-func (cw *CollectionWriter) WriteTree(root string, status *log.Logger) (manifest string, err error) {
+func (cw *CollectionWriter) BeginUpload(root string, status *log.Logger) *WalkUpload {
 	streamMap := make(map[string]*CollectionFileWriter)
-	wu := &WalkUpload{0, cw.IKeepClient, root, streamMap, status, nil, sync.Mutex{}}
-	err = filepath.Walk(root, wu.WalkFunc)
-
-	if err != nil {
-		return "", err
-	}
+	return &WalkUpload{0, cw.IKeepClient, root, streamMap, status, nil, sync.Mutex{}}
+}
 
+func (cw *CollectionWriter) EndUpload(wu *WalkUpload) {
 	cw.mtx.Lock()
-	for _, st := range streamMap {
+	for _, st := range wu.streamMap {
 		cw.Streams = append(cw.Streams, st)
 	}
 	cw.mtx.Unlock()
-
-	return cw.ManifestText()
 }
diff --git a/services/crunch-run/upload_test.go b/services/crunch-run/upload_test.go
index 96ea2b1..86dab41 100644
--- a/services/crunch-run/upload_test.go
+++ b/services/crunch-run/upload_test.go
@@ -5,13 +5,13 @@
 package main
 
 import (
+	. "gopkg.in/check.v1"
 	"io/ioutil"
 	"log"
 	"os"
+	"path/filepath"
 	"sync"
 	"syscall"
-
-	. "gopkg.in/check.v1"
 )
 
 type UploadTestSuite struct{}
@@ -19,6 +19,25 @@ type UploadTestSuite struct{}
 // Gocheck boilerplate
 var _ = Suite(&UploadTestSuite{})
 
+func writeTree(cw *CollectionWriter, root string, status *log.Logger) (mt string, err error) {
+	walkUpload := cw.BeginUpload(root, status)
+
+	err = filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+		info, _ = os.Stat(path)
+		if info.Mode().IsRegular() {
+			return walkUpload.UploadFile(path, path)
+		}
+		return nil
+	})
+
+	cw.EndUpload(walkUpload)
+	if err != nil {
+		return "", err
+	}
+	mt, err = cw.ManifestText()
+	return
+}
+
 func (s *TestSuite) TestSimpleUpload(c *C) {
 	tmpdir, _ := ioutil.TempDir("", "")
 	defer func() {
@@ -28,12 +47,12 @@ func (s *TestSuite) TestSimpleUpload(c *C) {
 	ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
 
 	cw := CollectionWriter{0, &KeepTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 	c.Check(err, IsNil)
 	c.Check(str, Equals, ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt\n")
 }
 
-func (s *TestSuite) TestSimpleUploadThreefiles(c *C) {
+func (s *TestSuite) TestUploadThreeFiles(c *C) {
 	tmpdir, _ := ioutil.TempDir("", "")
 	defer func() {
 		os.RemoveAll(tmpdir)
@@ -49,7 +68,7 @@ func (s *TestSuite) TestSimpleUploadThreefiles(c *C) {
 	}
 
 	cw := CollectionWriter{0, &KeepTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 
 	c.Check(err, IsNil)
 	c.Check(str, Equals, ". aa65a413921163458c52fea478d5d3ee+9 0:3:file1.txt 3:3:file2.txt 6:3:file3.txt\n")
@@ -67,7 +86,7 @@ func (s *TestSuite) TestSimpleUploadSubdir(c *C) {
 	ioutil.WriteFile(tmpdir+"/subdir/file2.txt", []byte("bar"), 0600)
 
 	cw := CollectionWriter{0, &KeepTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 
 	c.Check(err, IsNil)
 
@@ -101,7 +120,7 @@ func (s *TestSuite) TestSimpleUploadLarge(c *C) {
 	ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600)
 
 	cw := CollectionWriter{0, &KeepTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 
 	c.Check(err, IsNil)
 	c.Check(str, Equals, ". 00ecf01e0d93385115c9f8bed757425d+67108864 485cd630387b6b1846fe429f261ea05f+1048514 0:68157375:file1.txt 68157375:3:file2.txt\n")
@@ -118,7 +137,7 @@ func (s *TestSuite) TestUploadEmptySubdir(c *C) {
 	ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
 
 	cw := CollectionWriter{0, &KeepTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 
 	c.Check(err, IsNil)
 	c.Check(str, Equals, `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
@@ -134,7 +153,7 @@ func (s *TestSuite) TestUploadEmptyFile(c *C) {
 	ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte(""), 0600)
 
 	cw := CollectionWriter{0, &KeepTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 
 	c.Check(err, IsNil)
 	c.Check(str, Equals, `. d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1.txt
@@ -150,7 +169,7 @@ func (s *TestSuite) TestUploadError(c *C) {
 	ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
 
 	cw := CollectionWriter{0, &KeepErrorTestClient{}, nil, nil, sync.Mutex{}}
-	str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+	str, err := writeTree(&cw, tmpdir, log.New(os.Stdout, "", 0))
 
 	c.Check(err, NotNil)
 	c.Check(str, Equals, "")

commit 511ff9e86f2655e816f07f53401b911927d5f3e4
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Oct 2 16:07:18 2017 -0400

    12183: Simplify walking symlinks in upload.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/upload.go b/services/crunch-run/upload.go
index 7b13109..3127683 100644
--- a/services/crunch-run/upload.go
+++ b/services/crunch-run/upload.go
@@ -19,7 +19,6 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"log"
 	"os"
 	"path/filepath"
@@ -288,14 +287,7 @@ func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
 			// here.  We've previously checked that they stay in
 			// the output directory and don't result in an endless
 			// loop.
-			var rd []os.FileInfo
-			rd, err = ioutil.ReadDir(path)
-			if err != nil {
-				return err
-			}
-			for _, ent := range rd {
-				err = filepath.Walk(filepath.Join(path, ent.Name()), m.WalkFunc)
-			}
+			filepath.Walk(path+"/.", m.WalkFunc)
 		}
 	}
 

commit f93e925c4a4322ee4ef2244a0eb99552a01fce9a
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon Oct 2 15:34:38 2017 -0400

    12183: Improve error handling if unable to delete symlinks marked for deletion.
    
    Add unit test for EvalSymlinks.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index c50f799..83c4ca3 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -942,7 +942,6 @@ func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manife
 			return
 		}
 
-		var tgt string
 		if info.Mode()&os.ModeSymlink == 0 {
 			// Not a symlink, nothing to do.
 			return
@@ -951,7 +950,9 @@ func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manife
 		// Remember symlink for cleanup later
 		links = append(links, path)
 
-		tgt, err = os.Readlink(path)
+		var readlinktgt string
+		readlinktgt, err = os.Readlink(path)
+		tgt := readlinktgt
 		if err != nil {
 			return
 		}
@@ -999,13 +1000,22 @@ func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manife
 		// directory, otherwise it is an error.
 		if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
 			err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
-				path[len(runner.HostOutputDir):], tgt)
+				path[len(runner.HostOutputDir):], readlinktgt)
 			return
 		}
 
 		// Update symlink to host FS
-		os.Remove(path)
-		os.Symlink(tgt, path)
+		err = os.Remove(path)
+		if err != nil {
+			err = fmt.Errorf("Error removing symlink %q: %v", path, err)
+			return
+		}
+
+		err = os.Symlink(tgt, path)
+		if err != nil {
+			err = fmt.Errorf("Error updating symlink %q: %v", path, err)
+			return
+		}
 
 		// Target is within the output directory, so loop and check if
 		// it is also a symlink.
@@ -1062,7 +1072,7 @@ func (runner *ContainerRunner) CaptureOutput() error {
 	if err != nil {
 		// Regular directory
 
-		var symlinksToRemove []string
+		symlinksToRemove := make(map[string]bool)
 		var m string
 		var srm []string
 		// Find symlinks to arv-mounted files & dirs.
@@ -1071,14 +1081,24 @@ func (runner *ContainerRunner) CaptureOutput() error {
 				return err
 			}
 			m, srm, err = runner.EvalSymlinks(path, binds)
-			symlinksToRemove = append(symlinksToRemove, srm...)
+			for _, r := range srm {
+				symlinksToRemove[r] = true
+			}
 			if err == nil {
 				manifestText = manifestText + m
 			}
 			return err
 		})
-		for _, l := range symlinksToRemove {
-			os.Remove(l)
+		for l, _ := range symlinksToRemove {
+			err2 := os.Remove(l)
+			if err2 != nil {
+				if err == nil {
+					err = fmt.Errorf("Error removing symlink %q: %v", err2)
+				} else {
+					err = fmt.Errorf("%v\nError removing symlink %q: %v",
+						err, err2)
+				}
+			}
 		}
 		if err != nil {
 			return fmt.Errorf("While checking output symlinks: %v", err)
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 9081783..d3606ec 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1685,3 +1685,41 @@ func (s *TestSuite) TestNumberRoundTrip(c *C) {
 	c.Check(err, IsNil)
 	c.Check(string(jsondata), Equals, `{"number":123456789123456789}`)
 }
+
+func (s *TestSuite) TestEvalSymlinks(c *C) {
+	cr := NewContainerRunner(&ArvTestClient{callraw: true}, &KeepTestClient{}, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+
+	realTemp, err := ioutil.TempDir("", "crunchrun_test-")
+	c.Assert(err, IsNil)
+	defer os.RemoveAll(realTemp)
+
+	cr.HostOutputDir = realTemp
+
+	// Absolute path outside output dir
+	os.Symlink("/etc/passwd", realTemp+"/p1")
+
+	// Relative outside output dir
+	os.Symlink("../..", realTemp+"/p2")
+
+	// Circular references
+	os.Symlink("p4", realTemp+"/p3")
+	os.Symlink("p5", realTemp+"/p4")
+	os.Symlink("p3", realTemp+"/p5")
+
+	symlinksToRemove := make(map[string]bool)
+	for _, v := range []string{"p1", "p2", "p3", "p4", "p5"} {
+		var srm []string
+		_, srm, err = cr.EvalSymlinks(realTemp+"/"+v, []string{})
+		c.Assert(err, NotNil)
+		for _, r := range srm {
+			symlinksToRemove[r] = true
+		}
+	}
+	c.Assert(len(symlinksToRemove), Equals, 5)
+
+	c.Assert(map[string]bool{realTemp + "/" + "p1": true,
+		realTemp + "/" + "p2": true,
+		realTemp + "/" + "p3": true,
+		realTemp + "/" + "p4": true,
+		realTemp + "/" + "p5": true}, DeepEquals, symlinksToRemove)
+}
diff --git a/services/crunch-run/upload.go b/services/crunch-run/upload.go
index faf20b4..7b13109 100644
--- a/services/crunch-run/upload.go
+++ b/services/crunch-run/upload.go
@@ -283,7 +283,7 @@ func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return fmt.Errorf("stat symlink %q target %q: %s", path, targetPath, err)
 		}
-		if targetInfo.Mode()&os.ModeDir != 0 {
+		if targetInfo.IsDir() {
 			// Symlinks to directories don't get walked, so do it
 			// here.  We've previously checked that they stay in
 			// the output directory and don't result in an endless

commit 174d343cf86e02e083293746f86366e1d0a95786
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 29 16:06:59 2017 -0400

    12183: Support uploading symlinks to directories.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 6117997..c50f799 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -926,12 +926,12 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
 // they must remain within the output directory.
 //
 // Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
-func (runner *ContainerRunner) EvalSymlinks(path string, binds []string, symlinksToRemove *[]string) (manifestText string, err error) {
+func (runner *ContainerRunner) EvalSymlinks(path string, binds []string) (manifestText string, symlinksToRemove []string, err error) {
 	var links []string
 
 	defer func() {
 		if err != nil {
-			*symlinksToRemove = append(*symlinksToRemove, links...)
+			symlinksToRemove = append(symlinksToRemove, links...)
 		}
 	}()
 
@@ -989,7 +989,7 @@ func (runner *ContainerRunner) EvalSymlinks(path string, binds []string, symlink
 						return
 					}
 					manifestText = manifestText + m
-					*symlinksToRemove = append(*symlinksToRemove, l)
+					symlinksToRemove = append(symlinksToRemove, l)
 				}
 				return
 			}
@@ -1064,18 +1064,19 @@ func (runner *ContainerRunner) CaptureOutput() error {
 
 		var symlinksToRemove []string
 		var m string
+		var srm []string
 		// Find symlinks to arv-mounted files & dirs.
 		err = filepath.Walk(runner.HostOutputDir, func(path string, info os.FileInfo, err error) error {
 			if err != nil {
 				return err
 			}
-			m, err = runner.EvalSymlinks(path, binds, &symlinksToRemove)
+			m, srm, err = runner.EvalSymlinks(path, binds)
+			symlinksToRemove = append(symlinksToRemove, srm...)
 			if err == nil {
 				manifestText = manifestText + m
 			}
 			return err
 		})
-		runner.CrunchLog.Printf("sm %q", symlinksToRemove)
 		for _, l := range symlinksToRemove {
 			os.Remove(l)
 		}
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index dbf9cc7..9081783 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1543,10 +1543,18 @@ func (s *TestSuite) TestOutputSymlinkToOutput(c *C) {
 		rf, _ := os.Create(t.realTemp + "/2/realfile")
 		rf.Write([]byte("foo"))
 		rf.Close()
+
+		os.Mkdir(t.realTemp+"/2/realdir", 0700)
+		rf, _ = os.Create(t.realTemp + "/2/realdir/subfile")
+		rf.Write([]byte("bar"))
+		rf.Close()
+
 		os.Symlink("/tmp/realfile", t.realTemp+"/2/file1")
 		os.Symlink("realfile", t.realTemp+"/2/file2")
 		os.Symlink("/tmp/file1", t.realTemp+"/2/file3")
 		os.Symlink("file2", t.realTemp+"/2/file4")
+		os.Symlink("realdir", t.realTemp+"/2/dir1")
+		os.Symlink("/tmp/realdir", t.realTemp+"/2/dir2")
 		t.logWriter.Close()
 	})
 
@@ -1557,7 +1565,12 @@ func (s *TestSuite) TestOutputSymlinkToOutput(c *C) {
 			collection := v["collection"].(arvadosclient.Dict)
 			if strings.Index(collection["name"].(string), "output") == 0 {
 				manifest := collection["manifest_text"].(string)
-				c.Check(manifest, Equals, ". 7a2c86e102dcc231bd232aad99686dfa+15 0:3:file1 3:3:file2 6:3:file3 9:3:file4 12:3:realfile\n")
+				c.Check(manifest, Equals,
+					`. 7a2c86e102dcc231bd232aad99686dfa+15 0:3:file1 3:3:file2 6:3:file3 9:3:file4 12:3:realfile
+./dir1 37b51d194a7513e45b56f6524f2d51f2+3 0:3:subfile
+./dir2 37b51d194a7513e45b56f6524f2d51f2+3 0:3:subfile
+./realdir 37b51d194a7513e45b56f6524f2d51f2+3 0:3:subfile
+`)
 			}
 		}
 	}
diff --git a/services/crunch-run/upload.go b/services/crunch-run/upload.go
index bb2776a..faf20b4 100644
--- a/services/crunch-run/upload.go
+++ b/services/crunch-run/upload.go
@@ -19,6 +19,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"io/ioutil"
 	"log"
 	"os"
 	"path/filepath"
@@ -282,6 +283,20 @@ func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return fmt.Errorf("stat symlink %q target %q: %s", path, targetPath, err)
 		}
+		if targetInfo.Mode()&os.ModeDir != 0 {
+			// Symlinks to directories don't get walked, so do it
+			// here.  We've previously checked that they stay in
+			// the output directory and don't result in an endless
+			// loop.
+			var rd []os.FileInfo
+			rd, err = ioutil.ReadDir(path)
+			if err != nil {
+				return err
+			}
+			for _, ent := range rd {
+				err = filepath.Walk(filepath.Join(path, ent.Name()), m.WalkFunc)
+			}
+		}
 	}
 
 	if targetInfo.Mode()&os.ModeType != 0 {

commit e9eb0975e100e76d1da2a6997656ff2524a2ba31
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Fri Sep 29 15:11:46 2017 -0400

    12183: Evaluate symlinks to handle chained links and absolute paths.
    
    Ensure that all symlinks either resolve within the output directory or to a
    keep mount.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 1665742..6117997 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -920,6 +920,102 @@ func (runner *ContainerRunner) WaitFinish() (err error) {
 	return nil
 }
 
+// EvalSymlinks follows symlinks within the output directory.  If the symlink
+// leads to a keep mount, copy the manifest text from the keep mount into the
+// output manifestText.  Ensure that whether symlinks are relative or absolute,
+// they must remain within the output directory.
+//
+// Assumes initial value of "path" is absolute, and located within runner.HostOutputDir.
+func (runner *ContainerRunner) EvalSymlinks(path string, binds []string, symlinksToRemove *[]string) (manifestText string, err error) {
+	var links []string
+
+	defer func() {
+		if err != nil {
+			*symlinksToRemove = append(*symlinksToRemove, links...)
+		}
+	}()
+
+	for n := 0; n < 32; n++ {
+		var info os.FileInfo
+		info, err = os.Lstat(path)
+		if err != nil {
+			return
+		}
+
+		var tgt string
+		if info.Mode()&os.ModeSymlink == 0 {
+			// Not a symlink, nothing to do.
+			return
+		}
+
+		// Remember symlink for cleanup later
+		links = append(links, path)
+
+		tgt, err = os.Readlink(path)
+		if err != nil {
+			return
+		}
+		if !strings.HasPrefix(tgt, "/") {
+			// Relative symlink, resolve it to host path
+			tgt = filepath.Join(filepath.Dir(path), tgt)
+		}
+		if strings.HasPrefix(tgt, runner.Container.OutputPath+"/") && !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
+			// Absolute symlink to container output path, adjust it to host output path.
+			tgt = filepath.Join(runner.HostOutputDir, tgt[len(runner.Container.OutputPath):])
+		}
+
+		runner.CrunchLog.Printf("Resolve %q to %q", path, tgt)
+
+		// go through mounts and try reverse map to collection reference
+		for _, bind := range binds {
+			mnt := runner.Container.Mounts[bind]
+			if tgt == bind || strings.HasPrefix(tgt, bind+"/") {
+				// get path relative to bind
+				targetSuffix := tgt[len(bind):]
+
+				// Copy mount and adjust the path to add path relative to the bind
+				adjustedMount := mnt
+				adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
+
+				for _, l := range links {
+					// The chain of one or more symlinks
+					// terminates in this keep mount, so
+					// add them all to the manifest text at
+					// appropriate locations.
+					var m string
+					outputSuffix := l[len(runner.HostOutputDir):]
+					m, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
+					if err != nil {
+						return
+					}
+					manifestText = manifestText + m
+					*symlinksToRemove = append(*symlinksToRemove, l)
+				}
+				return
+			}
+		}
+
+		// If target is not a mount, it must be within the output
+		// directory, otherwise it is an error.
+		if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
+			err = fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
+				path[len(runner.HostOutputDir):], tgt)
+			return
+		}
+
+		// Update symlink to host FS
+		os.Remove(path)
+		os.Symlink(tgt, path)
+
+		// Target is within the output directory, so loop and check if
+		// it is also a symlink.
+		path = tgt
+	}
+	// Got stuck in a loop or just a pathological number of links, give up.
+	err = fmt.Errorf("Too many symlinks.")
+	return
+}
+
 // HandleOutput sets the output, unmounts the FUSE mount, and deletes temporary directories
 func (runner *ContainerRunner) CaptureOutput() error {
 	if runner.finalState != "Complete" {
@@ -966,73 +1062,28 @@ func (runner *ContainerRunner) CaptureOutput() error {
 	if err != nil {
 		// Regular directory
 
+		var symlinksToRemove []string
+		var m string
 		// Find symlinks to arv-mounted files & dirs.
 		err = filepath.Walk(runner.HostOutputDir, func(path string, info os.FileInfo, err error) error {
 			if err != nil {
 				return err
 			}
-			if info.Mode()&os.ModeSymlink == 0 {
-				return nil
-			}
-			// read link to get container internal path
-			// only support 1 level of symlinking here.
-			var tgt string
-			tgt, err = os.Readlink(path)
-			if err != nil {
-				return err
-			}
-
-			// get path relative to output dir
-			outputSuffix := path[len(runner.HostOutputDir):]
-
-			if strings.HasPrefix(tgt, "/") {
-				// go through mounts and try reverse map to collection reference
-				for _, bind := range binds {
-					mnt := runner.Container.Mounts[bind]
-					if tgt == bind || strings.HasPrefix(tgt, bind+"/") {
-						// get path relative to bind
-						targetSuffix := tgt[len(bind):]
-
-						// Copy mount and adjust the path to add path relative to the bind
-						adjustedMount := mnt
-						adjustedMount.Path = filepath.Join(adjustedMount.Path, targetSuffix)
-
-						// get manifest text
-						var m string
-						m, err = runner.getCollectionManifestForPath(adjustedMount, outputSuffix)
-						if err != nil {
-							return err
-						}
-						manifestText = manifestText + m
-						// delete symlink so WriteTree won't try to to dereference it.
-						os.Remove(path)
-						return nil
-					}
-				}
-			}
-
-			// Not a link to a mount.  Must be dereferencible and
-			// point into the output directory.
-			tgt, err = filepath.EvalSymlinks(path)
-			if err != nil {
-				os.Remove(path)
-				return err
-			}
-
-			// Symlink target must be within the output directory otherwise it's an error.
-			if !strings.HasPrefix(tgt, runner.HostOutputDir+"/") {
-				os.Remove(path)
-				return fmt.Errorf("Output directory symlink %q points to invalid location %q, must point to mount or output directory.",
-					outputSuffix, tgt)
+			m, err = runner.EvalSymlinks(path, binds, &symlinksToRemove)
+			if err == nil {
+				manifestText = manifestText + m
 			}
-			return nil
+			return err
 		})
+		runner.CrunchLog.Printf("sm %q", symlinksToRemove)
+		for _, l := range symlinksToRemove {
+			os.Remove(l)
+		}
 		if err != nil {
 			return fmt.Errorf("While checking output symlinks: %v", err)
 		}
 
 		cw := CollectionWriter{0, runner.Kc, nil, nil, sync.Mutex{}}
-		var m string
 		m, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
 		manifestText = manifestText + m
 		if err != nil {
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 5e77d7b..dbf9cc7 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1523,6 +1523,46 @@ func (s *TestSuite) TestOutputError(c *C) {
 	c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
 }
 
+func (s *TestSuite) TestOutputSymlinkToOutput(c *C) {
+	helperRecord := `{
+		"command": ["/bin/sh", "-c", "echo $FROBIZ"],
+		"container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+		"cwd": "/bin",
+		"environment": {"FROBIZ": "bilbo"},
+		"mounts": {
+        "/tmp": {"kind": "tmp"}
+    },
+		"output_path": "/tmp",
+		"priority": 1,
+		"runtime_constraints": {}
+	}`
+
+	extraMounts := []string{}
+
+	api, _, _ := FullRunHelper(c, helperRecord, extraMounts, 0, func(t *TestDockerClient) {
+		rf, _ := os.Create(t.realTemp + "/2/realfile")
+		rf.Write([]byte("foo"))
+		rf.Close()
+		os.Symlink("/tmp/realfile", t.realTemp+"/2/file1")
+		os.Symlink("realfile", t.realTemp+"/2/file2")
+		os.Symlink("/tmp/file1", t.realTemp+"/2/file3")
+		os.Symlink("file2", t.realTemp+"/2/file4")
+		t.logWriter.Close()
+	})
+
+	c.Check(api.CalledWith("container.exit_code", 0), NotNil)
+	c.Check(api.CalledWith("container.state", "Complete"), NotNil)
+	for _, v := range api.Content {
+		if v["collection"] != nil {
+			collection := v["collection"].(arvadosclient.Dict)
+			if strings.Index(collection["name"].(string), "output") == 0 {
+				manifest := collection["manifest_text"].(string)
+				c.Check(manifest, Equals, ". 7a2c86e102dcc231bd232aad99686dfa+15 0:3:file1 3:3:file2 6:3:file3 9:3:file4 12:3:realfile\n")
+			}
+		}
+	}
+}
+
 func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
 	helperRecord := `{
 		"command": ["/bin/sh", "-c", "echo $FROBIZ"],

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list