[ARVADOS] updated: 471270bc6ec9453fdd4c1faf97b65a8291543c6f

Git user git at public.curoverse.com
Thu Feb 2 19:18:23 EST 2017


Summary of changes:
 sdk/go/manifest/manifest.go           | 13 +++++++++++
 services/crunch-run/crunchrun.go      | 44 +++++++++++++++++------------------
 services/crunch-run/crunchrun_test.go | 16 ++++++-------
 3 files changed, 42 insertions(+), 31 deletions(-)

       via  471270bc6ec9453fdd4c1faf97b65a8291543c6f (commit)
      from  e6986ed88126f61ecad0f557c78981961f901044 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 471270bc6ec9453fdd4c1faf97b65a8291543c6f
Author: radhika <radhika at curoverse.com>
Date:   Thu Feb 2 19:16:25 2017 -0500

    9397: Use manifest.FileSegmentForPath to get manifest segment for a file path. Cache collections to avoid fetching
    the same collection repeatedly. If no manifest segment found for a mounted path, log that fact.

diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index 22b1c97..f6656b4 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -265,6 +265,19 @@ func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
 	return ch
 }
 
+func (m *Manifest) FileSegmentForPath(filepath string) string {
+	dir := "."
+	file := filepath
+	if idx := strings.LastIndex(filepath, "/"); idx >= 0 {
+		dir = "./" + filepath[0:idx]
+		file = filepath[idx+1:]
+	}
+	for fs := range m.FileSegmentIterByName(filepath) {
+		return fmt.Sprintf("%v %v %v:%v:%v", dir, fs.Locator, fs.Offset, fs.Len, file)
+	}
+	return ""
+}
+
 // Blocks may appear multiple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index fa8ee13..876df03 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -713,6 +713,8 @@ func (runner *ContainerRunner) CaptureOutput() error {
 	return nil
 }
 
+var outputCollections = make(map[string]arvados.Collection)
+
 // Fetch the collection for the mnt.PortableDataHash
 // Return the manifest_text fragment corresponding to the specified mnt.Path
 //  after making any required updates.
@@ -730,12 +732,17 @@ func (runner *ContainerRunner) CaptureOutput() error {
 //    "path":"/subdir1/subdir2"
 //    "path":"/subdir/filename" etc
 func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, bindSuffix string) (string, error) {
-	var collection arvados.Collection
-	err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
-	if err != nil {
-		return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
+	collection := outputCollections[mnt.PortableDataHash]
+	if collection.PortableDataHash == "" {
+		err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
+		if err != nil {
+			return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
+		}
+		outputCollections[mnt.PortableDataHash] = collection
 	}
 
+	manifest := manifest.Manifest{Text: collection.ManifestText}
+
 	manifestText := ""
 	if mnt.Path == "" || mnt.Path == "/" {
 		// no path specified; return the entire manifest text after making adjustments
@@ -770,30 +777,21 @@ func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, b
 				break
 			} else {
 				// look for a matching file in this stream
-				if tokens[0] == pathSubdir {
-					// path refers to a file in this stream
-					for _, token := range tokens {
-						if strings.Index(token, ":"+pathFileName) > 0 {
-							// found the file in the stream; discard all other file tokens
-							for _, t := range tokens {
-								if strings.Index(t, ":") == -1 {
-									manifestText += (" " + t)
-								} else {
-									break // done reading all non-file tokens of this stream
-								}
-							}
-							manifestText = strings.Trim(manifestText, " ")
-							token = strings.Replace(token, ":"+pathFileName, ":"+bindFileName, -1)
-							manifestText += (" " + token + "\n")
-							manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
-							break
-						}
-					}
+				fs := manifest.FileSegmentForPath(mntPath[1:])
+				if fs != "" {
+					manifestText = strings.Replace(fs, ":"+pathFileName, ":"+bindFileName, -1)
+					manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
+					manifestText += "\n"
+					break
 				}
 			}
 		}
 	}
 
+	if manifestText == "" {
+		runner.CrunchLog.Printf("No manifest segment found for bind '%v' with path '%v'", bindSuffix, mnt.Path)
+	}
+
 	return manifestText, nil
 }
 
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 51549ae..8d0322c 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -57,8 +57,8 @@ var hwImageId = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea
 var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d at 5693216f 0:46:md5sum.txt\n"
 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
 
-var subdirManifest = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 27:5649:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt\n./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt\n./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 0:9:file1_in_subdir2.txt 9:19:file2_in_subdir2.txt\n"
-var subdirPDH = "a0def87f80dd594d4675809e83bd4f15+367"
+var normalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 27:5649:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt\n./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt\n./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt\n"
+var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
 
 var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
 var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
@@ -185,8 +185,8 @@ func (client *ArvTestClient) Get(resourceType string, uuid string, parameters ar
 			output.(*arvados.Collection).ManifestText = hwManifest
 		} else if uuid == otherPDH {
 			output.(*arvados.Collection).ManifestText = otherManifest
-		} else if uuid == subdirPDH {
-			output.(*arvados.Collection).ManifestText = subdirManifest
+		} else if uuid == normalizedWithSubdirsPDH {
+			output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
 		}
 	}
 	if resourceType == "containers" {
@@ -1194,7 +1194,7 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
 
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out"))
 
-				origManifestWithDotReplacedAsFoo := strings.Replace(subdirManifest, "./", "./foo/", -1)
+				origManifestWithDotReplacedAsFoo := strings.Replace(normalizedManifestWithSubdirs, "./", "./foo/", -1)
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo"+origManifestWithDotReplacedAsFoo[1:]))
 
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 9:18:bar"))
@@ -1203,7 +1203,7 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
 
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 9:18:sub1file2"))
 
-				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:19:sub2file2"))
+				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:18:sub2file2"))
 			}
 		}
 	}
@@ -1252,7 +1252,7 @@ func (s *TestSuite) TestStdoutWithMountPointForFileUnderOutputDir(c *C) {
 
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out"))
 
-				origManifestWithDotReplacedAsFoo := strings.Replace(subdirManifest, "./", "./foo/", -1)
+				origManifestWithDotReplacedAsFoo := strings.Replace(normalizedManifestWithSubdirs, "./", "./foo/", -1)
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo"+origManifestWithDotReplacedAsFoo[1:]))
 
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 9:18:bar"))
@@ -1261,7 +1261,7 @@ func (s *TestSuite) TestStdoutWithMountPointForFileUnderOutputDir(c *C) {
 
 				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 9:18:sub1file2"))
 
-				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:19:sub2file2"))
+				c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:18:sub2file2"))
 			}
 		}
 	}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list