[ARVADOS] updated: 471270bc6ec9453fdd4c1faf97b65a8291543c6f
Git user
git at public.curoverse.com
Thu Feb 2 19:18:23 EST 2017
Summary of changes:
sdk/go/manifest/manifest.go | 13 +++++++++++
services/crunch-run/crunchrun.go | 44 +++++++++++++++++------------------
services/crunch-run/crunchrun_test.go | 16 ++++++-------
3 files changed, 42 insertions(+), 31 deletions(-)
via 471270bc6ec9453fdd4c1faf97b65a8291543c6f (commit)
from e6986ed88126f61ecad0f557c78981961f901044 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 471270bc6ec9453fdd4c1faf97b65a8291543c6f
Author: radhika <radhika at curoverse.com>
Date: Thu Feb 2 19:16:25 2017 -0500
9397: Use manifest.FileSegmentForPath to get manifest segment for a file path. Cache collections to avoid fetching
the same collection repeatedly. If no manifest segment found for a mounted path, log that fact.
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go
index 22b1c97..f6656b4 100644
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -265,6 +265,19 @@ func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
return ch
}
+func (m *Manifest) FileSegmentForPath(filepath string) string {
+ dir := "."
+ file := filepath
+ if idx := strings.LastIndex(filepath, "/"); idx >= 0 {
+ dir = "./" + filepath[0:idx]
+ file = filepath[idx+1:]
+ }
+ for fs := range m.FileSegmentIterByName(filepath) {
+ return fmt.Sprintf("%v %v %v:%v:%v", dir, fs.Locator, fs.Offset, fs.Len, file)
+ }
+ return ""
+}
+
// Blocks may appear multiple times within the same manifest if they
// are used by multiple files. In that case this Iterator will output
// the same block multiple times.
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index fa8ee13..876df03 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -713,6 +713,8 @@ func (runner *ContainerRunner) CaptureOutput() error {
return nil
}
+var outputCollections = make(map[string]arvados.Collection)
+
// Fetch the collection for the mnt.PortableDataHash
// Return the manifest_text fragment corresponding to the specified mnt.Path
// after making any required updates.
@@ -730,12 +732,17 @@ func (runner *ContainerRunner) CaptureOutput() error {
// "path":"/subdir1/subdir2"
// "path":"/subdir/filename" etc
func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, bindSuffix string) (string, error) {
- var collection arvados.Collection
- err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
- if err != nil {
- return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
+ collection := outputCollections[mnt.PortableDataHash]
+ if collection.PortableDataHash == "" {
+ err := runner.ArvClient.Get("collections", mnt.PortableDataHash, nil, &collection)
+ if err != nil {
+ return "", fmt.Errorf("While getting collection for %v: %v", mnt.PortableDataHash, err)
+ }
+ outputCollections[mnt.PortableDataHash] = collection
}
+ manifest := manifest.Manifest{Text: collection.ManifestText}
+
manifestText := ""
if mnt.Path == "" || mnt.Path == "/" {
// no path specified; return the entire manifest text after making adjustments
@@ -770,30 +777,21 @@ func (runner *ContainerRunner) getCollectionManifestForPath(mnt arvados.Mount, b
break
} else {
// look for a matching file in this stream
- if tokens[0] == pathSubdir {
- // path refers to a file in this stream
- for _, token := range tokens {
- if strings.Index(token, ":"+pathFileName) > 0 {
- // found the file in the stream; discard all other file tokens
- for _, t := range tokens {
- if strings.Index(t, ":") == -1 {
- manifestText += (" " + t)
- } else {
- break // done reading all non-file tokens of this stream
- }
- }
- manifestText = strings.Trim(manifestText, " ")
- token = strings.Replace(token, ":"+pathFileName, ":"+bindFileName, -1)
- manifestText += (" " + token + "\n")
- manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
- break
- }
- }
+ fs := manifest.FileSegmentForPath(mntPath[1:])
+ if fs != "" {
+ manifestText = strings.Replace(fs, ":"+pathFileName, ":"+bindFileName, -1)
+ manifestText = strings.Replace(manifestText, pathSubdir, bindSubdir, -1)
+ manifestText += "\n"
+ break
}
}
}
}
+ if manifestText == "" {
+ runner.CrunchLog.Printf("No manifest segment found for bind '%v' with path '%v'", bindSuffix, mnt.Path)
+ }
+
return manifestText, nil
}
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index 51549ae..8d0322c 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -57,8 +57,8 @@ var hwImageId = "9c31ee32b3d15268a0754e8edc74d4f815ee014b693bc5109058e431dd5caea
var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb1decdf60cc1c937f556d at 5693216f 0:46:md5sum.txt\n"
var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
-var subdirManifest = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 27:5649:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt\n./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt\n./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 0:9:file1_in_subdir2.txt 9:19:file2_in_subdir2.txt\n"
-var subdirPDH = "a0def87f80dd594d4675809e83bd4f15+367"
+var normalizedManifestWithSubdirs = ". 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 0:9:file1_in_main.txt 9:18:file2_in_main.txt 27:5649:zzzzz-8i9sb-bcdefghijkdhvnk.log.txt\n./subdir1 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 0:9:file1_in_subdir1.txt 9:18:file2_in_subdir1.txt\n./subdir1/subdir2 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 0:9:file1_in_subdir2.txt 9:18:file2_in_subdir2.txt\n"
+var normalizedWithSubdirsPDH = "a0def87f80dd594d4675809e83bd4f15+367"
var fakeAuthUUID = "zzzzz-gj3su-55pqoyepgi2glem"
var fakeAuthToken = "a3ltuwzqcu2u4sc0q7yhpc2w7s00fdcqecg5d6e0u3pfohmbjt"
@@ -185,8 +185,8 @@ func (client *ArvTestClient) Get(resourceType string, uuid string, parameters ar
output.(*arvados.Collection).ManifestText = hwManifest
} else if uuid == otherPDH {
output.(*arvados.Collection).ManifestText = otherManifest
- } else if uuid == subdirPDH {
- output.(*arvados.Collection).ManifestText = subdirManifest
+ } else if uuid == normalizedWithSubdirsPDH {
+ output.(*arvados.Collection).ManifestText = normalizedManifestWithSubdirs
}
}
if resourceType == "containers" {
@@ -1194,7 +1194,7 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
c.Check(-1, Not(Equals), strings.Index(manifest, "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out"))
- origManifestWithDotReplacedAsFoo := strings.Replace(subdirManifest, "./", "./foo/", -1)
+ origManifestWithDotReplacedAsFoo := strings.Replace(normalizedManifestWithSubdirs, "./", "./foo/", -1)
c.Check(-1, Not(Equals), strings.Index(manifest, "./foo"+origManifestWithDotReplacedAsFoo[1:]))
c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 9:18:bar"))
@@ -1203,7 +1203,7 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 9:18:sub1file2"))
- c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:19:sub2file2"))
+ c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:18:sub2file2"))
}
}
}
@@ -1252,7 +1252,7 @@ func (s *TestSuite) TestStdoutWithMountPointForFileUnderOutputDir(c *C) {
c.Check(-1, Not(Equals), strings.Index(manifest, "./a/b 307372fa8fd5c146b22ae7a45b49bc31+6 0:6:c.out"))
- origManifestWithDotReplacedAsFoo := strings.Replace(subdirManifest, "./", "./foo/", -1)
+ origManifestWithDotReplacedAsFoo := strings.Replace(normalizedManifestWithSubdirs, "./", "./foo/", -1)
c.Check(-1, Not(Equals), strings.Index(manifest, "./foo"+origManifestWithDotReplacedAsFoo[1:]))
c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0abcdefgh11234567890 at 569fa8c3 9:18:bar"))
@@ -1261,7 +1261,7 @@ func (s *TestSuite) TestStdoutWithMountPointForFileUnderOutputDir(c *C) {
c.Check(-1, Not(Equals), strings.Index(manifest, "./foo 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396cabcdefghij6419876543234 at 569fa8c4 9:18:sub1file2"))
- c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:19:sub2file2"))
+ c.Check(-1, Not(Equals), strings.Index(manifest, "./foo/bar 3e426d509afffb85e06c4c96a7c15e91+27+Aa124ac75e5168396c73c0bcdefghijk544332211 at 569fa8c5 9:18:sub2file2"))
}
}
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list