[ARVADOS] created: 1.2.0-283-g4ed6626cf

Git user git at public.curoverse.com
Mon Nov 5 10:00:47 EST 2018


        at  4ed6626cfba06e924f77dc54e1fe704eff1d113c (commit)


commit 4ed6626cfba06e924f77dc54e1fe704eff1d113c
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Mon Nov 5 09:58:40 2018 -0500

    14406: Copy remote blocks to local storage during MarshalManifest.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/go/arvados/fs_backend.go b/sdk/go/arvados/fs_backend.go
index 301f0b48b..9ae0fc3a5 100644
--- a/sdk/go/arvados/fs_backend.go
+++ b/sdk/go/arvados/fs_backend.go
@@ -21,6 +21,7 @@ type keepBackend struct {
 type keepClient interface {
 	ReadAt(locator string, p []byte, off int) (int, error)
 	PutB(p []byte) (string, int, error)
+	LocalLocator(locator string) (string, error)
 }
 
 type apiClient interface {
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 7ba0bb8eb..2b9b7b30c 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -546,9 +546,10 @@ func (dn *dirnode) Child(name string, replace func(inode) (inode, error)) (inode
 	return dn.treenode.Child(name, replace)
 }
 
-// sync flushes in-memory data (for the children with the given names,
-// which must be children of dn) to persistent storage. Caller must
-// have write lock on dn and the named children.
+// sync flushes in-memory data and remote block references (for the
+// children with the given names, which must be children of dn) to
+// local persistent storage. Caller must have write lock on dn and the
+// named children.
 func (dn *dirnode) sync(names []string) error {
 	type shortBlock struct {
 		fn  *filenode
@@ -591,25 +592,33 @@ func (dn *dirnode) sync(names []string) error {
 			continue
 		}
 		for idx, seg := range fn.segments {
-			seg, ok := seg.(*memSegment)
-			if !ok {
-				continue
-			}
-			if seg.Len() > maxBlockSize/2 {
-				if err := flush([]shortBlock{{fn, idx}}); err != nil {
+			switch seg := seg.(type) {
+			case storedSegment:
+				loc, err := dn.fs.LocalLocator(seg.locator)
+				if err != nil {
 					return err
 				}
-				continue
-			}
-			if pendingLen+seg.Len() > maxBlockSize {
-				if err := flush(pending); err != nil {
-					return err
+				seg.locator = loc
+				fn.segments[idx] = seg
+			case *memSegment:
+				if seg.Len() > maxBlockSize/2 {
+					if err := flush([]shortBlock{{fn, idx}}); err != nil {
+						return err
+					}
+					continue
 				}
-				pending = nil
-				pendingLen = 0
+				if pendingLen+seg.Len() > maxBlockSize {
+					if err := flush(pending); err != nil {
+						return err
+					}
+					pending = nil
+					pendingLen = 0
+				}
+				pending = append(pending, shortBlock{fn, idx})
+				pendingLen += seg.Len()
+			default:
+				panic(fmt.Sprintf("can't sync segment type %T", seg))
 			}
-			pending = append(pending, shortBlock{fn, idx})
-			pendingLen += seg.Len()
 		}
 	}
 	return flush(pending)
diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index 8d59540d0..5faa5794f 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -7,6 +7,7 @@ package arvados
 import (
 	"bytes"
 	"crypto/md5"
+	"crypto/sha1"
 	"errors"
 	"fmt"
 	"io"
@@ -16,6 +17,7 @@ import (
 	"os"
 	"regexp"
 	"runtime"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -27,7 +29,8 @@ import (
 var _ = check.Suite(&CollectionFSSuite{})
 
 type keepClientStub struct {
-	blocks map[string][]byte
+	blocks      map[string][]byte
+	refreshable map[string]bool
 	sync.RWMutex
 }
 
@@ -53,11 +56,28 @@ func (kcs *keepClientStub) PutB(p []byte) (string, int, error) {
 	return locator, 1, nil
 }
 
+var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`)
+
+func (kcs *keepClientStub) LocalLocator(locator string) (string, error) {
+	kcs.Lock()
+	defer kcs.Unlock()
+	if strings.Contains(locator, "+R") {
+		if len(locator) < 32 {
+			return "", fmt.Errorf("bad locator: %q", locator)
+		}
+		if _, ok := kcs.blocks[locator[:32]]; !ok && !kcs.refreshable[locator[:32]] {
+			return "", fmt.Errorf("kcs.refreshable[%q]==false", locator)
+		}
+	}
+	fakeSig := fmt.Sprintf("+A%x@%x", sha1.Sum(nil), time.Now().Add(time.Hour*24*14).Unix())
+	return localOrRemoteSignature.ReplaceAllLiteralString(locator, fakeSig), nil
+}
+
 type CollectionFSSuite struct {
 	client *Client
 	coll   Collection
 	fs     CollectionFileSystem
-	kc     keepClient
+	kc     *keepClientStub
 }
 
 func (s *CollectionFSSuite) SetUpTest(c *check.C) {
@@ -399,6 +419,37 @@ func (s *CollectionFSSuite) TestSeekSparse(c *check.C) {
 	checkSize(11)
 }
 
+func (s *CollectionFSSuite) TestMarshalCopiesRemoteBlocks(c *check.C) {
+	foo := "foo"
+	bar := "bar"
+	hash := map[string]string{
+		foo: fmt.Sprintf("%x", md5.Sum([]byte(foo))),
+		bar: fmt.Sprintf("%x", md5.Sum([]byte(bar))),
+	}
+
+	fs, err := (&Collection{
+		ManifestText: ". " + hash[foo] + "+3+Rzaaaa-foo at bab " + hash[bar] + "+3+A12345 at ffffff 0:2:fo.txt 2:4:obar.txt\n",
+	}).FileSystem(s.client, s.kc)
+	c.Assert(err, check.IsNil)
+	manifest, err := fs.MarshalManifest(".")
+	c.Check(manifest, check.Equals, "")
+	c.Check(err, check.NotNil)
+
+	s.kc.refreshable = map[string]bool{hash[bar]: true}
+
+	for _, sigIn := range []string{"Rzaaaa-foo at bab", "A12345 at abcde"} {
+		fs, err = (&Collection{
+			ManifestText: ". " + hash[foo] + "+3+A12345 at fffff " + hash[bar] + "+3+" + sigIn + " 0:2:fo.txt 2:4:obar.txt\n",
+		}).FileSystem(s.client, s.kc)
+		c.Assert(err, check.IsNil)
+		manifest, err := fs.MarshalManifest(".")
+		c.Check(err, check.IsNil)
+		// Both blocks should now have +A signatures.
+		c.Check(manifest, check.Matches, `.*\+A.* .*\+A.*\n`)
+		c.Check(manifest, check.Not(check.Matches), `.*\+R.*\n`)
+	}
+}
+
 func (s *CollectionFSSuite) TestMarshalSmallBlocks(c *check.C) {
 	maxBlockSize = 8
 	defer func() { maxBlockSize = 2 << 26 }()
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 169f1457e..6a3e83796 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -198,9 +198,9 @@ func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error
 	}
 }
 
-func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, int64, string, error) {
+func (kc *KeepClient) getOrHead(method string, locator string, header http.Header) (io.ReadCloser, int64, string, http.Header, error) {
 	if strings.HasPrefix(locator, "d41d8cd98f00b204e9800998ecf8427e+0") {
-		return ioutil.NopCloser(bytes.NewReader(nil)), 0, "", nil
+		return ioutil.NopCloser(bytes.NewReader(nil)), 0, "", nil, nil
 	}
 
 	reqid := kc.getRequestID()
@@ -237,8 +237,15 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i
 				errs = append(errs, fmt.Sprintf("%s: %v", url, err))
 				continue
 			}
-			req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken)
-			req.Header.Add("X-Request-Id", reqid)
+			for k, v := range header {
+				req.Header[k] = append([]string(nil), v...)
+			}
+			if req.Header.Get("Authorization") == "" {
+				req.Header.Set("Authorization", "OAuth2 "+kc.Arvados.ApiToken)
+			}
+			if req.Header.Get("X-Request-Id") == "" {
+				req.Header.Set("X-Request-Id", reqid)
+			}
 			resp, err := kc.httpClient().Do(req)
 			if err != nil {
 				// Probably a network error, may be transient,
@@ -269,12 +276,12 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i
 			if expectLength < 0 {
 				if resp.ContentLength < 0 {
 					resp.Body.Close()
-					return nil, 0, "", fmt.Errorf("error reading %q: no size hint, no Content-Length header in response", locator)
+					return nil, 0, "", nil, fmt.Errorf("error reading %q: no size hint, no Content-Length header in response", locator)
 				}
 				expectLength = resp.ContentLength
 			} else if resp.ContentLength >= 0 && expectLength != resp.ContentLength {
 				resp.Body.Close()
-				return nil, 0, "", fmt.Errorf("error reading %q: size hint %d != Content-Length %d", locator, expectLength, resp.ContentLength)
+				return nil, 0, "", nil, fmt.Errorf("error reading %q: size hint %d != Content-Length %d", locator, expectLength, resp.ContentLength)
 			}
 			// Success
 			if method == "GET" {
@@ -282,10 +289,10 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i
 					Reader: resp.Body,
 					Hash:   md5.New(),
 					Check:  locator[0:32],
-				}, expectLength, url, nil
+				}, expectLength, url, resp.Header, nil
 			} else {
 				resp.Body.Close()
-				return nil, expectLength, url, nil
+				return nil, expectLength, url, resp.Header, nil
 			}
 		}
 		serversToTry = retryList
@@ -301,7 +308,29 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i
 			isTemp: len(serversToTry) > 0,
 		}}
 	}
-	return nil, 0, "", err
+	return nil, 0, "", nil, err
+}
+
+// LocalLocator returns a locator equivalent to the one supplied, but
+// with a valid signature from the local cluster. If the given locator
+// already has a local signature, it is returned unchanged.
+func (kc *KeepClient) LocalLocator(locator string) (string, error) {
+	if !strings.Contains(locator, "+R") {
+		// Either it has +A, or it's unsigned and we assume
+		// it's a local locator on a site with signatures
+		// disabled.
+		return locator, nil
+	}
+	sighdr := fmt.Sprintf("local, time=%s", time.Now().UTC().Format(time.RFC3339))
+	_, _, url, hdr, err := kc.getOrHead("HEAD", locator, http.Header{"X-Keep-Signature": []string{sighdr}})
+	if err != nil {
+		return "", err
+	}
+	loc := hdr.Get("X-Keep-Locator")
+	if loc == "" {
+		return "", fmt.Errorf("missing X-Keep-Locator header in HEAD response from %s", url)
+	}
+	return loc, nil
 }
 
 // Get() retrieves a block, given a locator. Returns a reader, the
@@ -312,7 +341,8 @@ func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, i
 // reader returned by this method will return a BadChecksum error
 // instead of EOF.
 func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
-	return kc.getOrHead("GET", locator)
+	rdr, size, url, _, err := kc.getOrHead("GET", locator, nil)
+	return rdr, size, url, err
 }
 
 // ReadAt() retrieves a portion of block from the cache if it's
@@ -329,7 +359,7 @@ func (kc *KeepClient) ReadAt(locator string, p []byte, off int) (int, error) {
 // Returns the data size (content length) reported by the Keep service
 // and the URI reporting the data size.
 func (kc *KeepClient) Ask(locator string) (int64, string, error) {
-	_, size, url, err := kc.getOrHead("HEAD", locator)
+	_, size, url, _, err := kc.getOrHead("HEAD", locator, nil)
 	return size, url, err
 }
 
diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index dc80ad7e1..176ad65bb 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -884,6 +884,19 @@ func (s *ServerRequiredSuite) TestPutGetHead(c *C) {
 		c.Check(n, Equals, int64(len(content)))
 		c.Check(url2, Matches, fmt.Sprintf("http://localhost:\\d+/%s", hash))
 	}
+	{
+		loc, err := kc.LocalLocator(hash)
+		c.Check(err, Equals, nil)
+		c.Assert(len(loc) >= 32, Equals, true)
+		c.Check(loc[:32], Equals, hash[:32])
+	}
+	{
+		content := []byte("the perth county conspiracy")
+		loc, err := kc.LocalLocator(fmt.Sprintf("%x+%d+Rzaaaa-abcde at 12345", md5.Sum(content), len(content)))
+		c.Check(loc, Equals, "")
+		c.Check(err, ErrorMatches, `.*HEAD .*\+R.*`)
+		c.Check(err, ErrorMatches, `.*HTTP 400.*`)
+	}
 }
 
 type StubProxyHandler struct {
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 66534b430..800556866 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -61,6 +61,7 @@ type IKeepClient interface {
 	PutB(buf []byte) (string, int, error)
 	ReadAt(locator string, p []byte, off int) (int, error)
 	ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error)
+	LocalLocator(locator string) (string, error)
 	ClearBlockCache()
 }
 
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index eb4f220e2..ab6afc77c 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -375,6 +375,10 @@ call:
 	return nil
 }
 
+func (client *KeepTestClient) LocalLocator(locator string) (string, error) {
+	return locator, nil
+}
+
 func (client *KeepTestClient) PutB(buf []byte) (string, int, error) {
 	client.Content = buf
 	return fmt.Sprintf("%x+%d", md5.Sum(buf), len(buf)), len(buf), nil
@@ -520,6 +524,10 @@ func (*KeepErrorTestClient) PutB(buf []byte) (string, int, error) {
 	return "", 0, errors.New("KeepError")
 }
 
+func (*KeepErrorTestClient) LocalLocator(string) (string, error) {
+	return "", errors.New("KeepError")
+}
+
 type KeepReadErrorTestClient struct {
 	KeepTestClient
 }
diff --git a/services/keepstore/proxy_remote.go b/services/keepstore/proxy_remote.go
index 7dea2b665..1f82f3f4f 100644
--- a/services/keepstore/proxy_remote.go
+++ b/services/keepstore/proxy_remote.go
@@ -66,7 +66,7 @@ func (rp *remoteProxy) Get(ctx context.Context, w http.ResponseWriter, r *http.R
 			remoteID := part[1:6]
 			remote, ok := cluster.RemoteClusters[remoteID]
 			if !ok {
-				http.Error(w, "remote cluster not configured", http.StatusBadGateway)
+				http.Error(w, "remote cluster not configured", http.StatusBadRequest)
 				return
 			}
 			kc, err := rp.remoteClient(remoteID, remote, token)

commit fd06e6ea3cd933ac31cce1de48e71456e1147845
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Nov 1 15:22:36 2018 -0400

    14406: Tone down slow stress test.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index 96347737f..8d59540d0 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -490,7 +490,7 @@ func (s *CollectionFSSuite) TestConcurrentWriters(c *check.C) {
 			f, err := s.fs.OpenFile("/dir1/foo", os.O_RDWR, 0)
 			c.Assert(err, check.IsNil)
 			defer f.Close()
-			for i := 0; i < 6502; i++ {
+			for i := 0; i < 1024; i++ {
 				r := rand.Uint32()
 				switch {
 				case r%11 == 0:

commit 368b127e04f92946d4ee5f2373c1181f19532801
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Nov 1 14:54:13 2018 -0400

    14406: Remove debug printfs.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 460686c6c..7ba0bb8eb 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -8,7 +8,6 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"log"
 	"os"
 	"path"
 	"regexp"
@@ -116,14 +115,12 @@ func (fs *collectionFileSystem) newNode(name string, perm os.FileMode, modTime t
 }
 
 func (fs *collectionFileSystem) Sync() error {
-	log.Printf("cfs.Sync()")
 	if fs.uuid == "" {
 		return nil
 	}
 	txt, err := fs.MarshalManifest(".")
 	if err != nil {
-		log.Printf("WARNING: (collectionFileSystem)Sync() failed: %s", err)
-		return err
+		return fmt.Errorf("sync failed: %s", err)
 	}
 	coll := &Collection{
 		UUID:         fs.uuid,
@@ -131,9 +128,9 @@ func (fs *collectionFileSystem) Sync() error {
 	}
 	err = fs.RequestAndDecode(nil, "PUT", "arvados/v1/collections/"+fs.uuid, fs.UpdateBody(coll), map[string]interface{}{"select": []string{"uuid"}})
 	if err != nil {
-		log.Printf("WARNING: (collectionFileSystem)Sync() failed: %s", err)
+		return fmt.Errorf("sync failed: update %s: %s", fs.uuid, err)
 	}
-	return err
+	return nil
 }
 
 func (fs *collectionFileSystem) MarshalManifest(prefix string) (string, error) {

commit bda72e9c07d01620fbd220cf10b2c13187523f65
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Nov 1 13:56:59 2018 -0400

    14406: Fix safety warning.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index f6afadba5..460686c6c 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -618,7 +618,7 @@ func (dn *dirnode) sync(names []string) error {
 	return flush(pending)
 }
 
-// caller must have read lock.
+// caller must have write lock.
 func (dn *dirnode) marshalManifest(prefix string) (string, error) {
 	var streamLen int64
 	type filepart struct {

commit 8181eb2b50cb2462e0fd52d67203350ed802537b
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Thu Nov 1 13:56:13 2018 -0400

    14406: Pass output through collectionFS if it has remote block refs.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 27136b452..66534b430 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -1293,6 +1293,17 @@ func (runner *ContainerRunner) CaptureOutput() error {
 	if err != nil {
 		return err
 	}
+	if n := len(regexp.MustCompile(` [0-9a-f]+\+\S*\+R`).FindAllStringIndex(txt, -1)); n > 0 {
+		runner.CrunchLog.Printf("Copying %d data blocks from remote input collections...", n)
+		fs, err := (&arvados.Collection{ManifestText: txt}).FileSystem(runner.client, runner.Kc)
+		if err != nil {
+			return err
+		}
+		txt, err = fs.MarshalManifest(".")
+		if err != nil {
+			return err
+		}
+	}
 	var resp arvados.Collection
 	err = runner.ArvClient.Create("collections", arvadosclient.Dict{
 		"ensure_unique_name": true,

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list