[ARVADOS] created: a77e3ab646401773cc24c88f62b4d82b8826aef0

Git user git at public.curoverse.com
Mon Sep 4 16:49:25 EDT 2017


        at  a77e3ab646401773cc24c88f62b4d82b8826aef0 (commit)


commit a77e3ab646401773cc24c88f62b4d82b8826aef0
Author: Tom Clegg <tclegg at veritasgenetics.com>
Date:   Mon Sep 4 16:42:08 2017 -0400

    12190: Don't test block existence before first write.
    
    "Amazon S3 provides read-after-write consistency for PUTS of new
    objects in your S3 bucket in all regions with one caveat. The caveat
    is that if you make a HEAD or GET request to the key name (to find if
    the object exists) before creating the object, Amazon S3 provides
    eventual consistency for read-after-write."
    
    -- http://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg at veritasgenetics.com>

diff --git a/services/keepstore/s3_volume.go b/services/keepstore/s3_volume.go
index 0ab3e96..3f08f6e 100644
--- a/services/keepstore/s3_volume.go
+++ b/services/keepstore/s3_volume.go
@@ -340,6 +340,40 @@ func (v *S3Volume) Get(ctx context.Context, loc string, buf []byte) (int, error)
 
 // Compare the given data with the stored data.
 func (v *S3Volume) Compare(ctx context.Context, loc string, expect []byte) error {
+	errChan := make(chan error, 1)
+	go func() {
+		_, err := v.bucket.Head("recent/"+loc, nil)
+		errChan <- err
+	}()
+	var err error
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case err = <-errChan:
+	}
+	if err != nil {
+		// Checking for "loc" itself here would interfere with
+		// future GET requests.
+		//
+		// On AWS, if X doesn't exist, a HEAD or GET request
+		// for X causes X's non-existence to be cached. Thus,
+		// if we test for X, then create X and return a
+		// signature to our client, the client might still get
+		// 404 from all keepstores when trying to read it.
+		//
+		// To avoid this, we avoid doing HEAD X or GET X until
+		// we know X has been written.
+		//
+		// Note that X might exist even though recent/X
+		// doesn't: for example, the response to HEAD recent/X
+		// might itself come from a stale cache. In such
+		// cases, we will return a false negative and
+		// PutHandler might needlessly create another replica
+		// on a different volume. That's not ideal, but it's
+		// better than passing the eventually-consistent
+		// problem on to our clients.
+		return v.translateError(err)
+	}
 	rdr, err := v.getReaderWithContext(ctx, loc)
 	if err != nil {
 		return err
diff --git a/services/keepstore/s3_volume_test.go b/services/keepstore/s3_volume_test.go
index c2084ee..3d4a195 100644
--- a/services/keepstore/s3_volume_test.go
+++ b/services/keepstore/s3_volume_test.go
@@ -455,7 +455,11 @@ func (v *TestableS3Volume) Start() error {
 func (v *TestableS3Volume) PutRaw(loc string, block []byte) {
 	err := v.bucket.Put(loc, block, "application/octet-stream", s3ACL, s3.Options{})
 	if err != nil {
-		log.Printf("PutRaw: %+v", err)
+		log.Printf("PutRaw: %s: %+v", loc, err)
+	}
+	err = v.bucket.Put("recent/"+loc, nil, "application/octet-stream", s3ACL, s3.Options{})
+	if err != nil {
+		log.Printf("PutRaw: recent/%s: %+v", loc, err)
 	}
 }
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list