[ARVADOS] updated: 1.3.0-2813-g8f3b2dede

Git user git at public.arvados.org
Fri Jul 24 17:56:35 UTC 2020


Summary of changes:
 ...configure-s3-object-storage.html.textile.liquid |  4 ++--
 services/keepstore/s3aws_volume.go                 | 22 +++++++++++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

       via  8f3b2dedef2677654197e9838939d9abe7cc3791 (commit)
      from  ea57684c255434bcd25ec150a3979ce783a2183c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 8f3b2dedef2677654197e9838939d9abe7cc3791
Author: Ward Vandewege <ward at curii.com>
Date:   Fri Jul 24 13:55:51 2020 -0400

    10477: disable sha-256 calculation by the S3 driver; we don't need it
           and it slows uploads down because it's CPU bound.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/doc/install/configure-s3-object-storage.html.textile.liquid b/doc/install/configure-s3-object-storage.html.textile.liquid
index cfd436dd6..40cbbb533 100644
--- a/doc/install/configure-s3-object-storage.html.textile.liquid
+++ b/doc/install/configure-s3-object-storage.html.textile.liquid
@@ -107,11 +107,11 @@ The @aws-sdk-go-v2@ driver has faster _single thread_ read and write performance
 table(table table-bordered table-condensed).
 ||_. goamz |_. aws-sdk-go-v2 |_. command line|
 |single thread read performance (average)|32.53 MiB/s|79.48 MiB/s|keep-exercise -repeat 4 -run-time 60s -vary-request -use-index -rthreads 1 -wthreads 1|
-|single thread write performance (average)|39.75 MiB/s|49.58 MiB/s|keep-exercise -repeat 4 -run-time 60s -vary-request -use-index -rthreads 1 -wthreads 1|
+|single thread write performance (average)|39.75 MiB/s|41.05 MiB/s|keep-exercise -repeat 4 -run-time 60s -vary-request -use-index -rthreads 1 -wthreads 1|
 
 Because both S3 and Keep are optimized for _aggregate_ througput, the single thread performance is not as important as it may seem at first glance. When using 20 concurrent read or write threads, the numbers from both drivers are more closely aligned:
 
 table(table table-bordered table-condensed).
 ||_. goamz |_. aws-sdk-go-v2 |_. command line|
 |20 thread read performance (average)|585.60 MiB/s|898.93 MiB/s|keep-exercise -repeat 4 -run-time 60s -vary-request -use-index -rthreads 20 -wthreads 0|
-|20 thread write performance (average)|610.40 MiB/s|542.40 MiB/s|keep-exercise -repeat 4 -run-time 60s -vary-request -use-index -rthreads 0 -wthreads 20|
+|20 thread write performance (average)|610.40 MiB/s|688.25 MiB/s|keep-exercise -repeat 4 -run-time 60s -vary-request -use-index -rthreads 0 -wthreads 20|
diff --git a/services/keepstore/s3aws_volume.go b/services/keepstore/s3aws_volume.go
index d4fe9fd15..9bbb3c5b1 100644
--- a/services/keepstore/s3aws_volume.go
+++ b/services/keepstore/s3aws_volume.go
@@ -534,7 +534,9 @@ func (b *s3AWSbucket) PutReader(path string, r io.Reader, length int64, contType
 		Bucket: aws.String(b.bucket),
 		Key:    aws.String(path),
 		Body:   r,
-	})
+	}, s3manager.WithUploaderRequestOptions(func(r *aws.Request) {
+		r.HTTPRequest.Header.Set("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD")
+	}))
 
 	b.stats.TickOps("put")
 	b.stats.Tick(&b.stats.Ops, &b.stats.PutOps)
@@ -570,10 +572,6 @@ func (v *S3AWSVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader)
 	// See if this is the empty block
 	if contentMD5 != "d41d8cd98f00b204e9800998ecf8427e" {
 		uploadInput.ContentMD5 = &contentMD5
-		// Unlike the goamz S3 driver, we don't need to precompute ContentSHA256:
-		// the aws-sdk-go v2 SDK uses a ReadSeeker to avoid having to copy the
-		// block, so there is no extra memory use to be concerned about. See
-		// makeSha256Reader in aws/signer/v4/v4.go.
 	}
 
 	// Some experimentation indicated that using concurrency 5 yields the best
@@ -585,7 +583,15 @@ func (v *S3AWSVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader)
 		u.Concurrency = 5
 	})
 
-	_, err = uploader.UploadWithContext(ctx, &uploadInput, s3manager.WithUploaderRequestOptions())
+	// Unlike the goamz S3 driver, we don't need to precompute ContentSHA256:
+	// the aws-sdk-go v2 SDK uses a ReadSeeker to avoid having to copy the
+	// block, so there is no extra memory use to be concerned about. See
+	// makeSha256Reader in aws/signer/v4/v4.go. In fact, we explicitly disable
+	// calculating the Sha-256 because we don't need it; we already use md5sum
+	// hashes that match the name of the block.
+	_, err = uploader.UploadWithContext(ctx, &uploadInput, s3manager.WithUploaderRequestOptions(func(r *aws.Request) {
+		r.HTTPRequest.Header.Set("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD")
+	}))
 
 	v.bucket.stats.TickOps("put")
 	v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.PutOps)
@@ -599,7 +605,9 @@ func (v *S3AWSVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader)
 		Bucket: aws.String(v.bucket.bucket),
 		Key:    aws.String("recent/" + loc),
 		Body:   empty,
-	})
+	}, s3manager.WithUploaderRequestOptions(func(r *aws.Request) {
+		r.HTTPRequest.Header.Set("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD")
+	}))
 	v.bucket.stats.TickOps("put")
 	v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.PutOps)
 	v.bucket.stats.TickErr(err)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list