[ARVADOS] updated: 1.3.0-2771-g544d7aae0

Git user git at public.arvados.org
Mon Jul 13 01:34:20 UTC 2020


Summary of changes:
 lib/deduplicationreport/report.go      | 23 ++++++------
 lib/deduplicationreport/report_test.go | 68 ++++++++++++++++------------------
 2 files changed, 43 insertions(+), 48 deletions(-)

       via  544d7aae0d58a25e8c761c638167c3564de06af5 (commit)
      from  2e0648fb2b8a006664e6225826d78916f682eff5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 544d7aae0d58a25e8c761c638167c3564de06af5
Author: Ward Vandewege <ward at curii.com>
Date:   Sun Jul 12 21:34:02 2020 -0400

    16573: address review comments.
    
    Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>

diff --git a/lib/deduplicationreport/report.go b/lib/deduplicationreport/report.go
index b7699fcb2..663e73427 100644
--- a/lib/deduplicationreport/report.go
+++ b/lib/deduplicationreport/report.go
@@ -22,7 +22,7 @@ import (
 func deDuplicate(inputs []string) (trimmed []string) {
 	seen := make(map[string]bool)
 	for _, uuid := range inputs {
-		if _, ok := seen[uuid]; !ok {
+		if !seen[uuid] {
 			seen[uuid] = true
 			trimmed = append(trimmed, uuid)
 		}
@@ -42,9 +42,9 @@ Usage:
      <collection-pdh>,<collection_uuid> ...
 
   This program analyzes the overlap in blocks used by 2 or more collections. It
-  prints a deduplication report that shows the nominal space used by the list
-  of collection, as well as the actual size and the amount of space that is
-  saved by Keep's deduplication.
+  prints a deduplication report that shows the nominal space used by the
+  collections, as well as the actual size and the amount of space that is saved
+  by Keep's deduplication.
 
   The list of collections may be provided in two ways. A list of collection
   uuids is sufficient. Alternatively, the PDH for each collection may also be
@@ -58,9 +58,9 @@ Example:
   Use the 'arv' and 'jq' commands to get the list of the 100
   largest collections and generate the deduplication report:
 
-  arv collection list --order 'file_size_total desc' | \
+  arv collection list --order 'file_size_total desc' --limit 100 | \
     jq -r '.items[] | [.portable_data_hash,.uuid] |@csv' | \
-    tail -n100 |sed -e 's/"//g'|tr '\n' ' ' | \
+    tail -n+2 |sed -e 's/"//g'|tr '\n' ' ' | \
     xargs %s
 
 Options:
@@ -80,8 +80,8 @@ Options:
 
 	inputs = deDuplicate(inputs)
 
-	if len(inputs) < 2 {
-		logger.Error("Error: at least 2 different collections UUIDs required")
+	if len(inputs) < 1 {
+		logger.Errorf("Error: no collections provided\n")
 		flags.Usage()
 		return 2, inputs
 	}
@@ -115,7 +115,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
 	// Arvados Client setup
 	arv, err := arvadosclient.MakeArvadosClient()
 	if err != nil {
-		logger.Errorf("error creating Arvados object: %s", err)
+		logger.Errorf("Error creating Arvados object: %s\n", err)
 		exitcode = 1
 		return
 	}
@@ -129,7 +129,6 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
 	pdhs := make(map[string]Col)
 	var nominalSize int64
 
-	fmt.Println()
 	for _, input := range inputs {
 		var uuid string
 		var pdh string
@@ -143,7 +142,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
 			uuid = input
 		}
 		if !strings.Contains(uuid, "-4zz18-") {
-			logger.Error("uuid must refer to collection object")
+			logger.Errorf("Error: uuid must refer to collection object\n")
 			exitcode = 1
 			return
 		}
@@ -201,7 +200,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
 	seen := make(map[string]bool)
 	for _, v := range blocks {
 		for pdh, size := range v {
-			if _, ok := seen[pdh]; !ok {
+			if !seen[pdh] {
 				seen[pdh] = true
 				totalSize += int64(size)
 			}
diff --git a/lib/deduplicationreport/report_test.go b/lib/deduplicationreport/report_test.go
index 6e7cd3af5..dc760f78a 100644
--- a/lib/deduplicationreport/report_test.go
+++ b/lib/deduplicationreport/report_test.go
@@ -39,10 +39,13 @@ func (*Suite) TestTwoIdenticalUUIDs(c *check.C) {
 	var stdout, stderr bytes.Buffer
 	// Run dedupreport with 2 identical uuids
 	exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.FooCollection}, &bytes.Buffer{}, &stdout, &stderr)
-	c.Check(exitcode, check.Equals, 2)
-	c.Check(stdout.String(), check.Equals, "")
+	c.Check(exitcode, check.Equals, 0)
+	//c.Check(stdout.String(), check.Equals, "")
+	c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+1.*")
+	c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+	c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+	c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
 	c.Log(stderr.String())
-	c.Check(stderr.String(), check.Matches, `(?ms).*Error: at least 2 different collections UUIDs required.*`)
 }
 
 func (*Suite) TestTwoUUIDsInvalidPDH(c *check.C) {
@@ -70,6 +73,7 @@ func (*Suite) TestManyUUIDsNoOverlap(c *check.C) {
 	// Run dedupreport with 5 UUIDs
 	exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.HelloWorldCollection, arvadostest.FooBarDirCollection, arvadostest.WazVersion1Collection, arvadostest.UserAgreementCollection}, &bytes.Buffer{}, &stdout, &stderr)
 	c.Check(exitcode, check.Equals, 0)
+	c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+5.*")
 	c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
 	c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
 	c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
@@ -83,42 +87,34 @@ func (*Suite) TestTwoOverlappingCollections(c *check.C) {
 	arv := arvados.NewClientFromEnv()
 
 	var c1 arvados.Collection
-	err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4+A2705511e0c47c92cc73e9ddc95b9822ef774c406 at 5f0de808 0:4:foo\n"}})
-	c.Assert(err, check.Equals, nil)
-
-	var c2 arvados.Collection
-	err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4+A1544eb0cee937934dc565d2b11836c804384c139 at 5f0e0bf9 d3b07384d113edec49eaa6238ad5ff00+4+A60746cad7ecc16fe26a0c17c55af90db675369c2 at 5f0e0bf9 0:4:bar 4:4:foo\n"}})
-	c.Assert(err, check.Equals, nil)
-
-	// Run dedupreport with 2 arguments: uuid uuid
-	exitcode := Command.RunCommand("deduplicationreport.test", []string{c1.UUID, c2.UUID}, &bytes.Buffer{}, &stdout, &stderr)
-	c.Check(exitcode, check.Equals, 0)
-	c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
-	c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
-	c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
-	c.Log(stderr.String())
-	c.Check(stderr.String(), check.Equals, "")
-}
-
-func (*Suite) TestTwoOverlappingCollectionsWithPDH(c *check.C) {
-	var stdout, stderr bytes.Buffer
-	// Create two collections
-	arv := arvados.NewClientFromEnv()
-
-	var c1 arvados.Collection
-	err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4+A2705511e0c47c92cc73e9ddc95b9822ef774c406 at 5f0de808 0:4:foo\n"}})
+	err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:foo\n"}})
 	c.Assert(err, check.Equals, nil)
 
 	var c2 arvados.Collection
-	err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4+A1544eb0cee937934dc565d2b11836c804384c139 at 5f0e0bf9 d3b07384d113edec49eaa6238ad5ff00+4+A60746cad7ecc16fe26a0c17c55af90db675369c2 at 5f0e0bf9 0:4:bar 4:4:foo\n"}})
+	err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4 d3b07384d113edec49eaa6238ad5ff00+4 0:4:bar 4:4:foo\n"}})
 	c.Assert(err, check.Equals, nil)
 
-	// Run dedupreport with 2 arguments: pdh,uuid uuid
-	exitcode := Command.RunCommand("deduplicationreport.test", []string{c1.PortableDataHash + "," + c1.UUID, c2.UUID}, &bytes.Buffer{}, &stdout, &stderr)
-	c.Check(exitcode, check.Equals, 0)
-	c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
-	c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
-	c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
-	c.Log(stderr.String())
-	c.Check(stderr.String(), check.Equals, "")
+	for _, trial := range []struct {
+		field1 string
+		field2 string
+	}{
+		{
+			// Run dedupreport with 2 arguments: uuid uuid
+			field1: c1.UUID,
+			field2: c2.UUID,
+		},
+		{
+			// Run dedupreport with 2 arguments: pdh,uuid uuid
+			field1: c1.PortableDataHash + "," + c1.UUID,
+			field2: c2.UUID,
+		},
+	} {
+		exitcode := Command.RunCommand("deduplicationreport.test", []string{trial.field1, trial.field2}, &bytes.Buffer{}, &stdout, &stderr)
+		c.Check(exitcode, check.Equals, 0)
+		c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
+		c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
+		c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
+		c.Log(stderr.String())
+		c.Check(stderr.String(), check.Equals, "")
+	}
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list