[ARVADOS] updated: 1.3.0-2771-g544d7aae0
Git user
git at public.arvados.org
Mon Jul 13 01:34:20 UTC 2020
Summary of changes:
lib/deduplicationreport/report.go | 23 ++++++------
lib/deduplicationreport/report_test.go | 68 ++++++++++++++++------------------
2 files changed, 43 insertions(+), 48 deletions(-)
via 544d7aae0d58a25e8c761c638167c3564de06af5 (commit)
from 2e0648fb2b8a006664e6225826d78916f682eff5 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 544d7aae0d58a25e8c761c638167c3564de06af5
Author: Ward Vandewege <ward at curii.com>
Date: Sun Jul 12 21:34:02 2020 -0400
16573: address review comments.
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward at curii.com>
diff --git a/lib/deduplicationreport/report.go b/lib/deduplicationreport/report.go
index b7699fcb2..663e73427 100644
--- a/lib/deduplicationreport/report.go
+++ b/lib/deduplicationreport/report.go
@@ -22,7 +22,7 @@ import (
func deDuplicate(inputs []string) (trimmed []string) {
seen := make(map[string]bool)
for _, uuid := range inputs {
- if _, ok := seen[uuid]; !ok {
+ if !seen[uuid] {
seen[uuid] = true
trimmed = append(trimmed, uuid)
}
@@ -42,9 +42,9 @@ Usage:
<collection-pdh>,<collection_uuid> ...
This program analyzes the overlap in blocks used by 2 or more collections. It
- prints a deduplication report that shows the nominal space used by the list
- of collection, as well as the actual size and the amount of space that is
- saved by Keep's deduplication.
+ prints a deduplication report that shows the nominal space used by the
+ collections, as well as the actual size and the amount of space that is saved
+ by Keep's deduplication.
The list of collections may be provided in two ways. A list of collection
uuids is sufficient. Alternatively, the PDH for each collection may also be
@@ -58,9 +58,9 @@ Example:
Use the 'arv' and 'jq' commands to get the list of the 100
largest collections and generate the deduplication report:
- arv collection list --order 'file_size_total desc' | \
+ arv collection list --order 'file_size_total desc' --limit 100 | \
jq -r '.items[] | [.portable_data_hash,.uuid] |@csv' | \
- tail -n100 |sed -e 's/"//g'|tr '\n' ' ' | \
+ tail -n+2 |sed -e 's/"//g'|tr '\n' ' ' | \
xargs %s
Options:
@@ -80,8 +80,8 @@ Options:
inputs = deDuplicate(inputs)
- if len(inputs) < 2 {
- logger.Error("Error: at least 2 different collections UUIDs required")
+ if len(inputs) < 1 {
+ logger.Errorf("Error: no collections provided\n")
flags.Usage()
return 2, inputs
}
@@ -115,7 +115,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
// Arvados Client setup
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
- logger.Errorf("error creating Arvados object: %s", err)
+ logger.Errorf("Error creating Arvados object: %s\n", err)
exitcode = 1
return
}
@@ -129,7 +129,6 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
pdhs := make(map[string]Col)
var nominalSize int64
- fmt.Println()
for _, input := range inputs {
var uuid string
var pdh string
@@ -143,7 +142,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
uuid = input
}
if !strings.Contains(uuid, "-4zz18-") {
- logger.Error("uuid must refer to collection object")
+ logger.Errorf("Error: uuid must refer to collection object\n")
exitcode = 1
return
}
@@ -201,7 +200,7 @@ func report(prog string, args []string, loader *config.Loader, logger *logrus.Lo
seen := make(map[string]bool)
for _, v := range blocks {
for pdh, size := range v {
- if _, ok := seen[pdh]; !ok {
+ if !seen[pdh] {
seen[pdh] = true
totalSize += int64(size)
}
diff --git a/lib/deduplicationreport/report_test.go b/lib/deduplicationreport/report_test.go
index 6e7cd3af5..dc760f78a 100644
--- a/lib/deduplicationreport/report_test.go
+++ b/lib/deduplicationreport/report_test.go
@@ -39,10 +39,13 @@ func (*Suite) TestTwoIdenticalUUIDs(c *check.C) {
var stdout, stderr bytes.Buffer
// Run dedupreport with 2 identical uuids
exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.FooCollection}, &bytes.Buffer{}, &stdout, &stderr)
- c.Check(exitcode, check.Equals, 2)
- c.Check(stdout.String(), check.Equals, "")
+ c.Check(exitcode, check.Equals, 0)
+ //c.Check(stdout.String(), check.Equals, "")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+1.*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
c.Log(stderr.String())
- c.Check(stderr.String(), check.Matches, `(?ms).*Error: at least 2 different collections UUIDs required.*`)
}
func (*Suite) TestTwoUUIDsInvalidPDH(c *check.C) {
@@ -70,6 +73,7 @@ func (*Suite) TestManyUUIDsNoOverlap(c *check.C) {
// Run dedupreport with 5 UUIDs
exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.HelloWorldCollection, arvadostest.FooBarDirCollection, arvadostest.WazVersion1Collection, arvadostest.UserAgreementCollection}, &bytes.Buffer{}, &stdout, &stderr)
c.Check(exitcode, check.Equals, 0)
+ c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+5.*")
c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
@@ -83,42 +87,34 @@ func (*Suite) TestTwoOverlappingCollections(c *check.C) {
arv := arvados.NewClientFromEnv()
var c1 arvados.Collection
- err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4+A2705511e0c47c92cc73e9ddc95b9822ef774c406 at 5f0de808 0:4:foo\n"}})
- c.Assert(err, check.Equals, nil)
-
- var c2 arvados.Collection
- err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4+A1544eb0cee937934dc565d2b11836c804384c139 at 5f0e0bf9 d3b07384d113edec49eaa6238ad5ff00+4+A60746cad7ecc16fe26a0c17c55af90db675369c2 at 5f0e0bf9 0:4:bar 4:4:foo\n"}})
- c.Assert(err, check.Equals, nil)
-
- // Run dedupreport with 2 arguments: uuid uuid
- exitcode := Command.RunCommand("deduplicationreport.test", []string{c1.UUID, c2.UUID}, &bytes.Buffer{}, &stdout, &stderr)
- c.Check(exitcode, check.Equals, 0)
- c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
- c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
- c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
- c.Log(stderr.String())
- c.Check(stderr.String(), check.Equals, "")
-}
-
-func (*Suite) TestTwoOverlappingCollectionsWithPDH(c *check.C) {
- var stdout, stderr bytes.Buffer
- // Create two collections
- arv := arvados.NewClientFromEnv()
-
- var c1 arvados.Collection
- err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4+A2705511e0c47c92cc73e9ddc95b9822ef774c406 at 5f0de808 0:4:foo\n"}})
+ err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:foo\n"}})
c.Assert(err, check.Equals, nil)
var c2 arvados.Collection
- err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4+A1544eb0cee937934dc565d2b11836c804384c139 at 5f0e0bf9 d3b07384d113edec49eaa6238ad5ff00+4+A60746cad7ecc16fe26a0c17c55af90db675369c2 at 5f0e0bf9 0:4:bar 4:4:foo\n"}})
+ err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4 d3b07384d113edec49eaa6238ad5ff00+4 0:4:bar 4:4:foo\n"}})
c.Assert(err, check.Equals, nil)
- // Run dedupreport with 2 arguments: pdh,uuid uuid
- exitcode := Command.RunCommand("deduplicationreport.test", []string{c1.PortableDataHash + "," + c1.UUID, c2.UUID}, &bytes.Buffer{}, &stdout, &stderr)
- c.Check(exitcode, check.Equals, 0)
- c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
- c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
- c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
- c.Log(stderr.String())
- c.Check(stderr.String(), check.Equals, "")
+ for _, trial := range []struct {
+ field1 string
+ field2 string
+ }{
+ {
+ // Run dedupreport with 2 arguments: uuid uuid
+ field1: c1.UUID,
+ field2: c2.UUID,
+ },
+ {
+ // Run dedupreport with 2 arguments: pdh,uuid uuid
+ field1: c1.PortableDataHash + "," + c1.UUID,
+ field2: c2.UUID,
+ },
+ } {
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{trial.field1, trial.field2}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 0)
+ c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
+ c.Log(stderr.String())
+ c.Check(stderr.String(), check.Equals, "")
+ }
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list