[ARVADOS] created: 48bafad68df6480b48103208c362c595f7ec1fb6
Git user
git at public.curoverse.com
Sun Apr 10 16:22:15 EDT 2016
at 48bafad68df6480b48103208c362c595f7ec1fb6 (commit)
commit 48bafad68df6480b48103208c362c595f7ec1fb6
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Sun Apr 10 16:21:58 2016 -0400
8912: Python script to consume list of collections with missing
blocks (produced by datamanager -extra-reports) and report which files within
the collection are affected.
diff --git a/tools/keep-block-to-file/keep_block_to_file.py b/tools/keep-block-to-file/keep_block_to_file.py
new file mode 100755
index 0000000..a430994
--- /dev/null
+++ b/tools/keep-block-to-file/keep_block_to_file.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+import re
+import sys
+import arvados.collection
+from arvados.keep import KeepLocator
+
+for collectionsWithMissing in sys.argv[1:]:
+
+ g = re.match(r"\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d-\d\d:\d\d_(.....-.....-...............)_missing\.txt", collectionsWithMissing)
+
+ collection = g.group(1)
+
+ blocklist = open(collectionsWithMissing)
+
+ missingblocks = set()
+ for b in blocklist:
+ missingblocks.add(b.strip())
+
+ def scanfiles(name, cur):
+ if isinstance(cur, arvados.collection.ArvadosFile):
+ segs = cur.segments()
+ for s in segs:
+ st = KeepLocator(s.locator).stripped()
+ if st in missingblocks:
+ print "\"%s\", \"%s\", \"%s\"" % (collection, name, st)
+ else:
+ for k, d in cur.items():
+ scanfiles("%s/%s" % (name, k), d)
+
+ scanfiles(".", arvados.collection.CollectionReader(collection))
commit 683f437ea698d66c8054c28ad2ed90ec2f3bcf03
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Sun Apr 10 15:59:40 2016 -0400
8912: Add flag -extra-reports to produce report of index and missing blocks.
diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go
index 8e12835..3ed494c 100644
--- a/services/datamanager/datamanager.go
+++ b/services/datamanager/datamanager.go
@@ -24,6 +24,7 @@ var (
minutesBetweenRuns int
collectionBatchSize int
dryRun bool
+ extraReports bool
)
func init() {
@@ -47,6 +48,10 @@ func init() {
"dry-run",
false,
"Perform a dry run. Log how many blocks would be deleted/moved, but do not issue any changes to keepstore.")
+ flag.BoolVar(&extraReports,
+ "extra-reports",
+ false,
+ "Log extra reports: keepstore indexes, collections missing blocks.")
}
func main() {
@@ -129,6 +134,12 @@ func singlerun(arv arvadosclient.ArvadosClient) error {
replicationSummary := buckets.SummarizeBuckets(readCollections)
replicationCounts := replicationSummary.ComputeCounts()
+ if extraReports {
+ ts := time.Now()
+ summary.LogKeepIndex(ts.Format(time.RFC3339), keepServerInfo)
+ summary.LogMissingBlocks(ts.Format(time.RFC3339), readCollections, replicationSummary)
+ }
+
log.Printf("Blocks In Collections: %d, "+
"\nBlocks In Keep: %d.",
len(readCollections.BlockToDesiredReplication),
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
index 9fb0316..8fbca38 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/summary.go
@@ -9,6 +9,8 @@ import (
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
"git.curoverse.com/arvados.git/services/datamanager/collection"
"git.curoverse.com/arvados.git/services/datamanager/keep"
+ "log"
+ "os"
"sort"
)
@@ -275,3 +277,37 @@ func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
return
}
+
+func LogKeepIndex(ts string, keepServerInfo keep.ReadServers) {
+ for svr, contents := range keepServerInfo.ServerToContents {
+ fn := fmt.Sprintf("%s_%s_index.txt", ts, svr.UUID)
+ idxfile, err := os.Create(fn)
+ if err != nil {
+ log.Printf("Could not open '%v' for writing: %v", fn, err)
+ } else {
+ for digest, _ := range contents.BlockDigestToInfo {
+ fmt.Fprintf(idxfile, "%016x%016x+%d\n", digest.Digest.H, digest.Digest.L, digest.Size)
+ }
+ idxfile.Close()
+ }
+ }
+}
+
+func LogMissingBlocks(ts string, readCollections collection.ReadCollections, rs ReplicationSummary) {
+ for ci, _ := range rs.CollectionsNotFullyInKeep {
+ uuid := readCollections.CollectionIndexToUUID[ci]
+ fn := fmt.Sprintf("%s_%s_missing.txt", ts, uuid)
+ idxfile, err := os.Create(fn)
+ if err != nil {
+ log.Printf("Could not open '%v' for writing: %v", fn, err)
+ } else {
+ collection := readCollections.UUIDToCollection[uuid]
+ for digest, sz := range collection.BlockDigestToSize {
+ bds := blockdigest.DigestWithSize{digest, uint32(sz)}
+ if _, ok := rs.CollectionBlocksNotInKeep[bds]; ok {
+ fmt.Fprintf(idxfile, "%016x%016x+%d\n", bds.Digest.H, bds.Digest.L, bds.Size)
+ }
+ }
+ }
+ }
+}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list