[ARVADOS] created: 48bafad68df6480b48103208c362c595f7ec1fb6

Git user git at public.curoverse.com
Sun Apr 10 16:22:15 EDT 2016


        at  48bafad68df6480b48103208c362c595f7ec1fb6 (commit)


commit 48bafad68df6480b48103208c362c595f7ec1fb6
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Sun Apr 10 16:21:58 2016 -0400

    8912: Python script to consume list of collections with missing
    blocks (produced by datamanager -extra-reports) and report which files within
    the collection are affected.

diff --git a/tools/keep-block-to-file/keep_block_to_file.py b/tools/keep-block-to-file/keep_block_to_file.py
new file mode 100755
index 0000000..a430994
--- /dev/null
+++ b/tools/keep-block-to-file/keep_block_to_file.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+import re
+import sys
+import arvados.collection
+from arvados.keep import KeepLocator
+
+for collectionsWithMissing in sys.argv[1:]:
+
+    g = re.match(r"\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d-\d\d:\d\d_(.....-.....-...............)_missing\.txt", collectionsWithMissing)
+
+    collection = g.group(1)
+
+    blocklist = open(collectionsWithMissing)
+
+    missingblocks = set()
+    for b in blocklist:
+        missingblocks.add(b.strip())
+
+    def scanfiles(name, cur):
+        if isinstance(cur, arvados.collection.ArvadosFile):
+            segs = cur.segments()
+            for s in segs:
+                st = KeepLocator(s.locator).stripped()
+                if st in missingblocks:
+                    print "\"%s\", \"%s\", \"%s\"" % (collection, name, st)
+        else:
+            for k, d in cur.items():
+                scanfiles("%s/%s" % (name, k), d)
+
+    scanfiles(".", arvados.collection.CollectionReader(collection))

commit 683f437ea698d66c8054c28ad2ed90ec2f3bcf03
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Sun Apr 10 15:59:40 2016 -0400

    8912: Add flag -extra-reports to produce report of index and missing blocks.

diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go
index 8e12835..3ed494c 100644
--- a/services/datamanager/datamanager.go
+++ b/services/datamanager/datamanager.go
@@ -24,6 +24,7 @@ var (
 	minutesBetweenRuns  int
 	collectionBatchSize int
 	dryRun              bool
+	extraReports        bool
 )
 
 func init() {
@@ -47,6 +48,10 @@ func init() {
 		"dry-run",
 		false,
 		"Perform a dry run. Log how many blocks would be deleted/moved, but do not issue any changes to keepstore.")
+	flag.BoolVar(&extraReports,
+		"extra-reports",
+		false,
+		"Log extra reports: keepstore indexes, collections missing blocks.")
 }
 
 func main() {
@@ -129,6 +134,12 @@ func singlerun(arv arvadosclient.ArvadosClient) error {
 	replicationSummary := buckets.SummarizeBuckets(readCollections)
 	replicationCounts := replicationSummary.ComputeCounts()
 
+	if extraReports {
+		ts := time.Now()
+		summary.LogKeepIndex(ts.Format(time.RFC3339), keepServerInfo)
+		summary.LogMissingBlocks(ts.Format(time.RFC3339), readCollections, replicationSummary)
+	}
+
 	log.Printf("Blocks In Collections: %d, "+
 		"\nBlocks In Keep: %d.",
 		len(readCollections.BlockToDesiredReplication),
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
index 9fb0316..8fbca38 100644
--- a/services/datamanager/summary/summary.go
+++ b/services/datamanager/summary/summary.go
@@ -9,6 +9,8 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/blockdigest"
 	"git.curoverse.com/arvados.git/services/datamanager/collection"
 	"git.curoverse.com/arvados.git/services/datamanager/keep"
+	"log"
+	"os"
 	"sort"
 )
 
@@ -275,3 +277,37 @@ func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
 
 	return
 }
+
+func LogKeepIndex(ts string, keepServerInfo keep.ReadServers) {
+	for svr, contents := range keepServerInfo.ServerToContents {
+		fn := fmt.Sprintf("%s_%s_index.txt", ts, svr.UUID)
+		idxfile, err := os.Create(fn)
+		if err != nil {
+			log.Printf("Could not open '%v' for writing: %v", fn, err)
+		} else {
+			for digest, _ := range contents.BlockDigestToInfo {
+				fmt.Fprintf(idxfile, "%016x%016x+%d\n", digest.Digest.H, digest.Digest.L, digest.Size)
+			}
+			idxfile.Close()
+		}
+	}
+}
+
+func LogMissingBlocks(ts string, readCollections collection.ReadCollections, rs ReplicationSummary) {
+	for ci, _ := range rs.CollectionsNotFullyInKeep {
+		uuid := readCollections.CollectionIndexToUUID[ci]
+		fn := fmt.Sprintf("%s_%s_missing.txt", ts, uuid)
+		idxfile, err := os.Create(fn)
+		if err != nil {
+			log.Printf("Could not open '%v' for writing: %v", fn, err)
+		} else {
+			collection := readCollections.UUIDToCollection[uuid]
+			for digest, sz := range collection.BlockDigestToSize {
+				bds := blockdigest.DigestWithSize{digest, uint32(sz)}
+				if _, ok := rs.CollectionBlocksNotInKeep[bds]; ok {
+					fmt.Fprintf(idxfile, "%016x%016x+%d\n", bds.Digest.H, bds.Digest.L, bds.Size)
+				}
+			}
+		}
+	}
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list