[ARVADOS] updated: f7538ca2df4a43ff60f87675f52a59edacfcdc7e

Fri Apr 11 18:48:27 EDT 2014

Summary of changes:
 services/datamanager/datamanager.py |   51 ++++++++++++++++++++++++++++++++--
 1 files changed, 48 insertions(+), 3 deletions(-)

       via  f7538ca2df4a43ff60f87675f52a59edacfcdc7e (commit)
      from  f4925bb0e7d9de484579b71087f181b9dbea5bff (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit f7538ca2df4a43ff60f87675f52a59edacfcdc7e
Author: Misha Zatsman <misha at curoverse.com>
Date:   Fri Apr 11 03:20:56 2014 +0000

    Added reporting of persisted blocks which are under or over replicated. Also added reporting of blocks without collections. refs #2574

diff --git a/services/datamanager/datamanager.py b/services/datamanager/datamanager.py
index ad7ac9c..865bc36 100755
--- a/services/datamanager/datamanager.py
+++ b/services/datamanager/datamanager.py
@@ -251,7 +251,7 @@ def computeWeightedReplicationCosts(replication_levels):
   The cost of the fourth, fifth and sixth copies is shared by two
   users, so they each pay 3 copies / 2 users = 1.5 (plus the above costs)
 
-  Here are some sample other examples:
+  Here are some other examples:
   computeWeightedReplicationCosts([1,]) -> {1:1.0}
   computeWeightedReplicationCosts([2,]) -> {2:2.0}
   computeWeightedReplicationCosts([1,1]) -> {1:0.5}
@@ -357,6 +357,41 @@ def computeReplication(keep_blocks):
   for server_blocks in keep_blocks:
     for block_uuid, _ in server_blocks:
       block_to_replication[block_uuid] += 1
+  # THIS IS A HACK TO DEAL WITH KEEP SERVER DOUBLE-REPORTING!
+  # DELETE THIS WHEN THAT BUG IS FIXED OR THE KEEP SERVER IS REPLACED.
+  block_to_replication.update({k: v/2 for k,v in block_to_replication.items()})
+  log.debug('Seeing the following replication levels among blocks: %s',
+            str(set(block_to_replication.values())))
+
+def detectReplicationProblems():
+  blocks_not_in_any_collections.update(
+    set(block_to_replication.keys()).difference(block_to_collections.keys()))
+  underreplicated_persisted_blocks.update(
+    [uuid
+     for uuid, persister_replication in block_to_persister_replication.items()
+     if len(persister_replication) > 0 and
+     block_to_replication[uuid] < max(persister_replication.values())])
+  overreplicated_persisted_blocks.update(
+    [uuid
+     for uuid, persister_replication in block_to_persister_replication.items()
+     if len(persister_replication) > 0 and
+     block_to_replication[uuid] > max(persister_replication.values())])
+  log.info('Found %d blocks not in any collections, e.g. %s...',
+           len(blocks_not_in_any_collections),
+           ','.join(list(blocks_not_in_any_collections)[:5]))
+  log.info('Found %d underreplicated blocks, e.g. %s...',
+           len(underreplicated_persisted_blocks),
+           ','.join(list(underreplicated_persisted_blocks)[:5]))
+  log.info('Found %d overreplicated blocks, e.g. %s...',
+           len(overreplicated_persisted_blocks),
+           ','.join(list(overreplicated_persisted_blocks)[:5]))
+  # TODO:
+  #  Read blocks sorted by mtime
+  #  Cache window vs % free space
+  #  Collections which will candidates appear in
+  #  Youngest underreplicated read blocks that appear in collections.
+  #  Report Collections that have blocks which are missing from (or
+  #   underreplicated in) keep.
 
 
 # This is the main flow here
@@ -441,6 +476,11 @@ keep_servers = []
 keep_blocks = []
 block_to_replication = defaultdict(lambda: 0)
 
+# Stuff to report on
+blocks_not_in_any_collections = set()
+underreplicated_persisted_blocks = set()
+overreplicated_persisted_blocks = set()
+
 all_data_loaded = False
 
 def loadAllData():
@@ -479,7 +519,11 @@ def loadAllData():
 
   computeReplication(keep_blocks)
 
-  log.info('average replication level is %f', (float(sum(block_to_replication.values())) / len(block_to_replication)))
+  log.info('average replication level is %f',
+           (float(sum(block_to_replication.values())) /
+            len(block_to_replication)))
+
+  detectReplicationProblems()
 
   computeUserStorageUsage()
   printUserStorageUsage()
@@ -489,6 +533,7 @@ def loadAllData():
   global all_data_loaded
   all_data_loaded = True
 
+
 class DataManagerHandler(BaseHTTPRequestHandler):
   USER_PATH = 'user'
   COLLECTION_PATH = 'collection'
@@ -658,10 +703,10 @@ class DataManagerHandler(BaseHTTPRequestHandler):
 class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
   """Handle requests in a separate thread."""
 
+
 if __name__ == '__main__':
   args = parser.parse_args()
 
-
   if args.port == 0:
     loadAllData()
   else:

-----------------------------------------------------------------------


hooks/post-receive
--