[ARVADOS] updated: de9903cfc08ea7c3da459e7c4ee5a744d52a7c89

Fri Apr 25 20:18:16 EDT 2014

Summary of changes:
 .../datamanager/{ => experimental}/datamanager.py  |   62 +++++++++++++++++---
 .../{ => experimental}/datamanager_test.py         |    0
 2 files changed, 53 insertions(+), 9 deletions(-)
 rename services/datamanager/{ => experimental}/datamanager.py (94%)
 rename services/datamanager/{ => experimental}/datamanager_test.py (100%)

       via  de9903cfc08ea7c3da459e7c4ee5a744d52a7c89 (commit)
       via  d034adbaf26a19e8fd48124cadd5108d2d3de642 (commit)
       via  a3650fa043500139ac36587a4b61ffed1a2dc1a3 (commit)
      from  f7538ca2df4a43ff60f87675f52a59edacfcdc7e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit de9903cfc08ea7c3da459e7c4ee5a744d52a7c89
Author: Misha Zatsman <misha at curoverse.com>
Date:   Sat Apr 26 00:17:29 2014 +0000

    Added most of the garbage collection reporting for #2622. Deleted some whitespace.

diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py
index 08e688e..4bd2a26 100755
--- a/services/datamanager/experimental/datamanager.py
+++ b/services/datamanager/experimental/datamanager.py
@@ -76,7 +76,7 @@ class CollectionInfo:
     if not CollectionInfo.all_by_uuid.has_key(uuid):
       CollectionInfo(uuid)
     return CollectionInfo.all_by_uuid[uuid]
-  
+
 
 def extractUuid(candidate):
   """ Returns a canonical (hash+size) uuid from a valid uuid, or None if candidate is not a valid uuid."""
@@ -360,6 +360,31 @@ def computeReplication(keep_blocks):
   log.debug('Seeing the following replication levels among blocks: %s',
             str(set(block_to_replication.values())))
 
+
+def computeGarbageCollectionCandidates():
+  for server_blocks in keep_blocks:
+    block_to_latest_mtime.addValues(server_blocks)
+  empty_set = set()
+  garbage_collection_priority = sorted(
+    [(block,mtime)
+     for block,mtime in block_to_latest_mtime.items()
+     if len(block_to_persisters.get(block,empty_set)) == 0],
+    key = itemgetter(1))
+  global garbage_collection_report
+  garbage_collection_report = []
+  cumulative_disk_size = 0
+  for block,mtime in garbage_collection_priority:
+    disk_size = blockDiskUsage(block)
+    cumulative_disk_size += disk_size
+    garbage_collection_report.append((block,
+                                      mtime,
+                                      disk_size,
+                                      cumulative_disk_size))
+
+  print 'The oldest Garbage Collection Candidates: '
+  pprint.pprint(garbage_collection_report[:20])
+
+
 def detectReplicationProblems():
   blocks_not_in_any_collections.update(
     set(block_to_replication.keys()).difference(block_to_collections.keys()))
@@ -474,6 +499,24 @@ user_to_usage = defaultdict(lambda : [0,]*NUM_COLS)
 keep_servers = []
 keep_blocks = []
 block_to_replication = defaultdict(lambda: 0)
+block_to_latest_mtime = maxdict()
+
+garbage_collection_report = []
+"""A list of non-persisted blocks, sorted by increasing mtime
+
+Each entry is of the form (block uuid, latest mtime, disk size,
+cumulative size)
+
+* block uuid: The id of the block we want to delete
+* latest mtime: The latest mtime of the block across all keep servers.
+* disk size: The total disk space used by this block (block size
+multiplied by current replication level)
+* cumulative disk size: The sum of this block's disk size and all the
+blocks listed above it
+* TODO: disk free: The proportion of our disk space that would be free
+if we deleted this block and all the above. So this is (current disk
+space used - cumulative disk size) / total disk capacity
+"""
 
 # Stuff to report on
 blocks_not_in_any_collections = set()
@@ -518,6 +561,8 @@ def loadAllData():
 
   computeReplication(keep_blocks)
 
+  computeGarbageCollectionCandidates()
+
   log.info('average replication level is %f',
            (float(sum(block_to_replication.values())) /
             len(block_to_replication)))
@@ -555,10 +600,10 @@ class DataManagerHandler(BaseHTTPRequestHandler):
 
   def writeTop(self, title):
     self.wfile.write('<HTML><HEAD><TITLE>%s</TITLE></HEAD>\n<BODY>' % title)
-    
+
   def writeBottom(self):
     self.wfile.write('</BODY></HTML>\n')
-    
+
   def writeHomePage(self):
     self.send_response(200)
     self.end_headers()
@@ -676,7 +721,7 @@ class DataManagerHandler(BaseHTTPRequestHandler):
         blocks = replication_to_blocks[replication_level]
         self.wfile.write('<TD valign="top">%s\n' % '<BR>\n'.join(blocks))
       self.wfile.write('</TR></TABLE>\n')
-      
+
 
   def do_GET(self):
     if not all_data_loaded:

commit d034adbaf26a19e8fd48124cadd5108d2d3de642
Author: Misha Zatsman <misha at curoverse.com>
Date:   Thu Apr 24 21:19:41 2014 +0000

    Moved datamanger prototype to experimental directory to indicate that
    it shouldn't be used in production code.
    
    closes #2689

diff --git a/services/datamanager/datamanager.py b/services/datamanager/experimental/datamanager.py
similarity index 100%
rename from services/datamanager/datamanager.py
rename to services/datamanager/experimental/datamanager.py
diff --git a/services/datamanager/datamanager_test.py b/services/datamanager/experimental/datamanager_test.py
similarity index 100%
rename from services/datamanager/datamanager_test.py
rename to services/datamanager/experimental/datamanager_test.py

commit a3650fa043500139ac36587a4b61ffed1a2dc1a3
Author: Misha Zatsman <misha at curoverse.com>
Date:   Thu Apr 24 21:11:40 2014 +0000

    Removed hack which was compensating for keep server bug which has been fixed.
    Update to match new log format info field has been renamed to properties.

diff --git a/services/datamanager/datamanager.py b/services/datamanager/datamanager.py
index 865bc36..08e688e 100755
--- a/services/datamanager/datamanager.py
+++ b/services/datamanager/datamanager.py
@@ -332,7 +332,7 @@ def logUserStorageUsage():
       usage[UNWEIGHTED_PERSIST_SIZE_COL])
     info['persisted_collections_weighted_bytes'] = (
       usage[WEIGHTED_PERSIST_SIZE_COL])
-    body['info'] = info
+    body['properties'] = info
     # TODO(misha): Confirm that this will throw an exception if it
     # fails to create the log entry.
     arv.logs().create(body=body).execute()
@@ -357,9 +357,6 @@ def computeReplication(keep_blocks):
   for server_blocks in keep_blocks:
     for block_uuid, _ in server_blocks:
       block_to_replication[block_uuid] += 1
-  # THIS IS A HACK TO DEAL WITH KEEP SERVER DOUBLE-REPORTING!
-  # DELETE THIS WHEN THAT BUG IS FIXED OR THE KEEP SERVER IS REPLACED.
-  block_to_replication.update({k: v/2 for k,v in block_to_replication.items()})
   log.debug('Seeing the following replication levels among blocks: %s',
             str(set(block_to_replication.values())))
 
@@ -376,6 +373,7 @@ def detectReplicationProblems():
      for uuid, persister_replication in block_to_persister_replication.items()
      if len(persister_replication) > 0 and
      block_to_replication[uuid] > max(persister_replication.values())])
+
   log.info('Found %d blocks not in any collections, e.g. %s...',
            len(blocks_not_in_any_collections),
            ','.join(list(blocks_not_in_any_collections)[:5]))
@@ -385,10 +383,11 @@ def detectReplicationProblems():
   log.info('Found %d overreplicated blocks, e.g. %s...',
            len(overreplicated_persisted_blocks),
            ','.join(list(overreplicated_persisted_blocks)[:5]))
+
   # TODO:
   #  Read blocks sorted by mtime
   #  Cache window vs % free space
-  #  Collections which will candidates appear in
+  #  Collections which candidates will appear in
   #  Youngest underreplicated read blocks that appear in collections.
   #  Report Collections that have blocks which are missing from (or
   #   underreplicated in) keep.

-----------------------------------------------------------------------


hooks/post-receive
--