[ARVADOS] updated: 40cc2b01c2a3ff911549c2d31c8195905109633d

git at public.curoverse.com git at public.curoverse.com
Tue Apr 1 16:12:58 EDT 2014


Summary of changes:
 services/datamanager/datamanager.py |   28 +++++++++++++++++++---------
 1 files changed, 19 insertions(+), 9 deletions(-)

       via  40cc2b01c2a3ff911549c2d31c8195905109633d (commit)
       via  74236320d68bf092f7f76f7ccf4f7a974735c6ab (commit)
      from  d0bf7a1ff103285e54433d3bcb67c2138b534542 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 40cc2b01c2a3ff911549c2d31c8195905109633d
Author: Misha Zatsman <misha at curoverse.com>
Date:   Tue Apr 1 19:59:14 2014 +0000

    Modified algorithm to take disk replication when counting usage.

diff --git a/services/datamanager/datamanager.py b/services/datamanager/datamanager.py
index 84efc9d..2a642f2 100755
--- a/services/datamanager/datamanager.py
+++ b/services/datamanager/datamanager.py
@@ -182,21 +182,29 @@ def reportBusiestUsers():
     print '%s reading %d collections.' % (persister, len(collections))
 
 
+def blockDiskUsage(block_uuid):
+  """Returns the disk usage of a block given its uuid.
+
+  Will return 0 before reading the contents of the keep servers.
+  """
+  return byteSizeFromValidUuid(block_uuid) * block_to_replication[block_uuid]
+
+
 def reportUserDiskUsage():
   for user, blocks in reader_to_blocks.items():
     user_to_usage[user][UNWEIGHTED_READ_SIZE_COL] = sum(map(
-        byteSizeFromValidUuid,
+        blockDiskUsage,
         blocks))
     user_to_usage[user][WEIGHTED_READ_SIZE_COL] = sum(map(
-        lambda block_uuid:(float(byteSizeFromValidUuid(block_uuid))/
+        lambda block_uuid:(float(blockDiskUsage(block_uuid))/
                                  len(block_to_readers[block_uuid])),
         blocks))
   for user, blocks in persister_to_blocks.items():
     user_to_usage[user][UNWEIGHTED_PERSIST_SIZE_COL] = sum(map(
-        byteSizeFromValidUuid,
+        blockDiskUsage,
         blocks))
     user_to_usage[user][WEIGHTED_PERSIST_SIZE_COL] = sum(map(
-        lambda block_uuid:(float(byteSizeFromValidUuid(block_uuid))/
+        lambda block_uuid:(float(blockDiskUsage(block_uuid))/
                                  len(block_to_persisters[block_uuid])),
         blocks))
   print ('user: unweighted readable block size, weighted readable block size, '
@@ -297,8 +305,6 @@ WEIGHTED_PERSIST_SIZE_COL = 3
 NUM_COLS = 4
 user_to_usage = defaultdict(lambda : [0,]*NUM_COLS)
 
-reportUserDiskUsage()
-
 print 'Getting Keep Servers'
 keep_servers = getKeepServers()
 
@@ -310,3 +316,5 @@ keep_blocks = getKeepBlocks(keep_servers)
 block_to_replication = computeReplication(keep_blocks)
 
 print 'average replication level is %f' % (float(sum(block_to_replication.values())) / len(block_to_replication))
+
+reportUserDiskUsage()

commit 74236320d68bf092f7f76f7ccf4f7a974735c6ab
Author: Misha Zatsman <misha at curoverse.com>
Date:   Tue Apr 1 19:46:45 2014 +0000

    Added manifest size to calculations.

diff --git a/services/datamanager/datamanager.py b/services/datamanager/datamanager.py
index 847aa7e..84efc9d 100755
--- a/services/datamanager/datamanager.py
+++ b/services/datamanager/datamanager.py
@@ -151,16 +151,18 @@ def reportMostPopularCollections():
 
 def buildMaps():
   for collection_uuid,collection_info in CollectionInfo.all_by_uuid.items():
-    for block_uuid in collection_info.block_uuids:
+    # Add the block holding the manifest itself for all calculations
+    block_uuids = collection_info.block_uuids.union([collection_uuid,])
+    for block_uuid in block_uuids:
       block_to_collections[block_uuid].add(collection_uuid)
       block_to_readers[block_uuid].update(collection_info.reader_uuids)
       block_to_persisters[block_uuid].update(collection_info.persister_uuids)
     for reader_uuid in collection_info.reader_uuids:
       reader_to_collections[reader_uuid].add(collection_uuid)
-      reader_to_blocks[reader_uuid].update(collection_info.block_uuids)
+      reader_to_blocks[reader_uuid].update(block_uuids)
     for persister_uuid in collection_info.persister_uuids:
       persister_to_collections[persister_uuid].add(collection_uuid)
-      persister_to_blocks[persister_uuid].update(collection_info.block_uuids)
+      persister_to_blocks[persister_uuid].update(block_uuids)
 
 
 def itemsByValueLength(original):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list