[ARVADOS] updated: e419f135d46050250fdeb10b6e2a2a254caa85d4

git at public.curoverse.com git at public.curoverse.com
Wed Apr 30 17:37:14 EDT 2014


Summary of changes:
 services/datamanager/experimental/datamanager.py |   73 ++++++++++++++++++++--
 1 files changed, 67 insertions(+), 6 deletions(-)

       via  e419f135d46050250fdeb10b6e2a2a254caa85d4 (commit)
       via  aade22d7587e9f5921edb18ad76792a60eaa92fb (commit)
       via  2e9a14d193db2948e14804539494f3b2384a87b8 (commit)
       via  ba4fcbd29b2615d9cc64010d756de49eac5109a5 (commit)
      from  a0a7b1a0c6f43a80449a76c00fb0c30858d38233 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e419f135d46050250fdeb10b6e2a2a254caa85d4
Author: Misha Zatsman <misha at curoverse.com>
Date:   Wed Apr 30 21:35:27 2014 +0000

    Added logging of block age vs free space histogram. Fixed an indentation bug. Fixed a bug where I forgot to declare a variable as global. Corrected histogram description in comments.

diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py
index 00cc554..4224426 100755
--- a/services/datamanager/experimental/datamanager.py
+++ b/services/datamanager/experimental/datamanager.py
@@ -29,7 +29,7 @@ def fileSizeFormat(value):
                          byteunits[exponent])
 
 def percentageFloor(x):
-""" Returns a float which is the input rounded down to the neared 0.01.
+  """ Returns a float which is the input rounded down to the neared 0.01.
 
 e.g. precentageFloor(0.941354) = 0.94
 """
@@ -444,6 +444,19 @@ def computeGarbageCollectionHistogram():
   return histogram
 
 
+def logGarbageCollectionHistogram():
+  body = {}
+  # TODO(misha): Decide whether we should specify an object_uuid in
+  # the body and if so, which uuid to use.
+  body['event_type'] = args.block_age_free_space_histogram_log_event_type
+  properties = {}
+  properties['histogram'] = garbage_collection_histogram
+  body['properties'] = properties
+  # TODO(misha): Confirm that this will throw an exception if it
+  # fails to create the log entry.
+  arv.logs().create(body=body).execute()
+
+
 def detectReplicationProblems():
   blocks_not_in_any_collections.update(
     set(block_to_replication.keys()).difference(block_to_collections.keys()))
@@ -524,6 +537,10 @@ parser.add_argument('--user-storage-log-event-type',
                     default='user-storage-report',
                     help=('The event type to set when logging user '
                           'storage usage to workbench.'))
+parser.add_argument('--block-age-free-space-histogram-log-event-type',
+                    default='block-age-free-space-histogram',
+                    help=('The event type to set when logging user '
+                          'storage usage to workbench.'))
 parser.add_argument('--garbage-collection-file',
                     default='',
                     help=('The file to write a garbage collection report, or '
@@ -591,9 +608,8 @@ garbage_collection_histogram = []
 Each entry is of the form (Disk Proportion, mtime).
 
 An entry of the form (0.52, 1388747781) means that if we deleted the
-olded non-presisted blocks until we had 52% of the disk free, the
-oldest non-persisted block we'd have left would have an mtime of
-1388747781.
+oldest non-presisted blocks until we had 52% of the disk free, then
+all blocks with an mtime greater than 1388747781 would be preserved.
 """
 
 # Stuff to report on
@@ -667,8 +683,12 @@ def loadAllData():
              args.garbage_collection_file)
     outputGarbageCollectionReport(args.garbage_collection_file)
 
+  global garbage_collection_histogram
   garbage_collection_histogram = computeGarbageCollectionHistogram()
 
+  if args.log_to_workbench:
+    logGarbageCollectionHistogram()
+
   detectReplicationProblems()
 
   computeUserStorageUsage()

commit aade22d7587e9f5921edb18ad76792a60eaa92fb
Author: Misha Zatsman <misha at curoverse.com>
Date:   Wed Apr 30 20:33:26 2014 +0000

    Renamed info to properties to match the name used by arvados.

diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py
index 920b6d8..00cc554 100755
--- a/services/datamanager/experimental/datamanager.py
+++ b/services/datamanager/experimental/datamanager.py
@@ -335,14 +335,15 @@ def logUserStorageUsage():
     # the object_type field since we don't know which we have.
     body['object_uuid'] = user
     body['event_type'] = args.user_storage_log_event_type
-    info = {}
-    info['read_collections_total_bytes'] = usage[UNWEIGHTED_READ_SIZE_COL]
-    info['read_collections_weighted_bytes'] = usage[WEIGHTED_READ_SIZE_COL]
-    info['persisted_collections_total_bytes'] = (
+    properties = {}
+    properties['read_collections_total_bytes'] = usage[UNWEIGHTED_READ_SIZE_COL]
+    properties['read_collections_weighted_bytes'] = (
+      usage[WEIGHTED_READ_SIZE_COL])
+    properties['persisted_collections_total_bytes'] = (
       usage[UNWEIGHTED_PERSIST_SIZE_COL])
-    info['persisted_collections_weighted_bytes'] = (
+    properties['persisted_collections_weighted_bytes'] = (
       usage[WEIGHTED_PERSIST_SIZE_COL])
-    body['properties'] = info
+    body['properties'] = properties
     # TODO(misha): Confirm that this will throw an exception if it
     # fails to create the log entry.
     arv.logs().create(body=body).execute()

commit 2e9a14d193db2948e14804539494f3b2384a87b8
Author: Misha Zatsman <misha at curoverse.com>
Date:   Wed Apr 30 19:59:42 2014 +0000

    Added code to compute histogram from garbage collection list.

diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py
index 957a3ce..920b6d8 100755
--- a/services/datamanager/experimental/datamanager.py
+++ b/services/datamanager/experimental/datamanager.py
@@ -28,6 +28,14 @@ def fileSizeFormat(value):
   return "%7.2f %-3s" % (float(value) / pow(1024, exponent),
                          byteunits[exponent])
 
+def percentageFloor(x):
+""" Returns a float which is the input rounded down to the neared 0.01.
+
+e.g. precentageFloor(0.941354) = 0.94
+"""
+  return math.floor(x*100) / 100.0
+
+
 def byteSizeFromValidUuid(valid_uuid):
   return int(valid_uuid.split('+')[1])
 
@@ -421,6 +429,20 @@ def outputGarbageCollectionReport(filename):
       gcwriter.writerow(line)
 
 
+def computeGarbageCollectionHistogram():
+  histogram = []
+  last_percentage = -1
+  for _,mtime,_,_,disk_free in garbage_collection_report:
+    curr_percentage = percentageFloor(disk_free)
+    if curr_percentage > last_percentage:
+      histogram.append( (curr_percentage, mtime) )
+    last_percentage = curr_percentage
+
+  log.info('Garbage collection histogram is: %s', histogram)
+
+  return histogram
+
+
 def detectReplicationProblems():
   blocks_not_in_any_collections.update(
     set(block_to_replication.keys()).difference(block_to_collections.keys()))
@@ -562,6 +584,17 @@ deleted this block and all the above. So this is (free disk space +
 cumulative disk size) / total disk capacity
 """
 
+garbage_collection_histogram = []
+""" Shows the tradeoff of keep block age vs keep disk free space.
+
+Each entry is of the form (Disk Proportion, mtime).
+
+An entry of the form (0.52, 1388747781) means that if we deleted the
+olded non-presisted blocks until we had 52% of the disk free, the
+oldest non-persisted block we'd have left would have an mtime of
+1388747781.
+"""
+
 # Stuff to report on
 blocks_not_in_any_collections = set()
 underreplicated_persisted_blocks = set()
@@ -610,7 +643,7 @@ def loadAllData():
   total_keep_space = sum(map(itemgetter(0), keep_stats))
   free_keep_space = sum(map(itemgetter(1), keep_stats))
 
-  # TODO(misha): Delete this hack when the keep serverse are fixed!
+  # TODO(misha): Delete this hack when the keep servers are fixed!
   # This hack deals with the fact that keep servers report each other's disks.
   total_keep_space /= len(keep_stats)
   free_keep_space /= len(keep_stats)
@@ -633,6 +666,8 @@ def loadAllData():
              args.garbage_collection_file)
     outputGarbageCollectionReport(args.garbage_collection_file)
 
+  garbage_collection_histogram = computeGarbageCollectionHistogram()
+
   detectReplicationProblems()
 
   computeUserStorageUsage()

commit ba4fcbd29b2615d9cc64010d756de49eac5109a5
Author: Misha Zatsman <misha at curoverse.com>
Date:   Wed Apr 30 17:37:25 2014 +0000

    Added hack to deal with the fact that keep servers are reporting each other's disks.

diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py
index 12b8d6a..957a3ce 100755
--- a/services/datamanager/experimental/datamanager.py
+++ b/services/datamanager/experimental/datamanager.py
@@ -610,6 +610,11 @@ def loadAllData():
   total_keep_space = sum(map(itemgetter(0), keep_stats))
   free_keep_space = sum(map(itemgetter(1), keep_stats))
 
+  # TODO(misha): Delete this hack when the keep serverse are fixed!
+  # This hack deals with the fact that keep servers report each other's disks.
+  total_keep_space /= len(keep_stats)
+  free_keep_space /= len(keep_stats)
+
   log.info('Total disk space: %s, Free disk space: %s (%d%%).' %
            (fileSizeFormat(total_keep_space),
             fileSizeFormat(free_keep_space),

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list