[ARVADOS] updated: b586267c7ff0a5973ecf2f83b3fa5eb452669cfc

Mon Apr 7 14:52:59 EDT 2014

Summary of changes:
 services/datamanager/datamanager.py |  133 +++++++++++++++++++++++++++++++----
 1 files changed, 120 insertions(+), 13 deletions(-)

       via  b586267c7ff0a5973ecf2f83b3fa5eb452669cfc (commit)
       via  4efaeba6eb371d03aa8c8f052b63c2b3404de063 (commit)
      from  f76947a7c1ba973a11e563b0977d54a2ece2ce38 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit b586267c7ff0a5973ecf2f83b3fa5eb452669cfc
Author: Misha Zatsman <misha at curoverse.com>
Date:   Mon Apr 7 18:28:57 2014 +0000

    Added handler for collections. Also renamed byte_size to byteSize to follow method naming convention.

diff --git a/services/datamanager/datamanager.py b/services/datamanager/datamanager.py
index 02f31ca..ae9fd0b 100755
--- a/services/datamanager/datamanager.py
+++ b/services/datamanager/datamanager.py
@@ -40,7 +40,7 @@ class CollectionInfo:
     self.persister_uuids = set()  # uuids of users who want this collection saved
     CollectionInfo.all_by_uuid[uuid] = self
 
-  def byte_size(self):
+  def byteSize(self):
     return sum(map(byteSizeFromValidUuid, self.block_uuids))
 
   def __str__(self):
@@ -50,7 +50,7 @@ class CollectionInfo:
             '               persister_uuids: %s' %
             (self.uuid,
              len(self.block_uuids),
-             fileSizeFormat(self.byte_size()),
+             fileSizeFormat(self.byteSize()),
              pprint.pformat(self.reader_uuids, indent = 15),
              pprint.pformat(self.persister_uuids, indent = 15)))
 
@@ -407,9 +407,11 @@ class DataManagerHandler(BaseHTTPRequestHandler):
       self.send_error(404,
                       'User (%s) Not Found.' % cgi.escape(uuid, quote=False))
     else:
+      # Here we assume that since a user exists, they don't need to be
+      # html escaped.
       self.send_response(200)
       self.end_headers()
-      self.writeTop('Home')
+      self.writeTop('User %s' % uuid)
       self.wfile.write('<TABLE>')
       self.wfile.write('<TR><TH>user'
                        '<TH>unweighted readable block size'
@@ -432,6 +434,43 @@ class DataManagerHandler(BaseHTTPRequestHandler):
                                      reader_to_collections[uuid])))
       self.writeBottom()
 
+  def collectionExists(self, uuid):
+    return CollectionInfo.all_by_uuid.has_key(uuid)
+
+  def writeCollectionPage(self, uuid):
+    if not self.collectionExists(uuid):
+      self.send_error(404,
+                      'Collection (%s) Not Found.' % cgi.escape(uuid, quote=False))
+    else:
+      collection = CollectionInfo.get(uuid)
+      # Here we assume that since a collection exists, its id doesn't
+      # need to be html escaped.
+      self.send_response(200)
+      self.end_headers()
+      self.writeTop('Collection %s' % uuid)
+      self.wfile.write('<H1>Collection %s</H1>\n' % uuid)
+      self.wfile.write('<P>Total size %s (not factoring in replication).\n' %
+                       fileSizeFormat(collection.byteSize()))
+      self.wfile.write('<P>Readers: %s\n' %
+                       ', '.join(map(self.userLink, collection.reader_uuids)))
+      self.wfile.write('<P>Persisters: %s\n' %
+                       ', '.join(map(self.userLink,
+                                     collection.persister_uuids)))
+      replication_to_blocks = defaultdict(set)
+      for block in collection.block_uuids:
+        replication_to_blocks[block_to_replication[block]].add(block)
+      replication_levels = sorted(replication_to_blocks.keys())
+      self.wfile.write('<P>%d blocks in %d replication level(s):\n' %
+                       (len(collection.block_uuids), len(replication_levels)))
+      self.wfile.write('<TABLE><TR><TH>%s</TR>\n' %
+                       '<TH>'.join(['Replication Level ' + str(x) for x in replication_levels]))
+      self.wfile.write('<TR>\n')
+      for replication_level in replication_levels:
+        blocks = replication_to_blocks[replication_level]
+        self.wfile.write('<TD valign="top">%s\n' % '<BR>\n'.join(blocks))
+      self.wfile.write('</TR></TABLE>\n')
+      
+
   def do_GET(self):
     if not all_data_loaded:
       self.send_error(503,
@@ -447,6 +486,8 @@ class DataManagerHandler(BaseHTTPRequestHandler):
         self.writeHomePage()
       elif request_type == DataManagerHandler.USER_PATH:
         self.writeUserPage(split_path[1])
+      elif request_type == DataManagerHandler.COLLECTION_PATH:
+        self.writeCollectionPage(split_path[1])
       else:
         self.send_error(404, 'Unrecognized request path.')
     return

commit 4efaeba6eb371d03aa8c8f052b63c2b3404de063
Author: Misha Zatsman <misha at curoverse.com>
Date:   Mon Apr 7 16:57:52 2014 +0000

    Added user page. Started using send_error. Added port flag.

diff --git a/services/datamanager/datamanager.py b/services/datamanager/datamanager.py
index 458c70a..02f31ca 100755
--- a/services/datamanager/datamanager.py
+++ b/services/datamanager/datamanager.py
@@ -3,10 +3,10 @@
 import arvados
 
 import argparse
-
+import cgi
 import logging
-import pprint
 import math
+import pprint
 import re
 import threading
 import urllib2
@@ -253,6 +253,11 @@ parser.add_argument('-m',
                     type=int,
                     default=5000,
                     help=('The max results to get at once.'))
+parser.add_argument('-p',
+                    '--port',
+                    type=int,
+                    default=9090,
+                    help=('The port number to serve on.'))
 parser.add_argument('-v',
                     '--verbose',
                     help='increase output verbosity',
@@ -345,8 +350,25 @@ def loadAllData():
   global all_data_loaded
   all_data_loaded = True
 
-
 class DataManagerHandler(BaseHTTPRequestHandler):
+  USER_PATH = 'user'
+  COLLECTION_PATH = 'collection'
+  BLOCK_PATH = 'block'
+
+  def userLink(self, uuid):
+    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
+            {'uuid': uuid,
+             'path': DataManagerHandler.USER_PATH})
+
+  def collectionLink(self, uuid):
+    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
+            {'uuid': uuid,
+             'path': DataManagerHandler.COLLECTION_PATH})
+
+  def blockLink(self, uuid):
+    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
+            {'uuid': uuid,
+             'path': DataManagerHandler.BLOCK_PATH})
 
   def writeTop(self, title):
     self.wfile.write('<HTML><HEAD><TITLE>%s</TITLE></HEAD>\n<BODY>' % title)
@@ -366,7 +388,7 @@ class DataManagerHandler(BaseHTTPRequestHandler):
                      '<TH>weighted persisted block size</TR>\n')
     for user, usage in user_to_usage.items():
       self.wfile.write('<TR><TD>%s<TD>%s<TD>%s<TD>%s<TD>%s</TR>\n' %
-                       (user,
+                       (self.userLink(user),
                         fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
                         fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
                         fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
@@ -374,15 +396,59 @@ class DataManagerHandler(BaseHTTPRequestHandler):
     self.wfile.write('</TABLE>\n')
     self.writeBottom()
 
-  def do_GET(self):
-    if not all_data_loaded:
-      self.send_response(503)
+  def userExists(self, uuid):
+    # Currently this will return false for a user who exists but
+    # doesn't appear on any manifests.
+    # TODO(misha): Figure out if we need to fix this.
+    return user_to_usage.has_key(uuid)
+
+  def writeUserPage(self, uuid):
+    if not self.userExists(uuid):
+      self.send_error(404,
+                      'User (%s) Not Found.' % cgi.escape(uuid, quote=False))
+    else:
+      self.send_response(200)
       self.end_headers()
-      self.writeTop('Not ready')
-      self.wfile.write('Sorry, but I am still loading all the data I need.\n')
+      self.writeTop('Home')
+      self.wfile.write('<TABLE>')
+      self.wfile.write('<TR><TH>user'
+                       '<TH>unweighted readable block size'
+                       '<TH>weighted readable block size'
+                       '<TH>unweighted persisted block size'
+                       '<TH>weighted persisted block size</TR>\n')
+      usage = user_to_usage[uuid]
+      self.wfile.write('<TR><TD>%s<TD>%s<TD>%s<TD>%s<TD>%s</TR>\n' %
+                       (self.userLink(uuid),
+                        fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
+                        fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
+                        fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
+                        fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
+      self.wfile.write('</TABLE>\n')
+      self.wfile.write('<P>Persisting Collections: %s\n' %
+                       ', '.join(map(self.collectionLink,
+                                     persister_to_collections[uuid])))
+      self.wfile.write('<P>Reading Collections: %s\n' %
+                       ', '.join(map(self.collectionLink,
+                                     reader_to_collections[uuid])))
       self.writeBottom()
+
+  def do_GET(self):
+    if not all_data_loaded:
+      self.send_error(503,
+                      'Sorry, but I am still loading all the data I need.')
     else:
-      self.writeHomePage()
+      # Removing leading '/' and process request path
+      split_path = self.path[1:].split('/')
+      request_type = split_path[0]
+      log.debug('path (%s) split as %s with request_type %s' % (self.path,
+                                                                split_path,
+                                                                request_type))
+      if request_type == '':
+        self.writeHomePage()
+      elif request_type == DataManagerHandler.USER_PATH:
+        self.writeUserPage(split_path[1])
+      else:
+        self.send_error(404, 'Unrecognized request path.')
     return
 
 class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
@@ -393,5 +459,5 @@ class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
 loader = threading.Thread(target = loadAllData, name = 'loader')
 loader.start()
 
-server = ThreadedHTTPServer(('localhost', 9090), DataManagerHandler)
+server = ThreadedHTTPServer(('localhost', args.port), DataManagerHandler)
 server.serve_forever()

-----------------------------------------------------------------------


hooks/post-receive
--