[ARVADOS] updated: fdd28d6f009c605c61f4444dc2d9d142c3a1d395

Mon Jan 25 01:17:48 EST 2016

Summary of changes:
 .../crunchstat_summary/command.py                  | 12 +--
 .../crunchstat_summary/summarizer.py               | 92 ++++++++++++----------
 2 files changed, 55 insertions(+), 49 deletions(-)

  discards  d5f3f1c665fd78fd47094f86630e774408800234 (commit)
  discards  8abd532c45353466a9b949951e71efb3baef7a35 (commit)
       via  fdd28d6f009c605c61f4444dc2d9d142c3a1d395 (commit)
       via  d9ab654450fd23c043beab5c19c99806a301e51c (commit)
       via  56922b056a746e79c53b43186dbeff7e8a856546 (commit)
       via  e001d5f3e5cece6429c9b1305f98fdad89f451d0 (commit)
       via  b59cedb067aa5cf27045d1ad15fe9cea49502520 (commit)
       via  aa720c29abcf039a965231decb6a40d00e479437 (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (d5f3f1c665fd78fd47094f86630e774408800234)
            \
             N -- N -- N (fdd28d6f009c605c61f4444dc2d9d142c3a1d395)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit fdd28d6f009c605c61f4444dc2d9d142c3a1d395
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jan 25 01:16:44 2016 -0500

    8123: Explain existing_constraints and use a proper instance variable.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index f1e44b8..486f0e7 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -26,8 +26,6 @@ class Task(object):
 
 
 class Summarizer(object):
-    existing_constraints = {}
-
     def __init__(self, logdata, label=None, skip_child_jobs=False):
         self._logdata = logdata
 
@@ -47,6 +45,12 @@ class Summarizer(object):
         self.seq_to_uuid = {}
         self.tasks = collections.defaultdict(Task)
 
+        # We won't bother recommending new runtime constraints if the
+        # constraints given when running the job are known to us and
+        # are already suitable.  If applicable, the subclass
+        # constructor will overwrite this with something useful.
+        self.existing_constraints = {}
+
         logger.debug("%s: logdata %s", self.label, repr(logdata))
 
     def run(self):
@@ -251,7 +255,7 @@ class Summarizer(object):
             logger.warning('%s: no CPU usage data', self.label)
             return
         used_cores = int(math.ceil(cpu_max_rate))
-        asked_cores =  self.existing_constraints.get('min_cores_per_node')
+        asked_cores = self.existing_constraints.get('min_cores_per_node')
         if asked_cores is None or used_cores < asked_cores:
             yield (
                 '#!! {} max CPU usage was {}% -- '
@@ -344,14 +348,13 @@ class JobSummarizer(CollectionSummarizer):
             self.job = arv.jobs().get(uuid=job).execute()
         else:
             self.job = job
-        self.label = self.job['uuid']
-        self.existing_constraints = self.job.get('runtime_constraints', {})
         if not self.job['log']:
             raise ValueError(
                 "job {} has no log; live summary not implemented".format(
                     self.job['uuid']))
         super(JobSummarizer, self).__init__(self.job['log'], **kwargs)
         self.label = self.job['uuid']
+        self.existing_constraints = self.job.get('runtime_constraints', {})
 
 
 class PipelineSummarizer(object):

commit d9ab654450fd23c043beab5c19c99806a301e51c
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jan 25 01:08:27 2016 -0500

    8123: Fix accidental old-style class.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 2fa32ae..f1e44b8 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -354,7 +354,7 @@ class JobSummarizer(CollectionSummarizer):
         self.label = self.job['uuid']
 
 
-class PipelineSummarizer():
+class PipelineSummarizer(object):
     def __init__(self, pipeline_instance_uuid, **kwargs):
         arv = arvados.api('v1', model=OrderedJsonModel())
         instance = arv.pipeline_instances().get(

commit 56922b056a746e79c53b43186dbeff7e8a856546
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jan 25 01:00:03 2016 -0500

    8123: Fix type check to accommodate unicode.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index e1c1d6c..2fa32ae 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -340,7 +340,7 @@ class CollectionSummarizer(Summarizer):
 class JobSummarizer(CollectionSummarizer):
     def __init__(self, job, **kwargs):
         arv = arvados.api('v1')
-        if isinstance(job, str):
+        if isinstance(job, basestring):
             self.job = arv.jobs().get(uuid=job).execute()
         else:
             self.job = job

commit e001d5f3e5cece6429c9b1305f98fdad89f451d0
Author: Tom Clegg <tom at curoverse.com>
Date:   Mon Jan 25 00:59:46 2016 -0500

    8123: Use -v,-vv instead of --verbose,--debug.

diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index a1e3eb2..a9dfc83 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -27,20 +27,14 @@ class ArgumentParser(argparse.ArgumentParser):
             '--format', type=str, choices=('html', 'text'), default='text',
             help='Report format')
         self.add_argument(
-            '--verbose', action='store_true',
-            help='Write progress messages to stderr')
-        self.add_argument(
-            '--debug', action='store_true',
-            help='Write debug messages to stderr')
+            '--verbose', '-v', action='count', default=0,
+            help='Log more information (once for progress, twice for debug)')
 
 
 class Command(object):
     def __init__(self, args):
         self.args = args
-        if args.debug:
-            logger.setLevel(logging.DEBUG)
-        elif args.verbose:
-            logger.setLevel(logging.INFO)
+        logger.setLevel(logging.WARNING - 10 * args.verbose)
 
     def run(self):
         kwargs = {

commit b59cedb067aa5cf27045d1ad15fe9cea49502520
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Jan 24 21:07:42 2016 -0500

    8123: Change --include-child-jobs to --skip-child-jobs (default False).

diff --git a/tools/crunchstat-summary/crunchstat_summary/command.py b/tools/crunchstat-summary/crunchstat_summary/command.py
index a5339dd..a1e3eb2 100644
--- a/tools/crunchstat-summary/crunchstat_summary/command.py
+++ b/tools/crunchstat-summary/crunchstat_summary/command.py
@@ -21,8 +21,8 @@ class ArgumentParser(argparse.ArgumentParser):
             '--log-file', type=str,
             help='Read log data from a regular file')
         self.add_argument(
-            '--include-child-jobs', action='store_true',
-            help='Include stats from child jobs')
+            '--skip-child-jobs', action='store_true',
+            help='Do not include stats from child jobs')
         self.add_argument(
             '--format', type=str, choices=('html', 'text'), default='text',
             help='Report format')
@@ -44,7 +44,7 @@ class Command(object):
 
     def run(self):
         kwargs = {
-            'include_child_jobs': self.args.include_child_jobs,
+            'skip_child_jobs': self.args.skip_child_jobs,
         }
         if self.args.pipeline_instance:
             self.summer = summarizer.PipelineSummarizer(self.args.pipeline_instance, **kwargs)
diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index f0a947c..e1c1d6c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -28,13 +28,13 @@ class Task(object):
 class Summarizer(object):
     existing_constraints = {}
 
-    def __init__(self, logdata, label=None, include_child_jobs=True):
+    def __init__(self, logdata, label=None, skip_child_jobs=False):
         self._logdata = logdata
 
         self.label = label
         self.starttime = None
         self.finishtime = None
-        self._include_child_jobs = include_child_jobs
+        self._skip_child_jobs = skip_child_jobs
 
         # stats_max: {category: {stat: val}}
         self.stats_max = collections.defaultdict(
@@ -72,8 +72,9 @@ class Summarizer(object):
             m = re.search(r'^\S+ \S+ \d+ (?P<seq>\d+) stderr Queued job (?P<uuid>\S+)$', line)
             if m:
                 uuid = m.group('uuid')
-                if not self._include_child_jobs:
-                    logger.warning('%s: omitting %s (try --include-child-job)',
+                if self._skip_child_jobs:
+                    logger.warning('%s: omitting stats from child job %s'
+                                   ' because --skip-child-jobs flag is on',
                                    self.label, uuid)
                     continue
                 logger.debug('%s: follow %s', self.label, uuid)

commit aa720c29abcf039a965231decb6a40d00e479437
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Jan 24 21:06:48 2016 -0500

    8123: Explain mysterious memory constraint logic.

diff --git a/tools/crunchstat-summary/crunchstat_summary/summarizer.py b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
index 93e018d..f0a947c 100644
--- a/tools/crunchstat-summary/crunchstat_summary/summarizer.py
+++ b/tools/crunchstat-summary/crunchstat_summary/summarizer.py
@@ -261,24 +261,56 @@ class Summarizer(object):
                 int(used_cores))
 
     def _recommend_ram(self):
-        """Recommend asking for (2048*0.95) MiB RAM if max rss was 1248 MiB"""
-
-        used_ram = self.stats_max['mem']['rss']
-        if used_ram == float('-Inf'):
+        """Recommend an economical RAM constraint for this job.
+
+        Nodes that are advertised as "8 gibibytes" actually have what
+        we might call "8 nearlygibs" of memory available for jobs.
+        Here, we calculate a whole number of nearlygibs that would
+        have sufficed to run the job, then recommend requesting a node
+        with that number of nearlygibs (expressed as mebibytes).
+
+        Requesting a node with "nearly 8 gibibytes" is our best hope
+        of getting a node that actually has nearly 8 gibibytes
+        available.  If the node manager is smart enough to account for
+        the discrepancy itself when choosing/creating a node, we'll
+        get an 8 GiB node with nearly 8 GiB available.  Otherwise, the
+        advertised size of the next-size-smaller node (say, 6 GiB)
+        will be too low to satisfy our request, so we will effectively
+        get rounded up to 8 GiB.
+
+        For example, if we need 7500 MiB, we can ask for 7500 MiB, and
+        we will generally get a node that is advertised as "8 GiB" and
+        has at least 7500 MiB available.  However, asking for 8192 MiB
+        would either result in an unnecessarily expensive 12 GiB node
+        (if node manager knows about the discrepancy), or an 8 GiB
+        node which has less than 8192 MiB available and is therefore
+        considered by crunch-dispatch to be too small to meet our
+        constraint.
+
+        When node manager learns how to predict the available memory
+        for each node type such that crunch-dispatch always agrees
+        that a node is big enough to run the job it was brought up
+        for, all this will be unnecessary.  We'll just ask for exactly
+        the memory we want -- even if that happens to be 8192 MiB.
+        """
+
+        used_bytes = self.stats_max['mem']['rss']
+        if used_bytes == float('-Inf'):
             logger.warning('%s: no memory usage data', self.label)
             return
-        used_ram = math.ceil(float(used_ram) / (1<<20))
-        asked_ram = self.existing_constraints.get('min_ram_mb_per_node')
-        if asked_ram is None or (
-                math.ceil((used_ram/AVAILABLE_RAM_RATIO)/(1<<10)) <
-                (asked_ram/AVAILABLE_RAM_RATIO)/(1<<10)):
+        used_mib = math.ceil(float(used_bytes) / 1048576)
+        asked_mib = self.existing_constraints.get('min_ram_mb_per_node')
+
+        nearlygibs = lambda mebibytes: mebibytes/AVAILABLE_RAM_RATIO/1024
+        if asked_mib is None or (
+                math.ceil(nearlygibs(used_mib)) < nearlygibs(asked_mib)):
             yield (
                 '#!! {} max RSS was {} MiB -- '
                 'try runtime_constraints "min_ram_mb_per_node":{}'
             ).format(
                 self.label,
-                int(used_ram),
-                int(math.ceil((used_ram/AVAILABLE_RAM_RATIO)/(1<<10))*(1<<10)*AVAILABLE_RAM_RATIO))
+                int(used_mib),
+                int(math.ceil(nearlygibs(used_mib))*AVAILABLE_RAM_RATIO*1024))
 
     def _format(self, val):
         """Return a string representation of a stat.

-----------------------------------------------------------------------


hooks/post-receive
--