[ARVADOS] updated: a686dcbfc02914af289fceec63d68645b4ffb13f

Tue Jan 6 11:05:10 EST 2015

Summary of changes:
 services/api/script/crunch-failure-report.py | 42 +++++++++++++---------------
 1 file changed, 19 insertions(+), 23 deletions(-)

       via  a686dcbfc02914af289fceec63d68645b4ffb13f (commit)
      from  e5dce6935b3ad0e1222c63cccc3fb4e4e4924d05 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit a686dcbfc02914af289fceec63d68645b4ffb13f
Author: Tim Pierce <twp at curoverse.com>
Date:   Tue Jan 6 11:03:10 2015 -0500

    4598: account for queued and cancelled jobs, fix sorting
    
    Per code review:
    * Updated report to include job states "Cancelled" and "Queued" as well
      as Failed, Running and Complete, and to take these into account when
      calculating job counts.
    * Fixed sorting for failure classes.

diff --git a/services/api/script/crunch-failure-report.py b/services/api/script/crunch-failure-report.py
index e6521e2..5183684 100755
--- a/services/api/script/crunch-failure-report.py
+++ b/services/api/script/crunch-failure-report.py
@@ -124,15 +124,17 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     # Find all jobs created within the specified window,
     # and their corresponding job logs.
     jobs_created = jobs_created_between_dates(api, start_time, end_time)
-    jobs_failed     = [job for job in jobs_created if job['state'] == 'Failed']
-    jobs_successful = [job for job in jobs_created if job['state'] == 'Complete']
+    jobs_by_state = {}
+    for job in jobs_created:
+        jobs_by_state.setdefault(job['state'], [])
+        jobs_by_state[job['state']].append(job)
 
     # Find failed jobs and record the job failure text.
 
     # failure_stats maps failure types (e.g. "sys/docker") to
     # a set of job UUIDs that failed for that reason.
     failure_stats = {}
-    for job in jobs_failed:
+    for job in jobs_by_state['Failed']:
         job_uuid = job['uuid']
         logs = job_logs(api, job)
         # Find the first permanent task failure, and collect the
@@ -165,21 +167,14 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     print ""
 
     job_start_count = len(jobs_created)
-    job_success_count = len(jobs_successful)
-    job_fail_count = len(jobs_failed)
-    job_unfinished_count = job_start_count - job_success_count - job_fail_count
-
-    print "  {: <25s} {:4d}".format('Started',
-                                    job_start_count)
-    print "  {: <25s} {:4d} ({: >4.0%})".format('Successful',
-                                                job_success_count,
-                                                job_success_count / float(job_start_count))
-    print "  {: <25s} {:4d} ({: >4.0%})".format('Failed',
-                                                job_fail_count,
-                                                job_fail_count / float(job_start_count))
-    print "  {: <25s} {:4d} ({: >4.0%})".format('In progress',
-                                                job_unfinished_count,
-                                                job_unfinished_count / float(job_start_count))
+    print "  {: <25s} {:4d}".format('Started', job_start_count)
+    for state in ['Complete', 'Failed', 'Queued', 'Cancelled', 'Running']:
+        if state in jobs_by_state:
+            job_count = len(jobs_by_state[state])
+            job_percentage = job_count / float(job_start_count)
+            print "  {: <25s} {:4d} ({: >4.0%})".format(state,
+                                                        job_count,
+                                                        job_percentage)
     print ""
 
     # Report failure types.
@@ -188,17 +183,18 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
 
     # Generate a mapping from failed job uuids to job records, to assist
     # in generating detailed statistics for job failures.
-    jobs_failed_map = { job['uuid']: job for job in jobs_failed }
+    jobs_failed_map = { job['uuid']: job for job in jobs_by_state.get('Failed', []) }
 
     # sort the failure stats in descending order by occurrence.
-    sorted_failures = sorted(failure_stats.items(),
+    sorted_failures = sorted(failure_stats,
                              reverse=True,
-                             key=lambda failed_job_list: len(failed_job_list))
-    for failtype, job_uuids in sorted_failures:
+                             key=lambda failure_type: len(failure_stats[failure_type]))
+    for failtype in sorted_failures:
+        job_uuids = failure_stats[failtype]
         failstat = "  {: <25s} {:4d} ({: >4.0%})\n".format(
             failtype,
             len(job_uuids),
-            len(job_uuids) / float(job_fail_count))
+            len(job_uuids) / float(len(jobs_by_state['Failed'])))
         failure_summary = failure_summary + failstat
         failure_detail = failure_detail + failstat
         for j in job_uuids:

-----------------------------------------------------------------------


hooks/post-receive
--