[ARVADOS] updated: a686dcbfc02914af289fceec63d68645b4ffb13f
git at public.curoverse.com
git at public.curoverse.com
Tue Jan 6 11:05:10 EST 2015
Summary of changes:
services/api/script/crunch-failure-report.py | 42 +++++++++++++---------------
1 file changed, 19 insertions(+), 23 deletions(-)
via a686dcbfc02914af289fceec63d68645b4ffb13f (commit)
from e5dce6935b3ad0e1222c63cccc3fb4e4e4924d05 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit a686dcbfc02914af289fceec63d68645b4ffb13f
Author: Tim Pierce <twp at curoverse.com>
Date: Tue Jan 6 11:03:10 2015 -0500
4598: account for queued and cancelled jobs, fix sorting
Per code review:
* Updated report to include job states "Cancelled" and "Queued" as well
as Failed, Running and Complete, and to take these into account when
calculating job counts.
* Fixed sorting for failure classes.
diff --git a/services/api/script/crunch-failure-report.py b/services/api/script/crunch-failure-report.py
index e6521e2..5183684 100755
--- a/services/api/script/crunch-failure-report.py
+++ b/services/api/script/crunch-failure-report.py
@@ -124,15 +124,17 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
# Find all jobs created within the specified window,
# and their corresponding job logs.
jobs_created = jobs_created_between_dates(api, start_time, end_time)
- jobs_failed = [job for job in jobs_created if job['state'] == 'Failed']
- jobs_successful = [job for job in jobs_created if job['state'] == 'Complete']
+ jobs_by_state = {}
+ for job in jobs_created:
+ jobs_by_state.setdefault(job['state'], [])
+ jobs_by_state[job['state']].append(job)
# Find failed jobs and record the job failure text.
# failure_stats maps failure types (e.g. "sys/docker") to
# a set of job UUIDs that failed for that reason.
failure_stats = {}
- for job in jobs_failed:
+ for job in jobs_by_state['Failed']:
job_uuid = job['uuid']
logs = job_logs(api, job)
# Find the first permanent task failure, and collect the
@@ -165,21 +167,14 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
print ""
job_start_count = len(jobs_created)
- job_success_count = len(jobs_successful)
- job_fail_count = len(jobs_failed)
- job_unfinished_count = job_start_count - job_success_count - job_fail_count
-
- print " {: <25s} {:4d}".format('Started',
- job_start_count)
- print " {: <25s} {:4d} ({: >4.0%})".format('Successful',
- job_success_count,
- job_success_count / float(job_start_count))
- print " {: <25s} {:4d} ({: >4.0%})".format('Failed',
- job_fail_count,
- job_fail_count / float(job_start_count))
- print " {: <25s} {:4d} ({: >4.0%})".format('In progress',
- job_unfinished_count,
- job_unfinished_count / float(job_start_count))
+ print " {: <25s} {:4d}".format('Started', job_start_count)
+ for state in ['Complete', 'Failed', 'Queued', 'Cancelled', 'Running']:
+ if state in jobs_by_state:
+ job_count = len(jobs_by_state[state])
+ job_percentage = job_count / float(job_start_count)
+ print " {: <25s} {:4d} ({: >4.0%})".format(state,
+ job_count,
+ job_percentage)
print ""
# Report failure types.
@@ -188,17 +183,18 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
# Generate a mapping from failed job uuids to job records, to assist
# in generating detailed statistics for job failures.
- jobs_failed_map = { job['uuid']: job for job in jobs_failed }
+ jobs_failed_map = { job['uuid']: job for job in jobs_by_state.get('Failed', []) }
# sort the failure stats in descending order by occurrence.
- sorted_failures = sorted(failure_stats.items(),
+ sorted_failures = sorted(failure_stats,
reverse=True,
- key=lambda failed_job_list: len(failed_job_list))
- for failtype, job_uuids in sorted_failures:
+ key=lambda failure_type: len(failure_stats[failure_type]))
+ for failtype in sorted_failures:
+ job_uuids = failure_stats[failtype]
failstat = " {: <25s} {:4d} ({: >4.0%})\n".format(
failtype,
len(job_uuids),
- len(job_uuids) / float(job_fail_count))
+ len(job_uuids) / float(len(jobs_by_state['Failed'])))
failure_summary = failure_summary + failstat
failure_detail = failure_detail + failstat
for j in job_uuids:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list