[ARVADOS] created: 2.1.0-2362-g96f176d43
Git user
git at public.arvados.org
Tue Apr 19 20:11:38 UTC 2022
at 96f176d43e03de4ba9dbb8a446a9339cb5032cec (commit)
commit 96f176d43e03de4ba9dbb8a446a9339cb5032cec
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Apr 19 15:41:24 2022 -0400
17301: Log all messages in details
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 5f24d2407..f04f30476 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -262,45 +262,22 @@ The 'jobs' API is no longer supported.
return
runtime_status = current.get('runtime_status', {})
# In case of status being an error, only report the first one.
- if kind == 'error':
- if not runtime_status.get('error'):
- runtime_status.update({
- 'error': message
- })
- if detail is not None:
- runtime_status.update({
- 'errorDetail': detail
- })
- # Further errors are only mentioned as a count.
- else:
- # Get anything before an optional 'and N more' string.
- try:
- error_msg = re.match(
- r'^(.*?)(?=\s*\(and \d+ more\)|$)', runtime_status.get('error')).groups()[0]
- more_failures = re.match(
- r'.*\(and (\d+) more\)', runtime_status.get('error'))
- except TypeError:
- # Ignore tests stubbing errors
- return
- if more_failures:
- failure_qty = int(more_failures.groups()[0])
- runtime_status.update({
- 'error': "%s (and %d more)" % (error_msg, failure_qty+1)
- })
- else:
- runtime_status.update({
- 'error': "%s (and 1 more)" % error_msg
- })
- elif kind in ['warning', 'activity']:
- # Record the last warning/activity status without regard of
- # previous occurences.
+ if kind in ('error', 'warning', 'activity'):
+ updatemessage = runtime_status.get(kind, "")
+ if updatemessage:
+ updatemessage += "\n"
+ updatemessage += message
+
+ # Subsequent messages tacked on as detail
+ updatedetail = runtime_status.get(kind+'Detail', "")
+ if updatedetail:
+ updatedetail += "\n"
+ if detail:
+ updatedetail += message + "\n" + detail
runtime_status.update({
- kind: message
+ kind: updatemessage,
+ kind+'Detail': updatedetail,
})
- if detail is not None:
- runtime_status.update({
- kind+"Detail": detail
- })
else:
# Ignore any other status kind
return
commit 6358388ad9f7676aa3b1ab149c9fbde4716929e5
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Apr 19 15:40:56 2022 -0400
17301: Report warning about OOM killer when exit code 137
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index e2c2f2e67..c85443a23 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -392,6 +392,10 @@ class ArvadosContainer(JobBase):
processStatus = "success"
else:
processStatus = "permanentFail"
+
+ if rcode == 137:
+ logger.warning("%s job was killed on the compute instance. The most common reason is that it attempted to allocate too much RAM and was targeted by the Out Of Memory (OOM) killer. Try resubmitting with a higher 'ramMin'.",
+ self.arvrunner.label(self))
else:
processStatus = "permanentFail"
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list