[ARVADOS] created: 2.1.0-2362-g96f176d43

Git user git at public.arvados.org
Tue Apr 19 20:11:38 UTC 2022


        at  96f176d43e03de4ba9dbb8a446a9339cb5032cec (commit)


commit 96f176d43e03de4ba9dbb8a446a9339cb5032cec
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Apr 19 15:41:24 2022 -0400

    17301: Log all messages in details
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 5f24d2407..f04f30476 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -262,45 +262,22 @@ The 'jobs' API is no longer supported.
                 return
             runtime_status = current.get('runtime_status', {})
             # In case of status being an error, only report the first one.
-            if kind == 'error':
-                if not runtime_status.get('error'):
-                    runtime_status.update({
-                        'error': message
-                    })
-                    if detail is not None:
-                        runtime_status.update({
-                            'errorDetail': detail
-                        })
-                # Further errors are only mentioned as a count.
-                else:
-                    # Get anything before an optional 'and N more' string.
-                    try:
-                        error_msg = re.match(
-                            r'^(.*?)(?=\s*\(and \d+ more\)|$)', runtime_status.get('error')).groups()[0]
-                        more_failures = re.match(
-                            r'.*\(and (\d+) more\)', runtime_status.get('error'))
-                    except TypeError:
-                        # Ignore tests stubbing errors
-                        return
-                    if more_failures:
-                        failure_qty = int(more_failures.groups()[0])
-                        runtime_status.update({
-                            'error': "%s (and %d more)" % (error_msg, failure_qty+1)
-                        })
-                    else:
-                        runtime_status.update({
-                            'error': "%s (and 1 more)" % error_msg
-                        })
-            elif kind in ['warning', 'activity']:
-                # Record the last warning/activity status without regard of
-                # previous occurences.
+            if kind in ('error', 'warning', 'activity'):
+                updatemessage = runtime_status.get(kind, "")
+                if updatemessage:
+                    updatemessage += "\n"
+                updatemessage += message
+
+                # Subsequent messages tacked on as detail
+                updatedetail = runtime_status.get(kind+'Detail', "")
+                if updatedetail:
+                   updatedetail += "\n"
+                if detail:
+                    updatedetail += message + "\n" + detail
                 runtime_status.update({
-                    kind: message
+                    kind: updatemessage,
+                    kind+'Detail': updatedetail,
                 })
-                if detail is not None:
-                    runtime_status.update({
-                        kind+"Detail": detail
-                    })
             else:
                 # Ignore any other status kind
                 return

commit 6358388ad9f7676aa3b1ab149c9fbde4716929e5
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Apr 19 15:40:56 2022 -0400

    17301: Report warning about OOM killer when exit code 137
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index e2c2f2e67..c85443a23 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -392,6 +392,10 @@ class ArvadosContainer(JobBase):
                     processStatus = "success"
                 else:
                     processStatus = "permanentFail"
+
+                if rcode == 137:
+                    logger.warning("%s job was killed on the compute instance.  The most common reason is that it attempted to allocate too much RAM and was targeted by the Out Of Memory (OOM) killer.  Try resubmitting with a higher 'ramMin'.",
+                                 self.arvrunner.label(self))
             else:
                 processStatus = "permanentFail"
 

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list