[arvados] created: 2.1.0-2994-ge2267bd99
git repository hosting
git at public.arvados.org
Fri Oct 28 21:41:36 UTC 2022
at e2267bd99209651c61425f335230e515421b2ef4 (commit)
commit e2267bd99209651c61425f335230e515421b2ef4
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Oct 28 17:30:39 2022 -0400
19678: Fix for parameters called 'name'
Also fix regression involving default file references appearing in
nested processes (inline declaration of a tool within a workflow).
Also fixed some dependency issues preventing arvados/jobs developer
image from building.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index a307cf7f6..486103919 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -465,10 +465,7 @@ def upload_dependencies(arvrunner, name, document_loader,
builder_job_order,
discovered)
- _jobloaderctx = jobloaderctx.copy()
- loader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=document_loader.fetcher_constructor)
-
- copied, _ = loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False)
+ copied, _ = document_loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False)
visit_class(copied, ("CommandLineTool", "Workflow"), discover_default_secondary_files)
for d in list(discovered):
@@ -698,9 +695,12 @@ def upload_job_order(arvrunner, name, tool, job_order, runtimeContext):
tool.tool["inputs"],
job_order)
+ _jobloaderctx = jobloaderctx.copy()
+ jobloader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=tool.doc_loader.fetcher_constructor)
+
jobmapper = upload_dependencies(arvrunner,
name,
- tool.doc_loader,
+ jobloader,
job_order,
job_order.get("id", "#"),
False,
@@ -728,28 +728,37 @@ def upload_workflow_deps(arvrunner, tool, runtimeContext):
merged_map = {}
tool_dep_cache = {}
+
+ todo = []
+
+ # Standard traversal is top down, we want to go bottom up, so use
+ # the visitor to accumalate a list of nodes to visit, then
+ # visit them in reverse order.
def upload_tool_deps(deptool):
if "id" in deptool:
- discovered_secondaryfiles = {}
- with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])):
- pm = upload_dependencies(arvrunner,
- "%s dependencies" % (shortname(deptool["id"])),
- document_loader,
- deptool,
- deptool["id"],
- False,
- runtimeContext,
- include_primary=False,
- discovered_secondaryfiles=discovered_secondaryfiles,
- cache=tool_dep_cache)
- document_loader.idx[deptool["id"]] = deptool
- toolmap = {}
- for k,v in pm.items():
- toolmap[k] = v.resolved
- merged_map[deptool["id"]] = FileUpdates(toolmap, discovered_secondaryfiles)
+ todo.append(deptool)
tool.visit(upload_tool_deps)
+ for deptool in reversed(todo):
+ discovered_secondaryfiles = {}
+ with Perf(metrics, "upload_dependencies %s" % shortname(deptool["id"])):
+ pm = upload_dependencies(arvrunner,
+ "%s dependencies" % (shortname(deptool["id"])),
+ document_loader,
+ deptool,
+ deptool["id"],
+ False,
+ runtimeContext,
+ include_primary=False,
+ discovered_secondaryfiles=discovered_secondaryfiles,
+ cache=tool_dep_cache)
+ document_loader.idx[deptool["id"]] = deptool
+ toolmap = {}
+ for k,v in pm.items():
+ toolmap[k] = v.resolved
+ merged_map[deptool["id"]] = FileUpdates(toolmap, discovered_secondaryfiles)
+
return merged_map
def arvados_jobs_image(arvrunner, img, runtimeContext):
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
index e70955c20..33c1ffcbf 100644
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -42,7 +42,8 @@ setup(name='arvados-cwl-runner',
'setuptools',
'ciso8601 >= 2.0.0',
'networkx < 2.6',
- 'msgpack==1.0.3'
+ 'msgpack==1.0.3',
+ 'importlib-metadata<5'
],
data_files=[
('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
diff --git a/sdk/cwl/tests/19678-name-id.cwl b/sdk/cwl/tests/19678-name-id.cwl
new file mode 100644
index 000000000..afed34b93
--- /dev/null
+++ b/sdk/cwl/tests/19678-name-id.cwl
@@ -0,0 +1,26 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+class: Workflow
+cwlVersion: v1.1
+inputs:
+ - type:
+ fields:
+ - name: first
+ type: string
+ - name: last
+ type: string
+ type: record
+ id: name
+outputs:
+ - type:
+ fields:
+ - name: first
+ type: string
+ - name: last
+ type: string
+ type: record
+ id: processed_name
+ outputSource: name
+steps: []
diff --git a/sdk/cwl/tests/arvados-tests.yml b/sdk/cwl/tests/arvados-tests.yml
index 2f309cfe8..4ed4d4ac3 100644
--- a/sdk/cwl/tests/arvados-tests.yml
+++ b/sdk/cwl/tests/arvados-tests.yml
@@ -469,3 +469,13 @@
}
tool: 19109-upload-secondary.cwl
doc: "Test issue 19109 - correctly discover & upload secondary files"
+
+- job: 19678-name-id.yml
+ output: {
+ "processed_name": {
+ "first": "foo",
+ "last": "bar"
+ }
+ }
+ tool: 19678-name-id.cwl
+ doc: "Test issue 19678 - non-string type input parameter called 'name'"
diff --git a/sdk/dev-jobs.dockerfile b/sdk/dev-jobs.dockerfile
index b55b056b2..60db4a889 100644
--- a/sdk/dev-jobs.dockerfile
+++ b/sdk/dev-jobs.dockerfile
@@ -35,11 +35,17 @@ ADD cwl/salad_dist/$salad /tmp/
ADD cwl/cwltool_dist/$cwltool /tmp/
ADD cwl/dist/$runner /tmp/
+RUN $pipcmd install wheel
RUN cd /tmp/arvados-python-client-* && $pipcmd install .
RUN if test -d /tmp/schema-salad-* ; then cd /tmp/schema-salad-* && $pipcmd install . ; fi
RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && $pipcmd install . ; fi
RUN cd /tmp/arvados-cwl-runner-* && $pipcmd install .
+# Sometimes Python dependencies install successfully but don't
+# actually work. So run arvados-cwl-runner here to catch fun
+# dependency errors like pkg_resources.DistributionNotFound.
+RUN arvados-cwl-runner --version
+
# Install dependencies and set up system.
RUN /usr/sbin/adduser --disabled-password \
--gecos 'Crunch execution user' crunch && \
commit a5f492d001f158a01abfd79b65c5bf41708da7f9
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Fri Oct 28 14:36:32 2022 -0400
19678: Use minimal job loader context to expand job order, same as cwltool
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 1544d05cd..a307cf7f6 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -53,13 +53,14 @@ from cwltool.command_line_tool import CommandLineTool
import cwltool.workflow
from cwltool.process import (scandeps, UnsupportedRequirement, normalizeFilesDirs,
shortname, Process, fill_in_defaults)
-from cwltool.load_tool import fetch_document
+from cwltool.load_tool import fetch_document, jobloaderctx
from cwltool.utils import aslist, adjustFileObjs, adjustDirObjs, visit_class
from cwltool.builder import substitute
from cwltool.pack import pack
from cwltool.update import INTERNAL_VERSION
from cwltool.builder import Builder
import schema_salad.validate as validate
+import schema_salad.ref_resolver
import arvados.collection
import arvados.util
@@ -464,7 +465,10 @@ def upload_dependencies(arvrunner, name, document_loader,
builder_job_order,
discovered)
- copied, _ = document_loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False)
+ _jobloaderctx = jobloaderctx.copy()
+ loader = schema_salad.ref_resolver.Loader(_jobloaderctx, fetcher_constructor=document_loader.fetcher_constructor)
+
+ copied, _ = loader.resolve_all(copy.deepcopy(cmap(workflowobj)), base_url=uri, checklinks=False)
visit_class(copied, ("CommandLineTool", "Workflow"), discover_default_secondary_files)
for d in list(discovered):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list