[ARVADOS] created: 1.2.0-303-g135f4c65d

Git user git at public.curoverse.com
Tue Oct 30 14:14:33 EDT 2018


        at  135f4c65d237721062676e9d3f5f0dda3a67b1be (commit)


commit 135f4c65d237721062676e9d3f5f0dda3a67b1be
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 17:21:05 2018 -0400

    14198: Fix typo current -> current_container, add copyright header
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 7256e1d0d..bf81853be 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import argparse
 import logging
 import os
@@ -618,7 +622,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             return (runnerjob.uuid, "success")
 
         current_container = get_current_container(self.api, self.num_retries, logger)
-        if current:
+        if current_container:
             logger.info("Running inside container %s", current_container.get("uuid"))
 
         self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)

commit 1da63bad34fe0fd831d6be6653d5320f4c69cf29
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 17:12:14 2018 -0400

    14198: Only assign runtime token when api token is for local cluster
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_containers.go b/lib/controller/fed_containers.go
index a3c292583..ebe324de7 100644
--- a/lib/controller/fed_containers.go
+++ b/lib/controller/fed_containers.go
@@ -76,17 +76,19 @@ func remoteContainerRequestCreate(
 			return true
 		}
 
-		if len(currentUser.Authorization.Scopes) != 1 || currentUser.Authorization.Scopes[0] != "all" {
-			httpserver.Error(w, "Token scope is not [all]", http.StatusForbidden)
-			return true
-		}
-
-		newtok, err := h.handler.createAPItoken(req, currentUser.UUID, nil)
-		if err != nil {
-			httpserver.Error(w, err.Error(), http.StatusForbidden)
-			return true
+		if currentUser.Authorization.UUID[0:5] == h.handler.Cluster.ClusterID {
+			if len(currentUser.Authorization.Scopes) != 1 || currentUser.Authorization.Scopes[0] != "all" {
+				httpserver.Error(w, "Token scope is not [all]", http.StatusForbidden)
+				return true
+			}
+
+			newtok, err := h.handler.createAPItoken(req, currentUser.UUID, nil)
+			if err != nil {
+				httpserver.Error(w, err.Error(), http.StatusForbidden)
+				return true
+			}
+			containerRequest["runtime_token"] = newtok.TokenV2()
 		}
-		containerRequest["runtime_token"] = newtok.TokenV2()
 	}
 
 	newbody, err := json.Marshal(request)

commit 32cbd724621ccc70b1b1cc6690d9cf59ba0d62e5
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 17:11:59 2018 -0400

    14198: Log current container
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index 6eaa4b92c..7256e1d0d 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -617,6 +617,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             runnerjob.run(submitargs)
             return (runnerjob.uuid, "success")
 
+        current_container = get_current_container(self.api, self.num_retries, logger)
+        if current:
+            logger.info("Running inside container %s", current_container.get("uuid"))
+
         self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
         self.polling_thread = threading.Thread(target=self.poll_states)
         self.polling_thread.start()

commit 634b7292bca492b278a6e605cdd6a42cf01ef9a0
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 16:14:50 2018 -0400

    14198: Support expressions in TargetCluster[clusterID, ownerUUID]
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index d9466a83a..823b41ce9 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -36,7 +36,7 @@ metrics = logging.getLogger('arvados.cwl-runner.metrics')
 class ArvadosContainer(JobBase):
     """Submit and manage a Crunch container request for executing a CWL CommandLineTool."""
 
-    def __init__(self, runner,
+    def __init__(self, runner, cluster_target,
                  builder,   # type: Builder
                  joborder,  # type: Dict[Text, Union[Dict[Text, Any], List, Text]]
                  make_path_mapper,  # type: Callable[..., PathMapper]
@@ -46,6 +46,7 @@ class ArvadosContainer(JobBase):
     ):
         super(ArvadosContainer, self).__init__(builder, joborder, make_path_mapper, requirements, hints, name)
         self.arvrunner = runner
+        self.cluster_target = cluster_target
         self.running = False
         self.uuid = None
 
@@ -251,13 +252,11 @@ class ArvadosContainer(JobBase):
             scheduling_parameters["max_run_time"] = self.timelimit
 
         extra_submit_params = {}
-        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
-        if cluster_target_req:
-            cluster_id = cluster_target_req.get("clusterID")
-            if cluster_id:
-                extra_submit_params["cluster_id"] = cluster_id
-            if cluster_target_req.get("ownerUUID"):
-                container_request["owner_uuid"] = cluster_target_req.get("ownerUUID")
+        if self.cluster_target is not None:
+            if self.cluster_target.cluster_id:
+                extra_submit_params["cluster_id"] = self.cluster_target.cluster_id
+            if self.cluster_target.owner_uuid:
+                container_request["owner_uuid"] = self.cluster_target.owner_uuid
 
         container_request["output_name"] = "Output for step %s" % (self.name)
         container_request["output_ttl"] = self.output_ttl
diff --git a/sdk/cwl/arvados_cwl/arvtool.py b/sdk/cwl/arvados_cwl/arvtool.py
index 119acc303..e0997db5b 100644
--- a/sdk/cwl/arvados_cwl/arvtool.py
+++ b/sdk/cwl/arvados_cwl/arvtool.py
@@ -6,6 +6,7 @@ from cwltool.command_line_tool import CommandLineTool
 from .arvjob import ArvadosJob
 from .arvcontainer import ArvadosContainer
 from .pathmapper import ArvPathMapper
+from .context import ClusterTarget
 from functools import partial
 
 class ArvadosCommandTool(CommandLineTool):
@@ -17,7 +18,7 @@ class ArvadosCommandTool(CommandLineTool):
 
     def make_job_runner(self, runtimeContext):
         if runtimeContext.work_api == "containers":
-            return partial(ArvadosContainer, self.arvrunner)
+            return partial(ArvadosContainer, self.arvrunner, runtimeContext.cluster_target)
         elif runtimeContext.work_api == "jobs":
             return partial(ArvadosJob, self.arvrunner)
         else:
@@ -44,6 +45,12 @@ class ArvadosCommandTool(CommandLineTool):
 
         runtimeContext = runtimeContext.copy()
 
+        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
+        if runtimeContext.cluster_target is None or runtimeContext.cluster_target.instance != id(cluster_target_req):
+            runtimeContext.cluster_target = ClusterTarget(id(cluster_target_req),
+                                                          builder.do_eval(cluster_target_req.get("clusterID")),
+                                                          builder.do_eval(cluster_target_req.get("ownerUUID")))
+
         if runtimeContext.work_api == "containers":
             dockerReq, is_req = self.get_requirement("DockerRequirement")
             if dockerReq and dockerReq.get("dockerOutputDirectory"):
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py
index ae9062510..f86641bfd 100644
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -131,158 +131,167 @@ class ArvadosWorkflow(Workflow):
         self.loadingContext = loadingContext
 
     def job(self, joborder, output_callback, runtimeContext):
+
+        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
+        if runtimeContext.cluster_target is None or runtimeContext.cluster_target.instance != id(cluster_target_req):
+            runtimeContext.cluster_target = ClusterTarget(id(cluster_target_req),
+                                                          builder.do_eval(cluster_target_req.get("clusterID")),
+                                                          builder.do_eval(cluster_target_req.get("ownerUUID")))
+
         req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
-        if req:
-            with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
-                if "id" not in self.tool:
-                    raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
-            document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
+        if not req:
+            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
 
-            discover_secondary_files(self.tool["inputs"], joborder)
+        # RunInSingleContainer is true
 
-            with Perf(metrics, "subworkflow upload_deps"):
-                upload_dependencies(self.arvrunner,
-                                    os.path.basename(joborder.get("id", "#")),
-                                    document_loader,
-                                    joborder,
-                                    joborder.get("id", "#"),
-                                    False)
+        with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
+            if "id" not in self.tool:
+                raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
+        document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
+
+        discover_secondary_files(self.tool["inputs"], joborder)
+
+        with Perf(metrics, "subworkflow upload_deps"):
+            upload_dependencies(self.arvrunner,
+                                os.path.basename(joborder.get("id", "#")),
+                                document_loader,
+                                joborder,
+                                joborder.get("id", "#"),
+                                False)
+
+            if self.wf_pdh is None:
+                workflowobj["requirements"] = dedup_reqs(self.requirements)
+                workflowobj["hints"] = dedup_reqs(self.hints)
+
+                packed = pack(document_loader, workflowobj, uri, self.metadata)
 
-                if self.wf_pdh is None:
-                    workflowobj["requirements"] = dedup_reqs(self.requirements)
-                    workflowobj["hints"] = dedup_reqs(self.hints)
-
-                    packed = pack(document_loader, workflowobj, uri, self.metadata)
-
-                    builder = Builder(joborder,
-                                      requirements=workflowobj["requirements"],
-                                      hints=workflowobj["hints"],
-                                      resources={})
-
-                    def visit(item):
-                        for t in ("hints", "requirements"):
-                            if t not in item:
-                                continue
-                            for req in item[t]:
-                                if req["class"] == "ResourceRequirement":
-                                    dyn = False
-                                    for k in max_res_pars + sum_res_pars:
-                                        if k in req:
-                                            if isinstance(req[k], basestring):
-                                                if item["id"] == "#main":
-                                                    # only the top-level requirements/hints may contain expressions
-                                                    self.dynamic_resource_req.append(req)
-                                                    dyn = True
-                                                    break
-                                                else:
-                                                    with SourceLine(req, k, WorkflowException):
-                                                        raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
-                                    if not dyn:
-                                        self.static_resource_req.append(req)
-
-                    visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
-
-                    if self.static_resource_req:
-                        self.static_resource_req = [get_overall_res_req(self.static_resource_req)]
-
-                    upload_dependencies(self.arvrunner,
-                                        runtimeContext.name,
-                                        document_loader,
-                                        packed,
-                                        uri,
-                                        False)
-
-                    # Discover files/directories referenced by the
-                    # workflow (mainly "default" values)
-                    visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)
-
-
-            if self.dynamic_resource_req:
                 builder = Builder(joborder,
-                                  requirements=self.requirements,
-                                  hints=self.hints,
+                                  requirements=workflowobj["requirements"],
+                                  hints=workflowobj["hints"],
                                   resources={})
 
-                # Evaluate dynamic resource requirements using current builder
-                rs = copy.copy(self.static_resource_req)
-                for dyn_rs in self.dynamic_resource_req:
-                    eval_req = {"class": "ResourceRequirement"}
-                    for a in max_res_pars + sum_res_pars:
-                        if a in dyn_rs:
-                            eval_req[a] = builder.do_eval(dyn_rs[a])
-                    rs.append(eval_req)
-                job_res_reqs = [get_overall_res_req(rs)]
-            else:
-                job_res_reqs = self.static_resource_req
-
-            with Perf(metrics, "subworkflow adjust"):
-                joborder_resolved = copy.deepcopy(joborder)
-                joborder_keepmount = copy.deepcopy(joborder)
-
-                reffiles = []
-                visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)
-
-                mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir,
-                                       "/keep/%s",
-                                       "/keep/%s/%s")
-
-                # For containers API, we need to make sure any extra
-                # referenced files (ie referenced by the workflow but
-                # not in the inputs) are included in the mounts.
-                if self.wf_reffiles:
-                    runtimeContext = runtimeContext.copy()
-                    runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles)
-
-                def keepmount(obj):
-                    remove_redundant_fields(obj)
-                    with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
-                        if "location" not in obj:
-                            raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
-                    with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
-                        if obj["location"].startswith("keep:"):
-                            obj["location"] = mapper.mapper(obj["location"]).target
-                            if "listing" in obj:
-                                del obj["listing"]
-                        elif obj["location"].startswith("_:"):
-                            del obj["location"]
-                        else:
-                            raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
-
-                visit_class(joborder_keepmount, ("File", "Directory"), keepmount)
-
-                def resolved(obj):
-                    if obj["location"].startswith("keep:"):
-                        obj["location"] = mapper.mapper(obj["location"]).resolved
-
-                visit_class(joborder_resolved, ("File", "Directory"), resolved)
-
-                if self.wf_pdh is None:
-                    adjustFileObjs(packed, keepmount)
-                    adjustDirObjs(packed, keepmount)
-                    self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)
-
-            wf_runner = cmap({
-                "class": "CommandLineTool",
-                "baseCommand": "cwltool",
-                "inputs": self.tool["inputs"],
-                "outputs": self.tool["outputs"],
-                "stdout": "cwl.output.json",
-                "requirements": self.requirements+job_res_reqs+[
-                    {"class": "InlineJavascriptRequirement"},
-                    {
-                    "class": "InitialWorkDirRequirement",
-                    "listing": [{
-                            "entryname": "workflow.cwl",
-                            "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh
-                        }, {
-                            "entryname": "cwl.input.yml",
-                            "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
-                        }]
-                }],
-                "hints": self.hints,
-                "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
-                "id": "#"
-            })
-            return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
+                def visit(item):
+                    for t in ("hints", "requirements"):
+                        if t not in item:
+                            continue
+                        for req in item[t]:
+                            if req["class"] == "ResourceRequirement":
+                                dyn = False
+                                for k in max_res_pars + sum_res_pars:
+                                    if k in req:
+                                        if isinstance(req[k], basestring):
+                                            if item["id"] == "#main":
+                                                # only the top-level requirements/hints may contain expressions
+                                                self.dynamic_resource_req.append(req)
+                                                dyn = True
+                                                break
+                                            else:
+                                                with SourceLine(req, k, WorkflowException):
+                                                    raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
+                                if not dyn:
+                                    self.static_resource_req.append(req)
+
+                visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
+
+                if self.static_resource_req:
+                    self.static_resource_req = [get_overall_res_req(self.static_resource_req)]
+
+                upload_dependencies(self.arvrunner,
+                                    runtimeContext.name,
+                                    document_loader,
+                                    packed,
+                                    uri,
+                                    False)
+
+                # Discover files/directories referenced by the
+                # workflow (mainly "default" values)
+                visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)
+
+
+        if self.dynamic_resource_req:
+            builder = Builder(joborder,
+                              requirements=self.requirements,
+                              hints=self.hints,
+                              resources={})
+
+            # Evaluate dynamic resource requirements using current builder
+            rs = copy.copy(self.static_resource_req)
+            for dyn_rs in self.dynamic_resource_req:
+                eval_req = {"class": "ResourceRequirement"}
+                for a in max_res_pars + sum_res_pars:
+                    if a in dyn_rs:
+                        eval_req[a] = builder.do_eval(dyn_rs[a])
+                rs.append(eval_req)
+            job_res_reqs = [get_overall_res_req(rs)]
         else:
-            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
+            job_res_reqs = self.static_resource_req
+
+        with Perf(metrics, "subworkflow adjust"):
+            joborder_resolved = copy.deepcopy(joborder)
+            joborder_keepmount = copy.deepcopy(joborder)
+
+            reffiles = []
+            visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)
+
+            mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir,
+                                   "/keep/%s",
+                                   "/keep/%s/%s")
+
+            # For containers API, we need to make sure any extra
+            # referenced files (ie referenced by the workflow but
+            # not in the inputs) are included in the mounts.
+            if self.wf_reffiles:
+                runtimeContext = runtimeContext.copy()
+                runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles)
+
+            def keepmount(obj):
+                remove_redundant_fields(obj)
+                with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
+                    if "location" not in obj:
+                        raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
+                with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
+                    if obj["location"].startswith("keep:"):
+                        obj["location"] = mapper.mapper(obj["location"]).target
+                        if "listing" in obj:
+                            del obj["listing"]
+                    elif obj["location"].startswith("_:"):
+                        del obj["location"]
+                    else:
+                        raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
+
+            visit_class(joborder_keepmount, ("File", "Directory"), keepmount)
+
+            def resolved(obj):
+                if obj["location"].startswith("keep:"):
+                    obj["location"] = mapper.mapper(obj["location"]).resolved
+
+            visit_class(joborder_resolved, ("File", "Directory"), resolved)
+
+            if self.wf_pdh is None:
+                adjustFileObjs(packed, keepmount)
+                adjustDirObjs(packed, keepmount)
+                self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)
+
+        wf_runner = cmap({
+            "class": "CommandLineTool",
+            "baseCommand": "cwltool",
+            "inputs": self.tool["inputs"],
+            "outputs": self.tool["outputs"],
+            "stdout": "cwl.output.json",
+            "requirements": self.requirements+job_res_reqs+[
+                {"class": "InlineJavascriptRequirement"},
+                {
+                "class": "InitialWorkDirRequirement",
+                "listing": [{
+                        "entryname": "workflow.cwl",
+                        "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh
+                    }, {
+                        "entryname": "cwl.input.yml",
+                        "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
+                    }]
+            }],
+            "hints": self.hints,
+            "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"],
+            "id": "#"
+        })
+        return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 4e1334c1c..23e7b91a0 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -3,11 +3,14 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from cwltool.context import LoadingContext, RuntimeContext
+from collections import namedtuple
 
 class ArvLoadingContext(LoadingContext):
     def __init__(self, kwargs=None):
         super(ArvLoadingContext, self).__init__(kwargs)
 
+ClusterTarget = namedtuple("ClusterTarget", ("instance", "cluster_id", "owner_uuid"))
+
 class ArvRuntimeContext(RuntimeContext):
     def __init__(self, kwargs=None):
         self.work_api = None
@@ -31,6 +34,7 @@ class ArvRuntimeContext(RuntimeContext):
         self.current_container = None
         self.http_timeout = 300
         self.submit_runner_cluster = None
+        self.cluster_target = None
 
         super(ArvRuntimeContext, self).__init__(kwargs)
 

commit 2ac3c5ecaf80b4f3139ae1f28288a7356861a173
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 15:19:05 2018 -0400

    14198: Resolve to Docker images to PDH and set "http://arvados.org/cwl#dockerCollectionPDH"
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index 7e22a7d39..d9466a83a 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -214,8 +214,7 @@ class ArvadosContainer(JobBase):
         container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
                                                                     docker_req,
                                                                     runtimeContext.pull_image,
-                                                                    self.arvrunner.project_uuid,
-                                                                    runtimeContext.submit_runner_cluster)
+                                                                    self.arvrunner.project_uuid)
 
         api_req, _ = self.get_requirement("http://arvados.org/cwl#APIRequirement")
         if api_req:
diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py
index 6bca07c88..84006b47d 100644
--- a/sdk/cwl/arvados_cwl/arvdocker.py
+++ b/sdk/cwl/arvados_cwl/arvdocker.py
@@ -21,6 +21,9 @@ cached_lookups_lock = threading.Lock()
 def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid):
     """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""
 
+    if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
+        return dockerRequirement["http://arvados.org/cwl#dockerCollectionPDH"]
+
     if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
         dockerRequirement = copy.deepcopy(dockerRequirement)
         dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py
index bbfb8ffc6..6eaa4b92c 100644
--- a/sdk/cwl/arvados_cwl/executor.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -37,6 +37,7 @@ from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
 from cwltool.command_line_tool import compute_checksums
 
 logger = logging.getLogger('arvados.cwl-runner')
+metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
 class RuntimeStatusLoggingHandler(logging.Handler):
     """
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 3b40552ac..31a424d30 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -26,7 +26,7 @@ from cwltool.pack import pack
 import arvados.collection
 import ruamel.yaml as yaml
 
-from .arvdocker import arv_docker_get_image
+import arvdocker
 from .pathmapper import ArvPathMapper, trim_listing
 from ._version import __version__
 from . import done
@@ -215,9 +215,9 @@ def upload_docker(arvrunner, tool):
                 # TODO: can be supported by containers API, but not jobs API.
                 raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
                     "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
-            arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid)
+            arvdocker.arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid)
         else:
-            arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs"}, True, arvrunner.project_uuid)
+            arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": "arvados/jobs"}, True, arvrunner.project_uuid)
     elif isinstance(tool, cwltool.workflow.Workflow):
         for s in tool.steps:
             upload_docker(arvrunner, s.embedded_tool)
@@ -244,6 +244,9 @@ def packed_workflow(arvrunner, tool, merged_map):
                 v["location"] = merged_map[cur_id].resolved[v["location"]]
             if "location" in v and v["location"] in merged_map[cur_id].secondaryFiles:
                 v["secondaryFiles"] = merged_map[cur_id].secondaryFiles[v["location"]]
+            if v.get("class") == "DockerRequirement":
+                img = v.get("dockerImageId") or v.get("dockerPull")
+                v["http://arvados.org/cwl#dockerCollectionPDH"] = arvdocker.cached_lookups[img]
             for l in v:
                 visit(v[l], cur_id)
         if isinstance(v, list):
@@ -324,7 +327,7 @@ def arvados_jobs_image(arvrunner, img):
     """Determine if the right arvados/jobs image version is available.  If not, try to pull and upload it."""
 
     try:
-        return arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid)
+        return arvdocker.arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid)
     except Exception as e:
         raise Exception("Docker image %s is not available\n%s" % (img, e) )
 

commit a921230d1871b1bb6a03d425fd6b4c4016f9f807
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 15:03:23 2018 -0400

    14198: Add genericFederatedRequestHandler support for links
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index 0e016f301..8ad34157c 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -28,6 +28,7 @@ var containersRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "containers", "dz
 var containerRequestsRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "container_requests", "xvhdp"))
 var collectionRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "collections", "4zz18"))
 var collectionByPDHRe = regexp.MustCompile(`^/arvados/v1/collections/([0-9a-fA-F]{32}\+[0-9]+)+$`)
+var linksRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "links", "o0j2j"))
 
 func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*http.Response, context.CancelFunc, error) {
 	remote, ok := h.Cluster.RemoteClusters[remoteID]
@@ -90,9 +91,12 @@ func (h *Handler) setupProxyRemoteCluster(next http.Handler) http.Handler {
 	containersHandler := &genericFederatedRequestHandler{next, h, containersRe, nil}
 	containerRequestsHandler := &genericFederatedRequestHandler{next, h, containerRequestsRe,
 		[]federatedRequestDelegate{remoteContainerRequestCreate}}
+	linksHandler := &genericFederatedRequestHandler{next, h, linksRe, nil}
 
 	mux.Handle("/arvados/v1/workflows", wfHandler)
 	mux.Handle("/arvados/v1/workflows/", wfHandler)
+	mux.Handle("/arvados/v1/links", linksHandler)
+	mux.Handle("/arvados/v1/links/", linksHandler)
 	mux.Handle("/arvados/v1/containers", containersHandler)
 	mux.Handle("/arvados/v1/containers/", containersHandler)
 	mux.Handle("/arvados/v1/container_requests", containerRequestsHandler)

commit ddf5bc12a24635fe102f0eca889720c77ca90648
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 15:02:23 2018 -0400

    14198: Refactor and add support for --submit-runner-cluster
    
    Rename ArvCwlRunner to ArvCwlExecutor and move into its own file.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 0866f69d6..63fc3ea47 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -10,19 +10,9 @@ import argparse
 import logging
 import os
 import sys
-import threading
-import hashlib
-import copy
-import json
 import re
-from functools import partial
 import pkg_resources  # part of setuptools
-import Queue
-import time
-import signal
-import thread
 
-from cwltool.errors import WorkflowException
 import cwltool.main
 import cwltool.workflow
 import cwltool.process
@@ -36,23 +26,12 @@ from arvados.keep import KeepClient
 from arvados.errors import ApiError
 import arvados.commands._util as arv_cmd
 
-from .arvcontainer import ArvadosContainer, RunnerContainer
-from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
-from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
-from .arvtool import ArvadosCommandTool
-from .arvworkflow import ArvadosWorkflow, upload_workflow
-from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache
 from .perf import Perf
-from .pathmapper import NoFollowPathMapper
-from .task_queue import TaskQueue
-from .context import ArvLoadingContext, ArvRuntimeContext
-from .util import get_current_container
 from ._version import __version__
+from .executor import ArvCwlExecutor
 
-from cwltool.pack import pack
 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
-from cwltool.command_line_tool import compute_checksums
 
 from arvados.api import OrderedJsonModel
 
@@ -66,677 +45,6 @@ arvados.log_handler.setFormatter(logging.Formatter(
 
 DEFAULT_PRIORITY = 500
 
-class RuntimeStatusLoggingHandler(logging.Handler):
-    """
-    Intercepts logging calls and report them as runtime statuses on runner
-    containers.
-    """
-    def __init__(self, runtime_status_update_func):
-        super(RuntimeStatusLoggingHandler, self).__init__()
-        self.runtime_status_update = runtime_status_update_func
-
-    def emit(self, record):
-        kind = None
-        if record.levelno >= logging.ERROR:
-            kind = 'error'
-        elif record.levelno >= logging.WARNING:
-            kind = 'warning'
-        if kind is not None:
-            log_msg = record.getMessage()
-            if '\n' in log_msg:
-                # If the logged message is multi-line, use its first line as status
-                # and the rest as detail.
-                status, detail = log_msg.split('\n', 1)
-                self.runtime_status_update(
-                    kind,
-                    "%s: %s" % (record.name, status),
-                    detail
-                )
-            else:
-                self.runtime_status_update(
-                    kind,
-                    "%s: %s" % (record.name, record.getMessage())
-                )
-
-class ArvCwlRunner(object):
-    """Execute a CWL tool or workflow, submit work (using either jobs or
-    containers API), wait for them to complete, and report output.
-
-    """
-
-    def __init__(self, api_client,
-                 arvargs=None,
-                 keep_client=None,
-                 num_retries=4,
-                 thread_count=4):
-
-        if arvargs is None:
-            arvargs = argparse.Namespace()
-            arvargs.work_api = None
-            arvargs.output_name = None
-            arvargs.output_tags = None
-            arvargs.thread_count = 1
-
-        self.api = api_client
-        self.processes = {}
-        self.workflow_eval_lock = threading.Condition(threading.RLock())
-        self.final_output = None
-        self.final_status = None
-        self.num_retries = num_retries
-        self.uuid = None
-        self.stop_polling = threading.Event()
-        self.poll_api = None
-        self.pipeline = None
-        self.final_output_collection = None
-        self.output_name = arvargs.output_name
-        self.output_tags = arvargs.output_tags
-        self.project_uuid = None
-        self.intermediate_output_ttl = 0
-        self.intermediate_output_collections = []
-        self.trash_intermediate = False
-        self.thread_count = arvargs.thread_count
-        self.poll_interval = 12
-        self.loadingContext = None
-
-        if keep_client is not None:
-            self.keep_client = keep_client
-        else:
-            self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
-
-        self.collection_cache = CollectionCache(self.api, self.keep_client, self.num_retries)
-
-        self.fetcher_constructor = partial(CollectionFetcher,
-                                           api_client=self.api,
-                                           fs_access=CollectionFsAccess("", collection_cache=self.collection_cache),
-                                           num_retries=self.num_retries)
-
-        self.work_api = None
-        expected_api = ["jobs", "containers"]
-        for api in expected_api:
-            try:
-                methods = self.api._rootDesc.get('resources')[api]['methods']
-                if ('httpMethod' in methods['create'] and
-                    (arvargs.work_api == api or arvargs.work_api is None)):
-                    self.work_api = api
-                    break
-            except KeyError:
-                pass
-
-        if not self.work_api:
-            if arvargs.work_api is None:
-                raise Exception("No supported APIs")
-            else:
-                raise Exception("Unsupported API '%s', expected one of %s" % (arvargs.work_api, expected_api))
-
-        if self.work_api == "jobs":
-            logger.warn("""
-*******************************
-Using the deprecated 'jobs' API.
-
-To get rid of this warning:
-
-Users: read about migrating at
-http://doc.arvados.org/user/cwl/cwl-style.html#migrate
-and use the option --api=containers
-
-Admins: configure the cluster to disable the 'jobs' API as described at:
-http://doc.arvados.org/install/install-api-server.html#disable_api_methods
-*******************************""")
-
-        self.loadingContext = ArvLoadingContext(vars(arvargs))
-        self.loadingContext.fetcher_constructor = self.fetcher_constructor
-        self.loadingContext.resolver = partial(collectionResolver, self.api, num_retries=self.num_retries)
-        self.loadingContext.construct_tool_object = self.arv_make_tool
-
-        # Add a custom logging handler to the root logger for runtime status reporting
-        # if running inside a container
-        if get_current_container(self.api, self.num_retries, logger):
-            root_logger = logging.getLogger('')
-            handler = RuntimeStatusLoggingHandler(self.runtime_status_update)
-            root_logger.addHandler(handler)
-
-    def arv_make_tool(self, toolpath_object, loadingContext):
-        if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
-            return ArvadosCommandTool(self, toolpath_object, loadingContext)
-        elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
-            return ArvadosWorkflow(self, toolpath_object, loadingContext)
-        else:
-            return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
-
-    def output_callback(self, out, processStatus):
-        with self.workflow_eval_lock:
-            if processStatus == "success":
-                logger.info("Overall process status is %s", processStatus)
-                state = "Complete"
-            else:
-                logger.error("Overall process status is %s", processStatus)
-                state = "Failed"
-            if self.pipeline:
-                self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
-                                                        body={"state": state}).execute(num_retries=self.num_retries)
-            self.final_status = processStatus
-            self.final_output = out
-            self.workflow_eval_lock.notifyAll()
-
-
-    def start_run(self, runnable, runtimeContext):
-        self.task_queue.add(partial(runnable.run, runtimeContext))
-
-    def process_submitted(self, container):
-        with self.workflow_eval_lock:
-            self.processes[container.uuid] = container
-
-    def process_done(self, uuid, record):
-        with self.workflow_eval_lock:
-            j = self.processes[uuid]
-            logger.info("%s %s is %s", self.label(j), uuid, record["state"])
-            self.task_queue.add(partial(j.done, record))
-            del self.processes[uuid]
-
-    def runtime_status_update(self, kind, message, detail=None):
-        """
-        Updates the runtime_status field on the runner container.
-        Called when there's a need to report errors, warnings or just
-        activity statuses, for example in the RuntimeStatusLoggingHandler.
-        """
-        with self.workflow_eval_lock:
-            current = get_current_container(self.api, self.num_retries, logger)
-            if current is None:
-                return
-            runtime_status = current.get('runtime_status', {})
-            # In case of status being an error, only report the first one.
-            if kind == 'error':
-                if not runtime_status.get('error'):
-                    runtime_status.update({
-                        'error': message
-                    })
-                    if detail is not None:
-                        runtime_status.update({
-                            'errorDetail': detail
-                        })
-                # Further errors are only mentioned as a count.
-                else:
-                    # Get anything before an optional 'and N more' string.
-                    try:
-                        error_msg = re.match(
-                            r'^(.*?)(?=\s*\(and \d+ more\)|$)', runtime_status.get('error')).groups()[0]
-                        more_failures = re.match(
-                            r'.*\(and (\d+) more\)', runtime_status.get('error'))
-                    except TypeError:
-                        # Ignore tests stubbing errors
-                        return
-                    if more_failures:
-                        failure_qty = int(more_failures.groups()[0])
-                        runtime_status.update({
-                            'error': "%s (and %d more)" % (error_msg, failure_qty+1)
-                        })
-                    else:
-                        runtime_status.update({
-                            'error': "%s (and 1 more)" % error_msg
-                        })
-            elif kind in ['warning', 'activity']:
-                # Record the last warning/activity status without regard of
-                # previous occurences.
-                runtime_status.update({
-                    kind: message
-                })
-                if detail is not None:
-                    runtime_status.update({
-                        kind+"Detail": detail
-                    })
-            else:
-                # Ignore any other status kind
-                return
-            try:
-                self.api.containers().update(uuid=current['uuid'],
-                                            body={
-                                                'runtime_status': runtime_status,
-                                            }).execute(num_retries=self.num_retries)
-            except Exception as e:
-                logger.info("Couldn't update runtime_status: %s", e)
-
-    def wrapped_callback(self, cb, obj, st):
-        with self.workflow_eval_lock:
-            cb(obj, st)
-            self.workflow_eval_lock.notifyAll()
-
-    def get_wrapped_callback(self, cb):
-        return partial(self.wrapped_callback, cb)
-
-    def on_message(self, event):
-        if event.get("object_uuid") in self.processes and event["event_type"] == "update":
-            uuid = event["object_uuid"]
-            if event["properties"]["new_attributes"]["state"] == "Running":
-                with self.workflow_eval_lock:
-                    j = self.processes[uuid]
-                    if j.running is False:
-                        j.running = True
-                        j.update_pipeline_component(event["properties"]["new_attributes"])
-                        logger.info("%s %s is Running", self.label(j), uuid)
-            elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled", "Final"):
-                self.process_done(uuid, event["properties"]["new_attributes"])
-
-    def label(self, obj):
-        return "[%s %s]" % (self.work_api[0:-1], obj.name)
-
-    def poll_states(self):
-        """Poll status of jobs or containers listed in the processes dict.
-
-        Runs in a separate thread.
-        """
-
-        try:
-            remain_wait = self.poll_interval
-            while True:
-                if remain_wait > 0:
-                    self.stop_polling.wait(remain_wait)
-                if self.stop_polling.is_set():
-                    break
-                with self.workflow_eval_lock:
-                    keys = list(self.processes.keys())
-                if not keys:
-                    remain_wait = self.poll_interval
-                    continue
-
-                begin_poll = time.time()
-                if self.work_api == "containers":
-                    table = self.poll_api.container_requests()
-                elif self.work_api == "jobs":
-                    table = self.poll_api.jobs()
-
-                try:
-                    proc_states = table.list(filters=[["uuid", "in", keys]]).execute(num_retries=self.num_retries)
-                except Exception as e:
-                    logger.warn("Error checking states on API server: %s", e)
-                    remain_wait = self.poll_interval
-                    continue
-
-                for p in proc_states["items"]:
-                    self.on_message({
-                        "object_uuid": p["uuid"],
-                        "event_type": "update",
-                        "properties": {
-                            "new_attributes": p
-                        }
-                    })
-                finish_poll = time.time()
-                remain_wait = self.poll_interval - (finish_poll - begin_poll)
-        except:
-            logger.exception("Fatal error in state polling thread.")
-            with self.workflow_eval_lock:
-                self.processes.clear()
-                self.workflow_eval_lock.notifyAll()
-        finally:
-            self.stop_polling.set()
-
-    def add_intermediate_output(self, uuid):
-        if uuid:
-            self.intermediate_output_collections.append(uuid)
-
-    def trash_intermediate_output(self):
-        logger.info("Cleaning up intermediate output collections")
-        for i in self.intermediate_output_collections:
-            try:
-                self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
-            except:
-                logger.warn("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
-            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
-                break
-
-    def check_features(self, obj):
-        if isinstance(obj, dict):
-            if obj.get("writable") and self.work_api != "containers":
-                raise SourceLine(obj, "writable", UnsupportedRequirement).makeError("InitialWorkDir feature 'writable: true' not supported with --api=jobs")
-            if obj.get("class") == "DockerRequirement":
-                if obj.get("dockerOutputDirectory"):
-                    if self.work_api != "containers":
-                        raise SourceLine(obj, "dockerOutputDirectory", UnsupportedRequirement).makeError(
-                            "Option 'dockerOutputDirectory' of DockerRequirement not supported with --api=jobs.")
-                    if not obj.get("dockerOutputDirectory").startswith('/'):
-                        raise SourceLine(obj, "dockerOutputDirectory", validate.ValidationException).makeError(
-                            "Option 'dockerOutputDirectory' must be an absolute path.")
-            if obj.get("class") == "http://commonwl.org/cwltool#Secrets" and self.work_api != "containers":
-                raise SourceLine(obj, "class", UnsupportedRequirement).makeError("Secrets not supported with --api=jobs")
-            for v in obj.itervalues():
-                self.check_features(v)
-        elif isinstance(obj, list):
-            for i,v in enumerate(obj):
-                with SourceLine(obj, i, UnsupportedRequirement, logger.isEnabledFor(logging.DEBUG)):
-                    self.check_features(v)
-
-    def make_output_collection(self, name, storage_classes, tagsString, outputObj):
-        outputObj = copy.deepcopy(outputObj)
-
-        files = []
-        def capture(fileobj):
-            files.append(fileobj)
-
-        adjustDirObjs(outputObj, capture)
-        adjustFileObjs(outputObj, capture)
-
-        generatemapper = NoFollowPathMapper(files, "", "", separateDirs=False)
-
-        final = arvados.collection.Collection(api_client=self.api,
-                                              keep_client=self.keep_client,
-                                              num_retries=self.num_retries)
-
-        for k,v in generatemapper.items():
-            if k.startswith("_:"):
-                if v.type == "Directory":
-                    continue
-                if v.type == "CreateFile":
-                    with final.open(v.target, "wb") as f:
-                        f.write(v.resolved.encode("utf-8"))
-                    continue
-
-            if not k.startswith("keep:"):
-                raise Exception("Output source is not in keep or a literal")
-            sp = k.split("/")
-            srccollection = sp[0][5:]
-            try:
-                reader = self.collection_cache.get(srccollection)
-                srcpath = "/".join(sp[1:]) if len(sp) > 1 else "."
-                final.copy(srcpath, v.target, source_collection=reader, overwrite=False)
-            except arvados.errors.ArgumentError as e:
-                logger.error("Creating CollectionReader for '%s' '%s': %s", k, v, e)
-                raise
-            except IOError as e:
-                logger.warn("While preparing output collection: %s", e)
-
-        def rewrite(fileobj):
-            fileobj["location"] = generatemapper.mapper(fileobj["location"]).target
-            for k in ("listing", "contents", "nameext", "nameroot", "dirname"):
-                if k in fileobj:
-                    del fileobj[k]
-
-        adjustDirObjs(outputObj, rewrite)
-        adjustFileObjs(outputObj, rewrite)
-
-        with final.open("cwl.output.json", "w") as f:
-            json.dump(outputObj, f, sort_keys=True, indent=4, separators=(',',': '))
-
-        final.save_new(name=name, owner_uuid=self.project_uuid, storage_classes=storage_classes, ensure_unique_name=True)
-
-        logger.info("Final output collection %s \"%s\" (%s)", final.portable_data_hash(),
-                    final.api_response()["name"],
-                    final.manifest_locator())
-
-        final_uuid = final.manifest_locator()
-        tags = tagsString.split(',')
-        for tag in tags:
-             self.api.links().create(body={
-                "head_uuid": final_uuid, "link_class": "tag", "name": tag
-                }).execute(num_retries=self.num_retries)
-
-        def finalcollection(fileobj):
-            fileobj["location"] = "keep:%s/%s" % (final.portable_data_hash(), fileobj["location"])
-
-        adjustDirObjs(outputObj, finalcollection)
-        adjustFileObjs(outputObj, finalcollection)
-
-        return (outputObj, final)
-
-    def set_crunch_output(self):
-        if self.work_api == "containers":
-            current = get_current_container(self.api, self.num_retries, logger)
-            if current is None:
-                return
-            try:
-                self.api.containers().update(uuid=current['uuid'],
-                                             body={
-                                                 'output': self.final_output_collection.portable_data_hash(),
-                                             }).execute(num_retries=self.num_retries)
-                self.api.collections().update(uuid=self.final_output_collection.manifest_locator(),
-                                              body={
-                                                  'is_trashed': True
-                                              }).execute(num_retries=self.num_retries)
-            except Exception as e:
-                logger.info("Setting container output: %s", e)
-        elif self.work_api == "jobs" and "TASK_UUID" in os.environ:
-            self.api.job_tasks().update(uuid=os.environ["TASK_UUID"],
-                                   body={
-                                       'output': self.final_output_collection.portable_data_hash(),
-                                       'success': self.final_status == "success",
-                                       'progress':1.0
-                                   }).execute(num_retries=self.num_retries)
-
-    def arv_executor(self, tool, job_order, runtimeContext, logger=None):
-        self.debug = runtimeContext.debug
-
-        tool.visit(self.check_features)
-
-        self.project_uuid = runtimeContext.project_uuid
-        self.pipeline = None
-        self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
-        self.secret_store = runtimeContext.secret_store
-
-        self.trash_intermediate = runtimeContext.trash_intermediate
-        if self.trash_intermediate and self.work_api != "containers":
-            raise Exception("--trash-intermediate is only supported with --api=containers.")
-
-        self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl
-        if self.intermediate_output_ttl and self.work_api != "containers":
-            raise Exception("--intermediate-output-ttl is only supported with --api=containers.")
-        if self.intermediate_output_ttl < 0:
-            raise Exception("Invalid value %d for --intermediate-output-ttl, cannot be less than zero" % self.intermediate_output_ttl)
-
-        if runtimeContext.submit_request_uuid and self.work_api != "containers":
-            raise Exception("--submit-request-uuid requires containers API, but using '{}' api".format(self.work_api))
-
-        if not runtimeContext.name:
-            runtimeContext.name = self.name = tool.tool.get("label") or tool.metadata.get("label") or os.path.basename(tool.tool["id"])
-
-        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
-        # Also uploads docker images.
-        merged_map = upload_workflow_deps(self, tool)
-
-        # Reload tool object which may have been updated by
-        # upload_workflow_deps
-        # Don't validate this time because it will just print redundant errors.
-        loadingContext = self.loadingContext.copy()
-        loadingContext.loader = tool.doc_loader
-        loadingContext.avsc_names = tool.doc_schema
-        loadingContext.metadata = tool.metadata
-        loadingContext.do_validate = False
-
-        tool = self.arv_make_tool(tool.doc_loader.idx[tool.tool["id"]],
-                                  loadingContext)
-
-        # Upload local file references in the job order.
-        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
-                                     tool, job_order)
-
-        existing_uuid = runtimeContext.update_workflow
-        if existing_uuid or runtimeContext.create_workflow:
-            # Create a pipeline template or workflow record and exit.
-            if self.work_api == "jobs":
-                tmpl = RunnerTemplate(self, tool, job_order,
-                                      runtimeContext.enable_reuse,
-                                      uuid=existing_uuid,
-                                      submit_runner_ram=runtimeContext.submit_runner_ram,
-                                      name=runtimeContext.name,
-                                      merged_map=merged_map)
-                tmpl.save()
-                # cwltool.main will write our return value to stdout.
-                return (tmpl.uuid, "success")
-            elif self.work_api == "containers":
-                return (upload_workflow(self, tool, job_order,
-                                        self.project_uuid,
-                                        uuid=existing_uuid,
-                                        submit_runner_ram=runtimeContext.submit_runner_ram,
-                                        name=runtimeContext.name,
-                                        merged_map=merged_map),
-                        "success")
-
-        self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse
-        self.eval_timeout = runtimeContext.eval_timeout
-
-        runtimeContext = runtimeContext.copy()
-        runtimeContext.use_container = True
-        runtimeContext.tmpdir_prefix = "tmp"
-        runtimeContext.work_api = self.work_api
-
-        if self.work_api == "containers":
-            if self.ignore_docker_for_reuse:
-                raise Exception("--ignore-docker-for-reuse not supported with containers API.")
-            runtimeContext.outdir = "/var/spool/cwl"
-            runtimeContext.docker_outdir = "/var/spool/cwl"
-            runtimeContext.tmpdir = "/tmp"
-            runtimeContext.docker_tmpdir = "/tmp"
-        elif self.work_api == "jobs":
-            if runtimeContext.priority != DEFAULT_PRIORITY:
-                raise Exception("--priority not implemented for jobs API.")
-            runtimeContext.outdir = "$(task.outdir)"
-            runtimeContext.docker_outdir = "$(task.outdir)"
-            runtimeContext.tmpdir = "$(task.tmpdir)"
-
-        if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
-            raise Exception("--priority must be in the range 1..1000.")
-
-        runnerjob = None
-        if runtimeContext.submit:
-            # Submit a runner job to run the workflow for us.
-            if self.work_api == "containers":
-                if tool.tool["class"] == "CommandLineTool" and runtimeContext.wait:
-                    runtimeContext.runnerjob = tool.tool["id"]
-                    runnerjob = tool.job(job_order,
-                                         self.output_callback,
-                                         runtimeContext).next()
-                else:
-                    runnerjob = RunnerContainer(self, tool, job_order, runtimeContext.enable_reuse,
-                                                self.output_name,
-                                                self.output_tags,
-                                                submit_runner_ram=runtimeContext.submit_runner_ram,
-                                                name=runtimeContext.name,
-                                                on_error=runtimeContext.on_error,
-                                                submit_runner_image=runtimeContext.submit_runner_image,
-                                                intermediate_output_ttl=runtimeContext.intermediate_output_ttl,
-                                                merged_map=merged_map,
-                                                priority=runtimeContext.priority,
-                                                secret_store=self.secret_store)
-            elif self.work_api == "jobs":
-                runnerjob = RunnerJob(self, tool, job_order, runtimeContext.enable_reuse,
-                                      self.output_name,
-                                      self.output_tags,
-                                      submit_runner_ram=runtimeContext.submit_runner_ram,
-                                      name=runtimeContext.name,
-                                      on_error=runtimeContext.on_error,
-                                      submit_runner_image=runtimeContext.submit_runner_image,
-                                      merged_map=merged_map)
-        elif runtimeContext.cwl_runner_job is None and self.work_api == "jobs":
-            # Create pipeline for local run
-            self.pipeline = self.api.pipeline_instances().create(
-                body={
-                    "owner_uuid": self.project_uuid,
-                    "name": runtimeContext.name if runtimeContext.name else shortname(tool.tool["id"]),
-                    "components": {},
-                    "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
-            logger.info("Pipeline instance %s", self.pipeline["uuid"])
-
-        if runnerjob and not runtimeContext.wait:
-            submitargs = runtimeContext.copy()
-            submitargs.submit = False
-            runnerjob.run(submitargs)
-            return (runnerjob.uuid, "success")
-
-        self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
-        self.polling_thread = threading.Thread(target=self.poll_states)
-        self.polling_thread.start()
-
-        self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count)
-
-        if runnerjob:
-            jobiter = iter((runnerjob,))
-        else:
-            if runtimeContext.cwl_runner_job is not None:
-                self.uuid = runtimeContext.cwl_runner_job.get('uuid')
-            jobiter = tool.job(job_order,
-                               self.output_callback,
-                               runtimeContext)
-
-        try:
-            self.workflow_eval_lock.acquire()
-            # Holds the lock while this code runs and releases it when
-            # it is safe to do so in self.workflow_eval_lock.wait(),
-            # at which point on_message can update job state and
-            # process output callbacks.
-
-            loopperf = Perf(metrics, "jobiter")
-            loopperf.__enter__()
-            for runnable in jobiter:
-                loopperf.__exit__()
-
-                if self.stop_polling.is_set():
-                    break
-
-                if self.task_queue.error is not None:
-                    raise self.task_queue.error
-
-                if runnable:
-                    with Perf(metrics, "run"):
-                        self.start_run(runnable, runtimeContext)
-                else:
-                    if (self.task_queue.in_flight + len(self.processes)) > 0:
-                        self.workflow_eval_lock.wait(3)
-                    else:
-                        logger.error("Workflow is deadlocked, no runnable processes and not waiting on any pending processes.")
-                        break
-                loopperf.__enter__()
-            loopperf.__exit__()
-
-            while (self.task_queue.in_flight + len(self.processes)) > 0:
-                if self.task_queue.error is not None:
-                    raise self.task_queue.error
-                self.workflow_eval_lock.wait(3)
-
-        except UnsupportedRequirement:
-            raise
-        except:
-            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
-                logger.error("Interrupted, workflow will be cancelled")
-            else:
-                logger.error("Execution failed: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
-            if self.pipeline:
-                self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
-                                                     body={"state": "Failed"}).execute(num_retries=self.num_retries)
-            if runnerjob and runnerjob.uuid and self.work_api == "containers":
-                self.api.container_requests().update(uuid=runnerjob.uuid,
-                                                     body={"priority": "0"}).execute(num_retries=self.num_retries)
-        finally:
-            self.workflow_eval_lock.release()
-            self.task_queue.drain()
-            self.stop_polling.set()
-            self.polling_thread.join()
-            self.task_queue.join()
-
-        if self.final_status == "UnsupportedRequirement":
-            raise UnsupportedRequirement("Check log for details.")
-
-        if self.final_output is None:
-            raise WorkflowException("Workflow did not return a result.")
-
-        if runtimeContext.submit and isinstance(runnerjob, Runner):
-            logger.info("Final output collection %s", runnerjob.final_output)
-        else:
-            if self.output_name is None:
-                self.output_name = "Output of %s" % (shortname(tool.tool["id"]))
-            if self.output_tags is None:
-                self.output_tags = ""
-
-            storage_classes = runtimeContext.storage_classes.strip().split(",")
-            self.final_output, self.final_output_collection = self.make_output_collection(self.output_name, storage_classes, self.output_tags, self.final_output)
-            self.set_crunch_output()
-
-        if runtimeContext.compute_checksum:
-            adjustDirObjs(self.final_output, partial(get_listing, self.fs_access))
-            adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access))
-
-        if self.trash_intermediate and self.final_status == "success":
-            self.trash_intermediate_output()
-
-        return (self.final_output, self.final_status)
-
-
 def versionstring():
     """Print version string of key packages for provenance and debugging."""
 
@@ -831,9 +139,13 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
                         default=None)
 
-    parser.add_argument("--submit-request-uuid", type=str,
+    exgroup = parser.add_mutually_exclusive_group()
+    exgroup.add_argument("--submit-request-uuid", type=str,
                         default=None,
-                        help="Update and commit supplied container request instead of creating a new one (containers API only).")
+                        help="Update and commit to supplied container request instead of creating a new one (containers API only).")
+    exgroup.add_argument("--submit-runner-cluster", type=str,
+                        help="Submit toplevel runner to a remote cluster (containers API only)",
+                        default=None)
 
     parser.add_argument("--name", type=str,
                         help="Name to use for workflow execution instance.",
@@ -942,6 +254,10 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
 
     add_arv_hints()
 
+    for key, val in cwltool.argparser.get_default_args().items():
+        if not hasattr(arvargs, key):
+            setattr(arvargs, key, val)
+
     try:
         if api_client is None:
             api_client = arvados.safeapi.ThreadSafeApiCache(
@@ -952,7 +268,7 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
             api_client.users().current().execute()
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
-        runner = ArvCwlRunner(api_client, arvargs, keep_client=keep_client, num_retries=4)
+        executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
     except Exception as e:
         logger.error(e)
         return 1
@@ -977,22 +293,13 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
     else:
         arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
 
-    for key, val in cwltool.argparser.get_default_args().items():
-        if not hasattr(arvargs, key):
-            setattr(arvargs, key, val)
-
-    runtimeContext = ArvRuntimeContext(vars(arvargs))
-    runtimeContext.make_fs_access = partial(CollectionFsAccess,
-                             collection_cache=runner.collection_cache)
-    runtimeContext.http_timeout = arvargs.http_timeout
-
     return cwltool.main.main(args=arvargs,
                              stdout=stdout,
                              stderr=stderr,
-                             executor=runner.arv_executor,
+                             executor=executor.arv_executor,
                              versionfunc=versionstring,
                              job_order_object=job_order_object,
                              logger_handler=arvados.log_handler,
                              custom_schema_callback=add_arv_hints,
-                             loadingContext=runner.loadingContext,
-                             runtimeContext=runtimeContext)
+                             loadingContext=executor.loadingContext,
+                             runtimeContext=executor.runtimeContext)
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b46711af4..7e22a7d39 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -212,9 +212,10 @@ class ArvadosContainer(JobBase):
             docker_req = {"dockerImageId": "arvados/jobs"}
 
         container_request["container_image"] = arv_docker_get_image(self.arvrunner.api,
-                                                                     docker_req,
-                                                                     runtimeContext.pull_image,
-                                                                     self.arvrunner.project_uuid)
+                                                                    docker_req,
+                                                                    runtimeContext.pull_image,
+                                                                    self.arvrunner.project_uuid,
+                                                                    runtimeContext.submit_runner_cluster)
 
         api_req, _ = self.get_requirement("http://arvados.org/cwl#APIRequirement")
         if api_req:
@@ -490,14 +491,20 @@ class RunnerContainer(Runner):
         if self.arvrunner.project_uuid:
             job_spec["owner_uuid"] = self.arvrunner.project_uuid
 
+        extra_submit_params = {}
+        if runtimeContext.submit_runner_cluster:
+            extra_submit_params["cluster_id"] = runtimeContext.submit_runner_cluster
+
         if runtimeContext.submit_request_uuid:
             response = self.arvrunner.api.container_requests().update(
                 uuid=runtimeContext.submit_request_uuid,
-                body=job_spec
+                body=job_spec,
+                **extra_submit_params
             ).execute(num_retries=self.arvrunner.num_retries)
         else:
             response = self.arvrunner.api.container_requests().create(
-                body=job_spec
+                body=job_spec,
+                **extra_submit_params
             ).execute(num_retries=self.arvrunner.num_retries)
 
         self.uuid = response["uuid"]
diff --git a/sdk/cwl/arvados_cwl/context.py b/sdk/cwl/arvados_cwl/context.py
index 48a3edec5..4e1334c1c 100644
--- a/sdk/cwl/arvados_cwl/context.py
+++ b/sdk/cwl/arvados_cwl/context.py
@@ -30,5 +30,9 @@ class ArvRuntimeContext(RuntimeContext):
         self.storage_classes = "default"
         self.current_container = None
         self.http_timeout = 300
+        self.submit_runner_cluster = None
 
         super(ArvRuntimeContext, self).__init__(kwargs)
+
+        if self.submit_request_uuid:
+            self.submit_runner_cluster = self.submit_request_uuid[0:5]
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/executor.py
similarity index 69%
copy from sdk/cwl/arvados_cwl/__init__.py
copy to sdk/cwl/arvados_cwl/executor.py
index 0866f69d6..bbfb8ffc6 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/executor.py
@@ -1,43 +1,26 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# Implement cwl-runner interface for submitting and running work on Arvados, using
-# either the Crunch jobs API or Crunch containers API.
-
 import argparse
 import logging
 import os
 import sys
 import threading
-import hashlib
 import copy
 import json
 import re
 from functools import partial
-import pkg_resources  # part of setuptools
-import Queue
 import time
-import signal
-import thread
 
 from cwltool.errors import WorkflowException
-import cwltool.main
 import cwltool.workflow
-import cwltool.process
 from schema_salad.sourceline import SourceLine
 import schema_salad.validate as validate
-import cwltool.argparser
 
 import arvados
 import arvados.config
 from arvados.keep import KeepClient
 from arvados.errors import ApiError
-import arvados.commands._util as arv_cmd
 
-from .arvcontainer import ArvadosContainer, RunnerContainer
-from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
+from .arvcontainer import RunnerContainer
+from .arvjob import RunnerJob, RunnerTemplate
 from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
 from .arvtool import ArvadosCommandTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow
@@ -49,22 +32,11 @@ from .context import ArvLoadingContext, ArvRuntimeContext
 from .util import get_current_container
 from ._version import __version__
 
-from cwltool.pack import pack
 from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, get_listing
 from cwltool.command_line_tool import compute_checksums
 
-from arvados.api import OrderedJsonModel
-
 logger = logging.getLogger('arvados.cwl-runner')
-metrics = logging.getLogger('arvados.cwl-runner.metrics')
-logger.setLevel(logging.INFO)
-
-arvados.log_handler.setFormatter(logging.Formatter(
-        '%(asctime)s %(name)s %(levelname)s: %(message)s',
-        '%Y-%m-%d %H:%M:%S'))
-
-DEFAULT_PRIORITY = 500
 
 class RuntimeStatusLoggingHandler(logging.Handler):
     """
@@ -98,7 +70,7 @@ class RuntimeStatusLoggingHandler(logging.Handler):
                     "%s: %s" % (record.name, record.getMessage())
                 )
 
-class ArvCwlRunner(object):
+class ArvCwlExecutor(object):
     """Execute a CWL tool or workflow, submit work (using either jobs or
     containers API), wait for them to complete, and report output.
 
@@ -195,6 +167,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             handler = RuntimeStatusLoggingHandler(self.runtime_status_update)
             root_logger.addHandler(handler)
 
+        self.runtimeContext = ArvRuntimeContext(vars(arvargs))
+        self.runtimeContext.make_fs_access = partial(CollectionFsAccess,
+                                                     collection_cache=self.collection_cache)
+
+
     def arv_make_tool(self, toolpath_object, loadingContext):
         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
             return ArvadosCommandTool(self, toolpath_object, loadingContext)
@@ -735,264 +712,3 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             self.trash_intermediate_output()
 
         return (self.final_output, self.final_status)
-
-
-def versionstring():
-    """Print version string of key packages for provenance and debugging."""
-
-    arvcwlpkg = pkg_resources.require("arvados-cwl-runner")
-    arvpkg = pkg_resources.require("arvados-python-client")
-    cwlpkg = pkg_resources.require("cwltool")
-
-    return "%s %s, %s %s, %s %s" % (sys.argv[0], arvcwlpkg[0].version,
-                                    "arvados-python-client", arvpkg[0].version,
-                                    "cwltool", cwlpkg[0].version)
-
-
-def arg_parser():  # type: () -> argparse.ArgumentParser
-    parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
-
-    parser.add_argument("--basedir", type=str,
-                        help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
-    parser.add_argument("--outdir", type=str, default=os.path.abspath('.'),
-                        help="Output directory, default current directory")
-
-    parser.add_argument("--eval-timeout",
-                        help="Time to wait for a Javascript expression to evaluate before giving an error, default 20s.",
-                        type=float,
-                        default=20)
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--print-dot", action="store_true",
-                         help="Print workflow visualization in graphviz format and exit")
-    exgroup.add_argument("--version", action="version", help="Print version and exit", version=versionstring())
-    exgroup.add_argument("--validate", action="store_true", help="Validate CWL document only.")
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--verbose", action="store_true", help="Default logging")
-    exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
-    exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
-
-    parser.add_argument("--metrics", action="store_true", help="Print timing metrics")
-
-    parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--enable-reuse", action="store_true",
-                        default=True, dest="enable_reuse",
-                        help="Enable job or container reuse (default)")
-    exgroup.add_argument("--disable-reuse", action="store_false",
-                        default=True, dest="enable_reuse",
-                        help="Disable job or container reuse")
-
-    parser.add_argument("--project-uuid", type=str, metavar="UUID", help="Project that will own the workflow jobs, if not provided, will go to home project.")
-    parser.add_argument("--output-name", type=str, help="Name to use for collection that stores the final output.", default=None)
-    parser.add_argument("--output-tags", type=str, help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
-    parser.add_argument("--ignore-docker-for-reuse", action="store_true",
-                        help="Ignore Docker image version when deciding whether to reuse past jobs.",
-                        default=False)
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--submit", action="store_true", help="Submit workflow to run on Arvados.",
-                        default=True, dest="submit")
-    exgroup.add_argument("--local", action="store_false", help="Run workflow on local host (submits jobs to Arvados).",
-                        default=True, dest="submit")
-    exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
-                         dest="create_workflow")
-    exgroup.add_argument("--create-workflow", action="store_true", help="Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See --api.")
-    exgroup.add_argument("--update-workflow", type=str, metavar="UUID", help="Update an existing Arvados workflow or pipeline template with the given UUID.")
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner job, wait for completion.",
-                        default=True, dest="wait")
-    exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner job and exit.",
-                        default=True, dest="wait")
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--log-timestamps", action="store_true", help="Prefix logging lines with timestamp",
-                        default=True, dest="log_timestamps")
-    exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
-                        default=True, dest="log_timestamps")
-
-    parser.add_argument("--api", type=str,
-                        default=None, dest="work_api",
-                        choices=("jobs", "containers"),
-                        help="Select work submission API.  Default is 'jobs' if that API is available, otherwise 'containers'.")
-
-    parser.add_argument("--compute-checksum", action="store_true", default=False,
-                        help="Compute checksum of contents while collecting outputs",
-                        dest="compute_checksum")
-
-    parser.add_argument("--submit-runner-ram", type=int,
-                        help="RAM (in MiB) required for the workflow runner job (default 1024)",
-                        default=None)
-
-    parser.add_argument("--submit-runner-image", type=str,
-                        help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
-                        default=None)
-
-    parser.add_argument("--submit-request-uuid", type=str,
-                        default=None,
-                        help="Update and commit supplied container request instead of creating a new one (containers API only).")
-
-    parser.add_argument("--name", type=str,
-                        help="Name to use for workflow execution instance.",
-                        default=None)
-
-    parser.add_argument("--on-error", type=str,
-                        help="Desired workflow behavior when a step fails.  One of 'stop' or 'continue'. "
-                        "Default is 'continue'.", default="continue", choices=("stop", "continue"))
-
-    parser.add_argument("--enable-dev", action="store_true",
-                        help="Enable loading and running development versions "
-                             "of CWL spec.", default=False)
-    parser.add_argument('--storage-classes', default="default", type=str,
-                        help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
-
-    parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
-                        help="If N > 0, intermediate output collections will be trashed N seconds after creation.  Default is 0 (don't trash).",
-                        default=0)
-
-    parser.add_argument("--priority", type=int,
-                        help="Workflow priority (range 1..1000, higher has precedence over lower, containers api only)",
-                        default=DEFAULT_PRIORITY)
-
-    parser.add_argument("--disable-validate", dest="do_validate",
-                        action="store_false", default=True,
-                        help=argparse.SUPPRESS)
-
-    parser.add_argument("--disable-js-validation",
-                        action="store_true", default=False,
-                        help=argparse.SUPPRESS)
-
-    parser.add_argument("--thread-count", type=int,
-                        default=4, help="Number of threads to use for job submit and output collection.")
-
-    parser.add_argument("--http-timeout", type=int,
-                        default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
-
-    exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--trash-intermediate", action="store_true",
-                        default=False, dest="trash_intermediate",
-                         help="Immediately trash intermediate outputs on workflow success.")
-    exgroup.add_argument("--no-trash-intermediate", action="store_false",
-                        default=False, dest="trash_intermediate",
-                        help="Do not trash intermediate outputs (default).")
-
-    parser.add_argument("workflow", type=str, default=None, help="The workflow to execute")
-    parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
-
-    return parser
-
-def add_arv_hints():
-    cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*")
-    cwltool.command_line_tool.ACCEPTLIST_RE = cwltool.command_line_tool.ACCEPTLIST_EN_RELAXED_RE
-    res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema.yml')
-    use_custom_schema("v1.0", "http://arvados.org/cwl", res.read())
-    res.close()
-    cwltool.process.supportedProcessRequirements.extend([
-        "http://arvados.org/cwl#RunInSingleContainer",
-        "http://arvados.org/cwl#OutputDirType",
-        "http://arvados.org/cwl#RuntimeConstraints",
-        "http://arvados.org/cwl#PartitionRequirement",
-        "http://arvados.org/cwl#APIRequirement",
-        "http://commonwl.org/cwltool#LoadListingRequirement",
-        "http://arvados.org/cwl#IntermediateOutput",
-        "http://arvados.org/cwl#ReuseRequirement",
-        "http://arvados.org/cwl#ClusterTarget"
-    ])
-
-def exit_signal_handler(sigcode, frame):
-    logger.error("Caught signal {}, exiting.".format(sigcode))
-    sys.exit(-sigcode)
-
-def main(args, stdout, stderr, api_client=None, keep_client=None,
-         install_sig_handlers=True):
-    parser = arg_parser()
-
-    job_order_object = None
-    arvargs = parser.parse_args(args)
-
-    if len(arvargs.storage_classes.strip().split(',')) > 1:
-        logger.error("Multiple storage classes are not supported currently.")
-        return 1
-
-    arvargs.use_container = True
-    arvargs.relax_path_checks = True
-    arvargs.print_supported_versions = False
-
-    if install_sig_handlers:
-        arv_cmd.install_signal_handlers()
-
-    if arvargs.update_workflow:
-        if arvargs.update_workflow.find('-7fd4e-') == 5:
-            want_api = 'containers'
-        elif arvargs.update_workflow.find('-p5p6p-') == 5:
-            want_api = 'jobs'
-        else:
-            want_api = None
-        if want_api and arvargs.work_api and want_api != arvargs.work_api:
-            logger.error('--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'.format(
-                arvargs.update_workflow, want_api, arvargs.work_api))
-            return 1
-        arvargs.work_api = want_api
-
-    if (arvargs.create_workflow or arvargs.update_workflow) and not arvargs.job_order:
-        job_order_object = ({}, "")
-
-    add_arv_hints()
-
-    try:
-        if api_client is None:
-            api_client = arvados.safeapi.ThreadSafeApiCache(
-                api_params={"model": OrderedJsonModel(), "timeout": arvargs.http_timeout},
-                keep_params={"num_retries": 4})
-            keep_client = api_client.keep
-            # Make an API object now so errors are reported early.
-            api_client.users().current().execute()
-        if keep_client is None:
-            keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
-        runner = ArvCwlRunner(api_client, arvargs, keep_client=keep_client, num_retries=4)
-    except Exception as e:
-        logger.error(e)
-        return 1
-
-    if arvargs.debug:
-        logger.setLevel(logging.DEBUG)
-        logging.getLogger('arvados').setLevel(logging.DEBUG)
-
-    if arvargs.quiet:
-        logger.setLevel(logging.WARN)
-        logging.getLogger('arvados').setLevel(logging.WARN)
-        logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
-
-    if arvargs.metrics:
-        metrics.setLevel(logging.DEBUG)
-        logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
-
-    if arvargs.log_timestamps:
-        arvados.log_handler.setFormatter(logging.Formatter(
-            '%(asctime)s %(name)s %(levelname)s: %(message)s',
-            '%Y-%m-%d %H:%M:%S'))
-    else:
-        arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
-
-    for key, val in cwltool.argparser.get_default_args().items():
-        if not hasattr(arvargs, key):
-            setattr(arvargs, key, val)
-
-    runtimeContext = ArvRuntimeContext(vars(arvargs))
-    runtimeContext.make_fs_access = partial(CollectionFsAccess,
-                             collection_cache=runner.collection_cache)
-    runtimeContext.http_timeout = arvargs.http_timeout
-
-    return cwltool.main.main(args=arvargs,
-                             stdout=stdout,
-                             stderr=stderr,
-                             executor=runner.arv_executor,
-                             versionfunc=versionstring,
-                             job_order_object=job_order_object,
-                             logger_handler=arvados.log_handler,
-                             custom_schema_callback=add_arv_hints,
-                             loadingContext=runner.loadingContext,
-                             runtimeContext=runtimeContext)
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py
index 41166c512..3b40552ac 100644
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -324,10 +324,10 @@ def arvados_jobs_image(arvrunner, img):
     """Determine if the right arvados/jobs image version is available.  If not, try to pull and upload it."""
 
     try:
-        arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid)
+        return arv_docker_get_image(arvrunner.api, {"dockerPull": img}, True, arvrunner.project_uuid)
     except Exception as e:
         raise Exception("Docker image %s is not available\n%s" % (img, e) )
-    return img
+
 
 def upload_workflow_collection(arvrunner, name, packed):
     collection = arvados.collection.Collection(api_client=arvrunner.api,

commit 9234706ebe091a43c2c379d67704110946b048c8
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 25 10:36:47 2018 -0400

    14198: Use PDH for container_image instead of docker repo+tag
    
    Needed to support federated container requests.
    
    This reverts 6ea807b2caf6c934f170b2e4d89c23c4a08ca69c
    
    Based on the commit comment, that change was made to accomodate the
    Docker v1 to v2 image format migration (to enable the API server to
    select the image with the correct format).  However, the API server
    subsequently gained the ability to detect if it needed to substitute a
    PDH with a migrated image PDH in commit
    a72205728f94f5261b657766e01f5767dc15d4b5 so now we want restore the
    original behavior of locally resolving the image PDH and using that in
    the container request.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py
index 7508febb0..6bca07c88 100644
--- a/sdk/cwl/arvados_cwl/arvdocker.py
+++ b/sdk/cwl/arvados_cwl/arvdocker.py
@@ -31,7 +31,7 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
     global cached_lookups_lock
     with cached_lookups_lock:
         if dockerRequirement["dockerImageId"] in cached_lookups:
-            return dockerRequirement["dockerImageId"]
+            return cached_lookups[dockerRequirement["dockerImageId"]]
 
     with SourceLine(dockerRequirement, "dockerImageId", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
         sp = dockerRequirement["dockerImageId"].split(":")
@@ -70,10 +70,12 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
         if not images:
             raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
 
+        pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"]
+
         with cached_lookups_lock:
-            cached_lookups[dockerRequirement["dockerImageId"]] = True
+            cached_lookups[dockerRequirement["dockerImageId"]] = pdh
 
-    return dockerRequirement["dockerImageId"]
+    return pdh
 
 def arv_docker_clear_cache():
     global cached_lookups

commit 31faaec3a6473bab3ad656611914343bba29a875
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Wed Oct 24 14:45:21 2018 -0400

    14198: Initial support ClusterTarget hint
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
index 2e1ea50a3..0866f69d6 100644
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -898,7 +898,8 @@ def add_arv_hints():
         "http://arvados.org/cwl#APIRequirement",
         "http://commonwl.org/cwltool#LoadListingRequirement",
         "http://arvados.org/cwl#IntermediateOutput",
-        "http://arvados.org/cwl#ReuseRequirement"
+        "http://arvados.org/cwl#ReuseRequirement",
+        "http://arvados.org/cwl#ClusterTarget"
     ])
 
 def exit_signal_handler(sigcode, frame):
diff --git a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
index 4f762192a..94eaf9560 100644
--- a/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
+++ b/sdk/cwl/arvados_cwl/arv-cwl-schema.yml
@@ -232,4 +232,24 @@ $graph:
     coresMin:
       type: int?
       doc: Minimum cores allocated to cwl-runner
-      jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
\ No newline at end of file
+      jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
+
+- name: ClusterTarget
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify where a workflow step should run
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:ClusterTarget'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    clusterID:
+      type: string?
+      doc: The cluster to run the container
+    ownerUUID:
+      type: string?
+      doc: The project that will own the container requests and intermediate collections
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py
index b4d01019f..b46711af4 100644
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -250,6 +250,15 @@ class ArvadosContainer(JobBase):
         if self.timelimit is not None:
             scheduling_parameters["max_run_time"] = self.timelimit
 
+        extra_submit_params = {}
+        cluster_target_req, _ = self.get_requirement("http://arvados.org/cwl#ClusterTarget")
+        if cluster_target_req:
+            cluster_id = cluster_target_req.get("clusterID")
+            if cluster_id:
+                extra_submit_params["cluster_id"] = cluster_id
+            if cluster_target_req.get("ownerUUID"):
+                container_request["owner_uuid"] = cluster_target_req.get("ownerUUID")
+
         container_request["output_name"] = "Output for step %s" % (self.name)
         container_request["output_ttl"] = self.output_ttl
         container_request["mounts"] = mounts
@@ -277,11 +286,13 @@ class ArvadosContainer(JobBase):
             if runtimeContext.submit_request_uuid:
                 response = self.arvrunner.api.container_requests().update(
                     uuid=runtimeContext.submit_request_uuid,
-                    body=container_request
+                    body=container_request,
+                    **extra_submit_params
                 ).execute(num_retries=self.arvrunner.num_retries)
             else:
                 response = self.arvrunner.api.container_requests().create(
-                    body=container_request
+                    body=container_request,
+                    **extra_submit_params
                 ).execute(num_retries=self.arvrunner.num_retries)
 
             self.uuid = response["uuid"]

commit 4427f2c5f740d03d5ee38745159f61b6805843e7
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Oct 29 17:09:08 2018 -0400

    14262: Rewrite collectionFederatedRequestHandler PDH search to use channels
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_collections.go b/lib/controller/fed_collections.go
index 8a97c25c9..88b0f95a0 100644
--- a/lib/controller/fed_collections.go
+++ b/lib/controller/fed_collections.go
@@ -159,63 +159,6 @@ type searchRemoteClusterForPDH struct {
 	statusCode    *int
 }
 
-func (s *searchRemoteClusterForPDH) filterRemoteClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
-	if *s.sentResponse {
-		// Another request already returned a response
-		return nil, nil
-	}
-
-	if requestError != nil {
-		*s.errors = append(*s.errors, fmt.Sprintf("Request error contacting %q: %v", s.remoteID, requestError))
-		// Record the error and suppress response
-		return nil, nil
-	}
-
-	if resp.StatusCode != http.StatusOK {
-		// Suppress returning unsuccessful result.  Maybe
-		// another request will find it.
-		*s.errors = append(*s.errors, fmt.Sprintf("Response to %q from %q: %v", resp.Header.Get(httpserver.HeaderRequestID), s.remoteID, resp.Status))
-		if resp.StatusCode != http.StatusNotFound {
-			// Got a non-404 error response, convert into BadGateway
-			*s.statusCode = http.StatusBadGateway
-		}
-		return nil, nil
-	}
-
-	s.mtx.Unlock()
-
-	// This reads the response body.  We don't want to hold the
-	// lock while doing this because other remote requests could
-	// also have made it to this point, and we don't want a
-	// slow response holding the lock to block a faster response
-	// that is waiting on the lock.
-	newResponse, err = rewriteSignatures(s.remoteID, s.pdh, resp, nil)
-
-	s.mtx.Lock()
-
-	if *s.sentResponse {
-		// Another request already returned a response
-		return nil, nil
-	}
-
-	if err != nil {
-		// Suppress returning unsuccessful result.  Maybe
-		// another request will be successful.
-		*s.errors = append(*s.errors, fmt.Sprintf("Error parsing response from %q: %v", s.remoteID, err))
-		return nil, nil
-	}
-
-	// We have a successful response.  Suppress/cancel all the
-	// other requests/responses.
-	*s.sentResponse = true
-	s.cancelFunc()
-
-	return newResponse, nil
-}
-
 func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 	if req.Method != "GET" {
 		// Only handle GET requests right now
@@ -263,58 +206,107 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 		return
 	}
 
-	sharedContext, cancelFunc := context.WithCancel(req.Context())
-	defer cancelFunc()
-	req = req.WithContext(sharedContext)
-
 	// Create a goroutine for each cluster in the
 	// RemoteClusters map.  The first valid result gets
 	// returned to the client.  When that happens, all
-	// other outstanding requests are cancelled or
-	// suppressed.
-	sentResponse := false
-	mtx := sync.Mutex{}
+	// other outstanding requests are cancelled
+	sharedContext, cancelFunc := context.WithCancel(req.Context())
+	req = req.WithContext(sharedContext)
 	wg := sync.WaitGroup{}
-	var errors []string
-	var errorCode int = http.StatusNotFound
+	pdh := m[1]
+	success := make(chan *http.Response)
+	errorChan := make(chan error)
 
 	// use channel as a semaphore to limit the number of concurrent
 	// requests at a time
 	sem := make(chan bool, h.handler.Cluster.RequestLimits.GetMultiClusterRequestConcurrency())
+
+	defer close(errorChan)
+	defer close(success)
 	defer close(sem)
+	defer cancelFunc()
+
 	for remoteID := range h.handler.Cluster.RemoteClusters {
 		if remoteID == h.handler.Cluster.ClusterID {
 			// No need to query local cluster again
 			continue
 		}
-		// blocks until it can put a value into the
-		// channel (which has a max queue capacity)
-		sem <- true
-		if sentResponse {
-			break
-		}
-		search := &searchRemoteClusterForPDH{m[1], remoteID, &mtx, &sentResponse,
-			&sharedContext, cancelFunc, &errors, &errorCode}
+
 		wg.Add(1)
-		go func() {
-			resp, cancel, err := h.handler.remoteClusterRequest(search.remoteID, req)
-			if cancel != nil {
-				defer cancel()
+		go func(remote string) {
+			defer wg.Done()
+			// blocks until it can put a value into the
+			// channel (which has a max queue capacity)
+			sem <- true
+			select {
+			case <-sharedContext.Done():
+				return
+			default:
+			}
+
+			resp, _, err := h.handler.remoteClusterRequest(remote, req)
+			wasSuccess := false
+			defer func() {
+				if resp != nil && !wasSuccess {
+					resp.Body.Close()
+				}
+			}()
+			// Don't need to do anything with the cancel
+			// function returned by remoteClusterRequest
+			// because the context inherits from
+			// sharedContext, so when sharedContext is
+			// cancelled it should cancel that one as
+			// well.
+			if err != nil {
+				errorChan <- err
+				return
 			}
-			newResp, err := search.filterRemoteClusterResponse(resp, err)
-			if newResp != nil || err != nil {
-				h.handler.proxy.ForwardResponse(w, newResp, err)
+			if resp.StatusCode != http.StatusOK {
+				errorChan <- HTTPError{resp.Status, resp.StatusCode}
+				return
+			}
+			select {
+			case <-sharedContext.Done():
+				return
+			default:
+			}
+
+			newResponse, err := rewriteSignatures(remote, pdh, resp, nil)
+			if err != nil {
+				errorChan <- err
+				return
+			}
+			select {
+			case <-sharedContext.Done():
+			case success <- newResponse:
+				wasSuccess = true
 			}
-			wg.Done()
 			<-sem
-		}()
+		}(remoteID)
 	}
-	wg.Wait()
+	go func() {
+		wg.Wait()
+		cancelFunc()
+	}()
 
-	if sentResponse {
-		return
-	}
+	var errors []string
+	errorCode := http.StatusNotFound
 
-	// No successful responses, so return the error
-	httpserver.Errors(w, errors, errorCode)
+	for {
+		select {
+		case newResp = <-success:
+			h.handler.proxy.ForwardResponse(w, newResp, nil)
+			return
+		case err := <-errorChan:
+			if httperr, ok := err.(HTTPError); ok {
+				if httperr.Code != http.StatusNotFound {
+					errorCode = http.StatusBadGateway
+				}
+			}
+			errors = append(errors, err.Error())
+		case <-sharedContext.Done():
+			httpserver.Errors(w, errors, errorCode)
+			return
+		}
+	}
 }

commit 703179225b04309485c0a1cefb794df6c919e84f
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Oct 29 15:36:45 2018 -0400

    14262: Make sure cancel() from proxy.Do() gets called
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_collections.go b/lib/controller/fed_collections.go
index 70dbdc3f5..8a97c25c9 100644
--- a/lib/controller/fed_collections.go
+++ b/lib/controller/fed_collections.go
@@ -34,7 +34,7 @@ func rewriteSignatures(clusterID string, expectHash string,
 		return resp, requestError
 	}
 
-	if resp.StatusCode != 200 {
+	if resp.StatusCode != http.StatusOK {
 		return resp, nil
 	}
 
@@ -140,7 +140,7 @@ func filterLocalClusterResponse(resp *http.Response, requestError error) (newRes
 		return resp, requestError
 	}
 
-	if resp.StatusCode == 404 {
+	if resp.StatusCode == http.StatusNotFound {
 		// Suppress returning this result, because we want to
 		// search the federation.
 		return nil, nil
@@ -174,12 +174,11 @@ func (s *searchRemoteClusterForPDH) filterRemoteClusterResponse(resp *http.Respo
 		return nil, nil
 	}
 
-	if resp.StatusCode != 200 {
+	if resp.StatusCode != http.StatusOK {
 		// Suppress returning unsuccessful result.  Maybe
 		// another request will find it.
-		// TODO collect and return error responses.
-		*s.errors = append(*s.errors, fmt.Sprintf("Response to %q from %q: %v", httpserver.GetRequestID(resp.Header), s.remoteID, resp.Status))
-		if resp.StatusCode != 404 {
+		*s.errors = append(*s.errors, fmt.Sprintf("Response to %q from %q: %v", resp.Header.Get(httpserver.HeaderRequestID), s.remoteID, resp.Status))
+		if resp.StatusCode != http.StatusNotFound {
 			// Got a non-404 error response, convert into BadGateway
 			*s.statusCode = http.StatusBadGateway
 		}
@@ -236,7 +235,10 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 
 		if clusterId != "" && clusterId != h.handler.Cluster.ClusterID {
 			// request for remote collection by uuid
-			resp, err := h.handler.remoteClusterRequest(clusterId, req)
+			resp, cancel, err := h.handler.remoteClusterRequest(clusterId, req)
+			if cancel != nil {
+				defer cancel()
+			}
 			newResponse, err := rewriteSignatures(clusterId, "", resp, err)
 			h.handler.proxy.ForwardResponse(w, newResponse, err)
 			return
@@ -251,7 +253,10 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 	// Request for collection by PDH.  Search the federation.
 
 	// First, query the local cluster.
-	resp, err := h.handler.localClusterRequest(req)
+	resp, localClusterRequestCancel, err := h.handler.localClusterRequest(req)
+	if localClusterRequestCancel != nil {
+		defer localClusterRequestCancel()
+	}
 	newResp, err := filterLocalClusterResponse(resp, err)
 	if newResp != nil || err != nil {
 		h.handler.proxy.ForwardResponse(w, newResp, err)
@@ -271,7 +276,7 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 	mtx := sync.Mutex{}
 	wg := sync.WaitGroup{}
 	var errors []string
-	var errorCode int = 404
+	var errorCode int = http.StatusNotFound
 
 	// use channel as a semaphore to limit the number of concurrent
 	// requests at a time
@@ -292,7 +297,10 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 			&sharedContext, cancelFunc, &errors, &errorCode}
 		wg.Add(1)
 		go func() {
-			resp, err := h.handler.remoteClusterRequest(search.remoteID, req)
+			resp, cancel, err := h.handler.remoteClusterRequest(search.remoteID, req)
+			if cancel != nil {
+				defer cancel()
+			}
 			newResp, err := search.filterRemoteClusterResponse(resp, err)
 			if newResp != nil || err != nil {
 				h.handler.proxy.ForwardResponse(w, newResp, err)
diff --git a/lib/controller/fed_containers.go b/lib/controller/fed_containers.go
index ccb2401bb..a3c292583 100644
--- a/lib/controller/fed_containers.go
+++ b/lib/controller/fed_containers.go
@@ -95,7 +95,10 @@ func remoteContainerRequestCreate(
 	req.ContentLength = int64(buf.Len())
 	req.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len()))
 
-	resp, err := h.handler.remoteClusterRequest(*clusterId, req)
+	resp, cancel, err := h.handler.remoteClusterRequest(*clusterId, req)
+	if cancel != nil {
+		defer cancel()
+	}
 	h.handler.proxy.ForwardResponse(w, resp, err)
 	return true
 }
diff --git a/lib/controller/fed_generic.go b/lib/controller/fed_generic.go
index 63e61e690..7d5b63d31 100644
--- a/lib/controller/fed_generic.go
+++ b/lib/controller/fed_generic.go
@@ -6,6 +6,7 @@ package controller
 
 import (
 	"bytes"
+	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
@@ -65,12 +66,16 @@ func (h *genericFederatedRequestHandler) remoteQueryUUIDs(w http.ResponseWriter,
 		rc := multiClusterQueryResponseCollector{clusterID: clusterID}
 
 		var resp *http.Response
+		var cancel context.CancelFunc
 		if clusterID == h.handler.Cluster.ClusterID {
-			resp, err = h.handler.localClusterRequest(&remoteReq)
+			resp, cancel, err = h.handler.localClusterRequest(&remoteReq)
 		} else {
-			resp, err = h.handler.remoteClusterRequest(clusterID, &remoteReq)
+			resp, cancel, err = h.handler.remoteClusterRequest(clusterID, &remoteReq)
 		}
 		rc.collectResponse(resp, err)
+		if cancel != nil {
+			cancel()
+		}
 
 		if rc.error != nil {
 			return nil, "", rc.error
@@ -304,7 +309,10 @@ func (h *genericFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *h
 	if clusterId == "" || clusterId == h.handler.Cluster.ClusterID {
 		h.next.ServeHTTP(w, req)
 	} else {
-		resp, err := h.handler.remoteClusterRequest(clusterId, req)
+		resp, cancel, err := h.handler.remoteClusterRequest(clusterId, req)
+		if cancel != nil {
+			defer cancel()
+		}
 		h.handler.proxy.ForwardResponse(w, resp, err)
 	}
 }
diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index dc0aa908c..0e016f301 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -6,6 +6,7 @@ package controller
 
 import (
 	"bytes"
+	"context"
 	"database/sql"
 	"encoding/json"
 	"fmt"
@@ -28,10 +29,10 @@ var containerRequestsRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "container
 var collectionRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "collections", "4zz18"))
 var collectionByPDHRe = regexp.MustCompile(`^/arvados/v1/collections/([0-9a-fA-F]{32}\+[0-9]+)+$`)
 
-func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*http.Response, error) {
+func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*http.Response, context.CancelFunc, error) {
 	remote, ok := h.Cluster.RemoteClusters[remoteID]
 	if !ok {
-		return nil, HTTPError{fmt.Sprintf("no proxy available for cluster %v", remoteID), http.StatusNotFound}
+		return nil, nil, HTTPError{fmt.Sprintf("no proxy available for cluster %v", remoteID), http.StatusNotFound}
 	}
 	scheme := remote.Scheme
 	if scheme == "" {
@@ -39,7 +40,7 @@ func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*htt
 	}
 	saltedReq, err := h.saltAuthToken(req, remoteID)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 	urlOut := &url.URL{
 		Scheme:   scheme,
@@ -52,7 +53,7 @@ func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*htt
 	if remote.Insecure {
 		client = h.insecureClient
 	}
-	return h.proxy.ForwardRequest(saltedReq, urlOut, client)
+	return h.proxy.Do(saltedReq, urlOut, client)
 }
 
 // Buffer request body, parse form parameters in request, and then
diff --git a/lib/controller/federation_test.go b/lib/controller/federation_test.go
index 7842ad05d..f6bfca302 100644
--- a/lib/controller/federation_test.go
+++ b/lib/controller/federation_test.go
@@ -94,8 +94,8 @@ func (s *FederationSuite) SetUpTest(c *check.C) {
 func (s *FederationSuite) remoteMockHandler(w http.ResponseWriter, req *http.Request) {
 	b := &bytes.Buffer{}
 	io.Copy(b, req.Body)
-	req.Body = ioutil.NopCloser(b)
 	req.Body.Close()
+	req.Body = ioutil.NopCloser(b)
 	s.remoteMockRequests = append(s.remoteMockRequests, *req)
 }
 
diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index 5e9012949..cbfaaddab 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -5,6 +5,7 @@
 package controller
 
 import (
+	"context"
 	"database/sql"
 	"errors"
 	"net"
@@ -121,10 +122,10 @@ func prepend(next http.Handler, middleware middlewareFunc) http.Handler {
 	})
 }
 
-func (h *Handler) localClusterRequest(req *http.Request) (*http.Response, error) {
+func (h *Handler) localClusterRequest(req *http.Request) (*http.Response, context.CancelFunc, error) {
 	urlOut, insecure, err := findRailsAPI(h.Cluster, h.NodeProfile)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 	urlOut = &url.URL{
 		Scheme:   urlOut.Scheme,
@@ -137,11 +138,14 @@ func (h *Handler) localClusterRequest(req *http.Request) (*http.Response, error)
 	if insecure {
 		client = h.insecureClient
 	}
-	return h.proxy.ForwardRequest(req, urlOut, client)
+	return h.proxy.Do(req, urlOut, client)
 }
 
 func (h *Handler) proxyRailsAPI(w http.ResponseWriter, req *http.Request, next http.Handler) {
-	resp, err := h.localClusterRequest(req)
+	resp, cancel, err := h.localClusterRequest(req)
+	if cancel != nil {
+		defer cancel()
+	}
 	n, err := h.proxy.ForwardResponse(w, resp, err)
 	if err != nil {
 		httpserver.Logger(req).WithError(err).WithField("bytesCopied", n).Error("error copying response body")
diff --git a/lib/controller/proxy.go b/lib/controller/proxy.go
index b7f3c4f72..c89b9b36a 100644
--- a/lib/controller/proxy.go
+++ b/lib/controller/proxy.go
@@ -45,11 +45,11 @@ var dropHeaders = map[string]bool{
 
 type ResponseFilter func(*http.Response, error) (*http.Response, error)
 
-// Forward a request to downstream service, and return response or error.
-func (p *proxy) ForwardRequest(
+// Forward a request to upstream service, and return response or error.
+func (p *proxy) Do(
 	reqIn *http.Request,
 	urlOut *url.URL,
-	client *http.Client) (*http.Response, error) {
+	client *http.Client) (*http.Response, context.CancelFunc, error) {
 
 	// Copy headers from incoming request, then add/replace proxy
 	// headers like Via and X-Forwarded-For.
@@ -70,8 +70,9 @@ func (p *proxy) ForwardRequest(
 	hdrOut.Add("Via", reqIn.Proto+" arvados-controller")
 
 	ctx := reqIn.Context()
+	var cancel context.CancelFunc
 	if p.RequestTimeout > 0 {
-		ctx, _ = context.WithDeadline(ctx, time.Now().Add(time.Duration(p.RequestTimeout)))
+		ctx, cancel = context.WithDeadline(ctx, time.Now().Add(time.Duration(p.RequestTimeout)))
 	}
 
 	reqOut := (&http.Request{
@@ -82,10 +83,11 @@ func (p *proxy) ForwardRequest(
 		Body:   reqIn.Body,
 	}).WithContext(ctx)
 
-	return client.Do(reqOut)
+	resp, err := client.Do(reqOut)
+	return resp, cancel, err
 }
 
-// Copy a response (or error) to the upstream client
+// Copy a response (or error) to the downstream client
 func (p *proxy) ForwardResponse(w http.ResponseWriter, resp *http.Response, err error) (int64, error) {
 	if err != nil {
 		if he, ok := err.(HTTPError); ok {
diff --git a/sdk/go/httpserver/id_generator.go b/sdk/go/httpserver/id_generator.go
index 6093a8a7b..14d89873b 100644
--- a/sdk/go/httpserver/id_generator.go
+++ b/sdk/go/httpserver/id_generator.go
@@ -12,6 +12,10 @@ import (
 	"time"
 )
 
+const (
+	HeaderRequestID = "X-Request-Id"
+)
+
 // IDGenerator generates alphanumeric strings suitable for use as
 // unique IDs (a given IDGenerator will never return the same ID
 // twice).
@@ -44,16 +48,12 @@ func (g *IDGenerator) Next() string {
 func AddRequestIDs(h http.Handler) http.Handler {
 	gen := &IDGenerator{Prefix: "req-"}
 	return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
-		if req.Header.Get("X-Request-Id") == "" {
+		if req.Header.Get(HeaderRequestID) == "" {
 			if req.Header == nil {
 				req.Header = http.Header{}
 			}
-			req.Header.Set("X-Request-Id", gen.Next())
+			req.Header.Set(HeaderRequestID, gen.Next())
 		}
 		h.ServeHTTP(w, req)
 	})
 }
-
-func GetRequestID(h http.Header) string {
-	return h.Get("X-Request-Id")
-}

commit ac42fb64f13c7d1bbc21d99c98ee8a3769fc9684
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Oct 29 14:56:26 2018 -0400

    14262: Only allow unknown PDH for images when there are remote_hosts
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb
index 718ffc0d0..487043ee3 100644
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -496,7 +496,14 @@ class Collection < ArvadosModel
     if loc = Keep::Locator.parse(search_term)
       loc.strip_hints!
       coll_match = readable_by(*readers).where(portable_data_hash: loc.to_s).limit(1)
-      return get_compatible_images(readers, pattern, coll_match)
+      if coll_match.any? or Rails.configuration.remote_hosts.length == 0
+        return get_compatible_images(readers, pattern, coll_match)
+      else
+        # Allow bare pdh that doesn't exist in the local database so
+        # that federated container requests which refer to remotely
+        # stored containers will validate.
+        return [Collection.new(portable_data_hash: loc.to_s)]
+      end
     end
 
     if search_tag.nil? and (n = search_term.index(":"))
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index e469a49be..0d8453174 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -248,15 +248,6 @@ class Container < ArvadosModel
   def self.resolve_container_image(container_image)
     coll = Collection.for_latest_docker_image(container_image)
     if !coll
-      if loc = Keep::Locator.parse(container_image)
-        loc.strip_hints!
-        if !Collection.readable_by(current_user).where(portable_data_hash: loc.to_s).any?
-          # Allow bare pdh that doesn't exist in the local database so
-          # that federated container requests which refer to remotely
-          # stored containers will validate.
-          return loc.to_s
-        end
-      end
       raise ArvadosModel::UnresolvableContainerError.new "docker image #{container_image.inspect} not found"
     end
     coll.portable_data_hash
diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb
index e4c3399c4..0fafb9903 100644
--- a/services/api/test/unit/container_request_test.rb
+++ b/services/api/test/unit/container_request_test.rb
@@ -500,7 +500,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
     end
   end
 
-  ['ENOEXIST',
+  ['acbd18db4cc2f85cedef654fccc4a4d8+3',
+   'ENOEXIST',
    'arvados/apitestfixture:ENOEXIST',
   ].each do |img|
     test "container_image_for_container(#{img.inspect}) => 422" do
@@ -511,6 +512,12 @@ class ContainerRequestTest < ActiveSupport::TestCase
     end
   end
 
+  test "allow unrecognized container when there are remote_hosts" do
+    set_user_from_auth :active
+    Rails.configuration.remote_hosts = {"foooo" => "bar.com"}
+    Container.resolve_container_image('acbd18db4cc2f85cedef654fccc4a4d8+3')
+  end
+
   test "migrated docker image" do
     Rails.configuration.docker_image_formats = ['v2']
     add_docker19_migration_link

commit dcb3218d02d977a867502d044bd5041a6e695790
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Wed Oct 24 15:30:34 2018 -0400

    14262: Fix logic for when to allow unknown PDH for containers
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 5d46ac29f..e469a49be 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -250,7 +250,7 @@ class Container < ArvadosModel
     if !coll
       if loc = Keep::Locator.parse(container_image)
         loc.strip_hints!
-        if !Collection.where(portable_data_hash: loc.to_s).any?
+        if !Collection.readable_by(current_user).where(portable_data_hash: loc.to_s).any?
           # Allow bare pdh that doesn't exist in the local database so
           # that federated container requests which refer to remotely
           # stored containers will validate.

commit 1ca342e29efe3e9f2c0b9b3dfc61ce806d5fdbb6
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Wed Oct 24 14:44:25 2018 -0400

    14198: Fix error responses in container POST
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_containers.go b/lib/controller/fed_containers.go
index e8cc739b0..ccb2401bb 100644
--- a/lib/controller/fed_containers.go
+++ b/lib/controller/fed_containers.go
@@ -30,11 +30,18 @@ func remoteContainerRequestCreate(
 		return false
 	}
 
-	defer req.Body.Close()
+	if req.Header.Get("Content-Type") != "application/json" {
+		httpserver.Error(w, "Expected Content-Type: application/json, got "+req.Header.Get("Content-Type"), http.StatusBadRequest)
+		return true
+	}
+
+	originalBody := req.Body
+	defer originalBody.Close()
 	var request map[string]interface{}
 	err := json.NewDecoder(req.Body).Decode(&request)
 	if err != nil {
-		return false
+		httpserver.Error(w, err.Error(), http.StatusBadRequest)
+		return true
 	}
 
 	crString, ok := request["container_request"].(string)
@@ -42,7 +49,8 @@ func remoteContainerRequestCreate(
 		var crJson map[string]interface{}
 		err := json.Unmarshal([]byte(crString), &crJson)
 		if err != nil {
-			return false
+			httpserver.Error(w, err.Error(), http.StatusBadRequest)
+			return true
 		}
 
 		request["container_request"] = crJson
@@ -50,7 +58,8 @@ func remoteContainerRequestCreate(
 
 	containerRequest, ok := request["container_request"].(map[string]interface{})
 	if !ok {
-		return false
+		// Use toplevel object as the container_request object
+		containerRequest = request
 	}
 
 	// If runtime_token is not set, create a new token
@@ -68,7 +77,8 @@ func remoteContainerRequestCreate(
 		}
 
 		if len(currentUser.Authorization.Scopes) != 1 || currentUser.Authorization.Scopes[0] != "all" {
-			return false
+			httpserver.Error(w, "Token scope is not [all]", http.StatusForbidden)
+			return true
 		}
 
 		newtok, err := h.handler.createAPItoken(req, currentUser.UUID, nil)

commit 78471fbe6370154fe9478a67c29c669a605c22bb
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Oct 23 14:30:39 2018 -0400

    14262: Handle container_request posted as a string parameter
    
    Needs to be parsed as json second time (this is how the Ruby 'arv'
    client submits it, unfortunately.)
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_collections.go b/lib/controller/fed_collections.go
index 62f98367c..70dbdc3f5 100644
--- a/lib/controller/fed_collections.go
+++ b/lib/controller/fed_collections.go
@@ -178,7 +178,7 @@ func (s *searchRemoteClusterForPDH) filterRemoteClusterResponse(resp *http.Respo
 		// Suppress returning unsuccessful result.  Maybe
 		// another request will find it.
 		// TODO collect and return error responses.
-		*s.errors = append(*s.errors, fmt.Sprintf("Response from %q: %v", s.remoteID, resp.Status))
+		*s.errors = append(*s.errors, fmt.Sprintf("Response to %q from %q: %v", httpserver.GetRequestID(resp.Header), s.remoteID, resp.Status))
 		if resp.StatusCode != 404 {
 			// Got a non-404 error response, convert into BadGateway
 			*s.statusCode = http.StatusBadGateway
diff --git a/lib/controller/fed_containers.go b/lib/controller/fed_containers.go
index 32ae25fc4..e8cc739b0 100644
--- a/lib/controller/fed_containers.go
+++ b/lib/controller/fed_containers.go
@@ -33,10 +33,23 @@ func remoteContainerRequestCreate(
 	defer req.Body.Close()
 	var request map[string]interface{}
 	err := json.NewDecoder(req.Body).Decode(&request)
+	if err != nil {
+		return false
+	}
+
+	crString, ok := request["container_request"].(string)
+	if ok {
+		var crJson map[string]interface{}
+		err := json.Unmarshal([]byte(crString), &crJson)
+		if err != nil {
+			return false
+		}
+
+		request["container_request"] = crJson
+	}
 
 	containerRequest, ok := request["container_request"].(map[string]interface{})
 	if !ok {
-		log.Printf("wah wah")
 		return false
 	}
 
diff --git a/sdk/go/httpserver/id_generator.go b/sdk/go/httpserver/id_generator.go
index 6452136d8..6093a8a7b 100644
--- a/sdk/go/httpserver/id_generator.go
+++ b/sdk/go/httpserver/id_generator.go
@@ -53,3 +53,7 @@ func AddRequestIDs(h http.Handler) http.Handler {
 		h.ServeHTTP(w, req)
 	})
 }
+
+func GetRequestID(h http.Header) string {
+	return h.Get("X-Request-Id")
+}

commit 9b5ef51ec402b8071d9d695bf08f4a59540ca864
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Oct 23 13:29:53 2018 -0400

    14262: Fix crunch-run tests
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go
index eb4f220e2..2f254b5bd 100644
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -443,6 +443,10 @@ func (s *TestSuite) TestLoadImage(c *C) {
 	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
 
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, kc, nil
+	}
+
 	_, err = cr.Docker.ImageRemove(nil, hwImageId, dockertypes.ImageRemoveOptions{})
 	c.Check(err, IsNil)
 
@@ -488,6 +492,9 @@ func (ArvErrorTestClient) Create(resourceType string,
 }
 
 func (ArvErrorTestClient) Call(method, resourceType, uuid, action string, parameters arvadosclient.Dict, output interface{}) error {
+	if method == "GET" && resourceType == "containers" && action == "auth" {
+		return nil
+	}
 	return errors.New("ArvError")
 }
 
@@ -548,9 +555,13 @@ func (s *TestSuite) TestLoadImageArvError(c *C) {
 	// (1) Arvados error
 	kc := &KeepTestClient{}
 	defer kc.Close()
-	cr, err := NewContainerRunner(s.client, ArvErrorTestClient{}, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+	cr, err := NewContainerRunner(s.client, &ArvErrorTestClient{}, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
+
 	cr.Container.ContainerImage = hwPDH
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvErrorTestClient{}, &KeepTestClient{}, nil
+	}
 
 	err = cr.LoadImage()
 	c.Check(err.Error(), Equals, "While getting container image collection: ArvError")
@@ -558,9 +569,13 @@ func (s *TestSuite) TestLoadImageArvError(c *C) {
 
 func (s *TestSuite) TestLoadImageKeepError(c *C) {
 	// (2) Keep error
-	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, &KeepErrorTestClient{}, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+	kc := &KeepErrorTestClient{}
+	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
 	cr.Container.ContainerImage = hwPDH
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, kc, nil
+	}
 
 	err = cr.LoadImage()
 	c.Assert(err, NotNil)
@@ -569,9 +584,13 @@ func (s *TestSuite) TestLoadImageKeepError(c *C) {
 
 func (s *TestSuite) TestLoadImageCollectionError(c *C) {
 	// (3) Collection doesn't contain image
-	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, &KeepReadErrorTestClient{}, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+	kc := &KeepReadErrorTestClient{}
+	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
 	cr.Container.ContainerImage = otherPDH
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, kc, nil
+	}
 
 	err = cr.LoadImage()
 	c.Check(err.Error(), Equals, "First file in the container image collection does not end in .tar")
@@ -579,9 +598,13 @@ func (s *TestSuite) TestLoadImageCollectionError(c *C) {
 
 func (s *TestSuite) TestLoadImageKeepReadError(c *C) {
 	// (4) Collection doesn't contain image
-	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, &KeepReadErrorTestClient{}, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+	kc := &KeepReadErrorTestClient{}
+	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
 	cr.Container.ContainerImage = hwPDH
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, kc, nil
+	}
 
 	err = cr.LoadImage()
 	c.Check(err, NotNil)
@@ -629,6 +652,10 @@ func (s *TestSuite) TestRunContainer(c *C) {
 	cr, err := NewContainerRunner(s.client, &ArvTestClient{}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
 
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, kc, nil
+	}
+
 	var logs TestLogs
 	cr.NewLogWriter = logs.NewTestLoggingWriter
 	cr.Container.ContainerImage = hwPDH
@@ -772,8 +799,8 @@ func (s *TestSuite) fullRunHelper(c *C, record string, extraMounts []string, exi
 		}
 		return d, err
 	}
-	cr.MkArvClient = func(token string) (IArvadosClient, error) {
-		return &ArvTestClient{secretMounts: secretMounts}, nil
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{secretMounts: secretMounts}, &KeepTestClient{}, nil
 	}
 
 	if extraMounts != nil && len(extraMounts) > 0 {
@@ -1069,8 +1096,8 @@ func (s *TestSuite) testStopContainer(c *C, setup func(cr *ContainerRunner)) {
 	cr, err := NewContainerRunner(s.client, api, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 	c.Assert(err, IsNil)
 	cr.RunArvMount = func([]string, string) (*exec.Cmd, error) { return nil, nil }
-	cr.MkArvClient = func(token string) (IArvadosClient, error) {
-		return &ArvTestClient{}, nil
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, &KeepTestClient{}, nil
 	}
 	setup(cr)
 
@@ -1553,8 +1580,8 @@ func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func(t *TestDoc
 	c.Assert(err, IsNil)
 	am := &ArvMountCmdLine{}
 	cr.RunArvMount = am.ArvMountTest
-	cr.MkArvClient = func(token string) (IArvadosClient, error) {
-		return &ArvTestClient{}, nil
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
+		return &ArvTestClient{}, &KeepTestClient{}, nil
 	}
 
 	err = cr.Run()

commit 07115948f7d6281d7d8dada4608f5b3c991a84c8
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Oct 23 09:26:48 2018 -0400

    14262: Missing file
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_containers.go b/lib/controller/fed_containers.go
new file mode 100644
index 000000000..32ae25fc4
--- /dev/null
+++ b/lib/controller/fed_containers.go
@@ -0,0 +1,78 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package controller
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+
+	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+func remoteContainerRequestCreate(
+	h *genericFederatedRequestHandler,
+	effectiveMethod string,
+	clusterId *string,
+	uuid string,
+	remainder string,
+	w http.ResponseWriter,
+	req *http.Request) bool {
+
+	if effectiveMethod != "POST" || uuid != "" || remainder != "" ||
+		*clusterId == "" || *clusterId == h.handler.Cluster.ClusterID {
+		return false
+	}
+
+	defer req.Body.Close()
+	var request map[string]interface{}
+	err := json.NewDecoder(req.Body).Decode(&request)
+
+	containerRequest, ok := request["container_request"].(map[string]interface{})
+	if !ok {
+		log.Printf("wah wah")
+		return false
+	}
+
+	// If runtime_token is not set, create a new token
+	if _, ok := containerRequest["runtime_token"]; !ok {
+		log.Printf("ok %v", ok)
+
+		// First make sure supplied token is valid.
+		creds := auth.NewCredentials()
+		creds.LoadTokensFromHTTPRequest(req)
+
+		currentUser, err := h.handler.validateAPItoken(req, creds.Tokens[0])
+		if err != nil {
+			httpserver.Error(w, err.Error(), http.StatusForbidden)
+			return true
+		}
+
+		if len(currentUser.Authorization.Scopes) != 1 || currentUser.Authorization.Scopes[0] != "all" {
+			return false
+		}
+
+		newtok, err := h.handler.createAPItoken(req, currentUser.UUID, nil)
+		if err != nil {
+			httpserver.Error(w, err.Error(), http.StatusForbidden)
+			return true
+		}
+		containerRequest["runtime_token"] = newtok.TokenV2()
+	}
+
+	newbody, err := json.Marshal(request)
+	buf := bytes.NewBuffer(newbody)
+	req.Body = ioutil.NopCloser(buf)
+	req.ContentLength = int64(buf.Len())
+	req.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len()))
+
+	resp, err := h.handler.remoteClusterRequest(*clusterId, req)
+	h.handler.proxy.ForwardResponse(w, resp, err)
+	return true
+}

commit dea40b3efdb8b6b948cda5a1601fcbabb831f2fc
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Oct 22 17:01:52 2018 -0400

    14262: Fix tests
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/proxy.go b/lib/controller/proxy.go
index 9aecdc1b2..b7f3c4f72 100644
--- a/lib/controller/proxy.go
+++ b/lib/controller/proxy.go
@@ -71,9 +71,7 @@ func (p *proxy) ForwardRequest(
 
 	ctx := reqIn.Context()
 	if p.RequestTimeout > 0 {
-		var cancel context.CancelFunc
-		ctx, cancel = context.WithDeadline(ctx, time.Now().Add(time.Duration(p.RequestTimeout)))
-		defer cancel()
+		ctx, _ = context.WithDeadline(ctx, time.Now().Add(time.Duration(p.RequestTimeout)))
 	}
 
 	reqOut := (&http.Request{
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go
index 923cecdd5..254a0fa7d 100644
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -103,7 +103,7 @@ var reqIDGen = httpserver.IDGenerator{Prefix: "req-"}
 // (*http.Client)Do().
 func (c *Client) Do(req *http.Request) (*http.Response, error) {
 	if c.AuthToken != "" {
-		req.Header.Add("Authorization", "OAuth2 "+c.AuthToken)
+		req.Header.Set("Authorization", "OAuth2 "+c.AuthToken)
 	}
 
 	if req.Header.Get("X-Request-Id") == "" {
@@ -215,7 +215,7 @@ func (c *Client) MakeRequest(method, path string, body io.Reader, params interfa
 	req.Header.Set("Content-type", "application/x-www-form-urlencoded")
 
 	if c.AuthToken != "" {
-		req.Header.Add("Authorization", "OAuth2 "+c.AuthToken)
+		req.Header.Set("Authorization", "OAuth2 "+c.AuthToken)
 	}
 
 	if req.Header.Get("X-Request-Id") == "" {
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 7d8cc00f2..5d46ac29f 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -248,11 +248,14 @@ class Container < ArvadosModel
   def self.resolve_container_image(container_image)
     coll = Collection.for_latest_docker_image(container_image)
     if !coll
-      # Allow bare pdh without any additional checking otherwise
-      # federated container requests won't work.
       if loc = Keep::Locator.parse(container_image)
         loc.strip_hints!
-        return loc.to_s
+        if !Collection.where(portable_data_hash: loc.to_s).any?
+          # Allow bare pdh that doesn't exist in the local database so
+          # that federated container requests which refer to remotely
+          # stored containers will validate.
+          return loc.to_s
+        end
       end
       raise ArvadosModel::UnresolvableContainerError.new "docker image #{container_image.inspect} not found"
     end
diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb
index 8ff216e28..e4c3399c4 100644
--- a/services/api/test/unit/container_request_test.rb
+++ b/services/api/test/unit/container_request_test.rb
@@ -500,8 +500,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
     end
   end
 
-  ['acbd18db4cc2f85cedef654fccc4a4d8+3',
-   'ENOEXIST',
+  ['ENOEXIST',
    'arvados/apitestfixture:ENOEXIST',
   ].each do |img|
     test "container_image_for_container(#{img.inspect}) => 422" do

commit 4956c96c8bf86d6512231b8ea4118dee9e918779
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Oct 22 12:13:38 2018 -0400

    14262: Tests for setting and checking container tokens.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index 18f3e4479..dc0aa908c 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -185,9 +185,9 @@ func (h *Handler) createAPItoken(req *http.Request, userUUID string, scopes []st
 (uuid, api_token, expires_at, scopes,
 user_id,
 api_client_id, created_at, updated_at)
-VALUES ($1, $2, now() + INTERVAL '2 weeks', $3,
+VALUES ($1, $2, CURRENT_TIMESTAMP + INTERVAL '2 weeks', $3,
 (SELECT id FROM users WHERE users.uuid=$4 LIMIT 1),
-0, now(), now())`,
+0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)`,
 		uuid, token, string(scopesjson), userUUID)
 
 	if err != nil {
diff --git a/lib/controller/federation_test.go b/lib/controller/federation_test.go
index 23d5d7ca7..7842ad05d 100644
--- a/lib/controller/federation_test.go
+++ b/lib/controller/federation_test.go
@@ -5,8 +5,10 @@
 package controller
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"net/http"
 	"net/http/httptest"
@@ -90,6 +92,10 @@ func (s *FederationSuite) SetUpTest(c *check.C) {
 }
 
 func (s *FederationSuite) remoteMockHandler(w http.ResponseWriter, req *http.Request) {
+	b := &bytes.Buffer{}
+	io.Copy(b, req.Body)
+	req.Body = ioutil.NopCloser(b)
+	req.Body.Close()
 	s.remoteMockRequests = append(s.remoteMockRequests, *req)
 }
 
@@ -567,6 +573,67 @@ func (s *FederationSuite) TestCreateRemoteContainerRequest(c *check.C) {
 	c.Check(strings.HasPrefix(cr.UUID, "zzzzz-"), check.Equals, true)
 }
 
+func (s *FederationSuite) TestCreateRemoteContainerRequestCheckRuntimeToken(c *check.C) {
+	// Send request to zmock and check that outgoing request has
+	// runtime_token sent (because runtime_token isn't returned in
+	// the response).
+
+	defer s.localServiceReturns404(c).Close()
+	// pass cluster_id via query parameter, this allows arvados-controller
+	// to avoid parsing the body
+	req := httptest.NewRequest("POST", "/arvados/v1/container_requests?cluster_id=zmock",
+		strings.NewReader(`{
+  "container_request": {
+    "name": "hello world",
+    "state": "Uncommitted",
+    "output_path": "/",
+    "container_image": "123",
+    "command": ["abc"]
+  }
+}
+`))
+	req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+	req.Header.Set("Content-type", "application/json")
+	resp := s.testRequest(req)
+	c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+	var cr struct {
+		arvados.ContainerRequest `json:"container_request"`
+	}
+	c.Check(json.NewDecoder(s.remoteMockRequests[0].Body).Decode(&cr), check.IsNil)
+	c.Check(strings.HasPrefix(cr.ContainerRequest.RuntimeToken, "v2/"), check.Equals, true)
+}
+
+func (s *FederationSuite) TestCreateRemoteContainerRequestCheckSetRuntimeToken(c *check.C) {
+	// Send request to zmock and check that outgoing request has
+	// runtime_token sent (because runtime_token isn't returned in
+	// the response).
+
+	defer s.localServiceReturns404(c).Close()
+	// pass cluster_id via query parameter, this allows arvados-controller
+	// to avoid parsing the body
+	req := httptest.NewRequest("POST", "/arvados/v1/container_requests?cluster_id=zmock",
+		strings.NewReader(`{
+  "container_request": {
+    "name": "hello world",
+    "state": "Uncommitted",
+    "output_path": "/",
+    "container_image": "123",
+    "command": ["abc"],
+    "runtime_token": "xyz"
+  }
+}
+`))
+	req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+	req.Header.Set("Content-type", "application/json")
+	resp := s.testRequest(req)
+	c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+	var cr struct {
+		arvados.ContainerRequest `json:"container_request"`
+	}
+	c.Check(json.NewDecoder(s.remoteMockRequests[0].Body).Decode(&cr), check.IsNil)
+	c.Check(cr.ContainerRequest.RuntimeToken, check.Equals, "xyz")
+}
+
 func (s *FederationSuite) TestCreateRemoteContainerRequestError(c *check.C) {
 	defer s.localServiceReturns404(c).Close()
 	// pass cluster_id via query parameter, this allows arvados-controller
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go
index 2622c1370..b70b4ac91 100644
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -56,6 +56,7 @@ type ContainerRequest struct {
 	UseExisting             bool                   `json:"use_existing"`
 	LogUUID                 string                 `json:"log_uuid"`
 	OutputUUID              string                 `json:"output_uuid"`
+	RuntimeToken            string                 `json:"runtime_token"`
 }
 
 // Mount is special behavior to attach to a filesystem path or device.

commit 6c3c7b2a8c6491e2ddc585ac194abaf685acec41
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Mon Oct 22 11:02:02 2018 -0400

    14262: Add createAPIToken, with test
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_generic.go b/lib/controller/fed_generic.go
index 0630217b6..63e61e690 100644
--- a/lib/controller/fed_generic.go
+++ b/lib/controller/fed_generic.go
@@ -17,10 +17,20 @@ import (
 	"git.curoverse.com/arvados.git/sdk/go/httpserver"
 )
 
+type federatedRequestDelegate func(
+	h *genericFederatedRequestHandler,
+	effectiveMethod string,
+	clusterId *string,
+	uuid string,
+	remainder string,
+	w http.ResponseWriter,
+	req *http.Request) bool
+
 type genericFederatedRequestHandler struct {
-	next    http.Handler
-	handler *Handler
-	matcher *regexp.Regexp
+	next      http.Handler
+	handler   *Handler
+	matcher   *regexp.Regexp
+	delegates []federatedRequestDelegate
 }
 
 func (h *genericFederatedRequestHandler) remoteQueryUUIDs(w http.ResponseWriter,
@@ -285,6 +295,12 @@ func (h *genericFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *h
 		return
 	}
 
+	for _, d := range h.delegates {
+		if d(h, effectiveMethod, &clusterId, m[1], m[3], w, req) {
+			return
+		}
+	}
+
 	if clusterId == "" || clusterId == h.handler.Cluster.ClusterID {
 		h.next.ServeHTTP(w, req)
 	} else {
diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index 03d2f3fab..18f3e4479 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -7,6 +7,7 @@ package controller
 import (
 	"bytes"
 	"database/sql"
+	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -17,6 +18,7 @@ import (
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
+	"github.com/jmcvetta/randutil"
 )
 
 var pathPattern = `^/arvados/v1/%s(/([0-9a-z]{5})-%s-[0-9a-z]{15})?(.*)$`
@@ -82,12 +84,18 @@ func loadParamsFromForm(req *http.Request) error {
 
 func (h *Handler) setupProxyRemoteCluster(next http.Handler) http.Handler {
 	mux := http.NewServeMux()
-	mux.Handle("/arvados/v1/workflows", &genericFederatedRequestHandler{next, h, wfRe})
-	mux.Handle("/arvados/v1/workflows/", &genericFederatedRequestHandler{next, h, wfRe})
-	mux.Handle("/arvados/v1/containers", &genericFederatedRequestHandler{next, h, containersRe})
-	mux.Handle("/arvados/v1/containers/", &genericFederatedRequestHandler{next, h, containersRe})
-	mux.Handle("/arvados/v1/container_requests", &genericFederatedRequestHandler{next, h, containerRequestsRe})
-	mux.Handle("/arvados/v1/container_requests/", &genericFederatedRequestHandler{next, h, containerRequestsRe})
+
+	wfHandler := &genericFederatedRequestHandler{next, h, wfRe, nil}
+	containersHandler := &genericFederatedRequestHandler{next, h, containersRe, nil}
+	containerRequestsHandler := &genericFederatedRequestHandler{next, h, containerRequestsRe,
+		[]federatedRequestDelegate{remoteContainerRequestCreate}}
+
+	mux.Handle("/arvados/v1/workflows", wfHandler)
+	mux.Handle("/arvados/v1/workflows/", wfHandler)
+	mux.Handle("/arvados/v1/containers", containersHandler)
+	mux.Handle("/arvados/v1/containers/", containersHandler)
+	mux.Handle("/arvados/v1/container_requests", containerRequestsHandler)
+	mux.Handle("/arvados/v1/container_requests/", containerRequestsHandler)
 	mux.Handle("/arvados/v1/collections", next)
 	mux.Handle("/arvados/v1/collections/", &collectionFederatedRequestHandler{next, h})
 	mux.Handle("/", next)
@@ -118,12 +126,79 @@ type CurrentUser struct {
 	UUID          string
 }
 
-func (h *Handler) validateAPItoken(req *http.Request, user *CurrentUser) error {
+// validateAPItoken extracts the token from the provided http request,
+// checks it again api_client_authorizations table in the database,
+// and fills in the token scope and user UUID.  Does not handle remote
+// tokens unless they are already in the database and not expired.
+func (h *Handler) validateAPItoken(req *http.Request, token string) (*CurrentUser, error) {
+	user := CurrentUser{Authorization: arvados.APIClientAuthorization{APIToken: token}}
 	db, err := h.db(req)
 	if err != nil {
-		return err
+		return nil, err
+	}
+
+	var uuid string
+	if strings.HasPrefix(token, "v2/") {
+		sp := strings.Split(token, "/")
+		uuid = sp[1]
+		token = sp[2]
+	}
+	user.Authorization.APIToken = token
+	var scopes string
+	err = db.QueryRowContext(req.Context(), `SELECT api_client_authorizations.uuid, api_client_authorizations.scopes, users.uuid FROM api_client_authorizations JOIN users on api_client_authorizations.user_id=users.id WHERE api_token=$1 AND (expires_at IS NULL OR expires_at > current_timestamp) LIMIT 1`, token).Scan(&user.Authorization.UUID, &scopes, &user.UUID)
+	if err != nil {
+		return nil, err
+	}
+	if uuid != "" && user.Authorization.UUID != uuid {
+		return nil, fmt.Errorf("UUID embedded in v2 token did not match record")
+	}
+	err = json.Unmarshal([]byte(scopes), &user.Authorization.Scopes)
+	if err != nil {
+		return nil, err
+	}
+	return &user, nil
+}
+
+func (h *Handler) createAPItoken(req *http.Request, userUUID string, scopes []string) (*arvados.APIClientAuthorization, error) {
+	db, err := h.db(req)
+	if err != nil {
+		return nil, err
+	}
+	rd, err := randutil.String(15, "abcdefghijklmnopqrstuvwxyz0123456789")
+	if err != nil {
+		return nil, err
+	}
+	uuid := fmt.Sprintf("%v-gj3su-%v", h.Cluster.ClusterID, rd)
+	token, err := randutil.String(50, "abcdefghijklmnopqrstuvwxyz0123456789")
+	if err != nil {
+		return nil, err
+	}
+	if len(scopes) == 0 {
+		scopes = append(scopes, "all")
 	}
-	return db.QueryRowContext(req.Context(), `SELECT api_client_authorizations.uuid, users.uuid FROM api_client_authorizations JOIN users on api_client_authorizations.user_id=users.id WHERE api_token=$1 AND (expires_at IS NULL OR expires_at > current_timestamp) LIMIT 1`, user.Authorization.APIToken).Scan(&user.Authorization.UUID, &user.UUID)
+	scopesjson, err := json.Marshal(scopes)
+	if err != nil {
+		return nil, err
+	}
+	_, err = db.ExecContext(req.Context(),
+		`INSERT INTO api_client_authorizations
+(uuid, api_token, expires_at, scopes,
+user_id,
+api_client_id, created_at, updated_at)
+VALUES ($1, $2, now() + INTERVAL '2 weeks', $3,
+(SELECT id FROM users WHERE users.uuid=$4 LIMIT 1),
+0, now(), now())`,
+		uuid, token, string(scopesjson), userUUID)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return &arvados.APIClientAuthorization{
+		UUID:      uuid,
+		APIToken:  token,
+		ExpiresAt: "",
+		Scopes:    scopes}, nil
 }
 
 // Extract the auth token supplied in req, and replace it with a
@@ -165,11 +240,10 @@ func (h *Handler) saltAuthToken(req *http.Request, remote string) (updatedReq *h
 		// If the token exists in our own database, salt it
 		// for the remote. Otherwise, assume it was issued by
 		// the remote, and pass it through unmodified.
-		currentUser := CurrentUser{Authorization: arvados.APIClientAuthorization{APIToken: creds.Tokens[0]}}
-		err = h.validateAPItoken(req, &currentUser)
+		currentUser, err := h.validateAPItoken(req, creds.Tokens[0])
 		if err == sql.ErrNoRows {
 			// Not ours; pass through unmodified.
-			token = currentUser.Authorization.APIToken
+			token = creds.Tokens[0]
 		} else if err != nil {
 			return nil, err
 		} else {
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go
index 963fd1159..746b9242f 100644
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -130,3 +130,39 @@ func (s *HandlerSuite) TestProxyRedirect(c *check.C) {
 	c.Check(resp.Code, check.Equals, http.StatusFound)
 	c.Check(resp.Header().Get("Location"), check.Matches, `https://0.0.0.0:1/auth/joshid\?return_to=foo&?`)
 }
+
+func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
+	req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
+	user, err := s.handler.(*Handler).validateAPItoken(req, arvadostest.ActiveToken)
+	c.Assert(err, check.IsNil)
+	c.Check(user.Authorization.UUID, check.Equals, arvadostest.ActiveTokenUUID)
+	c.Check(user.Authorization.APIToken, check.Equals, arvadostest.ActiveToken)
+	c.Check(user.Authorization.Scopes, check.DeepEquals, []string{"all"})
+	c.Check(user.UUID, check.Equals, arvadostest.ActiveUserUUID)
+}
+
+func (s *HandlerSuite) TestValidateV2APIToken(c *check.C) {
+	req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
+	user, err := s.handler.(*Handler).validateAPItoken(req, arvadostest.ActiveTokenV2)
+	c.Assert(err, check.IsNil)
+	c.Check(user.Authorization.UUID, check.Equals, arvadostest.ActiveTokenUUID)
+	c.Check(user.Authorization.APIToken, check.Equals, arvadostest.ActiveToken)
+	c.Check(user.Authorization.Scopes, check.DeepEquals, []string{"all"})
+	c.Check(user.UUID, check.Equals, arvadostest.ActiveUserUUID)
+	c.Check(user.Authorization.TokenV2(), check.Equals, arvadostest.ActiveTokenV2)
+}
+
+func (s *HandlerSuite) TestCreateAPIToken(c *check.C) {
+	req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
+	auth, err := s.handler.(*Handler).createAPItoken(req, arvadostest.ActiveUserUUID, nil)
+	c.Assert(err, check.IsNil)
+	c.Check(auth.Scopes, check.DeepEquals, []string{"all"})
+
+	user, err := s.handler.(*Handler).validateAPItoken(req, auth.TokenV2())
+	c.Assert(err, check.IsNil)
+	c.Check(user.Authorization.UUID, check.Equals, auth.UUID)
+	c.Check(user.Authorization.APIToken, check.Equals, auth.APIToken)
+	c.Check(user.Authorization.Scopes, check.DeepEquals, []string{"all"})
+	c.Check(user.UUID, check.Equals, arvadostest.ActiveUserUUID)
+	c.Check(user.Authorization.TokenV2(), check.Equals, auth.TokenV2())
+}
diff --git a/sdk/go/arvados/api_client_authorization.go b/sdk/go/arvados/api_client_authorization.go
index ec0239eb3..17cff235d 100644
--- a/sdk/go/arvados/api_client_authorization.go
+++ b/sdk/go/arvados/api_client_authorization.go
@@ -6,8 +6,10 @@ package arvados
 
 // APIClientAuthorization is an arvados#apiClientAuthorization resource.
 type APIClientAuthorization struct {
-	UUID     string `json:"uuid"`
-	APIToken string `json:"api_token"`
+	UUID      string   `json:"uuid,omitempty"`
+	APIToken  string   `json:"api_token,omitempty"`
+	ExpiresAt string   `json:"expires_at,omitempty"`
+	Scopes    []string `json:"scopes,omitempty"`
 }
 
 // APIClientAuthorizationList is an arvados#apiClientAuthorizationList resource.
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go
index cca9f9bf1..923cecdd5 100644
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -193,37 +193,62 @@ func anythingToValues(params interface{}) (url.Values, error) {
 	return urlValues, nil
 }
 
-// RequestAndDecode performs an API request and unmarshals the
-// response (which must be JSON) into dst. Method and body arguments
-// are the same as for http.NewRequest(). The given path is added to
-// the server's scheme/host/port to form the request URL. The given
-// params are passed via POST form or query string.
-//
-// path must not contain a query string.
-func (c *Client) RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error {
-	if body, ok := body.(io.Closer); ok {
-		// Ensure body is closed even if we error out early
-		defer body.Close()
-	}
+func (c *Client) MakeRequest(method, path string, body io.Reader, params interface{}) (*http.Request, error) {
 	urlString := c.apiURL(path)
 	urlValues, err := anythingToValues(params)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	if (method == "GET" || body != nil) && urlValues != nil {
 		// FIXME: what if params don't fit in URL
 		u, err := url.Parse(urlString)
 		if err != nil {
-			return err
+			return nil, err
 		}
 		u.RawQuery = urlValues.Encode()
 		urlString = u.String()
 	}
 	req, err := http.NewRequest(method, urlString, body)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	req.Header.Set("Content-type", "application/x-www-form-urlencoded")
+
+	if c.AuthToken != "" {
+		req.Header.Add("Authorization", "OAuth2 "+c.AuthToken)
+	}
+
+	if req.Header.Get("X-Request-Id") == "" {
+		reqid, _ := c.context().Value(contextKeyRequestID).(string)
+		if reqid == "" {
+			reqid = reqIDGen.Next()
+		}
+		if req.Header == nil {
+			req.Header = http.Header{"X-Request-Id": {reqid}}
+		} else {
+			req.Header.Set("X-Request-Id", reqid)
+		}
+	}
+
+	return req, nil
+}
+
+// RequestAndDecode performs an API request and unmarshals the
+// response (which must be JSON) into dst. Method and body arguments
+// are the same as for http.NewRequest(). The given path is added to
+// the server's scheme/host/port to form the request URL. The given
+// params are passed via POST form or query string.
+//
+// path must not contain a query string.
+func (c *Client) RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error {
+	if body, ok := body.(io.Closer); ok {
+		// Ensure body is closed even if we error out early
+		defer body.Close()
+	}
+	req, err := c.MakeRequest(method, path, body, params)
+	if err != nil {
+		return err
+	}
 	return c.DoAndDecode(dst, req)
 }
 
diff --git a/sdk/go/arvadostest/fixtures.go b/sdk/go/arvadostest/fixtures.go
index 114faf17b..e0f248313 100644
--- a/sdk/go/arvadostest/fixtures.go
+++ b/sdk/go/arvadostest/fixtures.go
@@ -8,6 +8,7 @@ package arvadostest
 const (
 	SpectatorToken          = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
 	ActiveToken             = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+	ActiveTokenUUID         = "zzzzz-gj3su-077z32aux8dg2s1"
 	ActiveTokenV2           = "v2/zzzzz-gj3su-077z32aux8dg2s1/3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
 	AdminToken              = "4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h"
 	AnonymousToken          = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
diff --git a/vendor/vendor.json b/vendor/vendor.json
index aa6b2d773..9abb9bb15 100644
--- a/vendor/vendor.json
+++ b/vendor/vendor.json
@@ -313,6 +313,12 @@
 			"revisionTime": "2015-07-11T00:45:18Z"
 		},
 		{
+			"checksumSHA1": "khL6oKjx81rAZKW+36050b7f5As=",
+			"path": "github.com/jmcvetta/randutil",
+			"revision": "2bb1b664bcff821e02b2a0644cd29c7e824d54f8",
+			"revisionTime": "2015-08-17T12:26:01Z"
+		},
+		{
 			"checksumSHA1": "oX6jFQD74oOApvDIhOzW2dXpg5Q=",
 			"path": "github.com/kevinburke/ssh_config",
 			"revision": "802051befeb51da415c46972b5caf36e7c33c53d",

commit 23ddce7f83a4ab2e39b5910766f54aafb7b5a99d
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 18 17:17:33 2018 -0400

    14262: Refactoring, split up federation code into smaller files
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/fed_collections.go b/lib/controller/fed_collections.go
new file mode 100644
index 000000000..62f98367c
--- /dev/null
+++ b/lib/controller/fed_collections.go
@@ -0,0 +1,312 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package controller
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"crypto/md5"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"strings"
+	"sync"
+
+	"git.curoverse.com/arvados.git/sdk/go/arvados"
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+	"git.curoverse.com/arvados.git/sdk/go/keepclient"
+)
+
+type collectionFederatedRequestHandler struct {
+	next    http.Handler
+	handler *Handler
+}
+
+func rewriteSignatures(clusterID string, expectHash string,
+	resp *http.Response, requestError error) (newResponse *http.Response, err error) {
+
+	if requestError != nil {
+		return resp, requestError
+	}
+
+	if resp.StatusCode != 200 {
+		return resp, nil
+	}
+
+	originalBody := resp.Body
+	defer originalBody.Close()
+
+	var col arvados.Collection
+	err = json.NewDecoder(resp.Body).Decode(&col)
+	if err != nil {
+		return nil, err
+	}
+
+	// rewriting signatures will make manifest text 5-10% bigger so calculate
+	// capacity accordingly
+	updatedManifest := bytes.NewBuffer(make([]byte, 0, int(float64(len(col.ManifestText))*1.1)))
+
+	hasher := md5.New()
+	mw := io.MultiWriter(hasher, updatedManifest)
+	sz := 0
+
+	scanner := bufio.NewScanner(strings.NewReader(col.ManifestText))
+	scanner.Buffer(make([]byte, 1048576), len(col.ManifestText))
+	for scanner.Scan() {
+		line := scanner.Text()
+		tokens := strings.Split(line, " ")
+		if len(tokens) < 3 {
+			return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
+		}
+
+		n, err := mw.Write([]byte(tokens[0]))
+		if err != nil {
+			return nil, fmt.Errorf("Error updating manifest: %v", err)
+		}
+		sz += n
+		for _, token := range tokens[1:] {
+			n, err = mw.Write([]byte(" "))
+			if err != nil {
+				return nil, fmt.Errorf("Error updating manifest: %v", err)
+			}
+			sz += n
+
+			m := keepclient.SignedLocatorRe.FindStringSubmatch(token)
+			if m != nil {
+				// Rewrite the block signature to be a remote signature
+				_, err = fmt.Fprintf(updatedManifest, "%s%s%s+R%s-%s%s", m[1], m[2], m[3], clusterID, m[5][2:], m[8])
+				if err != nil {
+					return nil, fmt.Errorf("Error updating manifest: %v", err)
+				}
+
+				// for hash checking, ignore signatures
+				n, err = fmt.Fprintf(hasher, "%s%s", m[1], m[2])
+				if err != nil {
+					return nil, fmt.Errorf("Error updating manifest: %v", err)
+				}
+				sz += n
+			} else {
+				n, err = mw.Write([]byte(token))
+				if err != nil {
+					return nil, fmt.Errorf("Error updating manifest: %v", err)
+				}
+				sz += n
+			}
+		}
+		n, err = mw.Write([]byte("\n"))
+		if err != nil {
+			return nil, fmt.Errorf("Error updating manifest: %v", err)
+		}
+		sz += n
+	}
+
+	// Check that expected hash is consistent with
+	// portable_data_hash field of the returned record
+	if expectHash == "" {
+		expectHash = col.PortableDataHash
+	} else if expectHash != col.PortableDataHash {
+		return nil, fmt.Errorf("portable_data_hash %q on returned record did not match expected hash %q ", expectHash, col.PortableDataHash)
+	}
+
+	// Certify that the computed hash of the manifest_text matches our expectation
+	sum := hasher.Sum(nil)
+	computedHash := fmt.Sprintf("%x+%v", sum, sz)
+	if computedHash != expectHash {
+		return nil, fmt.Errorf("Computed manifest_text hash %q did not match expected hash %q", computedHash, expectHash)
+	}
+
+	col.ManifestText = updatedManifest.String()
+
+	newbody, err := json.Marshal(col)
+	if err != nil {
+		return nil, err
+	}
+
+	buf := bytes.NewBuffer(newbody)
+	resp.Body = ioutil.NopCloser(buf)
+	resp.ContentLength = int64(buf.Len())
+	resp.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len()))
+
+	return resp, nil
+}
+
+func filterLocalClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
+	if requestError != nil {
+		return resp, requestError
+	}
+
+	if resp.StatusCode == 404 {
+		// Suppress returning this result, because we want to
+		// search the federation.
+		return nil, nil
+	}
+	return resp, nil
+}
+
+type searchRemoteClusterForPDH struct {
+	pdh           string
+	remoteID      string
+	mtx           *sync.Mutex
+	sentResponse  *bool
+	sharedContext *context.Context
+	cancelFunc    func()
+	errors        *[]string
+	statusCode    *int
+}
+
+func (s *searchRemoteClusterForPDH) filterRemoteClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
+	s.mtx.Lock()
+	defer s.mtx.Unlock()
+
+	if *s.sentResponse {
+		// Another request already returned a response
+		return nil, nil
+	}
+
+	if requestError != nil {
+		*s.errors = append(*s.errors, fmt.Sprintf("Request error contacting %q: %v", s.remoteID, requestError))
+		// Record the error and suppress response
+		return nil, nil
+	}
+
+	if resp.StatusCode != 200 {
+		// Suppress returning unsuccessful result.  Maybe
+		// another request will find it.
+		// TODO collect and return error responses.
+		*s.errors = append(*s.errors, fmt.Sprintf("Response from %q: %v", s.remoteID, resp.Status))
+		if resp.StatusCode != 404 {
+			// Got a non-404 error response, convert into BadGateway
+			*s.statusCode = http.StatusBadGateway
+		}
+		return nil, nil
+	}
+
+	s.mtx.Unlock()
+
+	// This reads the response body.  We don't want to hold the
+	// lock while doing this because other remote requests could
+	// also have made it to this point, and we don't want a
+	// slow response holding the lock to block a faster response
+	// that is waiting on the lock.
+	newResponse, err = rewriteSignatures(s.remoteID, s.pdh, resp, nil)
+
+	s.mtx.Lock()
+
+	if *s.sentResponse {
+		// Another request already returned a response
+		return nil, nil
+	}
+
+	if err != nil {
+		// Suppress returning unsuccessful result.  Maybe
+		// another request will be successful.
+		*s.errors = append(*s.errors, fmt.Sprintf("Error parsing response from %q: %v", s.remoteID, err))
+		return nil, nil
+	}
+
+	// We have a successful response.  Suppress/cancel all the
+	// other requests/responses.
+	*s.sentResponse = true
+	s.cancelFunc()
+
+	return newResponse, nil
+}
+
+func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+	if req.Method != "GET" {
+		// Only handle GET requests right now
+		h.next.ServeHTTP(w, req)
+		return
+	}
+
+	m := collectionByPDHRe.FindStringSubmatch(req.URL.Path)
+	if len(m) != 2 {
+		// Not a collection PDH GET request
+		m = collectionRe.FindStringSubmatch(req.URL.Path)
+		clusterId := ""
+
+		if len(m) > 0 {
+			clusterId = m[2]
+		}
+
+		if clusterId != "" && clusterId != h.handler.Cluster.ClusterID {
+			// request for remote collection by uuid
+			resp, err := h.handler.remoteClusterRequest(clusterId, req)
+			newResponse, err := rewriteSignatures(clusterId, "", resp, err)
+			h.handler.proxy.ForwardResponse(w, newResponse, err)
+			return
+		}
+		// not a collection UUID request, or it is a request
+		// for a local UUID, either way, continue down the
+		// handler stack.
+		h.next.ServeHTTP(w, req)
+		return
+	}
+
+	// Request for collection by PDH.  Search the federation.
+
+	// First, query the local cluster.
+	resp, err := h.handler.localClusterRequest(req)
+	newResp, err := filterLocalClusterResponse(resp, err)
+	if newResp != nil || err != nil {
+		h.handler.proxy.ForwardResponse(w, newResp, err)
+		return
+	}
+
+	sharedContext, cancelFunc := context.WithCancel(req.Context())
+	defer cancelFunc()
+	req = req.WithContext(sharedContext)
+
+	// Create a goroutine for each cluster in the
+	// RemoteClusters map.  The first valid result gets
+	// returned to the client.  When that happens, all
+	// other outstanding requests are cancelled or
+	// suppressed.
+	sentResponse := false
+	mtx := sync.Mutex{}
+	wg := sync.WaitGroup{}
+	var errors []string
+	var errorCode int = 404
+
+	// use channel as a semaphore to limit the number of concurrent
+	// requests at a time
+	sem := make(chan bool, h.handler.Cluster.RequestLimits.GetMultiClusterRequestConcurrency())
+	defer close(sem)
+	for remoteID := range h.handler.Cluster.RemoteClusters {
+		if remoteID == h.handler.Cluster.ClusterID {
+			// No need to query local cluster again
+			continue
+		}
+		// blocks until it can put a value into the
+		// channel (which has a max queue capacity)
+		sem <- true
+		if sentResponse {
+			break
+		}
+		search := &searchRemoteClusterForPDH{m[1], remoteID, &mtx, &sentResponse,
+			&sharedContext, cancelFunc, &errors, &errorCode}
+		wg.Add(1)
+		go func() {
+			resp, err := h.handler.remoteClusterRequest(search.remoteID, req)
+			newResp, err := search.filterRemoteClusterResponse(resp, err)
+			if newResp != nil || err != nil {
+				h.handler.proxy.ForwardResponse(w, newResp, err)
+			}
+			wg.Done()
+			<-sem
+		}()
+	}
+	wg.Wait()
+
+	if sentResponse {
+		return
+	}
+
+	// No successful responses, so return the error
+	httpserver.Errors(w, errors, errorCode)
+}
diff --git a/lib/controller/fed_generic.go b/lib/controller/fed_generic.go
new file mode 100644
index 000000000..0630217b6
--- /dev/null
+++ b/lib/controller/fed_generic.go
@@ -0,0 +1,331 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package controller
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"net/url"
+	"regexp"
+	"sync"
+
+	"git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+type genericFederatedRequestHandler struct {
+	next    http.Handler
+	handler *Handler
+	matcher *regexp.Regexp
+}
+
+func (h *genericFederatedRequestHandler) remoteQueryUUIDs(w http.ResponseWriter,
+	req *http.Request,
+	clusterID string, uuids []string) (rp []map[string]interface{}, kind string, err error) {
+
+	found := make(map[string]bool)
+	prev_len_uuids := len(uuids) + 1
+	// Loop while
+	// (1) there are more uuids to query
+	// (2) we're making progress - on each iteration the set of
+	// uuids we are expecting for must shrink.
+	for len(uuids) > 0 && len(uuids) < prev_len_uuids {
+		var remoteReq http.Request
+		remoteReq.Header = req.Header
+		remoteReq.Method = "POST"
+		remoteReq.URL = &url.URL{Path: req.URL.Path}
+		remoteParams := make(url.Values)
+		remoteParams.Set("_method", "GET")
+		remoteParams.Set("count", "none")
+		if req.Form.Get("select") != "" {
+			remoteParams.Set("select", req.Form.Get("select"))
+		}
+		content, err := json.Marshal(uuids)
+		if err != nil {
+			return nil, "", err
+		}
+		remoteParams["filters"] = []string{fmt.Sprintf(`[["uuid", "in", %s]]`, content)}
+		enc := remoteParams.Encode()
+		remoteReq.Body = ioutil.NopCloser(bytes.NewBufferString(enc))
+
+		rc := multiClusterQueryResponseCollector{clusterID: clusterID}
+
+		var resp *http.Response
+		if clusterID == h.handler.Cluster.ClusterID {
+			resp, err = h.handler.localClusterRequest(&remoteReq)
+		} else {
+			resp, err = h.handler.remoteClusterRequest(clusterID, &remoteReq)
+		}
+		rc.collectResponse(resp, err)
+
+		if rc.error != nil {
+			return nil, "", rc.error
+		}
+
+		kind = rc.kind
+
+		if len(rc.responses) == 0 {
+			// We got zero responses, no point in doing
+			// another query.
+			return rp, kind, nil
+		}
+
+		rp = append(rp, rc.responses...)
+
+		// Go through the responses and determine what was
+		// returned.  If there are remaining items, loop
+		// around and do another request with just the
+		// stragglers.
+		for _, i := range rc.responses {
+			uuid, ok := i["uuid"].(string)
+			if ok {
+				found[uuid] = true
+			}
+		}
+
+		l := []string{}
+		for _, u := range uuids {
+			if !found[u] {
+				l = append(l, u)
+			}
+		}
+		prev_len_uuids = len(uuids)
+		uuids = l
+	}
+
+	return rp, kind, nil
+}
+
+func (h *genericFederatedRequestHandler) handleMultiClusterQuery(w http.ResponseWriter,
+	req *http.Request, clusterId *string) bool {
+
+	var filters [][]interface{}
+	err := json.Unmarshal([]byte(req.Form.Get("filters")), &filters)
+	if err != nil {
+		httpserver.Error(w, err.Error(), http.StatusBadRequest)
+		return true
+	}
+
+	// Split the list of uuids by prefix
+	queryClusters := make(map[string][]string)
+	expectCount := 0
+	for _, filter := range filters {
+		if len(filter) != 3 {
+			return false
+		}
+
+		if lhs, ok := filter[0].(string); !ok || lhs != "uuid" {
+			return false
+		}
+
+		op, ok := filter[1].(string)
+		if !ok {
+			return false
+		}
+
+		if op == "in" {
+			if rhs, ok := filter[2].([]interface{}); ok {
+				for _, i := range rhs {
+					if u, ok := i.(string); ok {
+						*clusterId = u[0:5]
+						queryClusters[u[0:5]] = append(queryClusters[u[0:5]], u)
+						expectCount += 1
+					}
+				}
+			}
+		} else if op == "=" {
+			if u, ok := filter[2].(string); ok {
+				*clusterId = u[0:5]
+				queryClusters[u[0:5]] = append(queryClusters[u[0:5]], u)
+				expectCount += 1
+			}
+		} else {
+			return false
+		}
+
+	}
+
+	if len(queryClusters) <= 1 {
+		// Query does not search for uuids across multiple
+		// clusters.
+		return false
+	}
+
+	// Validations
+	count := req.Form.Get("count")
+	if count != "" && count != `none` && count != `"none"` {
+		httpserver.Error(w, "Federated multi-object query must have 'count=none'", http.StatusBadRequest)
+		return true
+	}
+	if req.Form.Get("limit") != "" || req.Form.Get("offset") != "" || req.Form.Get("order") != "" {
+		httpserver.Error(w, "Federated multi-object may not provide 'limit', 'offset' or 'order'.", http.StatusBadRequest)
+		return true
+	}
+	if expectCount > h.handler.Cluster.RequestLimits.GetMaxItemsPerResponse() {
+		httpserver.Error(w, fmt.Sprintf("Federated multi-object request for %v objects which is more than max page size %v.",
+			expectCount, h.handler.Cluster.RequestLimits.GetMaxItemsPerResponse()), http.StatusBadRequest)
+		return true
+	}
+	if req.Form.Get("select") != "" {
+		foundUUID := false
+		var selects []string
+		err := json.Unmarshal([]byte(req.Form.Get("select")), &selects)
+		if err != nil {
+			httpserver.Error(w, err.Error(), http.StatusBadRequest)
+			return true
+		}
+
+		for _, r := range selects {
+			if r == "uuid" {
+				foundUUID = true
+				break
+			}
+		}
+		if !foundUUID {
+			httpserver.Error(w, "Federated multi-object request must include 'uuid' in 'select'", http.StatusBadRequest)
+			return true
+		}
+	}
+
+	// Perform concurrent requests to each cluster
+
+	// use channel as a semaphore to limit the number of concurrent
+	// requests at a time
+	sem := make(chan bool, h.handler.Cluster.RequestLimits.GetMultiClusterRequestConcurrency())
+	defer close(sem)
+	wg := sync.WaitGroup{}
+
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	mtx := sync.Mutex{}
+	errors := []error{}
+	var completeResponses []map[string]interface{}
+	var kind string
+
+	for k, v := range queryClusters {
+		if len(v) == 0 {
+			// Nothing to query
+			continue
+		}
+
+		// blocks until it can put a value into the
+		// channel (which has a max queue capacity)
+		sem <- true
+		wg.Add(1)
+		go func(k string, v []string) {
+			rp, kn, err := h.remoteQueryUUIDs(w, req, k, v)
+			mtx.Lock()
+			if err == nil {
+				completeResponses = append(completeResponses, rp...)
+				kind = kn
+			} else {
+				errors = append(errors, err)
+			}
+			mtx.Unlock()
+			wg.Done()
+			<-sem
+		}(k, v)
+	}
+	wg.Wait()
+
+	if len(errors) > 0 {
+		var strerr []string
+		for _, e := range errors {
+			strerr = append(strerr, e.Error())
+		}
+		httpserver.Errors(w, strerr, http.StatusBadGateway)
+		return true
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	itemList := make(map[string]interface{})
+	itemList["items"] = completeResponses
+	itemList["kind"] = kind
+	json.NewEncoder(w).Encode(itemList)
+
+	return true
+}
+
+func (h *genericFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+	m := h.matcher.FindStringSubmatch(req.URL.Path)
+	clusterId := ""
+
+	if len(m) > 0 && m[2] != "" {
+		clusterId = m[2]
+	}
+
+	// Get form parameters from URL and form body (if POST).
+	if err := loadParamsFromForm(req); err != nil {
+		httpserver.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	// Check if the parameters have an explicit cluster_id
+	if req.Form.Get("cluster_id") != "" {
+		clusterId = req.Form.Get("cluster_id")
+	}
+
+	// Handle the POST-as-GET special case (workaround for large
+	// GET requests that potentially exceed maximum URL length,
+	// like multi-object queries where the filter has 100s of
+	// items)
+	effectiveMethod := req.Method
+	if req.Method == "POST" && req.Form.Get("_method") != "" {
+		effectiveMethod = req.Form.Get("_method")
+	}
+
+	if effectiveMethod == "GET" &&
+		clusterId == "" &&
+		req.Form.Get("filters") != "" &&
+		h.handleMultiClusterQuery(w, req, &clusterId) {
+		return
+	}
+
+	if clusterId == "" || clusterId == h.handler.Cluster.ClusterID {
+		h.next.ServeHTTP(w, req)
+	} else {
+		resp, err := h.handler.remoteClusterRequest(clusterId, req)
+		h.handler.proxy.ForwardResponse(w, resp, err)
+	}
+}
+
+type multiClusterQueryResponseCollector struct {
+	responses []map[string]interface{}
+	error     error
+	kind      string
+	clusterID string
+}
+
+func (c *multiClusterQueryResponseCollector) collectResponse(resp *http.Response,
+	requestError error) (newResponse *http.Response, err error) {
+	if requestError != nil {
+		c.error = requestError
+		return nil, nil
+	}
+
+	defer resp.Body.Close()
+	var loadInto struct {
+		Kind   string                   `json:"kind"`
+		Items  []map[string]interface{} `json:"items"`
+		Errors []string                 `json:"errors"`
+	}
+	err = json.NewDecoder(resp.Body).Decode(&loadInto)
+
+	if err != nil {
+		c.error = fmt.Errorf("error fetching from %v (%v): %v", c.clusterID, resp.Status, err)
+		return nil, nil
+	}
+	if resp.StatusCode != http.StatusOK {
+		c.error = fmt.Errorf("error fetching from %v (%v): %v", c.clusterID, resp.Status, loadInto.Errors)
+		return nil, nil
+	}
+
+	c.responses = loadInto.Items
+	c.kind = loadInto.Kind
+
+	return nil, nil
+}
diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index c5089fa23..03d2f3fab 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -5,12 +5,8 @@
 package controller
 
 import (
-	"bufio"
 	"bytes"
-	"context"
-	"crypto/md5"
 	"database/sql"
-	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -18,12 +14,9 @@ import (
 	"net/url"
 	"regexp"
 	"strings"
-	"sync"
 
 	"git.curoverse.com/arvados.git/sdk/go/arvados"
 	"git.curoverse.com/arvados.git/sdk/go/auth"
-	"git.curoverse.com/arvados.git/sdk/go/httpserver"
-	"git.curoverse.com/arvados.git/sdk/go/keepclient"
 )
 
 var pathPattern = `^/arvados/v1/%s(/([0-9a-z]{5})-%s-[0-9a-z]{15})?(.*)$`
@@ -33,17 +26,6 @@ var containerRequestsRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "container
 var collectionRe = regexp.MustCompile(fmt.Sprintf(pathPattern, "collections", "4zz18"))
 var collectionByPDHRe = regexp.MustCompile(`^/arvados/v1/collections/([0-9a-fA-F]{32}\+[0-9]+)+$`)
 
-type genericFederatedRequestHandler struct {
-	next    http.Handler
-	handler *Handler
-	matcher *regexp.Regexp
-}
-
-type collectionFederatedRequestHandler struct {
-	next    http.Handler
-	handler *Handler
-}
-
 func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*http.Response, error) {
 	remote, ok := h.Cluster.RemoteClusters[remoteID]
 	if !ok {
@@ -98,597 +80,6 @@ func loadParamsFromForm(req *http.Request) error {
 	return nil
 }
 
-type multiClusterQueryResponseCollector struct {
-	responses []map[string]interface{}
-	error     error
-	kind      string
-	clusterID string
-}
-
-func (c *multiClusterQueryResponseCollector) collectResponse(resp *http.Response,
-	requestError error) (newResponse *http.Response, err error) {
-	if requestError != nil {
-		c.error = requestError
-		return nil, nil
-	}
-
-	defer resp.Body.Close()
-	var loadInto struct {
-		Kind   string                   `json:"kind"`
-		Items  []map[string]interface{} `json:"items"`
-		Errors []string                 `json:"errors"`
-	}
-	err = json.NewDecoder(resp.Body).Decode(&loadInto)
-
-	if err != nil {
-		c.error = fmt.Errorf("error fetching from %v (%v): %v", c.clusterID, resp.Status, err)
-		return nil, nil
-	}
-	if resp.StatusCode != http.StatusOK {
-		c.error = fmt.Errorf("error fetching from %v (%v): %v", c.clusterID, resp.Status, loadInto.Errors)
-		return nil, nil
-	}
-
-	c.responses = loadInto.Items
-	c.kind = loadInto.Kind
-
-	return nil, nil
-}
-
-func (h *genericFederatedRequestHandler) remoteQueryUUIDs(w http.ResponseWriter,
-	req *http.Request,
-	clusterID string, uuids []string) (rp []map[string]interface{}, kind string, err error) {
-
-	found := make(map[string]bool)
-	prev_len_uuids := len(uuids) + 1
-	// Loop while
-	// (1) there are more uuids to query
-	// (2) we're making progress - on each iteration the set of
-	// uuids we are expecting for must shrink.
-	for len(uuids) > 0 && len(uuids) < prev_len_uuids {
-		var remoteReq http.Request
-		remoteReq.Header = req.Header
-		remoteReq.Method = "POST"
-		remoteReq.URL = &url.URL{Path: req.URL.Path}
-		remoteParams := make(url.Values)
-		remoteParams.Set("_method", "GET")
-		remoteParams.Set("count", "none")
-		if req.Form.Get("select") != "" {
-			remoteParams.Set("select", req.Form.Get("select"))
-		}
-		content, err := json.Marshal(uuids)
-		if err != nil {
-			return nil, "", err
-		}
-		remoteParams["filters"] = []string{fmt.Sprintf(`[["uuid", "in", %s]]`, content)}
-		enc := remoteParams.Encode()
-		remoteReq.Body = ioutil.NopCloser(bytes.NewBufferString(enc))
-
-		rc := multiClusterQueryResponseCollector{clusterID: clusterID}
-
-		var resp *http.Response
-		if clusterID == h.handler.Cluster.ClusterID {
-			resp, err = h.handler.localClusterRequest(&remoteReq)
-		} else {
-			resp, err = h.handler.remoteClusterRequest(clusterID, &remoteReq)
-		}
-		rc.collectResponse(resp, err)
-
-		if rc.error != nil {
-			return nil, "", rc.error
-		}
-
-		kind = rc.kind
-
-		if len(rc.responses) == 0 {
-			// We got zero responses, no point in doing
-			// another query.
-			return rp, kind, nil
-		}
-
-		rp = append(rp, rc.responses...)
-
-		// Go through the responses and determine what was
-		// returned.  If there are remaining items, loop
-		// around and do another request with just the
-		// stragglers.
-		for _, i := range rc.responses {
-			uuid, ok := i["uuid"].(string)
-			if ok {
-				found[uuid] = true
-			}
-		}
-
-		l := []string{}
-		for _, u := range uuids {
-			if !found[u] {
-				l = append(l, u)
-			}
-		}
-		prev_len_uuids = len(uuids)
-		uuids = l
-	}
-
-	return rp, kind, nil
-}
-
-func (h *genericFederatedRequestHandler) handleMultiClusterQuery(w http.ResponseWriter,
-	req *http.Request, clusterId *string) bool {
-
-	var filters [][]interface{}
-	err := json.Unmarshal([]byte(req.Form.Get("filters")), &filters)
-	if err != nil {
-		httpserver.Error(w, err.Error(), http.StatusBadRequest)
-		return true
-	}
-
-	// Split the list of uuids by prefix
-	queryClusters := make(map[string][]string)
-	expectCount := 0
-	for _, filter := range filters {
-		if len(filter) != 3 {
-			return false
-		}
-
-		if lhs, ok := filter[0].(string); !ok || lhs != "uuid" {
-			return false
-		}
-
-		op, ok := filter[1].(string)
-		if !ok {
-			return false
-		}
-
-		if op == "in" {
-			if rhs, ok := filter[2].([]interface{}); ok {
-				for _, i := range rhs {
-					if u, ok := i.(string); ok {
-						*clusterId = u[0:5]
-						queryClusters[u[0:5]] = append(queryClusters[u[0:5]], u)
-						expectCount += 1
-					}
-				}
-			}
-		} else if op == "=" {
-			if u, ok := filter[2].(string); ok {
-				*clusterId = u[0:5]
-				queryClusters[u[0:5]] = append(queryClusters[u[0:5]], u)
-				expectCount += 1
-			}
-		} else {
-			return false
-		}
-
-	}
-
-	if len(queryClusters) <= 1 {
-		// Query does not search for uuids across multiple
-		// clusters.
-		return false
-	}
-
-	// Validations
-	count := req.Form.Get("count")
-	if count != "" && count != `none` && count != `"none"` {
-		httpserver.Error(w, "Federated multi-object query must have 'count=none'", http.StatusBadRequest)
-		return true
-	}
-	if req.Form.Get("limit") != "" || req.Form.Get("offset") != "" || req.Form.Get("order") != "" {
-		httpserver.Error(w, "Federated multi-object may not provide 'limit', 'offset' or 'order'.", http.StatusBadRequest)
-		return true
-	}
-	if expectCount > h.handler.Cluster.RequestLimits.GetMaxItemsPerResponse() {
-		httpserver.Error(w, fmt.Sprintf("Federated multi-object request for %v objects which is more than max page size %v.",
-			expectCount, h.handler.Cluster.RequestLimits.GetMaxItemsPerResponse()), http.StatusBadRequest)
-		return true
-	}
-	if req.Form.Get("select") != "" {
-		foundUUID := false
-		var selects []string
-		err := json.Unmarshal([]byte(req.Form.Get("select")), &selects)
-		if err != nil {
-			httpserver.Error(w, err.Error(), http.StatusBadRequest)
-			return true
-		}
-
-		for _, r := range selects {
-			if r == "uuid" {
-				foundUUID = true
-				break
-			}
-		}
-		if !foundUUID {
-			httpserver.Error(w, "Federated multi-object request must include 'uuid' in 'select'", http.StatusBadRequest)
-			return true
-		}
-	}
-
-	// Perform concurrent requests to each cluster
-
-	// use channel as a semaphore to limit the number of concurrent
-	// requests at a time
-	sem := make(chan bool, h.handler.Cluster.RequestLimits.GetMultiClusterRequestConcurrency())
-	defer close(sem)
-	wg := sync.WaitGroup{}
-
-	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
-	mtx := sync.Mutex{}
-	errors := []error{}
-	var completeResponses []map[string]interface{}
-	var kind string
-
-	for k, v := range queryClusters {
-		if len(v) == 0 {
-			// Nothing to query
-			continue
-		}
-
-		// blocks until it can put a value into the
-		// channel (which has a max queue capacity)
-		sem <- true
-		wg.Add(1)
-		go func(k string, v []string) {
-			rp, kn, err := h.remoteQueryUUIDs(w, req, k, v)
-			mtx.Lock()
-			if err == nil {
-				completeResponses = append(completeResponses, rp...)
-				kind = kn
-			} else {
-				errors = append(errors, err)
-			}
-			mtx.Unlock()
-			wg.Done()
-			<-sem
-		}(k, v)
-	}
-	wg.Wait()
-
-	if len(errors) > 0 {
-		var strerr []string
-		for _, e := range errors {
-			strerr = append(strerr, e.Error())
-		}
-		httpserver.Errors(w, strerr, http.StatusBadGateway)
-		return true
-	}
-
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	itemList := make(map[string]interface{})
-	itemList["items"] = completeResponses
-	itemList["kind"] = kind
-	json.NewEncoder(w).Encode(itemList)
-
-	return true
-}
-
-func (h *genericFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
-	m := h.matcher.FindStringSubmatch(req.URL.Path)
-	clusterId := ""
-
-	if len(m) > 0 && m[2] != "" {
-		clusterId = m[2]
-	}
-
-	// Get form parameters from URL and form body (if POST).
-	if err := loadParamsFromForm(req); err != nil {
-		httpserver.Error(w, err.Error(), http.StatusBadRequest)
-		return
-	}
-
-	// Check if the parameters have an explicit cluster_id
-	if req.Form.Get("cluster_id") != "" {
-		clusterId = req.Form.Get("cluster_id")
-	}
-
-	// Handle the POST-as-GET special case (workaround for large
-	// GET requests that potentially exceed maximum URL length,
-	// like multi-object queries where the filter has 100s of
-	// items)
-	effectiveMethod := req.Method
-	if req.Method == "POST" && req.Form.Get("_method") != "" {
-		effectiveMethod = req.Form.Get("_method")
-	}
-
-	if effectiveMethod == "GET" &&
-		clusterId == "" &&
-		req.Form.Get("filters") != "" &&
-		h.handleMultiClusterQuery(w, req, &clusterId) {
-		return
-	}
-
-	if clusterId == "" || clusterId == h.handler.Cluster.ClusterID {
-		h.next.ServeHTTP(w, req)
-	} else {
-		resp, err := h.handler.remoteClusterRequest(clusterId, req)
-		h.handler.proxy.ForwardResponse(w, resp, err)
-	}
-}
-
-func rewriteSignatures(clusterID string, expectHash string,
-	resp *http.Response, requestError error) (newResponse *http.Response, err error) {
-
-	if requestError != nil {
-		return resp, requestError
-	}
-
-	if resp.StatusCode != 200 {
-		return resp, nil
-	}
-
-	originalBody := resp.Body
-	defer originalBody.Close()
-
-	var col arvados.Collection
-	err = json.NewDecoder(resp.Body).Decode(&col)
-	if err != nil {
-		return nil, err
-	}
-
-	// rewriting signatures will make manifest text 5-10% bigger so calculate
-	// capacity accordingly
-	updatedManifest := bytes.NewBuffer(make([]byte, 0, int(float64(len(col.ManifestText))*1.1)))
-
-	hasher := md5.New()
-	mw := io.MultiWriter(hasher, updatedManifest)
-	sz := 0
-
-	scanner := bufio.NewScanner(strings.NewReader(col.ManifestText))
-	scanner.Buffer(make([]byte, 1048576), len(col.ManifestText))
-	for scanner.Scan() {
-		line := scanner.Text()
-		tokens := strings.Split(line, " ")
-		if len(tokens) < 3 {
-			return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
-		}
-
-		n, err := mw.Write([]byte(tokens[0]))
-		if err != nil {
-			return nil, fmt.Errorf("Error updating manifest: %v", err)
-		}
-		sz += n
-		for _, token := range tokens[1:] {
-			n, err = mw.Write([]byte(" "))
-			if err != nil {
-				return nil, fmt.Errorf("Error updating manifest: %v", err)
-			}
-			sz += n
-
-			m := keepclient.SignedLocatorRe.FindStringSubmatch(token)
-			if m != nil {
-				// Rewrite the block signature to be a remote signature
-				_, err = fmt.Fprintf(updatedManifest, "%s%s%s+R%s-%s%s", m[1], m[2], m[3], clusterID, m[5][2:], m[8])
-				if err != nil {
-					return nil, fmt.Errorf("Error updating manifest: %v", err)
-				}
-
-				// for hash checking, ignore signatures
-				n, err = fmt.Fprintf(hasher, "%s%s", m[1], m[2])
-				if err != nil {
-					return nil, fmt.Errorf("Error updating manifest: %v", err)
-				}
-				sz += n
-			} else {
-				n, err = mw.Write([]byte(token))
-				if err != nil {
-					return nil, fmt.Errorf("Error updating manifest: %v", err)
-				}
-				sz += n
-			}
-		}
-		n, err = mw.Write([]byte("\n"))
-		if err != nil {
-			return nil, fmt.Errorf("Error updating manifest: %v", err)
-		}
-		sz += n
-	}
-
-	// Check that expected hash is consistent with
-	// portable_data_hash field of the returned record
-	if expectHash == "" {
-		expectHash = col.PortableDataHash
-	} else if expectHash != col.PortableDataHash {
-		return nil, fmt.Errorf("portable_data_hash %q on returned record did not match expected hash %q ", expectHash, col.PortableDataHash)
-	}
-
-	// Certify that the computed hash of the manifest_text matches our expectation
-	sum := hasher.Sum(nil)
-	computedHash := fmt.Sprintf("%x+%v", sum, sz)
-	if computedHash != expectHash {
-		return nil, fmt.Errorf("Computed manifest_text hash %q did not match expected hash %q", computedHash, expectHash)
-	}
-
-	col.ManifestText = updatedManifest.String()
-
-	newbody, err := json.Marshal(col)
-	if err != nil {
-		return nil, err
-	}
-
-	buf := bytes.NewBuffer(newbody)
-	resp.Body = ioutil.NopCloser(buf)
-	resp.ContentLength = int64(buf.Len())
-	resp.Header.Set("Content-Length", fmt.Sprintf("%v", buf.Len()))
-
-	return resp, nil
-}
-
-func filterLocalClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
-	if requestError != nil {
-		return resp, requestError
-	}
-
-	if resp.StatusCode == 404 {
-		// Suppress returning this result, because we want to
-		// search the federation.
-		return nil, nil
-	}
-	return resp, nil
-}
-
-type searchRemoteClusterForPDH struct {
-	pdh           string
-	remoteID      string
-	mtx           *sync.Mutex
-	sentResponse  *bool
-	sharedContext *context.Context
-	cancelFunc    func()
-	errors        *[]string
-	statusCode    *int
-}
-
-func (s *searchRemoteClusterForPDH) filterRemoteClusterResponse(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
-	s.mtx.Lock()
-	defer s.mtx.Unlock()
-
-	if *s.sentResponse {
-		// Another request already returned a response
-		return nil, nil
-	}
-
-	if requestError != nil {
-		*s.errors = append(*s.errors, fmt.Sprintf("Request error contacting %q: %v", s.remoteID, requestError))
-		// Record the error and suppress response
-		return nil, nil
-	}
-
-	if resp.StatusCode != 200 {
-		// Suppress returning unsuccessful result.  Maybe
-		// another request will find it.
-		// TODO collect and return error responses.
-		*s.errors = append(*s.errors, fmt.Sprintf("Response from %q: %v", s.remoteID, resp.Status))
-		if resp.StatusCode != 404 {
-			// Got a non-404 error response, convert into BadGateway
-			*s.statusCode = http.StatusBadGateway
-		}
-		return nil, nil
-	}
-
-	s.mtx.Unlock()
-
-	// This reads the response body.  We don't want to hold the
-	// lock while doing this because other remote requests could
-	// also have made it to this point, and we don't want a
-	// slow response holding the lock to block a faster response
-	// that is waiting on the lock.
-	newResponse, err = rewriteSignatures(s.remoteID, s.pdh, resp, nil)
-
-	s.mtx.Lock()
-
-	if *s.sentResponse {
-		// Another request already returned a response
-		return nil, nil
-	}
-
-	if err != nil {
-		// Suppress returning unsuccessful result.  Maybe
-		// another request will be successful.
-		*s.errors = append(*s.errors, fmt.Sprintf("Error parsing response from %q: %v", s.remoteID, err))
-		return nil, nil
-	}
-
-	// We have a successful response.  Suppress/cancel all the
-	// other requests/responses.
-	*s.sentResponse = true
-	s.cancelFunc()
-
-	return newResponse, nil
-}
-
-func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
-	if req.Method != "GET" {
-		// Only handle GET requests right now
-		h.next.ServeHTTP(w, req)
-		return
-	}
-
-	m := collectionByPDHRe.FindStringSubmatch(req.URL.Path)
-	if len(m) != 2 {
-		// Not a collection PDH GET request
-		m = collectionRe.FindStringSubmatch(req.URL.Path)
-		clusterId := ""
-
-		if len(m) > 0 {
-			clusterId = m[2]
-		}
-
-		if clusterId != "" && clusterId != h.handler.Cluster.ClusterID {
-			// request for remote collection by uuid
-			resp, err := h.handler.remoteClusterRequest(clusterId, req)
-			newResponse, err := rewriteSignatures(clusterId, "", resp, err)
-			h.handler.proxy.ForwardResponse(w, newResponse, err)
-			return
-		}
-		// not a collection UUID request, or it is a request
-		// for a local UUID, either way, continue down the
-		// handler stack.
-		h.next.ServeHTTP(w, req)
-		return
-	}
-
-	// Request for collection by PDH.  Search the federation.
-
-	// First, query the local cluster.
-	resp, err := h.handler.localClusterRequest(req)
-	newResp, err := filterLocalClusterResponse(resp, err)
-	if newResp != nil || err != nil {
-		h.handler.proxy.ForwardResponse(w, newResp, err)
-		return
-	}
-
-	sharedContext, cancelFunc := context.WithCancel(req.Context())
-	defer cancelFunc()
-	req = req.WithContext(sharedContext)
-
-	// Create a goroutine for each cluster in the
-	// RemoteClusters map.  The first valid result gets
-	// returned to the client.  When that happens, all
-	// other outstanding requests are cancelled or
-	// suppressed.
-	sentResponse := false
-	mtx := sync.Mutex{}
-	wg := sync.WaitGroup{}
-	var errors []string
-	var errorCode int = 404
-
-	// use channel as a semaphore to limit the number of concurrent
-	// requests at a time
-	sem := make(chan bool, h.handler.Cluster.RequestLimits.GetMultiClusterRequestConcurrency())
-	defer close(sem)
-	for remoteID := range h.handler.Cluster.RemoteClusters {
-		if remoteID == h.handler.Cluster.ClusterID {
-			// No need to query local cluster again
-			continue
-		}
-		// blocks until it can put a value into the
-		// channel (which has a max queue capacity)
-		sem <- true
-		if sentResponse {
-			break
-		}
-		search := &searchRemoteClusterForPDH{m[1], remoteID, &mtx, &sentResponse,
-			&sharedContext, cancelFunc, &errors, &errorCode}
-		wg.Add(1)
-		go func() {
-			resp, err := h.handler.remoteClusterRequest(search.remoteID, req)
-			newResp, err := search.filterRemoteClusterResponse(resp, err)
-			if newResp != nil || err != nil {
-				h.handler.proxy.ForwardResponse(w, newResp, err)
-			}
-			wg.Done()
-			<-sem
-		}()
-	}
-	wg.Wait()
-
-	if sentResponse {
-		return
-	}
-
-	// No successful responses, so return the error
-	httpserver.Errors(w, errors, errorCode)
-}
-
 func (h *Handler) setupProxyRemoteCluster(next http.Handler) http.Handler {
 	mux := http.NewServeMux()
 	mux.Handle("/arvados/v1/workflows", &genericFederatedRequestHandler{next, h, wfRe})

commit b4e123604561174f928beab0f0a8fba7cbf2d33d
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 18 16:08:28 2018 -0400

    14262: Refactoring proxy
    
    Split proxy.Do() into ForwardRequest() and ForwardResponse().
    
    Inversion of control eliminates need for "filter" callback, since the
    caller can now modify the response in between the calls to
    ForwardRequest() and ForwardResponse().
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index f30365574..c5089fa23 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -44,17 +44,10 @@ type collectionFederatedRequestHandler struct {
 	handler *Handler
 }
 
-func (h *Handler) remoteClusterRequest(remoteID string, w http.ResponseWriter, req *http.Request, filter ResponseFilter) {
+func (h *Handler) remoteClusterRequest(remoteID string, req *http.Request) (*http.Response, error) {
 	remote, ok := h.Cluster.RemoteClusters[remoteID]
 	if !ok {
-		err := fmt.Errorf("no proxy available for cluster %v", remoteID)
-		if filter != nil {
-			_, err = filter(nil, err)
-		}
-		if err != nil {
-			httpserver.Error(w, err.Error(), http.StatusNotFound)
-		}
-		return
+		return nil, HTTPError{fmt.Sprintf("no proxy available for cluster %v", remoteID), http.StatusNotFound}
 	}
 	scheme := remote.Scheme
 	if scheme == "" {
@@ -62,13 +55,7 @@ func (h *Handler) remoteClusterRequest(remoteID string, w http.ResponseWriter, r
 	}
 	saltedReq, err := h.saltAuthToken(req, remoteID)
 	if err != nil {
-		if filter != nil {
-			_, err = filter(nil, err)
-		}
-		if err != nil {
-			httpserver.Error(w, err.Error(), http.StatusBadRequest)
-		}
-		return
+		return nil, err
 	}
 	urlOut := &url.URL{
 		Scheme:   scheme,
@@ -81,7 +68,7 @@ func (h *Handler) remoteClusterRequest(remoteID string, w http.ResponseWriter, r
 	if remote.Insecure {
 		client = h.insecureClient
 	}
-	h.proxy.Do(w, saltedReq, urlOut, client, filter)
+	return h.proxy.ForwardRequest(saltedReq, urlOut, client)
 }
 
 // Buffer request body, parse form parameters in request, and then
@@ -179,13 +166,14 @@ func (h *genericFederatedRequestHandler) remoteQueryUUIDs(w http.ResponseWriter,
 
 		rc := multiClusterQueryResponseCollector{clusterID: clusterID}
 
+		var resp *http.Response
 		if clusterID == h.handler.Cluster.ClusterID {
-			h.handler.localClusterRequest(w, &remoteReq,
-				rc.collectResponse)
+			resp, err = h.handler.localClusterRequest(&remoteReq)
 		} else {
-			h.handler.remoteClusterRequest(clusterID, w, &remoteReq,
-				rc.collectResponse)
+			resp, err = h.handler.remoteClusterRequest(clusterID, &remoteReq)
 		}
+		rc.collectResponse(resp, err)
+
 		if rc.error != nil {
 			return nil, "", rc.error
 		}
@@ -412,16 +400,14 @@ func (h *genericFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req *h
 	if clusterId == "" || clusterId == h.handler.Cluster.ClusterID {
 		h.next.ServeHTTP(w, req)
 	} else {
-		h.handler.remoteClusterRequest(clusterId, w, req, nil)
+		resp, err := h.handler.remoteClusterRequest(clusterId, req)
+		h.handler.proxy.ForwardResponse(w, resp, err)
 	}
 }
 
-type rewriteSignaturesClusterId struct {
-	clusterID  string
-	expectHash string
-}
+func rewriteSignatures(clusterID string, expectHash string,
+	resp *http.Response, requestError error) (newResponse *http.Response, err error) {
 
-func (rw rewriteSignaturesClusterId) rewriteSignatures(resp *http.Response, requestError error) (newResponse *http.Response, err error) {
 	if requestError != nil {
 		return resp, requestError
 	}
@@ -471,7 +457,7 @@ func (rw rewriteSignaturesClusterId) rewriteSignatures(resp *http.Response, requ
 			m := keepclient.SignedLocatorRe.FindStringSubmatch(token)
 			if m != nil {
 				// Rewrite the block signature to be a remote signature
-				_, err = fmt.Fprintf(updatedManifest, "%s%s%s+R%s-%s%s", m[1], m[2], m[3], rw.clusterID, m[5][2:], m[8])
+				_, err = fmt.Fprintf(updatedManifest, "%s%s%s+R%s-%s%s", m[1], m[2], m[3], clusterID, m[5][2:], m[8])
 				if err != nil {
 					return nil, fmt.Errorf("Error updating manifest: %v", err)
 				}
@@ -499,17 +485,17 @@ func (rw rewriteSignaturesClusterId) rewriteSignatures(resp *http.Response, requ
 
 	// Check that expected hash is consistent with
 	// portable_data_hash field of the returned record
-	if rw.expectHash == "" {
-		rw.expectHash = col.PortableDataHash
-	} else if rw.expectHash != col.PortableDataHash {
-		return nil, fmt.Errorf("portable_data_hash %q on returned record did not match expected hash %q ", rw.expectHash, col.PortableDataHash)
+	if expectHash == "" {
+		expectHash = col.PortableDataHash
+	} else if expectHash != col.PortableDataHash {
+		return nil, fmt.Errorf("portable_data_hash %q on returned record did not match expected hash %q ", expectHash, col.PortableDataHash)
 	}
 
 	// Certify that the computed hash of the manifest_text matches our expectation
 	sum := hasher.Sum(nil)
 	computedHash := fmt.Sprintf("%x+%v", sum, sz)
-	if computedHash != rw.expectHash {
-		return nil, fmt.Errorf("Computed manifest_text hash %q did not match expected hash %q", computedHash, rw.expectHash)
+	if computedHash != expectHash {
+		return nil, fmt.Errorf("Computed manifest_text hash %q did not match expected hash %q", computedHash, expectHash)
 	}
 
 	col.ManifestText = updatedManifest.String()
@@ -585,7 +571,7 @@ func (s *searchRemoteClusterForPDH) filterRemoteClusterResponse(resp *http.Respo
 	// also have made it to this point, and we don't want a
 	// slow response holding the lock to block a faster response
 	// that is waiting on the lock.
-	newResponse, err = rewriteSignaturesClusterId{s.remoteID, s.pdh}.rewriteSignatures(resp, nil)
+	newResponse, err = rewriteSignatures(s.remoteID, s.pdh, resp, nil)
 
 	s.mtx.Lock()
 
@@ -628,8 +614,9 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 
 		if clusterId != "" && clusterId != h.handler.Cluster.ClusterID {
 			// request for remote collection by uuid
-			h.handler.remoteClusterRequest(clusterId, w, req,
-				rewriteSignaturesClusterId{clusterId, ""}.rewriteSignatures)
+			resp, err := h.handler.remoteClusterRequest(clusterId, req)
+			newResponse, err := rewriteSignatures(clusterId, "", resp, err)
+			h.handler.proxy.ForwardResponse(w, newResponse, err)
 			return
 		}
 		// not a collection UUID request, or it is a request
@@ -642,7 +629,10 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 	// Request for collection by PDH.  Search the federation.
 
 	// First, query the local cluster.
-	if h.handler.localClusterRequest(w, req, filterLocalClusterResponse) {
+	resp, err := h.handler.localClusterRequest(req)
+	newResp, err := filterLocalClusterResponse(resp, err)
+	if newResp != nil || err != nil {
+		h.handler.proxy.ForwardResponse(w, newResp, err)
 		return
 	}
 
@@ -680,7 +670,11 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 			&sharedContext, cancelFunc, &errors, &errorCode}
 		wg.Add(1)
 		go func() {
-			h.handler.remoteClusterRequest(search.remoteID, w, req, search.filterRemoteClusterResponse)
+			resp, err := h.handler.remoteClusterRequest(search.remoteID, req)
+			newResp, err := search.filterRemoteClusterResponse(resp, err)
+			if newResp != nil || err != nil {
+				h.handler.proxy.ForwardResponse(w, newResp, err)
+			}
 			wg.Done()
 			<-sem
 		}()
diff --git a/lib/controller/handler.go b/lib/controller/handler.go
index 0c31815cb..5e9012949 100644
--- a/lib/controller/handler.go
+++ b/lib/controller/handler.go
@@ -121,14 +121,10 @@ func prepend(next http.Handler, middleware middlewareFunc) http.Handler {
 	})
 }
 
-// localClusterRequest sets up a request so it can be proxied to the
-// local API server using proxy.Do().  Returns true if a response was
-// written, false if not.
-func (h *Handler) localClusterRequest(w http.ResponseWriter, req *http.Request, filter ResponseFilter) bool {
+func (h *Handler) localClusterRequest(req *http.Request) (*http.Response, error) {
 	urlOut, insecure, err := findRailsAPI(h.Cluster, h.NodeProfile)
 	if err != nil {
-		httpserver.Error(w, err.Error(), http.StatusInternalServerError)
-		return true
+		return nil, err
 	}
 	urlOut = &url.URL{
 		Scheme:   urlOut.Scheme,
@@ -141,12 +137,14 @@ func (h *Handler) localClusterRequest(w http.ResponseWriter, req *http.Request,
 	if insecure {
 		client = h.insecureClient
 	}
-	return h.proxy.Do(w, req, urlOut, client, filter)
+	return h.proxy.ForwardRequest(req, urlOut, client)
 }
 
 func (h *Handler) proxyRailsAPI(w http.ResponseWriter, req *http.Request, next http.Handler) {
-	if !h.localClusterRequest(w, req, nil) && next != nil {
-		next.ServeHTTP(w, req)
+	resp, err := h.localClusterRequest(req)
+	n, err := h.proxy.ForwardResponse(w, resp, err)
+	if err != nil {
+		httpserver.Logger(req).WithError(err).WithField("bytesCopied", n).Error("error copying response body")
 	}
 }
 
diff --git a/lib/controller/proxy.go b/lib/controller/proxy.go
index 951cb9d25..9aecdc1b2 100644
--- a/lib/controller/proxy.go
+++ b/lib/controller/proxy.go
@@ -19,6 +19,15 @@ type proxy struct {
 	RequestTimeout time.Duration
 }
 
+type HTTPError struct {
+	Message string
+	Code    int
+}
+
+func (h HTTPError) Error() string {
+	return h.Message
+}
+
 // headers that shouldn't be forwarded when proxying. See
 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers
 var dropHeaders = map[string]bool{
@@ -36,15 +45,11 @@ var dropHeaders = map[string]bool{
 
 type ResponseFilter func(*http.Response, error) (*http.Response, error)
 
-// Do sends a request, passes the result to the filter (if provided)
-// and then if the result is not suppressed by the filter, sends the
-// request to the ResponseWriter.  Returns true if a response was written,
-// false if not.
-func (p *proxy) Do(w http.ResponseWriter,
+// Forward a request to downstream service, and return response or error.
+func (p *proxy) ForwardRequest(
 	reqIn *http.Request,
 	urlOut *url.URL,
-	client *http.Client,
-	filter ResponseFilter) bool {
+	client *http.Client) (*http.Response, error) {
 
 	// Copy headers from incoming request, then add/replace proxy
 	// headers like Via and X-Forwarded-For.
@@ -79,50 +84,26 @@ func (p *proxy) Do(w http.ResponseWriter,
 		Body:   reqIn.Body,
 	}).WithContext(ctx)
 
-	resp, err := client.Do(reqOut)
-	if filter == nil && err != nil {
-		httpserver.Error(w, err.Error(), http.StatusBadGateway)
-		return true
-	}
-
-	// make sure original response body gets closed
-	var originalBody io.ReadCloser
-	if resp != nil {
-		originalBody = resp.Body
-		if originalBody != nil {
-			defer originalBody.Close()
-		}
-	}
-
-	if filter != nil {
-		resp, err = filter(resp, err)
+	return client.Do(reqOut)
+}
 
-		if err != nil {
+// Copy a response (or error) to the upstream client
+func (p *proxy) ForwardResponse(w http.ResponseWriter, resp *http.Response, err error) (int64, error) {
+	if err != nil {
+		if he, ok := err.(HTTPError); ok {
+			httpserver.Error(w, he.Message, he.Code)
+		} else {
 			httpserver.Error(w, err.Error(), http.StatusBadGateway)
-			return true
-		}
-		if resp == nil {
-			// filter() returned a nil response, this means suppress
-			// writing a response, for the case where there might
-			// be multiple response writers.
-			return false
-		}
-
-		// the filter gave us a new response body, make sure that gets closed too.
-		if resp.Body != originalBody {
-			defer resp.Body.Close()
 		}
+		return 0, nil
 	}
 
+	defer resp.Body.Close()
 	for k, v := range resp.Header {
 		for _, v := range v {
 			w.Header().Add(k, v)
 		}
 	}
 	w.WriteHeader(resp.StatusCode)
-	n, err := io.Copy(w, resp.Body)
-	if err != nil {
-		httpserver.Logger(reqIn).WithError(err).WithField("bytesCopied", n).Error("error copying response body")
-	}
-	return true
+	return io.Copy(w, resp.Body)
 }

commit 5643759e918f5919174afc32b2c6556e13548e6d
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Thu Oct 18 14:34:49 2018 -0400

    14262: Fix bug moving api_token to header
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index e5c56bd83..f30365574 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -14,7 +14,6 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
-	"log"
 	"net/http"
 	"net/url"
 	"regexp"
@@ -61,7 +60,7 @@ func (h *Handler) remoteClusterRequest(remoteID string, w http.ResponseWriter, r
 	if scheme == "" {
 		scheme = "https"
 	}
-	req, err := h.saltAuthToken(req, remoteID)
+	saltedReq, err := h.saltAuthToken(req, remoteID)
 	if err != nil {
 		if filter != nil {
 			_, err = filter(nil, err)
@@ -74,15 +73,15 @@ func (h *Handler) remoteClusterRequest(remoteID string, w http.ResponseWriter, r
 	urlOut := &url.URL{
 		Scheme:   scheme,
 		Host:     remote.Host,
-		Path:     req.URL.Path,
-		RawPath:  req.URL.RawPath,
-		RawQuery: req.URL.RawQuery,
+		Path:     saltedReq.URL.Path,
+		RawPath:  saltedReq.URL.RawPath,
+		RawQuery: saltedReq.URL.RawQuery,
 	}
 	client := h.secureClient
 	if remote.Insecure {
 		client = h.insecureClient
 	}
-	h.proxy.Do(w, req, urlOut, client, filter)
+	h.proxy.Do(w, saltedReq, urlOut, client, filter)
 }
 
 // Buffer request body, parse form parameters in request, and then
@@ -777,7 +776,6 @@ func (h *Handler) saltAuthToken(req *http.Request, remote string) (updatedReq *h
 
 	token, err := auth.SaltToken(creds.Tokens[0], remote)
 
-	log.Printf("Salting %q %q to get %q %q", creds.Tokens[0], remote, token, err)
 	if err == auth.ErrObsoleteToken {
 		// If the token exists in our own database, salt it
 		// for the remote. Otherwise, assume it was issued by
@@ -801,14 +799,11 @@ func (h *Handler) saltAuthToken(req *http.Request, remote string) (updatedReq *h
 	}
 	updatedReq.Header = http.Header{}
 	for k, v := range req.Header {
-		if k == "Authorization" {
-			updatedReq.Header[k] = []string{"Bearer " + token}
-		} else {
+		if k != "Authorization" {
 			updatedReq.Header[k] = v
 		}
 	}
-
-	log.Printf("Salted %q %q to get %q", creds.Tokens[0], remote, token)
+	updatedReq.Header.Set("Authorization", "Bearer "+token)
 
 	// Remove api_token=... from the the query string, in case we
 	// end up forwarding the request.

commit 9469bd48034a3ffd4470dcd987c35cf5a97f819e
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Oct 16 15:43:12 2018 -0400

    14262: Use container token for access to load Docker image
    
    Previously used Dispatcher token, which created a security race
    condition (you couldn't set a container image that you didn't have
    access to, but if your access was revoked it the meantime, the
    container would still run.)
    
    Also tweaked API server to allow a PDH for the container image spec
    with no further checking (so the API server doesn't have to go out and
    search the federation.)  This is no longer a security hazard since it
    is now using a user token and not the dispatcher token.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb
index 0d8453174..7d8cc00f2 100644
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -248,6 +248,12 @@ class Container < ArvadosModel
   def self.resolve_container_image(container_image)
     coll = Collection.for_latest_docker_image(container_image)
     if !coll
+      # Allow bare pdh without any additional checking otherwise
+      # federated container requests won't work.
+      if loc = Keep::Locator.parse(container_image)
+        loc.strip_hints!
+        return loc.to_s
+      end
       raise ArvadosModel::UnresolvableContainerError.new "docker image #{container_image.inspect} not found"
     end
     coll.portable_data_hash
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go
index 27136b452..d055106d3 100644
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -122,7 +122,7 @@ type ContainerRunner struct {
 	SigChan         chan os.Signal
 	ArvMountExit    chan error
 	SecretMounts    map[string]arvados.Mount
-	MkArvClient     func(token string) (IArvadosClient, error)
+	MkArvClient     func(token string) (IArvadosClient, IKeepClient, error)
 	finalState      string
 	parentTemp      string
 
@@ -237,8 +237,17 @@ func (runner *ContainerRunner) LoadImage() (err error) {
 
 	runner.CrunchLog.Printf("Fetching Docker image from collection '%s'", runner.Container.ContainerImage)
 
+	tok, err := runner.ContainerToken()
+	if err != nil {
+		return fmt.Errorf("While getting container token (LoadImage): %v", err)
+	}
+	arvClient, kc, err := runner.MkArvClient(tok)
+	if err != nil {
+		return fmt.Errorf("While creating arv client (LoadImage): %v", err)
+	}
+
 	var collection arvados.Collection
-	err = runner.ArvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
+	err = arvClient.Get("collections", runner.Container.ContainerImage, nil, &collection)
 	if err != nil {
 		return fmt.Errorf("While getting container image collection: %v", err)
 	}
@@ -259,7 +268,7 @@ func (runner *ContainerRunner) LoadImage() (err error) {
 		runner.CrunchLog.Print("Loading Docker image from keep")
 
 		var readCloser io.ReadCloser
-		readCloser, err = runner.Kc.ManifestFileReader(manifest, img)
+		readCloser, err = kc.ManifestFileReader(manifest, img)
 		if err != nil {
 			return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
 		}
@@ -281,7 +290,7 @@ func (runner *ContainerRunner) LoadImage() (err error) {
 
 	runner.ContainerConfig.Image = imageID
 
-	runner.Kc.ClearBlockCache()
+	kc.ClearBlockCache()
 
 	return nil
 }
@@ -1679,7 +1688,7 @@ func (runner *ContainerRunner) fetchContainerRecord() error {
 		return fmt.Errorf("error getting container token: %v", err)
 	}
 
-	containerClient, err := runner.MkArvClient(containerToken)
+	containerClient, _, err := runner.MkArvClient(containerToken)
 	if err != nil {
 		return fmt.Errorf("error creating container API client: %v", err)
 	}
@@ -1719,13 +1728,17 @@ func NewContainerRunner(client *arvados.Client, api IArvadosClient, kc IKeepClie
 		}
 		return ps, nil
 	}
-	cr.MkArvClient = func(token string) (IArvadosClient, error) {
+	cr.MkArvClient = func(token string) (IArvadosClient, IKeepClient, error) {
 		cl, err := arvadosclient.MakeArvadosClient()
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 		cl.ApiToken = token
-		return cl, nil
+		kc, err := keepclient.MakeKeepClient(cl)
+		if err != nil {
+			return nil, nil, err
+		}
+		return cl, kc, nil
 	}
 	var err error
 	cr.LogCollection, err = (&arvados.Collection{}).FileSystem(cr.client, cr.Kc)

commit 1a456cacf09f4ca0223f343bfd565848cb92def2
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Tue Oct 16 14:51:12 2018 -0400

    14262: saltAuthToken returns copy of request object
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/lib/controller/federation.go b/lib/controller/federation.go
index 5c6f6bf7a..e5c56bd83 100644
--- a/lib/controller/federation.go
+++ b/lib/controller/federation.go
@@ -14,6 +14,7 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"log"
 	"net/http"
 	"net/url"
 	"regexp"
@@ -47,16 +48,27 @@ type collectionFederatedRequestHandler struct {
 func (h *Handler) remoteClusterRequest(remoteID string, w http.ResponseWriter, req *http.Request, filter ResponseFilter) {
 	remote, ok := h.Cluster.RemoteClusters[remoteID]
 	if !ok {
-		httpserver.Error(w, "no proxy available for cluster "+remoteID, http.StatusNotFound)
+		err := fmt.Errorf("no proxy available for cluster %v", remoteID)
+		if filter != nil {
+			_, err = filter(nil, err)
+		}
+		if err != nil {
+			httpserver.Error(w, err.Error(), http.StatusNotFound)
+		}
 		return
 	}
 	scheme := remote.Scheme
 	if scheme == "" {
 		scheme = "https"
 	}
-	err := h.saltAuthToken(req, remoteID)
+	req, err := h.saltAuthToken(req, remoteID)
 	if err != nil {
-		httpserver.Error(w, err.Error(), http.StatusBadRequest)
+		if filter != nil {
+			_, err = filter(nil, err)
+		}
+		if err != nil {
+			httpserver.Error(w, err.Error(), http.StatusBadRequest)
+		}
 		return
 	}
 	urlOut := &url.URL{
@@ -655,6 +667,10 @@ func (h *collectionFederatedRequestHandler) ServeHTTP(w http.ResponseWriter, req
 	sem := make(chan bool, h.handler.Cluster.RequestLimits.GetMultiClusterRequestConcurrency())
 	defer close(sem)
 	for remoteID := range h.handler.Cluster.RemoteClusters {
+		if remoteID == h.handler.Cluster.ClusterID {
+			// No need to query local cluster again
+			continue
+		}
 		// blocks until it can put a value into the
 		// channel (which has a max queue capacity)
 		sem <- true
@@ -728,28 +744,40 @@ func (h *Handler) validateAPItoken(req *http.Request, user *CurrentUser) error {
 
 // Extract the auth token supplied in req, and replace it with a
 // salted token for the remote cluster.
-func (h *Handler) saltAuthToken(req *http.Request, remote string) error {
+func (h *Handler) saltAuthToken(req *http.Request, remote string) (updatedReq *http.Request, err error) {
+	updatedReq = (&http.Request{
+		Method:        req.Method,
+		URL:           req.URL,
+		Header:        req.Header,
+		Body:          req.Body,
+		ContentLength: req.ContentLength,
+		Host:          req.Host,
+	}).WithContext(req.Context())
+
 	creds := auth.NewCredentials()
-	creds.LoadTokensFromHTTPRequest(req)
-	if len(creds.Tokens) == 0 && req.Header.Get("Content-Type") == "application/x-www-form-encoded" {
+	creds.LoadTokensFromHTTPRequest(updatedReq)
+	if len(creds.Tokens) == 0 && updatedReq.Header.Get("Content-Type") == "application/x-www-form-encoded" {
 		// Override ParseForm's 10MiB limit by ensuring
 		// req.Body is a *http.maxBytesReader.
-		req.Body = http.MaxBytesReader(nil, req.Body, 1<<28) // 256MiB. TODO: use MaxRequestSize from discovery doc or config.
-		if err := creds.LoadTokensFromHTTPRequestBody(req); err != nil {
-			return err
+		updatedReq.Body = http.MaxBytesReader(nil, updatedReq.Body, 1<<28) // 256MiB. TODO: use MaxRequestSize from discovery doc or config.
+		if err := creds.LoadTokensFromHTTPRequestBody(updatedReq); err != nil {
+			return nil, err
 		}
 		// Replace req.Body with a buffer that re-encodes the
 		// form without api_token, in case we end up
 		// forwarding the request.
-		if req.PostForm != nil {
-			req.PostForm.Del("api_token")
+		if updatedReq.PostForm != nil {
+			updatedReq.PostForm.Del("api_token")
 		}
-		req.Body = ioutil.NopCloser(bytes.NewBufferString(req.PostForm.Encode()))
+		updatedReq.Body = ioutil.NopCloser(bytes.NewBufferString(updatedReq.PostForm.Encode()))
 	}
 	if len(creds.Tokens) == 0 {
-		return nil
+		return updatedReq, nil
 	}
+
 	token, err := auth.SaltToken(creds.Tokens[0], remote)
+
+	log.Printf("Salting %q %q to get %q %q", creds.Tokens[0], remote, token, err)
 	if err == auth.ErrObsoleteToken {
 		// If the token exists in our own database, salt it
 		// for the remote. Otherwise, assume it was issued by
@@ -760,26 +788,41 @@ func (h *Handler) saltAuthToken(req *http.Request, remote string) error {
 			// Not ours; pass through unmodified.
 			token = currentUser.Authorization.APIToken
 		} else if err != nil {
-			return err
+			return nil, err
 		} else {
 			// Found; make V2 version and salt it.
 			token, err = auth.SaltToken(currentUser.Authorization.TokenV2(), remote)
 			if err != nil {
-				return err
+				return nil, err
 			}
 		}
 	} else if err != nil {
-		return err
+		return nil, err
+	}
+	updatedReq.Header = http.Header{}
+	for k, v := range req.Header {
+		if k == "Authorization" {
+			updatedReq.Header[k] = []string{"Bearer " + token}
+		} else {
+			updatedReq.Header[k] = v
+		}
 	}
-	req.Header.Set("Authorization", "Bearer "+token)
+
+	log.Printf("Salted %q %q to get %q", creds.Tokens[0], remote, token)
 
 	// Remove api_token=... from the the query string, in case we
 	// end up forwarding the request.
-	if values, err := url.ParseQuery(req.URL.RawQuery); err != nil {
-		return err
+	if values, err := url.ParseQuery(updatedReq.URL.RawQuery); err != nil {
+		return nil, err
 	} else if _, ok := values["api_token"]; ok {
 		delete(values, "api_token")
-		req.URL.RawQuery = values.Encode()
+		updatedReq.URL = &url.URL{
+			Scheme:   req.URL.Scheme,
+			Host:     req.URL.Host,
+			Path:     req.URL.Path,
+			RawPath:  req.URL.RawPath,
+			RawQuery: values.Encode(),
+		}
 	}
-	return nil
+	return updatedReq, nil
 }

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list